From 249c69a06968b1d04f50c999c773cd71df5930c3 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 28 Mar 2022 23:36:10 -0400 Subject: [PATCH 01/53] add json serialize for int16_t, uint16_t --- src/esp/io/JsonBuiltinTypes.h | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/esp/io/JsonBuiltinTypes.h b/src/esp/io/JsonBuiltinTypes.h index e714bf26a9..0de0643dc5 100644 --- a/src/esp/io/JsonBuiltinTypes.h +++ b/src/esp/io/JsonBuiltinTypes.h @@ -78,6 +78,14 @@ inline JsonGenericValue toJsonValue(bool x, JsonAllocator&) { return JsonGenericValue(x, nullptr); } +inline JsonGenericValue toJsonValue(int16_t x, JsonAllocator&) { + return JsonGenericValue(x); +} + +inline JsonGenericValue toJsonValue(uint16_t x, JsonAllocator&) { + return JsonGenericValue(x); +} + inline JsonGenericValue toJsonValue(int x, JsonAllocator&) { return JsonGenericValue(x); } @@ -121,6 +129,34 @@ inline bool fromJsonValue(const JsonGenericValue& obj, bool& val) { return false; } +inline bool fromJsonValue(const JsonGenericValue& obj, int16_t& val) { + if (obj.IsNumber()) { + int valAsInt = obj.Get(); + if (valAsInt < std::numeric_limits::min() || valAsInt > std::numeric_limits::max()) { + ESP_ERROR() << "Value " << valAsInt << " is out of range for int16_t"; + return false; + } + val = valAsInt; + return true; + } + ESP_ERROR() << "Invalid int16_t value"; + return false; +} + +inline bool fromJsonValue(const JsonGenericValue& obj, uint16_t& val) { + if (obj.IsNumber()) { + int valAsInt = obj.Get(); + if (valAsInt < std::numeric_limits::min() || valAsInt > std::numeric_limits::max()) { + ESP_ERROR() << "Value " << valAsInt << " is out of range for uint16_t"; + return false; + } + val = valAsInt; + return true; + } + ESP_ERROR() << "Invalid uint16_t value"; + return false; +} + /** * @brief Populate passed @p val with value. Returns whether successfully * populated, or not. Logs an error if inappropriate type. From ce33e63e331169d17a2dd1e05b819ba0fded5630 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 28 Mar 2022 23:37:17 -0400 Subject: [PATCH 02/53] add json serialize for Magnum types: Matrix3x3, Range3D, Vector2 --- src/esp/io/JsonMagnumTypes.cpp | 108 +++++++++++++++++++++++++++++++++ src/esp/io/JsonMagnumTypes.h | 81 +++++++------------------ src/tests/IOTest.cpp | 13 ++++ 3 files changed, 144 insertions(+), 58 deletions(-) diff --git a/src/esp/io/JsonMagnumTypes.cpp b/src/esp/io/JsonMagnumTypes.cpp index 747816c72b..00356e6930 100644 --- a/src/esp/io/JsonMagnumTypes.cpp +++ b/src/esp/io/JsonMagnumTypes.cpp @@ -8,6 +8,60 @@ namespace esp { namespace io { +namespace { + +template +JsonGenericValue toJsonValueMatrixHelper(const T& mat, + JsonAllocator& allocator) { + constexpr auto numItems = T::Cols * T::Rows; + return toJsonArrayHelper(mat.data(), numItems, allocator); +} + +template +bool fromJsonValueMatrixHelper(const JsonGenericValue& obj, T& val) { + constexpr auto numItems = T::Cols * T::Rows; + if (obj.IsArray() && obj.Size() == numItems) { + for (rapidjson::SizeType i = 0; i < numItems; ++i) { + if (obj[i].IsNumber()) { + val.data()[i] = obj[i].GetDouble(); + } else { + ESP_ERROR() << "Invalid numeric value specified in JSON Matrix, index :" + << i; + return false; + } + } + return true; + } + return false; +} + +template +JsonGenericValue toJsonValueVectorHelper(const T& vec, + JsonAllocator& allocator) { + constexpr auto numItems = T::Size; + return toJsonArrayHelper(vec.data(), numItems, allocator); +} + +template +bool fromJsonValueVectorHelper(const JsonGenericValue& obj, T& val) { + constexpr auto numItems = T::Size; + if (obj.IsArray() && obj.Size() == numItems) { + for (rapidjson::SizeType i = 0; i < numItems; ++i) { + if (obj[i].IsNumber()) { + val[i] = obj[i].GetDouble(); + } else { + ESP_ERROR() << "Invalid numeric value specified in JSON Vector, index :" + << i; + return false; + } + } + return true; + } + return false; +} + +} + JsonGenericValue toJsonValue(const Magnum::Quaternion& quat, JsonAllocator& allocator) { JsonGenericValue arr(rapidjson::kArrayType); @@ -40,5 +94,59 @@ bool fromJsonValue(const JsonGenericValue& obj, Magnum::Quaternion& val) { return false; } + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Matrix3x3& val) { + + return fromJsonValueMatrixHelper(obj, val); +} + +JsonGenericValue toJsonValue(const Magnum::Matrix3x3& mat, + JsonAllocator& allocator) { + return toJsonValueMatrixHelper(mat, allocator); +} + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Range3D& val) { + bool success = true; + success &= readMember(obj, "min", val.min()); + success &= readMember(obj, "max", val.max()); + return success; +} + +JsonGenericValue toJsonValue(const Magnum::Range3D& x, + JsonAllocator& allocator) { + JsonGenericValue obj(rapidjson::kObjectType); + addMember(obj, "min", x.min(), allocator); + addMember(obj, "max", x.max(), allocator); + return obj; +} + +JsonGenericValue toJsonValue(const Magnum::Vector3& vec, + JsonAllocator& allocator) { + return toJsonValueVectorHelper(vec, allocator); +} + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Vector3& val) { + return fromJsonValueVectorHelper(obj, val); +} + +JsonGenericValue toJsonValue(const Magnum::Vector2& vec, + JsonAllocator& allocator) { + return toJsonValueVectorHelper(vec, allocator); +} + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Vector2& val) { + return fromJsonValueVectorHelper(obj, val); +} + +JsonGenericValue toJsonValue(const Magnum::Color4& color, + JsonAllocator& allocator) { + return toJsonValueVectorHelper(color, allocator); +} + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Color4& val) { + return fromJsonValueVectorHelper(obj, val); +} + + } // namespace io } // namespace esp diff --git a/src/esp/io/JsonMagnumTypes.h b/src/esp/io/JsonMagnumTypes.h index 50adb181d4..e740c4dec8 100644 --- a/src/esp/io/JsonMagnumTypes.h +++ b/src/esp/io/JsonMagnumTypes.h @@ -22,67 +22,21 @@ namespace esp { namespace io { -inline JsonGenericValue toJsonValue(const Magnum::Vector3& vec, - JsonAllocator& allocator) { - return toJsonArrayHelper(vec.data(), 3, allocator); -} +JsonGenericValue toJsonValue(const Magnum::Vector3& vec, + JsonAllocator& allocator); -/** - * @brief Specialization to handle Magnum::Vector3 values. Populate passed @p - * val with value. Returns whether successfully populated, or not. Logs an error - * if inappropriate type. - * - * @param obj json value to parse - * @param val destination value to be populated - * @return whether successful or not - */ -inline bool fromJsonValue(const JsonGenericValue& obj, Magnum::Vector3& val) { - if (obj.IsArray() && obj.Size() == 3) { - for (rapidjson::SizeType i = 0; i < 3; ++i) { - if (obj[i].IsNumber()) { - val[i] = obj[i].GetDouble(); - } else { - ESP_ERROR() << "Invalid numeric value specified in JSON Vec3, index :" - << i; - return false; - } - } - return true; - } - return false; -} +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Vector3& val); -inline JsonGenericValue toJsonValue(const Magnum::Color4& color, - JsonAllocator& allocator) { - return toJsonArrayHelper(color.data(), 4, allocator); -} +JsonGenericValue toJsonValue(const Magnum::Vector2& vec, + JsonAllocator& allocator); -/** - * @brief Specialization to handle Magnum::Color4 values. Populate passed @p - * val with value. Returns whether successfully populated, or not. Logs an error - * if inappropriate type. - * - * @param obj json value to parse - * @param val destination value to be populated - * @return whether successful or not - */ -inline bool fromJsonValue(const JsonGenericValue& obj, Magnum::Color4& val) { - if (obj.IsArray() && obj.Size() == 4) { - Magnum::Vector4 vec4; - for (rapidjson::SizeType i = 0; i < 4; ++i) { - if (obj[i].IsNumber()) { - vec4[i] = obj[i].GetDouble(); - } else { - ESP_ERROR() << "Invalid numeric value specified in JSON Color4, index :" - << i; - return false; - } - } - val = Magnum::Color4(vec4); - return true; - } - return false; -} +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Vector2& val); + + +JsonGenericValue toJsonValue(const Magnum::Color4& color, + JsonAllocator& allocator); + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Color4& val); JsonGenericValue toJsonValue(const Magnum::Quaternion& quat, JsonAllocator& allocator); @@ -142,6 +96,17 @@ inline bool fromJsonValue(const JsonGenericValue& obj, Magnum::Rad& val) { return true; } + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Matrix3x3& val); + +JsonGenericValue toJsonValue(const Magnum::Matrix3x3& mat, + JsonAllocator& allocator); + +bool fromJsonValue(const JsonGenericValue& obj, Magnum::Range3D& val); + +JsonGenericValue toJsonValue(const Magnum::Range3D& val, + JsonAllocator& allocator); + } // namespace io } // namespace esp diff --git a/src/tests/IOTest.cpp b/src/tests/IOTest.cpp index 01d977fee5..d6e2055b57 100644 --- a/src/tests/IOTest.cpp +++ b/src/tests/IOTest.cpp @@ -5,6 +5,7 @@ #include #include #include +#include "Magnum/Math/Matrix3.h" #include "esp/assets/RenderAssetInstanceCreationInfo.h" #include "esp/core/esp.h" #include "esp/io/JsonAllTypes.h" @@ -365,6 +366,18 @@ TEST(IOTest, JsonMagnumTypesTest) { EXPECT_TRUE(readMember(d, "myquat", quat2)); EXPECT_EQ(quat2, quat); + Magnum::Matrix3x3 mat33{Magnum::Vector3(1, 2, 3), Magnum::Vector3(4, 5, 6), Magnum::Vector3(7, 8, 9)}; + addMember(d, "mymat33", mat33, allocator); + Magnum::Matrix3x3 mat332; + EXPECT_TRUE(readMember(d, "mymat33", mat332)); + EXPECT_EQ(mat33, mat332); + + Magnum::Range3D range3{Magnum::Vector3(1, 2, 3), Magnum::Vector3(4, 5, 6)}; + addMember(d, "myrange3", range3, allocator); + Magnum::Range3D range32; + EXPECT_TRUE(readMember(d, "myrange3", range32)); + EXPECT_EQ(range3, range32); + // test reading the wrong type (wrong number of fields) Magnum::Quaternion quat3; EXPECT_FALSE(readMember(d, "myvec", quat3)); From c9f4cca62aca50f3cbea2950266de398dda99057 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 28 Mar 2022 23:46:36 -0400 Subject: [PATCH 03/53] add episode json save/load; add agent start pos/yaw to episode data --- src/esp/batched_sim/BatchedSimulator.cpp | 105 ++++++------ src/esp/batched_sim/BatchedSimulator.h | 5 +- src/esp/batched_sim/EpisodeSet.cpp | 201 +++++++++++++++++++++++ src/esp/batched_sim/EpisodeSet.h | 17 +- src/esp/bindings/BatchedSimBindings.cpp | 5 +- src/tests/BatchedSimTest.cpp | 4 +- 6 files changed, 275 insertions(+), 62 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index 5706eb6654..e909c3df8a 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -109,53 +109,6 @@ constexpr bool disableStageVisualsForEnv(const BatchedSimulatorConfig&, int) { } // namespace -void BatchedSimulator::randomizeRobotsForCurrentStep() { - BATCHED_SIM_ASSERT(currRolloutSubstep_ >= 0); - int numEnvs = bpsWrapper_->envs_.size(); - int numPosVars = robot_.numPosVars; - - float* yaws = &safeVectorGet(rollouts_.yaws_, currRolloutSubstep_ * numEnvs); - Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_,currRolloutSubstep_ * numEnvs); - float* jointPositions = - &safeVectorGet(rollouts_.jointPositions_,currRolloutSubstep_ * numEnvs * numPosVars); - - auto random = core::Random(/*seed*/ 3); - - for (int b = 0; b < numEnvs; b++) { - // hand-authored start location and yaw range that works for stages 0-12 - positions[b] = Mn::Vector2(2.59f, 0.f); - yaws[b] = random.uniform_float(-float(Mn::Rad(Mn::Deg(135.f))), -float(Mn::Rad(Mn::Deg(45.f)))); - - // temp move robot out of scene (hide/disable robot) - // positions[b].y() -= 1000.f; - - for (int j = 0; j < robot_.numPosVars; j++) { - auto& pos = jointPositions[b * robot_.numPosVars + j]; - pos = 0.f; - pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], - robot_.jointPositionLimits.second[j]); - } - - // 7 shoulder, + is down - // 8 twist, + is twist to right - // 9 elbow, + is down - // 10 elbow twist, + is twst to right - // 11 wrist, + is down - jointPositions[b * robot_.numPosVars + 9] = float(Mn::Rad(Mn::Deg(90.f))); - } - - if (config_.doPairedDebugEnvs) { - for (int b = 1; b < numEnvs; b += 2) { - BATCHED_SIM_ASSERT(isPairedDebugEnv(config_, b)); - positions[b] = positions[b - 1]; - yaws[b] = yaws[b - 1]; - for (int j = 0; j < robot_.numPosVars; j++) { - auto& pos = jointPositions[b * robot_.numPosVars + j]; - pos = jointPositions[(b - 1) * robot_.numPosVars + j]; - } - } - } -} RobotInstanceSet::RobotInstanceSet(Robot* robot, const BatchedSimulatorConfig* config, @@ -801,7 +754,10 @@ void BatchedSimulator::updateCollision() { if (robots_.collisionResults_[b]) { // todo: more robust handling of this, maybe at episode-load time - ESP_CHECK(currRolloutSubstep_ > 1, "For env " << b << ", the robot is in collision on the first step of the episode."); + const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + ESP_CHECK(currRolloutSubstep_ > 1, "For episode " << episodeInstance.episodeIndex_ + << ", the robot is in collision after the first step of the episode. In your " + << "episode set, revise agentStartPos/agentStartYaw or rearrange the scene."); recentStats_.numStepsInCollision_++; } } @@ -1352,6 +1308,34 @@ void BatchedSimulator::resetEpisodeInstance(int b) { const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + // init robot agent + { + const int numEnvs = bpsWrapper_->envs_.size(); + const int numPosVars = robot_.numPosVars; + float* yaws = &safeVectorGet(rollouts_.yaws_, currRolloutSubstep_ * numEnvs); + Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_,currRolloutSubstep_ * numEnvs); + float* jointPositions = + &safeVectorGet(rollouts_.jointPositions_,currRolloutSubstep_ * numEnvs * numPosVars); + + constexpr float groundY = 0.f; + positions[b] = episode.agentStartPos_; + yaws[b] = episode.agentStartYaw_; + + for (int j = 0; j < robot_.numPosVars; j++) { + auto& pos = jointPositions[b * robot_.numPosVars + j]; + pos = 0.f; + pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], + robot_.jointPositionLimits.second[j]); + } + + // 7 shoulder, + is down + // 8 twist, + is twist to right + // 9 elbow, + is down + // 10 elbow twist, + is twst to right + // 11 wrist, + is down + jointPositions[b * robot_.numPosVars + 9] = float(Mn::Rad(Mn::Deg(90.f))); + } + for (int i = 0; i < episodeInstance.movedFreeObjectIndexes_.size(); i++) { int freeObjectIndex = episodeInstance.movedFreeObjectIndexes_[i]; @@ -1465,14 +1449,26 @@ void BatchedSimulator::initEpisodeSet() { const int numEnvs = config_.numEnvs; - // generate exactly as many episodes as envs (this is not required) - episodeSet_ = generateBenchmarkEpisodeSet(numEnvs, sceneMapping_, serializeCollection_); + if (config_.doProceduralEpisodeSet) { + ESP_CHECK(config_.episodeSetFilepath.empty(), + "For BatchedSimulatorConfig::doProceduralEpisodeSet==true, don't specify episodeSetFilepath"); + + // generate exactly as many episodes as envs (this is not required) + episodeSet_ = generateBenchmarkEpisodeSet(numEnvs, sceneMapping_, serializeCollection_); + episodeSet_.saveToFile("generated.episode_set.json"); + } else { + ESP_CHECK(!config_.episodeSetFilepath.empty(), + "For BatchedSimulatorConfig::doProceduralEpisodeSet==false, you must specify episodeSetFilepath"); + episodeSet_ = EpisodeSet::loadFromFile(config_.episodeSetFilepath); + postLoadFixup(episodeSet_, sceneMapping_, serializeCollection_); + } episodeInstanceSet_.episodeInstanceByEnv_.resize(numEnvs); for (int b = 0; b < numEnvs; b++) { - auto episodeIndex = b * episodeSet_.episodes_.size() / numEnvs; // distribute episodes across instances - if (config_.doPairedDebugEnvs) { - episodeIndex /= 2; + // temp: statically assigned episodes to envs + auto episodeIndex = b * episodeSet_.episodes_.size() / numEnvs; + if (config_.doPairedDebugEnvs && (b % 2 == 1)) { + episodeIndex = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b - 1).episodeIndex_; } instantiateEpisode(b, episodeIndex); } @@ -1518,11 +1514,10 @@ void BatchedSimulator::reset() { currRolloutSubstep_ = 0; prevRolloutSubstep_ = -1; - randomizeRobotsForCurrentStep(); - robots_.updateLinkTransforms(currRolloutSubstep_, /*updateforPhysics*/ false, /*updateForRender*/ true); for (int b = 0; b < numEnvs; b++) { resetEpisodeInstance(b); } + robots_.updateLinkTransforms(currRolloutSubstep_, /*updateforPhysics*/ false, /*updateForRender*/ true); updateRenderInstances(/*forceUpdate*/true); updatePythonEnvironmentState(); diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 7089bcf075..863a3bfd29 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -41,6 +41,9 @@ struct BatchedSimulatorConfig { // if enabled, for every odd env, we don't render any visuals except debug stuff, // e.g. collision visualization. We should keep even and odd envs in sync (e.g. same actions). bool doPairedDebugEnvs = false; + bool doProceduralEpisodeSet = true; + // only set this for doProceduralEpisodeSet==false (it is otherwise ignored) + std::string episodeSetFilepath; ESP_SMART_POINTERS(BatchedSimulatorConfig); }; @@ -243,8 +246,6 @@ class BatchedSimulator { void reinsertFreeObject(int b, int freeObjectIndex, const Magnum::Vector3& pos, const Magnum::Quaternion& rotation); - void randomizeRobotsForCurrentStep(); - void initEpisodeSet(); void instantiateEpisode(int b, int episodeIndex); // We don't yet support changing the episode for an env. You can only reset to the diff --git a/src/esp/batched_sim/EpisodeSet.cpp b/src/esp/batched_sim/EpisodeSet.cpp index 5af44adcf2..567b785283 100644 --- a/src/esp/batched_sim/EpisodeSet.cpp +++ b/src/esp/batched_sim/EpisodeSet.cpp @@ -8,6 +8,9 @@ #include "esp/core/random.h" #include "esp/core/Check.h" +#include "esp/io/json.h" + +#include namespace Cr = Corrade; namespace Mn = Magnum; @@ -76,6 +79,10 @@ void addEpisode(EpisodeSet& set, const serialize::Collection& collection, int st episode.stageFixedObjIndex = stageFixedObjectIndex; episode.firstFreeObjectSpawnIndex_ = set.freeObjectSpawns_.size(); + // place robot agent + episode.agentStartPos_ = Mn::Vector2(2.59f, 0.f); + episode.agentStartYaw_ = random.uniform_float(-float(Mn::Rad(Mn::Deg(135.f))), -float(Mn::Rad(Mn::Deg(45.f)))); + // keep object count close to 28 (from Hab 2.0 benchmark), but include variation int targetNumSpawns = random.uniform_int(28, 33); episode.numFreeObjectSpawns_ = 0; @@ -346,5 +353,199 @@ void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& } } +void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const serialize::Collection& collection) { + + for (auto& freeObj : set.freeObjects_) { + freeObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(freeObj.name_); + freeObj.needsPostLoadFixup_ = false; + } + + for (auto& fixedObj : set.fixedObjects_) { + fixedObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(fixedObj.name_); + + // sloppy: copy-pasted from addStageFixedObject + std::string columnGridFilepathBase = "../data/columngrids/" + fixedObj.name_ + "_stage_only"; + fixedObj.columnGridSet_.load(columnGridFilepathBase); + + ESP_CHECK(fixedObj.columnGridSet_.getSphereRadii() == collection.collisionRadiusWorkingSet, + "ColumnGridSet " << fixedObj.name_ << " with radii " << fixedObj.columnGridSet_.getSphereRadii() + << " doesn't match collection collision radius working set " + << collection.collisionRadiusWorkingSet); + + fixedObj.needsPostLoadFixup_ = false; + } + + set.maxFreeObjects_ = 0; + for (const auto& episode : set.episodes_) { + set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); + } + set.needsPostLoadFixup_ = false; + + updateFromSerializeCollection(set, collection); +} + + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + FreeObject& val) { + // sloppy: some fields here are populated by updateFromSerializeCollection so they + // shouldn't be saved/loaded + esp::io::readMember(obj, "name", val.name_); + // esp::io::readMember(obj, "aabb", val.aabb_); + esp::io::readMember(obj, "startRotations", val.startRotations_); + // esp::io::readMember(obj, "heldRotationIndex", val.heldRotationIndex_); + // esp::io::readMember(obj, "collisionSpheres", val.collisionSpheres_); + + // for instanceBlueprint_, aabb_, heldRotationIndex_, collisionSpheres_ + val.needsPostLoadFixup_ = true; + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const FreeObject& x, + esp::io::JsonAllocator& allocator) { + // sloppy: some fields here are populated by updateFromSerializeCollection so they + // shouldn't be saved/loaded + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "name", x.name_, allocator); + //esp::io::addMember(obj, "aabb", x.aabb_, allocator); + esp::io::addMember(obj, "startRotations", x.startRotations_, allocator); + //esp::io::addMember(obj, "heldRotationIndex", x.heldRotationIndex_, allocator); + //esp::io::addMember(obj, "collisionSpheres", x.collisionSpheres_, allocator); + + return obj; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + CollisionSphere& val) { + esp::io::readMember(obj, "origin", val.origin); + esp::io::readMember(obj, "radiusIdx", val.radiusIdx); + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const CollisionSphere& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "origin", x.origin, allocator); + esp::io::addMember(obj, "radiusIdx", x.radiusIdx, allocator); + + return obj; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + FixedObject& val) { + esp::io::readMember(obj, "name", val.name_); + + // for instanceBlueprint_ and columnGridSet_ + val.needsPostLoadFixup_ = true; + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const FixedObject& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "name", x.name_, allocator); + return obj; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + FreeObjectSpawn& val) { + esp::io::readMember(obj, "freeObjIndex", val.freeObjIndex_); + esp::io::readMember(obj, "startRotationIndex", val.startRotationIndex_); + esp::io::readMember(obj, "startPos", val.startPos_); + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const FreeObjectSpawn& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "freeObjIndex", x.freeObjIndex_, allocator); + esp::io::addMember(obj, "startRotationIndex", x.startRotationIndex_, allocator); + esp::io::addMember(obj, "startPos", x.startPos_, allocator); + + return obj; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + Episode& val) { + esp::io::readMember(obj, "stageFixedObjIndex", val.stageFixedObjIndex); + esp::io::readMember(obj, "numFreeObjectSpawns", val.numFreeObjectSpawns_); + esp::io::readMember(obj, "targetObjIndex", val.targetObjIndex_); + esp::io::readMember(obj, "firstFreeObjectSpawnIndex", val.firstFreeObjectSpawnIndex_); + esp::io::readMember(obj, "agentStartPos", val.agentStartPos_); + esp::io::readMember(obj, "agentStartYaw", val.agentStartYaw_); + esp::io::readMember(obj, "targetObjGoalPos", val.targetObjGoalPos_); + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const Episode& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "stageFixedObjIndex", x.stageFixedObjIndex, allocator); + esp::io::addMember(obj, "numFreeObjectSpawns", x.numFreeObjectSpawns_, allocator); + esp::io::addMember(obj, "targetObjIndex", x.targetObjIndex_, allocator); + esp::io::addMember(obj, "firstFreeObjectSpawnIndex", x.firstFreeObjectSpawnIndex_, allocator); + esp::io::addMember(obj, "agentStartPos", x.agentStartPos_, allocator); + esp::io::addMember(obj, "agentStartYaw", x.agentStartYaw_, allocator); + esp::io::addMember(obj, "targetObjGoalPos", x.targetObjGoalPos_, allocator); + + return obj; +} + + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + EpisodeSet& val) { + esp::io::readMember(obj, "episodes", val.episodes_); + esp::io::readMember(obj, "fixedObjects", val.fixedObjects_); + esp::io::readMember(obj, "freeObjectSpawns", val.freeObjectSpawns_); + esp::io::readMember(obj, "freeObjects", val.freeObjects_); + + // for maxFreeObjects_ + val.needsPostLoadFixup_ = true; + val.maxFreeObjects_ = -1; + + return true; +} + + +esp::io::JsonGenericValue toJsonValue(const EpisodeSet& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "episodes", x.episodes_, allocator); + esp::io::addMember(obj, "fixedObjects", x.fixedObjects_, allocator); + esp::io::addMember(obj, "freeObjectSpawns", x.freeObjectSpawns_, allocator); + esp::io::addMember(obj, "freeObjects", x.freeObjects_, allocator); + // don't write maxFreeObjects_ + + return obj; +} + +EpisodeSet EpisodeSet::loadFromFile(const std::string& filepath) { + EpisodeSet episodeSet; + ESP_CHECK(Cr::Utility::Directory::exists(filepath), "couldn't find EpisodeSet file " << filepath); + auto newDoc = esp::io::parseJsonFile(filepath); + esp::io::readMember(newDoc, "episodeSet", episodeSet); + + return episodeSet; +} + + +void EpisodeSet::saveToFile(const std::string& filepath)const { + + rapidjson::Document document(rapidjson::kObjectType); + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + esp::io::addMember(document, "episodeSet", *this, allocator); + + // EpisodeSet use floats (not doubles) so this is plenty of precision + const float maxDecimalPlaces = 7; + constexpr bool usePrettyWriter = false; + bool success = esp::io::writeJsonToFile(document, filepath, usePrettyWriter, + maxDecimalPlaces); + ESP_CHECK(success, "failed to save EpisodeSet to " << filepath); +} + } // namespace batched_sim } // namespace esp diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 1652fa8563..41dbb2dd6b 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -36,11 +36,13 @@ class FreeObject { std::vector startRotations_; int heldRotationIndex_; // index into startRotations_ std::vector collisionSpheres_; + bool needsPostLoadFixup_ = true; }; class FixedObject { public: std::string name_; + bool needsPostLoadFixup_ = true; BpsSceneMapping::InstanceBlueprint instanceBlueprint_; esp::batched_sim::ColumnGridSet columnGridSet_; }; @@ -58,7 +60,11 @@ class Episode { int16_t numFreeObjectSpawns_ = 0; int16_t targetObjIndex_ = -1; // 0..numFreeObjectSpawns - 1, see also freeObjectIndex int32_t firstFreeObjectSpawnIndex_ = -1; // index into EpisodeSet::freeObjectSpawns_ - Mn::Vector3 targetObjGoalPos_; + Mn::Vector2 agentStartPos_; + float agentStartYaw_ = 0.f; // radians + + // task-specific + Magnum::Vector3 targetObjGoalPos_; }; class EpisodeSet { @@ -67,7 +73,11 @@ class EpisodeSet { std::vector fixedObjects_; // ~100, max 32K std::vector freeObjectSpawns_; // num episodes * num-spawns-per-episode, ~5,000,000 std::vector freeObjects_; // ~100, max 32K - int maxFreeObjects_ = 0; + int maxFreeObjects_ = -1; + bool needsPostLoadFixup_ = true; + + static EpisodeSet loadFromFile(const std::string& filepath); + void saveToFile(const std::string& filepath) const; }; class EpisodeInstance { @@ -95,6 +105,9 @@ void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& EpisodeSet generateBenchmarkEpisodeSet(int numEpisodes, const BpsSceneMapping& sceneMapping, const serialize::Collection& collection); +void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, + const serialize::Collection& collection); + } // namespace batched_sim } // namespace esp diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index e6d7dbbb9b..3c15a39dc4 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -45,8 +45,9 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("force_random_actions", &BatchedSimulatorConfig::forceRandomActions, R"(Todo)") .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") - .def_readwrite("max_episode_length", &BatchedSimulatorConfig::maxEpisodeLength, R"(Todo)"); - + .def_readwrite("max_episode_length", &BatchedSimulatorConfig::maxEpisodeLength, R"(Todo)") + .def_readwrite("do_procedural_episode_set", &BatchedSimulatorConfig::doProceduralEpisodeSet, R"(Todo)") + .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)"); py::class_( m, "EnvironmentState") diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index abe4a9eb2a..95d7cedf0e 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -209,7 +209,9 @@ TEST_F(BatchedSimulatorTest, basic) { .doAsyncPhysicsStep = true, .maxEpisodeLength = 500, .numSubsteps = 1, - .doPairedDebugEnvs = doPairedDebugEnvs + .doPairedDebugEnvs = doPairedDebugEnvs, + .doProceduralEpisodeSet = true, + //.episodeSetFilepath = "test.episode_set.json" }; BatchedSimulator bsim(config); From 05499d9de4ad6246ea7e4a23dd466456132e09a1 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Tue, 29 Mar 2022 00:48:24 -0400 Subject: [PATCH 04/53] update googletest commit to avoid maybe-uninitialized warning --- src/deps/googletest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/deps/googletest b/src/deps/googletest index 00938b2b22..4679637f1c 160000 --- a/src/deps/googletest +++ b/src/deps/googletest @@ -1 +1 @@ -Subproject commit 00938b2b228f3b70d3d9e51f29a1505bdad43f1e +Subproject commit 4679637f1c9d5a0728bdc347a531737fad0b1ca3 From 721357fb859b77f78283de3fdaedcafb4519953b Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Wed, 30 Mar 2022 01:13:28 -0400 Subject: [PATCH 05/53] per-env resetting and startStepPhysicsOrReset API --- src/esp/batched_sim/BatchedSimulator.cpp | 307 +++++++++++++---------- src/esp/batched_sim/BatchedSimulator.h | 38 +-- src/esp/bindings/BatchedSimBindings.cpp | 12 +- src/tests/BatchedSimTest.cpp | 45 ++-- 4 files changed, 230 insertions(+), 172 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index e909c3df8a..e973cce32c 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -175,21 +175,21 @@ RobotInstanceSet::RobotInstanceSet(Robot* robot, } void BatchedSimulator::reverseActionsForEnvironment(int b) { - BATCHED_SIM_ASSERT(prevRolloutSubstep_ != -1); + BATCHED_SIM_ASSERT(prevStorageStep_ != -1); const int numEnvs = config_.numEnvs; const int numPosVars = robot_.numPosVars; - const float* prevYaws = &rollouts_.yaws_[prevRolloutSubstep_ * numEnvs]; - float* yaws = &rollouts_.yaws_[currRolloutSubstep_ * numEnvs]; + const float* prevYaws = &rollouts_.yaws_[prevStorageStep_ * numEnvs]; + float* yaws = &rollouts_.yaws_[currStorageStep_ * numEnvs]; const Mn::Vector2* prevPositions = - &rollouts_.positions_[prevRolloutSubstep_ * numEnvs]; - Mn::Vector2* positions = &rollouts_.positions_[currRolloutSubstep_ * numEnvs]; + &rollouts_.positions_[prevStorageStep_ * numEnvs]; + Mn::Vector2* positions = &rollouts_.positions_[currStorageStep_ * numEnvs]; Mn::Matrix4* rootTransforms = - &rollouts_.rootTransforms_[currRolloutSubstep_ * numEnvs]; + &rollouts_.rootTransforms_[currStorageStep_ * numEnvs]; const float* prevJointPositions = - &rollouts_.jointPositions_[prevRolloutSubstep_ * numEnvs * numPosVars]; + &rollouts_.jointPositions_[prevStorageStep_ * numEnvs * numPosVars]; float* jointPositions = - &rollouts_.jointPositions_[currRolloutSubstep_ * numEnvs * numPosVars]; + &rollouts_.jointPositions_[currStorageStep_ * numEnvs * numPosVars]; yaws[b] = prevYaws[b]; positions[b] = prevPositions[b]; @@ -201,35 +201,40 @@ void BatchedSimulator::reverseActionsForEnvironment(int b) { } } -void RobotInstanceSet::updateLinkTransforms(int currRolloutSubstep, bool updateForPhysics, bool updateForRender) { +void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateForPhysics, bool updateForRender, bool includeResettingEnvs) { //ProfilingScope scope("BSim updateLinkTransforms"); BATCHED_SIM_ASSERT(updateForPhysics || updateForRender); // esp::gfx::replay::Keyframe debugKeyframe; - int numLinks = robot_->artObj->getNumLinks(); + int numLinks = robots_.robot_->artObj->getNumLinks(); int numNodes = numLinks + 1; - int numEnvs = config_->numEnvs; - int numPosVars = robot_->numPosVars; + int numEnvs = config_.numEnvs; + int numPosVars = robots_.robot_->numPosVars; if (updateForPhysics) { - areCollisionResultsValid_ = false; + robots_.areCollisionResultsValid_ = false; } - auto* mb = robot_->artObj->btMultiBody_.get(); + auto* mb = robots_.robot_->artObj->btMultiBody_.get(); int posCount = 0; - const float* yaws = &rollouts_->yaws_[currRolloutSubstep * numEnvs]; + const float* yaws = &robots_.rollouts_->yaws_[currRolloutSubstep * numEnvs]; const Mn::Vector2* positions = - &rollouts_->positions_[currRolloutSubstep * numEnvs]; + &robots_.rollouts_->positions_[currRolloutSubstep * numEnvs]; const float* jointPositions = - &rollouts_->jointPositions_[currRolloutSubstep * numEnvs * numPosVars]; + &robots_.rollouts_->jointPositions_[currRolloutSubstep * numEnvs * numPosVars]; Mn::Matrix4* rootTransforms = - &rollouts_->rootTransforms_[currRolloutSubstep * numEnvs]; + &robots_.rollouts_->rootTransforms_[currRolloutSubstep * numEnvs]; - for (int b = 0; b < config_->numEnvs; b++) { + for (int b = 0; b < config_.numEnvs; b++) { + + if (!includeResettingEnvs && isEnvResetting(b)) { + posCount += numPosVars; + continue; + } - auto& robotInstance = robotInstances_[b]; + auto& robotInstance = robots_.robotInstances_[b]; // perf todo: simplify this rootTransforms[b] = Mn::Matrix4::translation( @@ -250,11 +255,11 @@ void RobotInstanceSet::updateLinkTransforms(int currRolloutSubstep, bool updateF } } - mb->forwardKinematics(scratch_q_, scratch_m_); + mb->forwardKinematics(robots_.scratch_q_, robots_.scratch_m_); - auto& env = (*envs_)[b]; + auto& env = bpsWrapper_->envs_[b]; int baseInstanceIndex = b * numNodes; - const int baseSphereIndex = b * robot_->numCollisionSpheres_; + const int baseSphereIndex = b * robots_.robot_->numCollisionSpheres_; // extract link transforms // todo: update base node @@ -263,15 +268,15 @@ void RobotInstanceSet::updateLinkTransforms(int currRolloutSubstep, bool updateF const auto nodeIndex = i + 1; // 0 is base const auto instanceIndex = baseInstanceIndex + nodeIndex; - int instanceId = nodeInstanceIds_[instanceIndex]; + int instanceId = robots_.nodeInstanceIds_[instanceIndex]; if (instanceId == -1) { // todo: avoid every calculating this link transform continue; } if (!updateForRender) { - if (robot_->collisionSpheresByNode_[nodeIndex].empty() - && robot_->gripperLink_ != i) { + if (robots_.robot_->collisionSpheresByNode_[nodeIndex].empty() + && robots_.robot_->gripperLink_ != i) { continue; } } @@ -281,31 +286,31 @@ void RobotInstanceSet::updateLinkTransforms(int currRolloutSubstep, bool updateF : mb->getLink(i).m_cachedWorldTransform; Mn::Matrix4 mat = toMagnumMatrix4(btTrans); - auto tmp = robot_->nodeTransformFixups[nodeIndex]; + auto tmp = robots_.robot_->nodeTransformFixups[nodeIndex]; // auto vec = tmp[3]; // const float scale = (float)b / (numEnvs_ - 1); // tmp[3] = Mn::Vector4(vec.xyz() * scale, 1.f); mat = mat * tmp; - if (robot_->gripperLink_ == i) { + if (robots_.robot_->gripperLink_ == i) { robotInstance.cachedGripperLinkMat_ = mat; } if (updateForPhysics) { // perf todo: loop through collision spheres (and look up link id), instead of this sparse way here // compute collision sphere transforms - for (const auto& localSphereIdx : robot_->collisionSpheresByNode_[nodeIndex]) { - const auto& sphere = safeVectorGet(robot_->collisionSpheres_, localSphereIdx); - collisionSphereWorldOrigins_[baseSphereIndex + localSphereIdx] = + for (const auto& localSphereIdx : robots_.robot_->collisionSpheresByNode_[nodeIndex]) { + const auto& sphere = safeVectorGet(robots_.robot_->collisionSpheres_, localSphereIdx); + robots_.collisionSphereWorldOrigins_[baseSphereIndex + localSphereIdx] = mat.transformPoint(sphere.origin); } } if (updateForRender) { - BATCHED_SIM_ASSERT(instanceIndex < nodeNewTransforms_.size()); - nodeNewTransforms_[instanceIndex] = toGlmMat4x3(mat); + BATCHED_SIM_ASSERT(instanceIndex < robots_.nodeNewTransforms_.size()); + robots_.nodeNewTransforms_[instanceIndex] = toGlmMat4x3(mat); - if (robot_->cameraAttachNode_ == nodeIndex) { + if (robots_.robot_->cameraAttachNode_ == nodeIndex) { robotInstance.cameraAttachNodeTransform_ = mat; } } @@ -341,11 +346,9 @@ void RobotInstanceSet::updateLinkTransforms(int currRolloutSubstep, bool updateF void BatchedSimulator::updatePythonEnvironmentState() { - const auto currEpisodeStep = currRolloutSubstep_ / config_.numSubsteps; - const int numEnvs = config_.numEnvs; - const Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_, currRolloutSubstep_ * numEnvs); - const float* yaws = &safeVectorGet(rollouts_.yaws_, currRolloutSubstep_ * numEnvs); + const Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_, currStorageStep_ * numEnvs); + const float* yaws = &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); for (int b = 0; b < config_.numEnvs; b++) { @@ -353,9 +356,7 @@ void BatchedSimulator::updatePythonEnvironmentState() { auto& envState = safeVectorGet(pythonEnvStates_, b); envState.ee_pos = robotInstance.cachedGripperLinkMat_.translation(); - envState.did_finish_episode_and_reset = (currEpisodeStep == 0); - envState.finished_episode_success = false; // temp hardcoded - envState.episode_step_idx = currEpisodeStep; + // envState.episode_step_idx updated incrementally // todo: do logical or over all substeps envState.did_collide = robots_.collisionResults_[b]; // envState.obj_positions // this is updated incrementally @@ -365,6 +366,7 @@ void BatchedSimulator::updatePythonEnvironmentState() { // perf todo: set this just once at episode reset const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + envState.episode_idx = episodeInstance.episodeIndex_; envState.target_obj_idx = episode.targetObjIndex_; envState.goal_pos = episode.targetObjGoalPos_; } @@ -423,10 +425,14 @@ void BatchedSimulator::updateGripping() { for (int b = 0; b < numEnvs; b++) { + if (isEnvResetting(b)) { + continue; + } + auto& env = bpsWrapper_->envs_[b]; auto& robotInstance = robots_.robotInstances_[b]; - if (shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (shouldDrawDebugForEnv(config_, b, substep_)) { const auto& gripperMat = robotInstance.cachedGripperLinkMat_; auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); @@ -567,6 +573,10 @@ void BatchedSimulator::updateCollision() { for (int b = 0; b < numEnvs; b++) { + if (isEnvResetting(b)) { + continue; + } + const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); @@ -592,7 +602,7 @@ void BatchedSimulator::updateCollision() { // spheres in collision) if (thisSphereHit) { hit = true; - if (shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (shouldDrawDebugForEnv(config_, b, substep_)) { sphereHits[baseSphereIndex + s] = thisSphereHit; } else { break; @@ -613,7 +623,7 @@ void BatchedSimulator::updateCollision() { // todo: proper debug-drawing of hits for held object spheres if (thisSphereHit) { hit = true; - if (shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (shouldDrawDebugForEnv(config_, b, substep_)) { heldObjectHits[b] = thisSphereHit; } else { break; @@ -628,7 +638,11 @@ void BatchedSimulator::updateCollision() { // test against free objects for (int b = 0; b < numEnvs; b++) { - if (robots_.collisionResults_[b] && !shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (isEnvResetting(b)) { + continue; + } + + if (robots_.collisionResults_[b] && !shouldDrawDebugForEnv(config_, b, substep_)) { // already had a hit against column grid so don't test free objects continue; } @@ -650,7 +664,7 @@ void BatchedSimulator::updateCollision() { int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(spherePos, sphereRadius); if (hitFreeObjectIndex != -1) { hit = true; - if (shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (shouldDrawDebugForEnv(config_, b, substep_)) { sphereHits[baseSphereIndex + s] = true; freeObjectHits[hitFreeObjectIndex] = true; } else { @@ -673,7 +687,7 @@ void BatchedSimulator::updateCollision() { int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(sphereWorldOrigin, sphereRadius); if (hitFreeObjectIndex != -1) { hit = true; - if (shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (shouldDrawDebugForEnv(config_, b, substep_)) { heldObjectHits[b] = true; freeObjectHits[hitFreeObjectIndex] = true; } else { @@ -684,7 +698,7 @@ void BatchedSimulator::updateCollision() { } // render free objects to debug env, colored by collision result - if (shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (shouldDrawDebugForEnv(config_, b, substep_)) { for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { if (episodeInstance.colGrid_.isObstacleDisabled(freeObjectIndex)) { continue; @@ -718,8 +732,13 @@ void BatchedSimulator::updateCollision() { } for (int b = 0; b < numEnvs; b++) { + + if (isEnvResetting(b)) { + continue; + } + // render collision spheres for debug env, colored by collision result - if (shouldDrawDebugForEnv(config_, b, currRolloutSubstep_)) { + if (shouldDrawDebugForEnv(config_, b, substep_)) { const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const auto& robotInstance = robots_.robotInstances_[b]; @@ -755,8 +774,9 @@ void BatchedSimulator::updateCollision() { if (robots_.collisionResults_[b]) { // todo: more robust handling of this, maybe at episode-load time const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - ESP_CHECK(currRolloutSubstep_ > 1, "For episode " << episodeInstance.episodeIndex_ - << ", the robot is in collision after the first step of the episode. In your " + const auto& envState = safeVectorGet(pythonEnvStates_, b); + ESP_CHECK(envState.episode_step_idx > 0, "For episode " << episodeInstance.episodeIndex_ + << ", the robot is in collision on the first step of the episode. In your " << "episode set, revise agentStartPos/agentStartYaw or rearrange the scene."); recentStats_.numStepsInCollision_++; } @@ -773,6 +793,11 @@ void BatchedSimulator::postCollisionUpdate() { BATCHED_SIM_ASSERT(robots_.areCollisionResultsValid_); for (int b = 0; b < numEnvs; b++) { + + if (isEnvResetting(b)) { + continue; + } + if (robots_.collisionResults_[b]) { reverseActionsForEnvironment(b); } else { @@ -800,7 +825,8 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { // temp hack: we don't currently have bookeeping to know if a robot moved over // several substeps, so we assume it did here. perf todo: fix this bool didRobotMove = forceUpdate || - (!robots_.collisionResults_[b] || config_.numSubsteps > 1); + (!robots_.collisionResults_[b] || config_.numSubsteps > 1) || + isEnvResetting(b); // update robot links and camera if (didRobotMove) { @@ -1156,17 +1182,15 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { int batchNumActions = (actionDim_) * numEnvs; actions_.resize(batchNumActions, 0.f); + resets_.resize(numEnvs, -1); - ESP_CHECK(config_.maxEpisodeLength > 1, "BatchedSimulatorConfig::maxEpisodeLength must be > 1"); - // For an episode length of 2, we produce an observation immediately after resetting, - // then we take one step (config_.numSubsteps substeps), and we produce one more observation. - maxRolloutSubsteps_ = (config_.maxEpisodeLength - 1) * config_.numSubsteps + 1; + maxStorageSteps_ = 3; // todo: get rid of storage steps nonsense rollouts_ = - RolloutRecord(maxRolloutSubsteps_, numEnvs, robot_.numPosVars, numNodes); + RolloutRecord(maxStorageSteps_, numEnvs, robot_.numPosVars, numNodes); - currRolloutSubstep_ = + currStorageStep_ = -1; // trigger auto-reset on first call to autoResetOrStepPhysics - prevRolloutSubstep_ = -1; + prevStorageStep_ = -1; isOkToRender_ = false; isOkToStep_ = false; isRenderStarted_ = false; @@ -1197,7 +1221,7 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { void BatchedSimulator::close() { if (config_.doAsyncPhysicsStep) { if (physicsThread_.joinable()) { - waitAsyncStepPhysics(); + waitStepPhysicsOrReset(); signalKillPhysicsThread(); physicsThread_.join(); } @@ -1306,16 +1330,20 @@ void BatchedSimulator::resetEpisodeInstance(int b) { auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); auto& robotInstance = robots_.robotInstances_[b]; + BATCHED_SIM_ASSERT(isEnvResetting(b)); + ESP_CHECK(resets_[b] == episodeInstance.episodeIndex_, "resetEpisodeInstance: you can" + " only reset an environment to the same episode index (this is a temporary restriction)"); + const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); // init robot agent { const int numEnvs = bpsWrapper_->envs_.size(); const int numPosVars = robot_.numPosVars; - float* yaws = &safeVectorGet(rollouts_.yaws_, currRolloutSubstep_ * numEnvs); - Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_,currRolloutSubstep_ * numEnvs); + float* yaws = &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); + Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_,currStorageStep_ * numEnvs); float* jointPositions = - &safeVectorGet(rollouts_.jointPositions_,currRolloutSubstep_ * numEnvs * numPosVars); + &safeVectorGet(rollouts_.jointPositions_,currStorageStep_ * numEnvs * numPosVars); constexpr float groundY = 0.f; positions[b] = episode.agentStartPos_; @@ -1336,6 +1364,12 @@ void BatchedSimulator::resetEpisodeInstance(int b) { jointPositions[b * robot_.numPosVars + 9] = float(Mn::Rad(Mn::Deg(90.f))); } + // some cached robot state computed during a step + { + // assume robot is not in collision on reset + robots_.collisionResults_[b] = false; + } + for (int i = 0; i < episodeInstance.movedFreeObjectIndexes_.size(); i++) { int freeObjectIndex = episodeInstance.movedFreeObjectIndexes_[i]; @@ -1363,9 +1397,13 @@ void BatchedSimulator::resetEpisodeInstance(int b) { episodeInstance.colGrid_.getNumObstacleInstances()); auto& envState = safeVectorGet(pythonEnvStates_, b); - envState.obj_positions.resize(episode.numFreeObjectSpawns_); + envState.obj_positions.resize(episode.numFreeObjectSpawns_); + envState.episode_step_idx = 0; } +bool BatchedSimulator::isEnvResetting(int b) const { + return safeVectorGet(resets_, b) != -1; +} @@ -1474,13 +1512,17 @@ void BatchedSimulator::initEpisodeSet() { } } -void BatchedSimulator::setActions(std::vector&& actions) { +void BatchedSimulator::setActionsResets(std::vector&& actions, std::vector&& resets) { //ProfilingScope scope("BSim setActions"); ESP_CHECK(actions.size() == actions_.size(), - "BatchedSimulator::setActions: input dimension should be " + + "BatchedSimulator::setActionsResets: input dimension should be " + std::to_string(actions_.size()) + ", not " + std::to_string(actions.size())); + ESP_CHECK(resets.size() == resets_.size(), + "BatchedSimulator::setActionsResets: input dimension should be " + + std::to_string(resets_.size()) + ", not " + + std::to_string(resets.size())); const int numEnvs = config_.numEnvs; if (config_.forceRandomActions) { @@ -1491,6 +1533,8 @@ void BatchedSimulator::setActions(std::vector&& actions) { actions_ = std::move(actions); } + resets_ = std::move(resets); + if (config_.doPairedDebugEnvs) { // copy actions from non-debug env to its paired debug env // every other env, see isPairedDebugEnv @@ -1499,6 +1543,7 @@ void BatchedSimulator::setActions(std::vector&& actions) { for (int actionIdx = 0; actionIdx < actionDim_; actionIdx++) { actions_[b * actionDim_ + actionIdx] = actions_[(b - 1) * actionDim_ + actionIdx]; } + resets_[b] = resets_[b - 1]; } } } @@ -1512,12 +1557,17 @@ void BatchedSimulator::reset() { int numEnvs = bpsWrapper_->envs_.size(); - currRolloutSubstep_ = 0; - prevRolloutSubstep_ = -1; + currStorageStep_ = 0; + prevStorageStep_ = -1; + for (int b = 0; b < numEnvs; b++) { + // temp populate resets_ here to avoid confusion later + const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + safeVectorGet(resets_, b) = episodeInstance.episodeIndex_; + resetEpisodeInstance(b); } - robots_.updateLinkTransforms(currRolloutSubstep_, /*updateforPhysics*/ false, /*updateForRender*/ true); + updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ false, /*updateForRender*/ true, /*includeResettingEnvs*/true); updateRenderInstances(/*forceUpdate*/true); updatePythonEnvironmentState(); @@ -1527,6 +1577,24 @@ void BatchedSimulator::reset() { recentStats_.numEpisodes_ += numEnvs; } +void BatchedSimulator::resetHelper() { + + const int numEnvs = bpsWrapper_->envs_.size(); + + for (int b = 0; b < numEnvs; b++) { + + if (!isEnvResetting(b)) { + continue; + } + + resetEpisodeInstance(b); + + recentStats_.numEpisodes_++; + + // note link transforms and render instances are now outdated + } +} + #if 0 void BatchedSimulator::autoResetOrStepPhysics() { // ProfilingScope scope("BSim autoResetOrStepPhysics"); @@ -1535,7 +1603,7 @@ void BatchedSimulator::autoResetOrStepPhysics() { deleteDebugInstances(); - if (currRolloutSubstep_ == -1 || currRolloutSubstep_ == maxRolloutSubsteps_ - 1) { + if (currStorageStep_ == -1 || currStorageStep_ == maxStorageSteps_ - 1) { // all episodes are done; set done flag and reset reset(); } else { @@ -1547,15 +1615,14 @@ void BatchedSimulator::autoResetOrStepPhysics() { -void BatchedSimulator::startAsyncStepPhysics(std::vector&& actions) { +void BatchedSimulator::startStepPhysicsOrReset(std::vector&& actions, std::vector&& resets) { ProfilingScope scope("start async physics"); BATCHED_SIM_ASSERT(!isPhysicsThreadActive()); BATCHED_SIM_ASSERT(config_.doAsyncPhysicsStep); - BATCHED_SIM_ASSERT(currRolloutSubstep_ != -1); - BATCHED_SIM_ASSERT(currRolloutSubstep_ < maxRolloutSubsteps_ - 1); + BATCHED_SIM_ASSERT(currStorageStep_ != -1); - setActions(std::move(actions)); + setActionsResets(std::move(actions), std::move(resets)); deleteDebugInstances(); @@ -1565,13 +1632,24 @@ void BatchedSimulator::startAsyncStepPhysics(std::vector&& actions) { void BatchedSimulator::stepPhysics() { ProfilingScope scope("step physics"); + const int numEnvs = bpsWrapper_->envs_.size(); BATCHED_SIM_ASSERT(config_.numSubsteps > 0); - for (int i = 0; i < config_.numSubsteps; i++) { + for (substep_ = 0; substep_ < config_.numSubsteps; substep_++) { substepPhysics(); } + substep_ = -1; + + for (int b = 0; b < numEnvs; b++) { + if (!isEnvResetting(b)) { + auto& envState = safeVectorGet(pythonEnvStates_, b); + envState.episode_step_idx++; + } + } + + resetHelper(); - robots_.updateLinkTransforms(currRolloutSubstep_, /*updateforPhysics*/ false, /*updateForRender*/ true); + updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ false, /*updateForRender*/ true, /*includeResettingEnvs*/true); } void BatchedSimulator::substepPhysics() { @@ -1585,9 +1663,8 @@ void BatchedSimulator::substepPhysics() { constexpr float maxAbsYawAngle = float(Mn::Rad(Mn::Deg(5.f))); constexpr float maxAbsJointAngle = float(Mn::Rad(Mn::Deg(5.f))); - BATCHED_SIM_ASSERT(currRolloutSubstep_ < maxRolloutSubsteps_ - 1); - prevRolloutSubstep_ = currRolloutSubstep_; - currRolloutSubstep_++; + prevStorageStep_ = currStorageStep_; + currStorageStep_ = (currStorageStep_ + 1) % maxStorageSteps_; int numEnvs = bpsWrapper_->envs_.size(); int numPosVars = robot_.numPosVars; @@ -1596,20 +1673,26 @@ void BatchedSimulator::substepPhysics() { int actionIndex = 0; - const float* prevYaws = &rollouts_.yaws_[prevRolloutSubstep_ * numEnvs]; - float* yaws = &rollouts_.yaws_[currRolloutSubstep_ * numEnvs]; + const float* prevYaws = &rollouts_.yaws_[prevStorageStep_ * numEnvs]; + float* yaws = &rollouts_.yaws_[currStorageStep_ * numEnvs]; const Mn::Vector2* prevPositions = - &rollouts_.positions_[prevRolloutSubstep_ * numEnvs]; - Mn::Vector2* positions = &rollouts_.positions_[currRolloutSubstep_ * numEnvs]; + &rollouts_.positions_[prevStorageStep_ * numEnvs]; + Mn::Vector2* positions = &rollouts_.positions_[currStorageStep_ * numEnvs]; Mn::Matrix4* rootTransforms = - &rollouts_.rootTransforms_[currRolloutSubstep_ * numEnvs]; + &rollouts_.rootTransforms_[currStorageStep_ * numEnvs]; const float* prevJointPositions = - &rollouts_.jointPositions_[prevRolloutSubstep_ * numEnvs * numPosVars]; + &rollouts_.jointPositions_[prevStorageStep_ * numEnvs * numPosVars]; float* jointPositions = - &rollouts_.jointPositions_[currRolloutSubstep_ * numEnvs * numPosVars]; + &rollouts_.jointPositions_[currStorageStep_ * numEnvs * numPosVars]; // stepping code for (int b = 0; b < numEnvs; b++) { + + if (isEnvResetting(b)) { + actionIndex += actionDim_; + continue; + } + auto& robotInstance = robots_.robotInstances_[b]; const float grabDropAction = actions_[actionIndex++]; @@ -1646,7 +1729,7 @@ void BatchedSimulator::substepPhysics() { } BATCHED_SIM_ASSERT(actionIndex == actions_.size()); - robots_.updateLinkTransforms(currRolloutSubstep_, /*updateforPhysics*/ true, /*updateForRender*/ false); + updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ true, /*updateForRender*/ false, /*includeResettingEnvs*/false); updateCollision(); @@ -1657,33 +1740,6 @@ void BatchedSimulator::substepPhysics() { // robots_.applyActionPenalties(actions_); } -void BatchedSimulator::reverseRobotMovementActions() { - - if (prevRolloutSubstep_ == -1) { - return; - } - - BATCHED_SIM_ASSERT(!config_.doAsyncPhysicsStep); - - int numEnvs = bpsWrapper_->envs_.size(); - - for (int b = 0; b < numEnvs; b++) { - - reverseActionsForEnvironment(b); - - auto& robotInstance = robots_.robotInstances_[b]; - robotInstance.doAttemptDrop_ = false; - robotInstance.doAttemptGrip_ = false; - } - - currRolloutSubstep_--; - prevRolloutSubstep_--; - - robots_.updateLinkTransforms(currRolloutSubstep_, /*updateforPhysics*/ false, /*updateForRender*/ true); - // updateGripping(); - updateRenderInstances(true); -} - #if 0 void BatchedSimulator::stepPhysicsWithReferenceActions() { @@ -1734,7 +1790,7 @@ void BatchedSimulator::stepPhysicsWithReferenceActions() { bool BatchedSimulator::isPhysicsThreadActive() const { return config_.doAsyncPhysicsStep && - (!isAsyncStepPhysicsFinished_ || signalStepPhysics_); + (!isStepPhysicsOrResetFinished_ || signalStepPhysics_); } void BatchedSimulator::startRender() { @@ -1746,7 +1802,7 @@ void BatchedSimulator::startRender() { isRenderStarted_ = true; } -void BatchedSimulator::waitForRender() { +void BatchedSimulator::waitRender() { ProfilingScope scope("wait for GPU render"); BATCHED_SIM_ASSERT(isRenderStarted_); bpsWrapper_->renderer_->waitForFrame(); @@ -1837,7 +1893,7 @@ void BatchedSimulator::signalStepPhysics() { std::lock_guard lck(physicsMutex_); signalStepPhysics_ = true; - isAsyncStepPhysicsFinished_ = false; + isStepPhysicsOrResetFinished_ = false; areRenderInstancesUpdated_ = false; physicsCondVar_.notify_one(); // notify_all? } @@ -1855,19 +1911,12 @@ const std::vector& BatchedSimulator::getEnvironmentState return pythonEnvStates_; } -void BatchedSimulator::waitAsyncStepPhysics() { - //ProfilingScope scope("BSim waitAsyncStepPhysics"); +void BatchedSimulator::waitStepPhysicsOrReset() { + //ProfilingScope scope("BSim waitStepPhysicsOrReset"); { std::unique_lock lck(physicsMutex_); - physicsCondVar_.wait(lck, [&]{ return isAsyncStepPhysicsFinished_; }); - } - - // perf todo: do this reset on the physics thread - if (currRolloutSubstep_ == maxRolloutSubsteps_ - 1) { - reset(); - } else { - updatePythonEnvironmentState(); + physicsCondVar_.wait(lck, [&]{ return isStepPhysicsOrResetFinished_; }); } } @@ -1894,10 +1943,12 @@ void BatchedSimulator::physicsThreadFunc(int startEnvIndex, int numEnvs) { updateRenderInstances(/*forceUpdate*/false); + updatePythonEnvironmentState(); + { // ProfilingScope scope("physicsThreadFunc notify after step"); std::lock_guard lck(physicsMutex_); - isAsyncStepPhysicsFinished_ = true; + isStepPhysicsOrResetFinished_ = true; signalStepPhysics_ = false; physicsCondVar_.notify_one(); // notify_all? } diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 863a3bfd29..37c4e1598e 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -36,7 +36,6 @@ struct BatchedSimulatorConfig { CameraSensorConfig sensor0; bool forceRandomActions = false; bool doAsyncPhysicsStep = false; - int maxEpisodeLength = 50; int numSubsteps = 1; // if enabled, for every odd env, we don't render any visuals except debug stuff, // e.g. collision visualization. We should keep even and odd envs in sync (e.g. same actions). @@ -49,16 +48,14 @@ struct BatchedSimulatorConfig { struct PythonEnvironmentState { - // prev episode result - bool did_finish_episode_and_reset = false; - bool finished_episode_success = false; // curr episode - int target_obj_idx; + int episode_idx = -1; + int episode_step_idx = -1; // will be zero if just reset + int target_obj_idx = -1; Magnum::Vector3 goal_pos; - int episode_step_idx; // robot state Magnum::Vector3 robot_position; - float robot_yaw; + float robot_yaw = 0.f; Magnum::Vector3 ee_pos; bool did_collide = false; // other env state @@ -104,6 +101,8 @@ struct Robot { Magnum::Matrix4 cameraAttachTransform_; }; +// This is misnamed and isn't actually used to store entire rollouts. It contains +// some env state. See also RobotInstance and RobotInstanceSet. struct RolloutRecord { RolloutRecord() = default; RolloutRecord(int numRolloutSubsteps, @@ -141,7 +140,6 @@ class RobotInstanceSet { std::vector* envs, RolloutRecord* rollouts); - void updateLinkTransforms(int currRolloutSubstep, bool updateforPhysics, bool updateForRender); void applyActionPenalties(const std::vector& actions); Robot* robot_ = nullptr; @@ -177,16 +175,13 @@ class BatchedSimulator { void close(); void startRender(); - void waitForRender(); + void waitRender(); // todo: thread-safe access to PythonEnvironmentState void reset(); const std::vector& getEnvironmentStates() const; - void startAsyncStepPhysics(std::vector&& actions); - void waitAsyncStepPhysics(); - - // for interactive debugging; works a bit like undo, but doesn't undo grabs/drops - void reverseRobotMovementActions(); + void startStepPhysicsOrReset(std::vector&& actions, std::vector&& resets); + void waitStepPhysicsOrReset(); bps3D::Renderer& getBpsRenderer(); @@ -227,9 +222,10 @@ class BatchedSimulator { int numFailedDrops_ = 0; }; - void setActions(std::vector&& actions); + void setActionsResets(std::vector&& actions, std::vector&& resets); void stepPhysics(); void substepPhysics(); + void updateLinkTransforms(int currRolloutSubstep, bool updateforPhysics, bool updateForRender, bool includeResettingEnvs); void updateCollision(); // for each robot, undo action if collision void postCollisionUpdate(); @@ -237,6 +233,7 @@ class BatchedSimulator { void updateRenderInstances(bool forceUpdate); void reverseActionsForEnvironment(int b); void updateGripping(); + void resetHelper(); void updatePythonEnvironmentState(); // uses episode spawn location @@ -251,6 +248,7 @@ class BatchedSimulator { // We don't yet support changing the episode for an env. You can only reset to the // same episode. void resetEpisodeInstance(int b); + bool isEnvResetting(int b) const; void physicsThreadFunc(int startEnvIndex, int numEnvs); void signalStepPhysics(); @@ -268,14 +266,16 @@ class BatchedSimulator { bool isRenderStarted_ = false; Robot robot_; RobotInstanceSet robots_; - int currRolloutSubstep_ = -1; - int prevRolloutSubstep_ = -1; + int currStorageStep_ = -1; + int prevStorageStep_ = -1; + int substep_ = -1; RolloutRecord rollouts_; std::unique_ptr legacySim_; std::unique_ptr bpsWrapper_; int actionDim_ = -1; std::vector actions_; - int maxRolloutSubsteps_ = -1; + std::vector resets_; // episode index, or -1 if not resetting + int maxStorageSteps_ = -1; std::vector pythonEnvStates_; EpisodeSet episodeSet_; @@ -291,7 +291,7 @@ class BatchedSimulator { std::condition_variable physicsCondVar_; bool signalStepPhysics_ = false; bool signalKillPhysicsThread_ = false; - bool isAsyncStepPhysicsFinished_ = true; + bool isStepPhysicsOrResetFinished_ = true; bool areRenderInstancesUpdated_ = false; ESP_SMART_POINTERS(BatchedSimulator) diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 3c15a39dc4..aa5bf73998 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -45,17 +45,15 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("force_random_actions", &BatchedSimulatorConfig::forceRandomActions, R"(Todo)") .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") - .def_readwrite("max_episode_length", &BatchedSimulatorConfig::maxEpisodeLength, R"(Todo)") .def_readwrite("do_procedural_episode_set", &BatchedSimulatorConfig::doProceduralEpisodeSet, R"(Todo)") .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)"); py::class_( m, "EnvironmentState") .def(py::init(&PythonEnvironmentState::create<>)) - .def_readwrite("did_finish_episode_and_reset", &PythonEnvironmentState::did_finish_episode_and_reset, R"(Todo)") - .def_readwrite("finished_episode_success", &PythonEnvironmentState::finished_episode_success, R"(Todo)") .def_readwrite("target_obj_idx", &PythonEnvironmentState::target_obj_idx, R"(Todo)") .def_readwrite("goal_pos", &PythonEnvironmentState::goal_pos, R"(Todo)") + .def_readwrite("episode_idx", &PythonEnvironmentState::episode_idx, R"(Todo)") .def_readwrite("episode_step_idx", &PythonEnvironmentState::episode_step_idx, R"(Todo)") .def_readwrite("robot_position", &PythonEnvironmentState::robot_position, R"(Todo)") .def_readwrite("robot_yaw", &PythonEnvironmentState::robot_yaw, R"(Todo)") @@ -70,11 +68,11 @@ void initBatchedSimBindings(py::module& m) { &BatchedSimulator::reset, R"(todo)") .def("get_environment_states", &BatchedSimulator::getEnvironmentStates, R"(todo)") - .def("start_async_step_physics", - &BatchedSimulator::startAsyncStepPhysics, R"(todo)") - .def("wait_async_step_physics", &BatchedSimulator::waitAsyncStepPhysics, R"(todo)") + .def("start_step_physics_or_reset", + &BatchedSimulator::startStepPhysicsOrReset, R"(todo)") + .def("wait_step_physics_or_reset", &BatchedSimulator::waitStepPhysicsOrReset, R"(todo)") .def("start_render", &BatchedSimulator::startRender, R"(todo)") - .def("wait_for_frame", &BatchedSimulator::waitForRender, R"(todo)") + .def("wait_render", &BatchedSimulator::waitRender, R"(todo)") .def("close", &BatchedSimulator::close, R"(todo)") // .def("reset", &BatchedSimulator::reset, R"(todo)") .def( diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index 95d7cedf0e..cfb709020d 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -207,7 +207,6 @@ TEST_F(BatchedSimulatorTest, basic) { .sensor0 = {.width = 768, .height = 768, .hfov = 70}, .forceRandomActions = forceRandomActions, .doAsyncPhysicsStep = true, - .maxEpisodeLength = 500, .numSubsteps = 1, .doPairedDebugEnvs = doPairedDebugEnvs, .doProceduralEpisodeSet = true, @@ -276,12 +275,16 @@ TEST_F(BatchedSimulatorTest, basic) { } } + std::vector resets(config.numEnvs, -1); + bool doAdvanceSim = false; bsim.reset(); + std::vector envStates; + if (doOverlapPhysics) { - bsim.startAsyncStepPhysics(std::vector(actions)); + bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); } int frameIdx = 0; @@ -291,21 +294,25 @@ TEST_F(BatchedSimulatorTest, basic) { while (true) { if (doOverlapPhysics) { - bsim.waitAsyncStepPhysics(); + bsim.waitStepPhysicsOrReset(); bsim.startRender(); + // make a copy of envStates + envStates = bsim.getEnvironmentStates(); if (doAdvanceSim) { - bsim.startAsyncStepPhysics(std::vector(actions)); + bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); doAdvanceSim = false; } - bsim.waitForRender(); + bsim.waitRender(); } else { if (doAdvanceSim) { - bsim.startAsyncStepPhysics(std::vector(actions)); - bsim.waitAsyncStepPhysics(); + bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); + bsim.waitStepPhysicsOrReset(); doAdvanceSim = false; } bsim.startRender(); - bsim.waitForRender(); + // make a copy of envStates + envStates = bsim.getEnvironmentStates(); + bsim.waitRender(); } uint8_t* base_color_ptr = bsim.getBpsRenderer().getColorPointer(); @@ -385,11 +392,6 @@ TEST_F(BatchedSimulatorTest, basic) { doTuneRobotCam = !doTuneRobotCam; } else if (key == 'r') { bsim.reloadSerializeCollection(); - } else if (key == 'u') { - if (frameIdx > 0) { - bsim.reverseRobotMovementActions(); - frameIdx -= 2; // undoing two keypresses (the last one, and this 'u') - } } for (int b = 0; b < config.numEnvs; b++) { @@ -456,11 +458,6 @@ TEST_F(BatchedSimulatorTest, basic) { doAdvanceSim = true; } else if (key == 't') { doStartAnimation = true; - } else if (key == 'u') { - if (frameIdx > 0) { - bsim.reverseRobotMovementActions(); - frameIdx -= 2; // undoing two keypresses (the last one, and this 'u') - } } else if (key == 'c') { doTuneRobotCam = !doTuneRobotCam; } else if (key == 'r') { @@ -497,6 +494,18 @@ TEST_F(BatchedSimulatorTest, basic) { } } + #if 0 + // temp end episode on collision + for (int b = 0; b < config.numEnvs; b++) { + int resetPeriodForEnv = b + 1; + if (envStates[b].did_collide) { + resets[b] = envStates[b].episode_idx; + } else { + resets[b] = -1; + } + } + #endif + #if 0 // test columnGrid collision { constexpr float sphereDist = 1.f; From 4c693f55822f15cf1f0128f1c2a4eade74ea1d42 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Thu, 31 Mar 2022 01:21:08 -0400 Subject: [PATCH 06/53] add robot_joint_positions to PythonEnvironmentState --- src/esp/batched_sim/BatchedSimulator.cpp | 14 +++++++++++++- src/esp/batched_sim/BatchedSimulator.h | 1 + src/esp/bindings/BatchedSimBindings.cpp | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index e973cce32c..e9551ab395 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -347,8 +347,11 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF void BatchedSimulator::updatePythonEnvironmentState() { const int numEnvs = config_.numEnvs; + const int numPosVars = robot_.numPosVars; const Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_, currStorageStep_ * numEnvs); const float* yaws = &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); + const float* jointPositions = + &safeVectorGet(rollouts_.jointPositions_, currStorageStep_ * numEnvs * numPosVars); for (int b = 0; b < config_.numEnvs; b++) { @@ -357,11 +360,17 @@ void BatchedSimulator::updatePythonEnvironmentState() { envState.ee_pos = robotInstance.cachedGripperLinkMat_.translation(); // envState.episode_step_idx updated incrementally - // todo: do logical or over all substeps + // todo: do logical "or" over all substeps envState.did_collide = robots_.collisionResults_[b]; // envState.obj_positions // this is updated incrementally envState.robot_position = Mn::Vector3(positions[b].x(), 0.f, positions[b].y()); envState.robot_yaw = yaws[b]; + envState.robot_joint_positions.resize(numPosVars); + int baseJointIndex = b * robot_.numPosVars; + for (int j = 0; j < numPosVars; j++) { + const auto& pos = jointPositions[baseJointIndex + j]; + safeVectorGet(envState.robot_joint_positions, j) = pos; + } // perf todo: set this just once at episode reset const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); @@ -1688,6 +1697,9 @@ void BatchedSimulator::substepPhysics() { // stepping code for (int b = 0; b < numEnvs; b++) { + // perf todo: if collision occurs on a substep, don't try stepping again for the + // remaining substeps (not until the action changes on the next step) + if (isEnvResetting(b)) { actionIndex += actionDim_; continue; diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 37c4e1598e..164787e424 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -56,6 +56,7 @@ struct PythonEnvironmentState { // robot state Magnum::Vector3 robot_position; float robot_yaw = 0.f; + std::vector robot_joint_positions; Magnum::Vector3 ee_pos; bool did_collide = false; // other env state diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index aa5bf73998..ae34b1ff2c 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -57,6 +57,7 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("episode_step_idx", &PythonEnvironmentState::episode_step_idx, R"(Todo)") .def_readwrite("robot_position", &PythonEnvironmentState::robot_position, R"(Todo)") .def_readwrite("robot_yaw", &PythonEnvironmentState::robot_yaw, R"(Todo)") + .def_readwrite("robot_joint_positions", &PythonEnvironmentState::robot_joint_positions, R"(Todo)") .def_readwrite("ee_pos", &PythonEnvironmentState::ee_pos, R"(Todo)") .def_readwrite("did_collide", &PythonEnvironmentState::did_collide, R"(Todo)") .def_readwrite("obj_positions", &PythonEnvironmentState::obj_positions, R"(Todo)"); From b1baff8d93faa32db12c3982dd2d27d606b68e12 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Thu, 31 Mar 2022 02:00:10 -0400 Subject: [PATCH 07/53] add EpisodeSet::allEpisodesAABB_ --- src/esp/batched_sim/EpisodeSet.cpp | 68 +++++++++++++++++------------- src/esp/batched_sim/EpisodeSet.h | 5 ++- 2 files changed, 41 insertions(+), 32 deletions(-) diff --git a/src/esp/batched_sim/EpisodeSet.cpp b/src/esp/batched_sim/EpisodeSet.cpp index 567b785283..d26c023ee3 100644 --- a/src/esp/batched_sim/EpisodeSet.cpp +++ b/src/esp/batched_sim/EpisodeSet.cpp @@ -20,20 +20,11 @@ namespace batched_sim { namespace { -void addStageFixedObject(EpisodeSet& set, const std::string& name, - const BpsSceneMapping& sceneMapping, const serialize::Collection& collection) { +void addStageFixedObject(EpisodeSet& set, const std::string& name) { FixedObject fixedObj; fixedObj.name_ = name; - fixedObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(name); - - std::string columnGridFilepathBase = "../data/columngrids/" + name + "_stage_only"; - fixedObj.columnGridSet_.load(columnGridFilepathBase); - - ESP_CHECK(fixedObj.columnGridSet_.getSphereRadii() == collection.collisionRadiusWorkingSet, - "ColumnGridSet " << name << " with radii " << fixedObj.columnGridSet_.getSphereRadii() - << " doesn't match collection collision radius working set " - << collection.collisionRadiusWorkingSet); + fixedObj.needsPostLoadFixup_ = true; set.fixedObjects_.push_back(std::move(fixedObj)); } @@ -45,21 +36,11 @@ float getOriginBoundingSphereRadiusSquaredForAABB(const Magnum::Range3D& aabb) { return maxCorner.dot(); } -void addFreeObject(EpisodeSet& set, const std::string& name, const BpsSceneMapping& sceneMapping) { +void addFreeObject(EpisodeSet& set, const std::string& name) { FreeObject freeObj; freeObj.name_ = name; - freeObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(name); - -#if 0 - freeObj.aabb_ = aabb; - // freeObj.boundingSphereRadiusSq_ = getOriginBoundingSphereRadiusSquaredForAABB(aabb); - - // add one collision sphere at base of aabb - constexpr float sphereRadius = 0.1f; // temp - freeObj.collisionSphereLocalOrigins_.push_back( - {aabb.center().x(), aabb.center().y(), aabb.min().z() + sphereRadius}); -#endif + freeObj.needsPostLoadFixup_ = true; // all YCB objects needs this to be upright const auto baseRot = Mn::Quaternion::rotation(Mn::Deg(-90), Mn::Vector3(1.f, 0.f, 0.f)); @@ -190,8 +171,6 @@ EpisodeSet generateBenchmarkEpisodeSet(int numEpisodes, EpisodeSet set; - set.maxFreeObjects_ = 0; - std::vector replicaCadBakedStages = { "Baked_sc0_staging_00", "Baked_sc0_staging_01", @@ -209,20 +188,24 @@ EpisodeSet generateBenchmarkEpisodeSet(int numEpisodes, }; for (const auto& stageName : replicaCadBakedStages) { - addStageFixedObject(set, stageName, sceneMapping, collection); + addStageFixedObject(set, stageName); } for (const auto& serFreeObject : collection.freeObjects) { - addFreeObject(set, serFreeObject.name, sceneMapping); + addFreeObject(set, serFreeObject.name); } - updateFromSerializeCollection(set, collection); + // sloppy: call postLoadFixup before adding episodes; this means that + // set.maxFreeObjects_ gets computed incorrectly in here (but it will get computed + // correctly, incrementally, in addEpisode). + postLoadFixup(set, sceneMapping, collection); // distribute stages across episodes for (int i = 0; i < numEpisodes; i++) { int stageIndex = i * set.fixedObjects_.size() / numEpisodes; addEpisode(set, collection, stageIndex, random); } + BATCHED_SIM_ASSERT(set.maxFreeObjects_ > 0); return set; } @@ -360,6 +343,8 @@ void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const s freeObj.needsPostLoadFixup_ = false; } + set.allEpisodesAABB_ = Mn::Range3D(Mn::Math::ZeroInit); + bool isFirstFixedObj = true; for (auto& fixedObj : set.fixedObjects_) { fixedObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(fixedObj.name_); @@ -372,7 +357,30 @@ void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const s << " doesn't match collection collision radius working set " << collection.collisionRadiusWorkingSet); - fixedObj.needsPostLoadFixup_ = false; + fixedObj.needsPostLoadFixup_ = false; + + // update set.allEpisodesAABB_ + { + const auto& columnGrid = fixedObj.columnGridSet_.getColumnGrid(0); + constexpr float dummyMinY = -1e7f; + constexpr float dummyMaxY = 1e7f; + Mn::Vector3 cgMin( + columnGrid.minX, + dummyMinY, + columnGrid.minZ); + Mn::Vector3 cgMax( + columnGrid.getMaxX(), + dummyMaxY, + columnGrid.getMaxZ()); + + if (isFirstFixedObj) { + set.allEpisodesAABB_ = Mn::Range3D(cgMin, cgMax); + isFirstFixedObj = false; + } else { + set.allEpisodesAABB_.min() = Mn::Math::min(set.allEpisodesAABB_.min(), cgMin); + set.allEpisodesAABB_.max() = Mn::Math::max(set.allEpisodesAABB_.max(), cgMax); + } + } } set.maxFreeObjects_ = 0; @@ -503,7 +511,7 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, esp::io::readMember(obj, "freeObjectSpawns", val.freeObjectSpawns_); esp::io::readMember(obj, "freeObjects", val.freeObjects_); - // for maxFreeObjects_ + // for maxFreeObjects_ and allEpisodesAABB_ val.needsPostLoadFixup_ = true; val.maxFreeObjects_ = -1; diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 41dbb2dd6b..18ae0333cd 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -51,7 +51,7 @@ class FreeObjectSpawn { public: int16_t freeObjIndex_; int16_t startRotationIndex_; - Mn::Vector3 startPos_; // discretize? + Magnum::Vector3 startPos_; // discretize? }; class Episode { @@ -60,7 +60,7 @@ class Episode { int16_t numFreeObjectSpawns_ = 0; int16_t targetObjIndex_ = -1; // 0..numFreeObjectSpawns - 1, see also freeObjectIndex int32_t firstFreeObjectSpawnIndex_ = -1; // index into EpisodeSet::freeObjectSpawns_ - Mn::Vector2 agentStartPos_; + Magnum::Vector2 agentStartPos_; float agentStartYaw_ = 0.f; // radians // task-specific @@ -74,6 +74,7 @@ class EpisodeSet { std::vector freeObjectSpawns_; // num episodes * num-spawns-per-episode, ~5,000,000 std::vector freeObjects_; // ~100, max 32K int maxFreeObjects_ = -1; + Magnum::Range3D allEpisodesAABB_; bool needsPostLoadFixup_ = true; static EpisodeSet loadFromFile(const std::string& filepath); From 4d1238cb4ff2e25e4b58c3288fd84efb3935e5c5 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Fri, 1 Apr 2022 01:54:32 -0400 Subject: [PATCH 08/53] allow resetting an env to an arbitrary episode; bugfix for physics thread deadlock --- src/esp/batched_sim/BatchedSimulator.cpp | 331 +++++++++++------- src/esp/batched_sim/BatchedSimulator.h | 19 +- .../batched_sim/CollisionBroadphaseGrid.cpp | 13 + src/esp/batched_sim/CollisionBroadphaseGrid.h | 2 + src/esp/batched_sim/EpisodeSet.h | 4 +- src/esp/bindings/BatchedSimBindings.cpp | 3 +- src/tests/BatchedSimTest.cpp | 23 +- src/tests/ColumnGridTest.cpp | 12 + 8 files changed, 258 insertions(+), 149 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index e9551ab395..b324b3466b 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -176,6 +176,7 @@ RobotInstanceSet::RobotInstanceSet(Robot* robot, void BatchedSimulator::reverseActionsForEnvironment(int b) { BATCHED_SIM_ASSERT(prevStorageStep_ != -1); + BATCHED_SIM_ASSERT(!isEnvResetting(b)); const int numEnvs = config_.numEnvs; const int numPosVars = robot_.numPosVars; @@ -364,7 +365,7 @@ void BatchedSimulator::updatePythonEnvironmentState() { envState.did_collide = robots_.collisionResults_[b]; // envState.obj_positions // this is updated incrementally envState.robot_position = Mn::Vector3(positions[b].x(), 0.f, positions[b].y()); - envState.robot_yaw = yaws[b]; + //envState.robot_yaw = yaws[b]; envState.robot_joint_positions.resize(numPosVars); int baseJointIndex = b * robot_.numPosVars; for (int j = 0; j < numPosVars; j++) { @@ -916,8 +917,6 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { } } #endif - - areRenderInstancesUpdated_ = true; } @@ -1197,11 +1196,13 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { rollouts_ = RolloutRecord(maxStorageSteps_, numEnvs, robot_.numPosVars, numNodes); - currStorageStep_ = - -1; // trigger auto-reset on first call to autoResetOrStepPhysics + currStorageStep_ = 0; prevStorageStep_ = -1; + + initEpisodeInstances(); + isOkToRender_ = false; - isOkToStep_ = false; + isOkToStep_ = true; isRenderStarted_ = false; // default camera @@ -1215,15 +1216,6 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { if (config_.doAsyncPhysicsStep) { physicsThread_ = std::thread(&BatchedSimulator::physicsThreadFunc, this, 0, config_.numEnvs); - areRenderInstancesUpdated_ = false; - } - - if (config_.doPairedDebugEnvs) { - for (int b = 0; b < numEnvs; b++) { - if (shouldAddColumnGridDebugVisualsForEnv(config_, b)) { - debugRenderColumnGrids(b); - } - } } } @@ -1238,6 +1230,15 @@ void BatchedSimulator::close() { // todo: more close stuff? } +BatchedSimulator::~BatchedSimulator() { + close(); +} + + +int BatchedSimulator::getNumEpisodes() const { + return episodeSet_.episodes_.size(); +} + void BatchedSimulator::setCamera(const Mn::Vector3& camPos, const Mn::Quaternion& camRot) { @@ -1280,57 +1281,147 @@ bps3D::Environment& BatchedSimulator::getBpsEnvironment(int envIndex) { return bpsWrapper_->envs_[envIndex]; } -void BatchedSimulator::instantiateEpisode(int b, int episodeIndex) { +// one-time init for envs +void BatchedSimulator::initEpisodeInstances() { + + const int numEnvs = config_.numEnvs; + + // we expect all the episode instances to get default-constructed in here + BATCHED_SIM_ASSERT(episodeInstanceSet_.episodeInstanceByEnv_.empty()); + + episodeInstanceSet_.episodeInstanceByEnv_.resize(numEnvs); + + for (int b = 0; b < numEnvs; b++) { + auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + + constexpr int maxBytes = 1000 * 1024; + // this is tuned assuming a building-scale simulation with household-object-scale obstacles + constexpr float maxGridSpacing = 0.5f; + episodeInstance.colGrid_ = CollisionBroadphaseGrid(getMaxCollisionRadius(serializeCollection_), + episodeSet_.allEpisodesAABB_.min().x(), episodeSet_.allEpisodesAABB_.min().z(), + episodeSet_.allEpisodesAABB_.max().x(), episodeSet_.allEpisodesAABB_.max().z(), + maxBytes, maxGridSpacing); + } +} + + +void BatchedSimulator::clearEpisodeInstance(int b) { auto& env = getBpsEnvironment(b); auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - BATCHED_SIM_ASSERT(episodeInstance.episodeIndex_ == -1); - episodeInstance.episodeIndex_ = episodeIndex; + if (episodeInstance.episodeIndex_ == -1) { + return; // nothing to do + } + +#ifdef ENABLE_DEBUG_INSTANCES + for (auto& instanceId : episodeInstance.persistentDebugInstanceIds_) { + env.deleteInstance(instanceId); + } + episodeInstance.persistentDebugInstanceIds_.clear(); +#endif + + if (!disableFreeObjectVisualsForEnv(config_, b)) { + // Remove free object bps instances **in reverse order**. This is so bps3D will later + // allocate us new instance IDs (from its free list) in ascending order (see assert + // in spawnFreeObject). + const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + for (int freeObjectIndex = episode.numFreeObjectSpawns_ - 1; freeObjectIndex >= 0; freeObjectIndex--) { + int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + env.deleteInstance(instanceId); + } + } + + episodeInstance.firstFreeObjectInstanceId_ = -1; - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeIndex); - const auto& stageBlueprint = - safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex).instanceBlueprint_; if (!disableStageVisualsForEnv(config_, b)) { - episodeInstance.stageFixedObjectInstanceId_ = env.addInstance( - stageBlueprint.meshIdx_, stageBlueprint.mtrlIdx_, identityGlMat_); + // remove stage bps instance + env.deleteInstance(episodeInstance.stageFixedObjectInstanceId_); + } + + // remove all free objects from collision grid + episodeInstance.colGrid_.removeAllObstacles(); +} + +void BatchedSimulator::resetEpisodeInstance(int b) { + //ProfilingScope scope("BSim resetEpisodeInstance"); + + auto& env = getBpsEnvironment(b); + + clearEpisodeInstance(b); + + auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + BATCHED_SIM_ASSERT(episodeInstance.colGrid_.getNumObstacleInstances() == 0); + + BATCHED_SIM_ASSERT(isEnvResetting(b)); + + episodeInstance.episodeIndex_ = resets_[b]; + + const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + + // add stage + { + const auto& stageBlueprint = + safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex).instanceBlueprint_; + if (!disableStageVisualsForEnv(config_, b)) { + episodeInstance.stageFixedObjectInstanceId_ = env.addInstance( + stageBlueprint.meshIdx_, stageBlueprint.mtrlIdx_, identityGlMat_); + } } auto& envState = safeVectorGet(pythonEnvStates_, b); envState.obj_positions.resize(episode.numFreeObjectSpawns_); + for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { + spawnFreeObject(b, freeObjectIndex, /*reinsert*/false); + } + + // reset robot (note that robot's bps instances are not re-created here) { - const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); - const auto& columnGrid = stageFixedObject.columnGridSet_.getColumnGrid(0); + auto& robotInstance = robots_.robotInstances_[b]; - // todo: find extents for entire EpisodeSet, not just this specific columnGrid - constexpr int maxBytes = 1000 * 1024; - // this is tuned assuming a building-scale simulation with household-object-scale obstacles - constexpr float maxGridSpacing = 0.5f; - episodeInstance.colGrid_ = CollisionBroadphaseGrid(getMaxCollisionRadius(serializeCollection_), - columnGrid.minX, columnGrid.minZ, - columnGrid.getMaxX(), columnGrid.getMaxZ(), - maxBytes, maxGridSpacing); - } + const int numEnvs = bpsWrapper_->envs_.size(); + const int numPosVars = robot_.numPosVars; + float* yaws = &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); + Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_,currStorageStep_ * numEnvs); + float* jointPositions = + &safeVectorGet(rollouts_.jointPositions_,currStorageStep_ * numEnvs * numPosVars); - for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { + constexpr float groundY = 0.f; + positions[b] = episode.agentStartPos_; + yaws[b] = episode.agentStartYaw_; - int globalFreeObjectIndex = b * episodeSet_.maxFreeObjects_ + freeObjectIndex; + for (int j = 0; j < robot_.numPosVars; j++) { + auto& pos = jointPositions[b * robot_.numPosVars + j]; + pos = 0.f; + pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], + robot_.jointPositionLimits.second[j]); + } - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); - const auto& blueprint = freeObject.instanceBlueprint_; - const auto& rotation = safeVectorGet(freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); + // 7 shoulder, + is down + // 8 twist, + is twist to right + // 9 elbow, + is down + // 10 elbow twist, + is twst to right + // 11 wrist, + is down + jointPositions[b * robot_.numPosVars + 9] = float(Mn::Rad(Mn::Deg(90.f))); - spawnFreeObject(b, freeObjectIndex, /*reinsert*/false); + // assume robot is not in collision on reset + robots_.collisionResults_[b] = false; + + robotInstance.grippedFreeObjectIndex_ = -1; + robotInstance.doAttemptDrop_ = false; + robotInstance.doAttemptGrip_ = false; } - episodeInstance.debugNumColGridObstacleInstances_ = episodeInstance.colGrid_.getNumObstacleInstances(); + envState.episode_step_idx = 0; + if (shouldAddColumnGridDebugVisualsForEnv(config_, b)) { + debugRenderColumnGrids(b); + } } - +// incremental reset of same episode index to same env +#if 0 void BatchedSimulator::resetEpisodeInstance(int b) { //ProfilingScope scope("BSim resetEpisodeInstance"); @@ -1400,15 +1491,11 @@ void BatchedSimulator::resetEpisodeInstance(int b) { robotInstance.doAttemptDrop_ = false; robotInstance.doAttemptGrip_ = false; - // todo: ensure spawnFreeObject also updates bps instance - - BATCHED_SIM_ASSERT(episodeInstance.debugNumColGridObstacleInstances_ == - episodeInstance.colGrid_.getNumObstacleInstances()); - auto& envState = safeVectorGet(pythonEnvStates_, b); envState.obj_positions.resize(episode.numFreeObjectSpawns_); envState.episode_step_idx = 0; } +#endif bool BatchedSimulator::isEnvResetting(int b) const { return safeVectorGet(resets_, b) != -1; @@ -1437,9 +1524,10 @@ void BatchedSimulator::spawnFreeObject(int b, int freeObjectIndex, bool reinsert int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); // store the first free object's bps instanceId and assume the rest will be contiguous if (freeObjectIndex == 0) { + BATCHED_SIM_ASSERT(episodeInstance.firstFreeObjectInstanceId_ == -1); episodeInstance.firstFreeObjectInstanceId_ = instanceId; } - BATCHED_SIM_ASSERT(instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex)); + BATCHED_SIM_ASSERT(instanceId == getFreeObjectBpsInstanceId(b, freeObjectIndex)); } } @@ -1469,6 +1557,7 @@ void BatchedSimulator::removeFreeObjectFromCollisionGrid(int b, int freeObjectIn int BatchedSimulator::getFreeObjectBpsInstanceId(int b, int freeObjectIndex) const { auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + BATCHED_SIM_ASSERT(episodeInstance.firstFreeObjectInstanceId_ != -1); return episodeInstance.firstFreeObjectInstanceId_ + freeObjectIndex; } @@ -1500,8 +1589,8 @@ void BatchedSimulator::initEpisodeSet() { ESP_CHECK(config_.episodeSetFilepath.empty(), "For BatchedSimulatorConfig::doProceduralEpisodeSet==true, don't specify episodeSetFilepath"); - // generate exactly as many episodes as envs (this is not required) - episodeSet_ = generateBenchmarkEpisodeSet(numEnvs, sceneMapping_, serializeCollection_); + constexpr int numEpisodesToGenerate = 100; // arbitrary + episodeSet_ = generateBenchmarkEpisodeSet(numEpisodesToGenerate, sceneMapping_, serializeCollection_); episodeSet_.saveToFile("generated.episode_set.json"); } else { ESP_CHECK(!config_.episodeSetFilepath.empty(), @@ -1509,29 +1598,20 @@ void BatchedSimulator::initEpisodeSet() { episodeSet_ = EpisodeSet::loadFromFile(config_.episodeSetFilepath); postLoadFixup(episodeSet_, sceneMapping_, serializeCollection_); } - - episodeInstanceSet_.episodeInstanceByEnv_.resize(numEnvs); - for (int b = 0; b < numEnvs; b++) { - // temp: statically assigned episodes to envs - auto episodeIndex = b * episodeSet_.episodes_.size() / numEnvs; - if (config_.doPairedDebugEnvs && (b % 2 == 1)) { - episodeIndex = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b - 1).episodeIndex_; - } - instantiateEpisode(b, episodeIndex); - } } void BatchedSimulator::setActionsResets(std::vector&& actions, std::vector&& resets) { //ProfilingScope scope("BSim setActions"); - ESP_CHECK(actions.size() == actions_.size(), - "BatchedSimulator::setActionsResets: input dimension should be " + - std::to_string(actions_.size()) + ", not " + - std::to_string(actions.size())); - ESP_CHECK(resets.size() == resets_.size(), - "BatchedSimulator::setActionsResets: input dimension should be " + - std::to_string(resets_.size()) + ", not " + - std::to_string(resets.size())); + // note we allow empty actions OR empty resets + ESP_CHECK(actions.empty() || actions.size() == actions_.size(), + "BatchedSimulator::setActionsResets: actions length should be " << + actions_.size() << ", not " << actions.size()); + ESP_CHECK(resets.empty() || resets.size() == resets_.size(), + "BatchedSimulator::setActionsResets: resets length should be " << + resets_.size() << ", not " << resets.size()); + ESP_CHECK(!actions.empty() || !resets.empty(), "BatchedSimulator::setActionsResets: " + << "at least one of actions or resets must be length " << actions_.size()); const int numEnvs = config_.numEnvs; if (config_.forceRandomActions) { @@ -1539,10 +1619,18 @@ void BatchedSimulator::setActionsResets(std::vector&& actions, std::vecto action = random_.uniform_float(-1.f, 1.f); } } else { - actions_ = std::move(actions); + if (!actions.empty()) { + actions_ = std::move(actions); + } else { + std::fill(actions_.begin(), actions_.end(), 0.f); + } } - resets_ = std::move(resets); + if (!resets.empty()) { + resets_ = std::move(resets); + } else { + std::fill(resets_.begin(), resets_.end(), -1); + } if (config_.doPairedDebugEnvs) { // copy actions from non-debug env to its paired debug env @@ -1557,35 +1645,20 @@ void BatchedSimulator::setActionsResets(std::vector&& actions, std::vecto } } -void BatchedSimulator::reset() { +void BatchedSimulator::reset(std::vector&& resets) { ProfilingScope scope("reset episodes"); // we shouldn't be in the middle of rendering or stepping physics ESP_CHECK(!isPhysicsThreadActive(), "Don't call reset during async physics step"); ESP_CHECK(!isRenderStarted_, "Don't call reset during async render"); - int numEnvs = bpsWrapper_->envs_.size(); - - currStorageStep_ = 0; - prevStorageStep_ = -1; - - for (int b = 0; b < numEnvs; b++) { - // temp populate resets_ here to avoid confusion later - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - safeVectorGet(resets_, b) = episodeInstance.episodeIndex_; - - resetEpisodeInstance(b); - } - updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ false, /*updateForRender*/ true, /*includeResettingEnvs*/true); - updateRenderInstances(/*forceUpdate*/true); - - updatePythonEnvironmentState(); + startStepPhysicsOrReset(std::vector(), std::move(resets)); + waitStepPhysicsOrReset(); isOkToRender_ = true; - isOkToStep_ = true; - recentStats_.numEpisodes_ += numEnvs; } +// called within step to reset whatever envs are requested to reset void BatchedSimulator::resetHelper() { const int numEnvs = bpsWrapper_->envs_.size(); @@ -1604,39 +1677,23 @@ void BatchedSimulator::resetHelper() { } } -#if 0 -void BatchedSimulator::autoResetOrStepPhysics() { - // ProfilingScope scope("BSim autoResetOrStepPhysics"); - - BATCHED_SIM_ASSERT(!config_.doAsyncPhysicsStep); - - deleteDebugInstances(); - - if (currStorageStep_ == -1 || currStorageStep_ == maxStorageSteps_ - 1) { - // all episodes are done; set done flag and reset - reset(); - } else { - stepPhysics(); - updateRenderInstances(/*forceUpdate*/false); - } -} -#endif - - void BatchedSimulator::startStepPhysicsOrReset(std::vector&& actions, std::vector&& resets) { ProfilingScope scope("start async physics"); BATCHED_SIM_ASSERT(!isPhysicsThreadActive()); - BATCHED_SIM_ASSERT(config_.doAsyncPhysicsStep); BATCHED_SIM_ASSERT(currStorageStep_ != -1); setActionsResets(std::move(actions), std::move(resets)); deleteDebugInstances(); - // send message to physicsThread_ - signalStepPhysics(); + if (config_.doAsyncPhysicsStep) { + // send message to physicsThread_ + signalStepPhysics(); + } else { + stepPhysics(); + } } void BatchedSimulator::stepPhysics() { @@ -1659,6 +1716,10 @@ void BatchedSimulator::stepPhysics() { resetHelper(); updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ false, /*updateForRender*/ true, /*includeResettingEnvs*/true); + + updateRenderInstances(/*forceUpdate*/false); + + updatePythonEnvironmentState(); } void BatchedSimulator::substepPhysics() { @@ -1903,15 +1964,18 @@ std::string BatchedSimulator::getRecentStatsAndReset() const { void BatchedSimulator::signalStepPhysics() { //ProfilingScope scope("BSim signalStepPhysics"); - std::lock_guard lck(physicsMutex_); - signalStepPhysics_ = true; + BATCHED_SIM_ASSERT(config_.doAsyncPhysicsStep); + BATCHED_SIM_ASSERT(isStepPhysicsOrResetFinished_); + BATCHED_SIM_ASSERT(!signalStepPhysics_); isStepPhysicsOrResetFinished_ = false; - areRenderInstancesUpdated_ = false; + + std::lock_guard lck(physicsSignalMutex_); + signalStepPhysics_ = true; physicsCondVar_.notify_one(); // notify_all? } void BatchedSimulator::signalKillPhysicsThread() { - std::lock_guard lck(physicsMutex_); + std::lock_guard lck(physicsSignalMutex_); signalKillPhysicsThread_ = true; physicsCondVar_.notify_one(); // notify_all? } @@ -1925,9 +1989,8 @@ const std::vector& BatchedSimulator::getEnvironmentState void BatchedSimulator::waitStepPhysicsOrReset() { //ProfilingScope scope("BSim waitStepPhysicsOrReset"); - - { - std::unique_lock lck(physicsMutex_); + if (config_.doAsyncPhysicsStep) { + std::unique_lock lck(physicsFinishMutex_); physicsCondVar_.wait(lck, [&]{ return isStepPhysicsOrResetFinished_; }); } } @@ -1936,44 +1999,43 @@ void BatchedSimulator::physicsThreadFunc(int startEnvIndex, int numEnvs) { ProfilingScope scope("physics background thread"); while (true) { + bool didSignalStepPhysics = false; + bool didSignalKillPhysicsThread = false; { ProfilingScope scope("wait for main thread"); - std::unique_lock lck(physicsMutex_); + std::unique_lock lck(physicsSignalMutex_); physicsCondVar_.wait(lck, [&]{ return signalStepPhysics_ || signalKillPhysicsThread_; }); + didSignalStepPhysics = signalStepPhysics_; + didSignalKillPhysicsThread = signalKillPhysicsThread_; + signalStepPhysics_ = false; + signalKillPhysicsThread_ = false; } - if (signalKillPhysicsThread_) { + if (didSignalKillPhysicsThread) { break; } BATCHED_SIM_ASSERT(startEnvIndex == 0 && numEnvs == config_.numEnvs); - // BATCHED_SIM_ASSERT(startEnvIndex >= 0 && startEnvIndex); - // BATCHED_SIM_ASSERT(numEnvs > 0); - // BATCHED_SIM_ASSERT(startEnvIndex + numEnvs <= config_.numEnvs); + BATCHED_SIM_ASSERT(!isStepPhysicsOrResetFinished_); stepPhysics(); - updateRenderInstances(/*forceUpdate*/false); - - updatePythonEnvironmentState(); - { // ProfilingScope scope("physicsThreadFunc notify after step"); - std::lock_guard lck(physicsMutex_); + std::lock_guard lck(physicsFinishMutex_); isStepPhysicsOrResetFinished_ = true; - signalStepPhysics_ = false; physicsCondVar_.notify_one(); // notify_all? } } } -void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxProgress) const { +void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxProgress) { #ifdef ENABLE_DEBUG_INSTANCES auto& env = safeVectorGet(bpsWrapper_->envs_, b); - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); const auto& source = stageFixedObject.columnGridSet_.getColumnGrid(0); @@ -2020,6 +2082,7 @@ void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxPro glm::mat4x3 glMat = toGlmMat4x3(localToBox); int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); + episodeInstance.persistentDebugInstanceIds_.push_back(instanceId); } } } diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 164787e424..a4b809e21f 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -55,7 +55,6 @@ struct PythonEnvironmentState { Magnum::Vector3 goal_pos; // robot state Magnum::Vector3 robot_position; - float robot_yaw = 0.f; std::vector robot_joint_positions; Magnum::Vector3 ee_pos; bool did_collide = false; @@ -173,13 +172,17 @@ class RobotInstanceSet { class BatchedSimulator { public: BatchedSimulator(const BatchedSimulatorConfig& config); + ~BatchedSimulator(); void close(); + + int getNumEpisodes() const; + void startRender(); void waitRender(); // todo: thread-safe access to PythonEnvironmentState - void reset(); + void reset(std::vector&& resets); const std::vector& getEnvironmentStates() const; void startStepPhysicsOrReset(std::vector&& actions, std::vector&& resets); void waitStepPhysicsOrReset(); @@ -207,7 +210,7 @@ class BatchedSimulator { std::string getRecentStatsAndReset() const; // todo: threadsafe - void debugRenderColumnGrids(int b, int minProgress=0, int maxProgress=-1) const; + void debugRenderColumnGrids(int b, int minProgress=0, int maxProgress=-1); void reloadSerializeCollection(); @@ -245,10 +248,9 @@ class BatchedSimulator { const Magnum::Vector3& pos, const Magnum::Quaternion& rotation); void initEpisodeSet(); - void instantiateEpisode(int b, int episodeIndex); - // We don't yet support changing the episode for an env. You can only reset to the - // same episode. - void resetEpisodeInstance(int b); + void initEpisodeInstances(); + void clearEpisodeInstance(int b); + void resetEpisodeInstance(int b); // uses resets_[b] bool isEnvResetting(int b) const; void physicsThreadFunc(int startEnvIndex, int numEnvs); @@ -288,7 +290,8 @@ class BatchedSimulator { esp::core::Random random_{0}; std::thread physicsThread_; - std::mutex physicsMutex_; + std::mutex physicsSignalMutex_; + std::mutex physicsFinishMutex_; std::condition_variable physicsCondVar_; bool signalStepPhysics_ = false; bool signalKillPhysicsThread_ = false; diff --git a/src/esp/batched_sim/CollisionBroadphaseGrid.cpp b/src/esp/batched_sim/CollisionBroadphaseGrid.cpp index 88de3b6648..9143297ca9 100644 --- a/src/esp/batched_sim/CollisionBroadphaseGrid.cpp +++ b/src/esp/batched_sim/CollisionBroadphaseGrid.cpp @@ -302,6 +302,19 @@ void CollisionBroadphaseGrid::insertRemoveObstacleHelper(int16_t obsIndex, bool } +void CollisionBroadphaseGrid::removeAllObstacles() { + + for (auto& grid : grids_) { + for (auto& cell : grid.cells) { + cell.numObstacles = 0; + // cell.obstacles array doesn't need to be touched + } + } + numObstacleInstances_ = 0; + obstacles_.clear(); +} + + int CollisionBroadphaseGrid::contactTest(const Magnum::Vector3& spherePos, float sphereRadius) const { BATCHED_SIM_ASSERT(sphereRadius <= maxSphereRadius_); diff --git a/src/esp/batched_sim/CollisionBroadphaseGrid.h b/src/esp/batched_sim/CollisionBroadphaseGrid.h index 1f7b7629d4..5138feb77e 100644 --- a/src/esp/batched_sim/CollisionBroadphaseGrid.h +++ b/src/esp/batched_sim/CollisionBroadphaseGrid.h @@ -79,6 +79,8 @@ class CollisionBroadphaseGrid { const Obstacle& getObstacle(int obsIndex) const; + void removeAllObstacles(); + //// private API exposed only for testing //// public: diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 18ae0333cd..1ff7478e39 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -87,12 +87,14 @@ class EpisodeInstance { int32_t stageFixedObjectInstanceId_ = -1; // free obj instance ids stored in freeObjectInstanceIds_ CollisionBroadphaseGrid colGrid_; - int debugNumColGridObstacleInstances_ = 0; // todo: more robust storage for moved free objects static constexpr int MAX_MOVED_FREE_OBJECTS = 6; // todo: better memory management std::vector movedFreeObjectIndexes_; int firstFreeObjectInstanceId_ = -1; + #ifndef NDEBUG // todo: hook up to a well-named preproc var + std::vector persistentDebugInstanceIds_; + #endif }; class EpisodeInstanceSet { diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index ae34b1ff2c..b8277689ab 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -56,7 +56,6 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("episode_idx", &PythonEnvironmentState::episode_idx, R"(Todo)") .def_readwrite("episode_step_idx", &PythonEnvironmentState::episode_step_idx, R"(Todo)") .def_readwrite("robot_position", &PythonEnvironmentState::robot_position, R"(Todo)") - .def_readwrite("robot_yaw", &PythonEnvironmentState::robot_yaw, R"(Todo)") .def_readwrite("robot_joint_positions", &PythonEnvironmentState::robot_joint_positions, R"(Todo)") .def_readwrite("ee_pos", &PythonEnvironmentState::ee_pos, R"(Todo)") .def_readwrite("did_collide", &PythonEnvironmentState::did_collide, R"(Todo)") @@ -65,6 +64,8 @@ void initBatchedSimBindings(py::module& m) { py::class_(m, "BatchedSimulator") .def(py::init(&BatchedSimulator::create)) + .def("get_num_episodes", + &BatchedSimulator::getNumEpisodes, R"(todo)") .def("reset", &BatchedSimulator::reset, R"(todo)") .def("get_environment_states", diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index cfb709020d..0679a411f5 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -201,12 +201,12 @@ TEST_F(BatchedSimulatorTest, basic) { BatchedSimulatorConfig config{ // for video: 2048 x 1024, fov 80 - .numEnvs = 2, .gpuId = 0, + .numEnvs = 16, .gpuId = 0, .includeDepth = includeDepth, .includeColor = includeColor, .sensor0 = {.width = 768, .height = 768, .hfov = 70}, .forceRandomActions = forceRandomActions, - .doAsyncPhysicsStep = true, + .doAsyncPhysicsStep = doOverlapPhysics, .numSubsteps = 1, .doPairedDebugEnvs = doPairedDebugEnvs, .doProceduralEpisodeSet = true, @@ -275,11 +275,24 @@ TEST_F(BatchedSimulatorTest, basic) { } } + int nextEpisode = 0; + const int numEpisodes = bsim.getNumEpisodes(); + auto getNextEpisode = [&]() { + auto retVal = nextEpisode; + nextEpisode = (nextEpisode + 1) % numEpisodes; + return retVal; + }; + std::vector resets(config.numEnvs, -1); + for (int b = 0; b < config.numEnvs; b++) { + resets[b] = getNextEpisode(); + } bool doAdvanceSim = false; - bsim.reset(); + bsim.reset(std::vector(resets)); + + std::fill(resets.begin(), resets.end(), -1); std::vector envStates; @@ -494,12 +507,12 @@ TEST_F(BatchedSimulatorTest, basic) { } } - #if 0 + #if 1 // temp end episode on collision for (int b = 0; b < config.numEnvs; b++) { int resetPeriodForEnv = b + 1; if (envStates[b].did_collide) { - resets[b] = envStates[b].episode_idx; + resets[b] = getNextEpisode(); } else { resets[b] = -1; } diff --git a/src/tests/ColumnGridTest.cpp b/src/tests/ColumnGridTest.cpp index 74225afece..f7eb6042ae 100644 --- a/src/tests/ColumnGridTest.cpp +++ b/src/tests/ColumnGridTest.cpp @@ -427,6 +427,18 @@ void ColumnGridTest::testCollisionBroadphaseGrid() { CORRADE_COMPARE(cgrid2.contactTest({0.f, 1.f, 0.25f - eps}, sphereRadius), 1); // hit box 1 CORRADE_COMPARE(cgrid2.contactTest({0.f, 1.f, 0.25f + eps}, sphereRadius), -1); + + cgrid2.removeAllObstacles(); + CORRADE_COMPARE(cgrid2.contactTest({0.f, 1.f, 0.25f - eps}, sphereRadius), -1); + CORRADE_COMPARE(cgrid2.getNumObstacleInstances(), 0); + + // insert again and remove again + boxId0 = cgrid2.insertObstacle({0.f, 0.f, 0.f}, Mn::Quaternion(Mn::Math::IdentityInit), &aabb); + CORRADE_COMPARE(boxId0, 0); + boxId1 = cgrid2.insertObstacle({0.f, 1.f, 0.f}, Mn::Quaternion(Mn::Math::IdentityInit), &aabb); + CORRADE_COMPARE(boxId1, 1); + cgrid2.removeAllObstacles(); + CORRADE_COMPARE(cgrid2.getNumObstacleInstances(), 0); } { From 692d6571b65a6fec96af052c15e10a594ff0644d Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Fri, 1 Apr 2022 20:27:48 -0400 Subject: [PATCH 09/53] * added lots of state to PythonEnvironmentState * switch episode JSON rotations from Mat3x3 to Quat * force zero action for first step of episode (on the assumption that the action was computed from a stale observation) * more robust error-handling for episode JSON loading --- src/esp/batched_sim/BatchedSimulator.cpp | 121 ++++++++++++++++++----- src/esp/batched_sim/BatchedSimulator.h | 23 ++++- src/esp/batched_sim/BpsSceneMapping.cpp | 9 +- src/esp/batched_sim/EpisodeSet.cpp | 32 +++++- src/esp/batched_sim/EpisodeSet.h | 7 +- src/esp/bindings/BatchedSimBindings.cpp | 19 +++- src/tests/BatchedSimTest.cpp | 5 +- 7 files changed, 173 insertions(+), 43 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index b324b3466b..e696ea44a8 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -52,6 +52,16 @@ Mn::Matrix4 toMagnumMatrix4(const btTransform& btTrans) { return tmp2; } +Mn::Quaternion yawToRotation(float yawRadians) { + constexpr Mn::Vector3 upAxis(0.f, 1.f, 0.f); + return Mn::Quaternion::rotation(Mn::Rad(yawRadians), upAxis); +} + +Mn::Vector3 groundPositionToVector3(const Mn::Vector2& src) { + constexpr float groundY = 0.f; + return Mn::Vector3(src.x(), groundY, src.y()); +} + std::string getMeshNameFromURDFVisualFilepath(const std::string& filepath) { // "../meshes/base_link.dae" => "base_link" auto index0 = filepath.rfind('/'); @@ -359,26 +369,38 @@ void BatchedSimulator::updatePythonEnvironmentState() { const auto& robotInstance = safeVectorGet(robots_.robotInstances_, b); auto& envState = safeVectorGet(pythonEnvStates_, b); - envState.ee_pos = robotInstance.cachedGripperLinkMat_.translation(); - // envState.episode_step_idx updated incrementally - // todo: do logical "or" over all substeps - envState.did_collide = robots_.collisionResults_[b]; - // envState.obj_positions // this is updated incrementally - envState.robot_position = Mn::Vector3(positions[b].x(), 0.f, positions[b].y()); - //envState.robot_yaw = yaws[b]; +/* + // robot state + Magnum::Vector3 robot_pos; + Magnum::Quaternion robot_rotation; + std::vector robot_joint_positions; + Magnum::Vector3 ee_pos; + Magnum::Quaternion ee_rotation; + bool did_collide = false; + + // other env state + std::vector obj_positions; + std::vector obj_rotations; + */ + + envState.robot_pos = Mn::Vector3(positions[b].x(), 0.f, positions[b].y()); + envState.robot_rotation = yawToRotation(yaws[b]); envState.robot_joint_positions.resize(numPosVars); int baseJointIndex = b * robot_.numPosVars; for (int j = 0; j < numPosVars; j++) { const auto& pos = jointPositions[baseJointIndex + j]; safeVectorGet(envState.robot_joint_positions, j) = pos; } - - // perf todo: set this just once at episode reset - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - envState.episode_idx = episodeInstance.episodeIndex_; - envState.target_obj_idx = episode.targetObjIndex_; - envState.goal_pos = episode.targetObjGoalPos_; + envState.ee_pos = robotInstance.cachedGripperLinkMat_.translation(); + envState.ee_rotation = Mn::Quaternion::fromMatrix( + robotInstance.cachedGripperLinkMat_.rotation()); + // todo: do logical "or" over all substeps + envState.did_collide = robots_.collisionResults_[b]; + // envState.obj_positions // this is updated incrementally + // envSTate.obj_rotations // this is updated incrementally + envState.held_obj_idx = robotInstance.grippedFreeObjectIndex_; + // did_grasp updated incrementally + // did_drop updated incrementally } #if 0 // reference code to visualize pythonEnvironmentState @@ -441,6 +463,11 @@ void BatchedSimulator::updateGripping() { auto& env = bpsWrapper_->envs_[b]; auto& robotInstance = robots_.robotInstances_[b]; + auto& envState = safeVectorGet(pythonEnvStates_, b); + + // this is wrong for the case of multiple substeps + envState.did_drop = false; + envState.did_grasp = false; if (shouldDrawDebugForEnv(config_, b, substep_)) { @@ -505,6 +532,8 @@ void BatchedSimulator::updateGripping() { episodeInstance.movedFreeObjectIndexes_.push_back(grippedFreeObjectIndex); recentStats_.numGrips_++; + + envState.did_grasp = true; } recentStats_.numGripAttempts_++; @@ -555,6 +584,8 @@ void BatchedSimulator::updateGripping() { robotInstance.grippedFreeObjectIndex_ = -1; robotInstance.doAttemptDrop_ = false; + envState.did_drop = true; + recentStats_.numDrops_++; } } @@ -879,6 +910,7 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { auto& envState = safeVectorGet(pythonEnvStates_, b); safeVectorGet(envState.obj_positions, freeObjectIndex) = mat.translation(); + safeVectorGet(envState.obj_rotations, freeObjectIndex) = Mn::Quaternion::fromMatrix(mat.rotation()); } } } @@ -939,7 +971,7 @@ Mn::Matrix4 BatchedSimulator::getHeldObjectTransform(int b) const { const auto& rotation = safeVectorGet(freeObject.startRotations_, startRotationIndex); Mn::Matrix4 linkToGripper = Mn::Matrix4::translation(robot_.gripperQueryOffset_); - Mn::Matrix4 toOrientedObject = Mn::Matrix4::from(rotation, Mn::Vector3(Mn::Math::ZeroInit)); + Mn::Matrix4 toOrientedObject = Mn::Matrix4::from(rotation.toMatrix(), Mn::Vector3(Mn::Math::ZeroInit)); // todo: offset to a few centimeters from the edge, instead of the center Mn::Matrix4 toObjectCenter = Mn::Matrix4::translation(-freeObject.aabb_.center()); @@ -1370,7 +1402,8 @@ void BatchedSimulator::resetEpisodeInstance(int b) { } auto& envState = safeVectorGet(pythonEnvStates_, b); - envState.obj_positions.resize(episode.numFreeObjectSpawns_); + envState.obj_positions.resize(episode.numFreeObjectSpawns_); + envState.obj_rotations.resize(episode.numFreeObjectSpawns_); for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { spawnFreeObject(b, freeObjectIndex, /*reinsert*/false); @@ -1387,7 +1420,6 @@ void BatchedSimulator::resetEpisodeInstance(int b) { float* jointPositions = &safeVectorGet(rollouts_.jointPositions_,currStorageStep_ * numEnvs * numPosVars); - constexpr float groundY = 0.f; positions[b] = episode.agentStartPos_; yaws[b] = episode.agentStartYaw_; @@ -1413,7 +1445,39 @@ void BatchedSimulator::resetEpisodeInstance(int b) { robotInstance.doAttemptGrip_ = false; } - envState.episode_step_idx = 0; + { + // // curr episode + // int episode_idx = -1; // 0..len(episodes)-1 + // int episode_step_idx = -1; // will be zero if this env was just reset + // int target_obj_idx = -1; // see obj_positions, obj_rotations + // // all positions/rotations are relative to the mesh, i.e. some arbitrary coordinate frame + // Magnum::Vector3 target_obj_start_pos; + // Magnum::Quaternion target_obj_start_rotation; + // Magnum::Vector3 robot_start_pos; + // Magnum::Quaternion robot_start_rotation; + // Magnum::Vector3 goal_pos; + // Magnum::Quaternion goal_rotation; + + envState.episode_idx = episodeInstance.episodeIndex_; + envState.episode_step_idx = 0; + envState.target_obj_idx = episode.targetObjIndex_; + envState.goal_pos = episode.targetObjGoalPos_; + envState.goal_rotation = episode.targetObjGoalRotation_; + envState.robot_start_pos = groundPositionToVector3(episode.agentStartPos_); + envState.robot_start_rotation = yawToRotation(episode.agentStartYaw_); + + const int freeObjectIndex = episode.targetObjIndex_; + const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& rotation = safeVectorGet(freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); + + envState.target_obj_start_pos = freeObjectSpawn.startPos_; + envState.target_obj_start_rotation = rotation; + + envState.did_drop = false; + envState.did_grasp = false; + } if (shouldAddColumnGridDebugVisualsForEnv(config_, b)) { debugRenderColumnGrids(b); @@ -1518,7 +1582,7 @@ void BatchedSimulator::spawnFreeObject(int b, int freeObjectIndex, bool reinsert if (!reinsert) { // sloppy: this matrix gets created differently on episode reset Mn::Matrix4 mat = Mn::Matrix4::from( - rotation, freeObjectSpawn.startPos_); + rotation.toMatrix(), freeObjectSpawn.startPos_); glm::mat4x3 glMat = toGlmMat4x3(mat); if (!disableFreeObjectVisualsForEnv(config_, b)) { int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); @@ -1531,16 +1595,15 @@ void BatchedSimulator::spawnFreeObject(int b, int freeObjectIndex, bool reinsert } } - // temp sloppy convert to quat here - const auto rotationQuat = Mn::Quaternion::fromMatrix(rotation); if (!reinsert) { int16_t obsIndex = episodeInstance.colGrid_.insertObstacle( - freeObjectSpawn.startPos_, rotationQuat, &freeObject.aabb_); + freeObjectSpawn.startPos_, rotation, &freeObject.aabb_); BATCHED_SIM_ASSERT(obsIndex == freeObjectIndex); auto& envState = safeVectorGet(pythonEnvStates_, b); safeVectorGet(envState.obj_positions, freeObjectIndex) = freeObjectSpawn.startPos_; + safeVectorGet(envState.obj_rotations, freeObjectIndex) = rotation; } else { - reinsertFreeObject(b, freeObjectIndex, freeObjectSpawn.startPos_, rotationQuat); + reinsertFreeObject(b, freeObjectIndex, freeObjectSpawn.startPos_, rotation); } } @@ -1579,6 +1642,8 @@ void BatchedSimulator::reinsertFreeObject(int b, int freeObjectIndex, auto& envState = safeVectorGet(pythonEnvStates_, b); safeVectorGet(envState.obj_positions, freeObjectIndex) = pos; + safeVectorGet(envState.obj_rotations, freeObjectIndex) = rotation; + } void BatchedSimulator::initEpisodeSet() { @@ -1591,7 +1656,7 @@ void BatchedSimulator::initEpisodeSet() { constexpr int numEpisodesToGenerate = 100; // arbitrary episodeSet_ = generateBenchmarkEpisodeSet(numEpisodesToGenerate, sceneMapping_, serializeCollection_); - episodeSet_.saveToFile("generated.episode_set.json"); + episodeSet_.saveToFile("../data/generated.episode_set.json"); } else { ESP_CHECK(!config_.episodeSetFilepath.empty(), "For BatchedSimulatorConfig::doProceduralEpisodeSet==false, you must specify episodeSetFilepath"); @@ -1626,6 +1691,14 @@ void BatchedSimulator::setActionsResets(std::vector&& actions, std::vecto } } + for (int b = 0; b < numEnvs; b++) { + // force zero actions on first step of episode + const auto& envState = safeVectorGet(pythonEnvStates_, b); + if (envState.episode_step_idx == 0) { + std::fill(&actions_[b * actionDim_], &actions_[(b + 1) * actionDim_], 0.f); + } + } + if (!resets.empty()) { resets_ = std::move(resets); } else { diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index a4b809e21f..2d8d38111f 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -49,17 +49,32 @@ struct BatchedSimulatorConfig { struct PythonEnvironmentState { // curr episode - int episode_idx = -1; - int episode_step_idx = -1; // will be zero if just reset - int target_obj_idx = -1; + int episode_idx = -1; // 0..len(episodes)-1 + int episode_step_idx = -1; // will be zero if this env was just reset + int target_obj_idx = -1; // see obj_positions, obj_rotations + // all positions/rotations are relative to the mesh, i.e. some arbitrary coordinate frame + Magnum::Vector3 target_obj_start_pos; + Magnum::Quaternion target_obj_start_rotation; + Magnum::Vector3 robot_start_pos; + Magnum::Quaternion robot_start_rotation; Magnum::Vector3 goal_pos; + Magnum::Quaternion goal_rotation; + // robot state - Magnum::Vector3 robot_position; + Magnum::Vector3 robot_pos; + Magnum::Quaternion robot_rotation; std::vector robot_joint_positions; Magnum::Vector3 ee_pos; + Magnum::Quaternion ee_rotation; bool did_collide = false; + int held_obj_idx = -1; + bool did_grasp = false; + bool did_drop = false; + // other env state std::vector obj_positions; + std::vector obj_rotations; + ESP_SMART_POINTERS(PythonEnvironmentState); }; diff --git a/src/esp/batched_sim/BpsSceneMapping.cpp b/src/esp/batched_sim/BpsSceneMapping.cpp index 344bf5c59a..c582005e54 100644 --- a/src/esp/batched_sim/BpsSceneMapping.cpp +++ b/src/esp/batched_sim/BpsSceneMapping.cpp @@ -5,6 +5,7 @@ #include "BpsSceneMapping.h" #include "esp/io/json.h" +#include "esp/core/Check.h" namespace esp { namespace batched_sim { @@ -31,14 +32,20 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, BpsSceneMapping& x) { BpsSceneMapping::InstanceBlueprint BpsSceneMapping::findInstanceBlueprint( const std::string& nodeName) const { - for (const auto& meshMapping : meshMappings) { + ESP_CHECK(!nodeName.empty(), "findInstanceBlueprint: empty input name. Check your episode data."); + for (const auto& meshMapping : meshMappings) { if (meshMapping.name == nodeName) { constexpr float scale = 1.f; return InstanceBlueprint{meshMapping.meshIdx, meshMapping.mtrlIdx}; } } + ESP_CHECK(false, "findInstanceBlueprint: no mapping found for render asset " + << nodeName << ". For every free object and stage, we expect to find a " + << "corresponding render asset with the same name. See meshMappings names in your " + << ".bps.mapping.json file for valid render asset names."); + CORRADE_INTERNAL_ASSERT_UNREACHABLE(); } diff --git a/src/esp/batched_sim/EpisodeSet.cpp b/src/esp/batched_sim/EpisodeSet.cpp index d26c023ee3..8d32d99ca1 100644 --- a/src/esp/batched_sim/EpisodeSet.cpp +++ b/src/esp/batched_sim/EpisodeSet.cpp @@ -49,7 +49,7 @@ void addFreeObject(EpisodeSet& set, const std::string& name) { for (int i = 0; i < numRotationsAboutUpAxis; i++) { const auto angle = Mn::Deg((float)i * 360.f / numRotationsAboutUpAxis); const auto rotAboutUpAxis = Mn::Quaternion::rotation(angle, Mn::Vector3(0.f, 1.f, 0.f)); - freeObj.startRotations_.push_back((rotAboutUpAxis * baseRot).toMatrix()); + freeObj.startRotations_.push_back((rotAboutUpAxis * baseRot)); } set.freeObjects_.emplace_back(std::move(freeObj)); @@ -130,7 +130,7 @@ void addEpisode(EpisodeSet& set, const serialize::Collection& collection, int st const auto rotation = freeObject.startRotations_[spawn.startRotationIndex_]; Mn::Matrix4 mat = Mn::Matrix4::from( - rotation, randomPos); + rotation.toMatrix(), randomPos); if (placementHelper.place(mat, freeObject)) { if (!spawnRange.contains(mat.translation())) { @@ -140,6 +140,7 @@ void addEpisode(EpisodeSet& set, const serialize::Collection& collection, int st if (isGoalPositionAttempt) { episode.targetObjGoalPos_ = spawn.startPos_; + episode.targetObjGoalRotation_ = rotation; success = true; break; } else { @@ -147,7 +148,7 @@ void addEpisode(EpisodeSet& set, const serialize::Collection& collection, int st episode.numFreeObjectSpawns_++; // add to colGrid so future spawns don't intersect this one - colGrid.insertObstacle(spawn.startPos_, Mn::Quaternion::fromMatrix(rotation), &freeObject.aabb_); + colGrid.insertObstacle(spawn.startPos_, rotation, &freeObject.aabb_); } } } @@ -224,6 +225,12 @@ void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& auto& freeObject = *it; freeObject.aabb_ = Mn::Range3D(serFreeObject.collisionBox.min, serFreeObject.collisionBox.max); freeObject.heldRotationIndex_ = serFreeObject.heldRotationIndex; + ESP_CHECK(freeObject.heldRotationIndex_ >= 0 + && freeObject.heldRotationIndex_ < freeObject.startRotations_.size(), + "updateFromSerializeCollection: heldRotationIndex " << serFreeObject.heldRotationIndex + << " is out-of-range for FreeObject " + << freeObject.name_ << " with startRotations.size() == " << freeObject.startRotations_.size()); + freeObject.collisionSpheres_.clear(); std::vector generatedSpheres; const std::vector* serializeCollisionSpheres = nullptr; @@ -341,6 +348,9 @@ void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const s for (auto& freeObj : set.freeObjects_) { freeObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(freeObj.name_); freeObj.needsPostLoadFixup_ = false; + for (const auto& rot : freeObj.startRotations_) { + ESP_CHECK(rot.isNormalized(), "postLoadFixup: FreeObject " << freeObj.name_ << " rotation " << rot << " isn't normalized"); + } } set.allEpisodesAABB_ = Mn::Range3D(Mn::Math::ZeroInit); @@ -384,8 +394,19 @@ void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const s } set.maxFreeObjects_ = 0; - for (const auto& episode : set.episodes_) { + for (int i = 0; i < set.episodes_.size(); i++) { + const auto& episode = safeVectorGet(set.episodes_, i); set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); + + ESP_CHECK(episode.targetObjGoalRotation_.isNormalized(), + "postLoadFixup: episode " << i << " targetObjGoalRotation " + << episode.targetObjGoalRotation_ << " isn't normalized"); + ESP_CHECK(episode.numFreeObjectSpawns_ > 0, + "postLoadFixup: episode " << i << " has invalid numFreeObjectSpawns_=" << episode.numFreeObjectSpawns_); + ESP_CHECK(episode.stageFixedObjIndex >= 0 && episode.stageFixedObjIndex < set.fixedObjects_.size(), + "postLoadFixup: episode " << i << " has invalid stageFixedObjIndex=" << episode.stageFixedObjIndex); + ESP_CHECK(episode.targetObjIndex_ >= 0 && episode.targetObjIndex_ < episode.numFreeObjectSpawns_, + "postLoadFixup: episode " << i << " has invalid targetObjIndex_=" << episode.targetObjIndex_); } set.needsPostLoadFixup_ = false; @@ -400,6 +421,7 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, esp::io::readMember(obj, "name", val.name_); // esp::io::readMember(obj, "aabb", val.aabb_); esp::io::readMember(obj, "startRotations", val.startRotations_); + ESP_CHECK(!val.startRotations_.empty(), "FreeObject " << val.name_ << " has empty startRotations"); // esp::io::readMember(obj, "heldRotationIndex", val.heldRotationIndex_); // esp::io::readMember(obj, "collisionSpheres", val.collisionSpheres_); @@ -485,6 +507,7 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, esp::io::readMember(obj, "agentStartPos", val.agentStartPos_); esp::io::readMember(obj, "agentStartYaw", val.agentStartYaw_); esp::io::readMember(obj, "targetObjGoalPos", val.targetObjGoalPos_); + esp::io::readMember(obj, "targetObjGoalRotation", val.targetObjGoalRotation_); return true; } @@ -499,6 +522,7 @@ esp::io::JsonGenericValue toJsonValue(const Episode& x, esp::io::addMember(obj, "agentStartPos", x.agentStartPos_, allocator); esp::io::addMember(obj, "agentStartYaw", x.agentStartYaw_, allocator); esp::io::addMember(obj, "targetObjGoalPos", x.targetObjGoalPos_, allocator); + esp::io::addMember(obj, "targetObjGoalRotation", x.targetObjGoalRotation_, allocator); return obj; } diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 1ff7478e39..3afc7e56d5 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -33,7 +33,7 @@ class FreeObject { std::string name_; BpsSceneMapping::InstanceBlueprint instanceBlueprint_; Magnum::Range3D aabb_; - std::vector startRotations_; + std::vector startRotations_; int heldRotationIndex_; // index into startRotations_ std::vector collisionSpheres_; bool needsPostLoadFixup_ = true; @@ -60,11 +60,12 @@ class Episode { int16_t numFreeObjectSpawns_ = 0; int16_t targetObjIndex_ = -1; // 0..numFreeObjectSpawns - 1, see also freeObjectIndex int32_t firstFreeObjectSpawnIndex_ = -1; // index into EpisodeSet::freeObjectSpawns_ - Magnum::Vector2 agentStartPos_; + Magnum::Vector2 agentStartPos_ = Magnum::Vector2(Magnum::Math::ZeroInit); float agentStartYaw_ = 0.f; // radians // task-specific - Magnum::Vector3 targetObjGoalPos_; + Magnum::Vector3 targetObjGoalPos_ = Magnum::Vector3(Magnum::Math::ZeroInit); + Magnum::Quaternion targetObjGoalRotation_ = Magnum::Quaternion(Magnum::Math::IdentityInit); }; class EpisodeSet { diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index b8277689ab..a0412b664c 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -51,16 +51,25 @@ void initBatchedSimBindings(py::module& m) { py::class_( m, "EnvironmentState") .def(py::init(&PythonEnvironmentState::create<>)) - .def_readwrite("target_obj_idx", &PythonEnvironmentState::target_obj_idx, R"(Todo)") - .def_readwrite("goal_pos", &PythonEnvironmentState::goal_pos, R"(Todo)") .def_readwrite("episode_idx", &PythonEnvironmentState::episode_idx, R"(Todo)") .def_readwrite("episode_step_idx", &PythonEnvironmentState::episode_step_idx, R"(Todo)") - .def_readwrite("robot_position", &PythonEnvironmentState::robot_position, R"(Todo)") + .def_readwrite("target_obj_idx", &PythonEnvironmentState::target_obj_idx, R"(Todo)") + .def_readwrite("target_obj_start_pos", &PythonEnvironmentState::target_obj_start_pos, R"(Todo)") + .def_readwrite("robot_start_pos", &PythonEnvironmentState::robot_start_pos, R"(Todo)") + .def_readwrite("robot_start_rotation", &PythonEnvironmentState::robot_start_rotation, R"(Todo)") + .def_readwrite("goal_pos", &PythonEnvironmentState::goal_pos, R"(Todo)") + .def_readwrite("goal_rotation", &PythonEnvironmentState::goal_rotation, R"(Todo)") + .def_readwrite("robot_pos", &PythonEnvironmentState::robot_pos, R"(Todo)") + .def_readwrite("robot_rotation", &PythonEnvironmentState::robot_rotation, R"(Todo)") .def_readwrite("robot_joint_positions", &PythonEnvironmentState::robot_joint_positions, R"(Todo)") .def_readwrite("ee_pos", &PythonEnvironmentState::ee_pos, R"(Todo)") + .def_readwrite("ee_rotation", &PythonEnvironmentState::ee_rotation, R"(Todo)") .def_readwrite("did_collide", &PythonEnvironmentState::did_collide, R"(Todo)") - .def_readwrite("obj_positions", &PythonEnvironmentState::obj_positions, R"(Todo)"); - + .def_readwrite("held_obj_idx", &PythonEnvironmentState::held_obj_idx, R"(Todo)") + .def_readwrite("did_grasp", &PythonEnvironmentState::did_grasp, R"(Todo)") + .def_readwrite("did_drop", &PythonEnvironmentState::did_drop, R"(Todo)") + .def_readwrite("obj_positions", &PythonEnvironmentState::obj_positions, R"(Todo)") + .def_readwrite("obj_rotations", &PythonEnvironmentState::obj_rotations, R"(Todo)"); py::class_(m, "BatchedSimulator") .def(py::init(&BatchedSimulator::create)) diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index 0679a411f5..ad228dec82 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -210,7 +210,8 @@ TEST_F(BatchedSimulatorTest, basic) { .numSubsteps = 1, .doPairedDebugEnvs = doPairedDebugEnvs, .doProceduralEpisodeSet = true, - //.episodeSetFilepath = "test.episode_set.json" + //.doProceduralEpisodeSet = false, + //.episodeSetFilepath = "generated.episode_set.json" }; BatchedSimulator bsim(config); @@ -260,7 +261,7 @@ TEST_F(BatchedSimulatorTest, basic) { if (doFreeCam || doTuneRobotCam) { std::cout << "Camera controls: press WASDQE/arrow keys to move/look, +/- to adjust speed, or ESC to quit." << std::endl; } else { - std::cout << "Robot controls: press WASD/arrow keys to move, E to grab/drop, +/- to adjust speed, or ESC to quit." << std::endl; + std::cout << "Robot controls: press WASD/arrow keys to move, G to grab/drop, +/- to adjust speed, or ESC to quit." << std::endl; } constexpr int actionDim = 18; From 7563eb4c38d4084b6b07d23bee6260a9e1a055d5 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Sun, 3 Apr 2022 09:03:48 -0400 Subject: [PATCH 10/53] add habitat_sim.build_type string attr --- habitat_sim/__init__.py | 1 + habitat_sim/bindings/__init__.py | 1 + src/esp/bindings/bindings.cpp | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/habitat_sim/__init__.py b/habitat_sim/__init__.py index da853b082e..5128bcc7a8 100644 --- a/habitat_sim/__init__.py +++ b/habitat_sim/__init__.py @@ -60,6 +60,7 @@ SceneNodeType, SimulatorConfiguration, built_with_bullet, + build_type, cuda_enabled, vhacd_enabled, ) diff --git a/habitat_sim/bindings/__init__.py b/habitat_sim/bindings/__init__.py index 2d3379f2da..2dfc69c71d 100644 --- a/habitat_sim/bindings/__init__.py +++ b/habitat_sim/bindings/__init__.py @@ -39,6 +39,7 @@ SimulatorConfiguration, VisualSensorSpec, built_with_bullet, + build_type, cuda_enabled, vhacd_enabled, ) diff --git a/src/esp/bindings/bindings.cpp b/src/esp/bindings/bindings.cpp index 38aac29a6b..958695fefb 100644 --- a/src/esp/bindings/bindings.cpp +++ b/src/esp/bindings/bindings.cpp @@ -93,6 +93,14 @@ PYBIND11_MODULE(habitat_sim_bindings, m) { false; #endif + // todo: base this on CMake build_type instead of NDEBUG + m.attr("build_type") = +#ifdef NDEBUG + "release"; +#else + "debug"; +#endif + /* This function pointer is used by ESP_CHECK(). If it's null, it std::abort()s, if not, it calls it to cause a Python AssertionError */ esp::core::throwInPython = [](const char* const message) { From e42f36b493d2184b40ea46cd24a071df40ec60e8 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Sun, 3 Apr 2022 11:46:33 -0400 Subject: [PATCH 11/53] add BatchedSimulatorConfig::actionMap --- src/esp/batched_sim/BatchedSimulator.cpp | 56 +++++++++++++++++------- src/esp/batched_sim/BatchedSimulator.h | 12 ++++- src/esp/bindings/BatchedSimBindings.cpp | 11 ++++- src/tests/BatchedSimTest.cpp | 26 ++++++++--- 4 files changed, 82 insertions(+), 23 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index e696ea44a8..209017edf2 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -116,6 +116,22 @@ constexpr bool disableStageVisualsForEnv(const BatchedSimulatorConfig&, int) { } #endif +void checkActionMap(const ActionMap& actionMap, int numRobotJointDegrees) { + ESP_CHECK(actionMap.numActions > 0, "checkActionMap: numActions must be > 0"); + ESP_CHECK(actionMap.baseMove >= 0 && actionMap.baseMove < actionMap.numActions, + "checkActionMap: invalid baseMove=" << actionMap.baseMove); + ESP_CHECK(actionMap.baseRotate >= 0 && actionMap.baseRotate < actionMap.numActions, + "checkActionMap: invalid baseRotate=" << actionMap.baseRotate); + for (const auto& pair : actionMap.actionJointDegreePairs) { + int actionIdx = pair.first; + int jointIdx = pair.second; + ESP_CHECK(actionIdx >= 0 && actionIdx < actionMap.numActions, + "checkActionMap: invalid actionToJoint pair.first=" << pair.first); + ESP_CHECK(jointIdx >= 0 && jointIdx < numRobotJointDegrees, + "checkActionMap: invalid actionToJoint pair.second=" << pair.second + << " for robot with " << numRobotJointDegrees << " joint degrees of freedom"); + } +} } // namespace @@ -312,8 +328,9 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF // compute collision sphere transforms for (const auto& localSphereIdx : robots_.robot_->collisionSpheresByNode_[nodeIndex]) { const auto& sphere = safeVectorGet(robots_.robot_->collisionSpheres_, localSphereIdx); - robots_.collisionSphereWorldOrigins_[baseSphereIndex + localSphereIdx] = - mat.transformPoint(sphere.origin); + auto& worldSphere = robots_.collisionSphereWorldOrigins_[baseSphereIndex + localSphereIdx]; + worldSphere = mat.transformPoint(sphere.origin); + BATCHED_SIM_ASSERT(!std::isnan(worldSphere.x())); } } @@ -1214,11 +1231,8 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { robots_ = RobotInstanceSet(&robot_, &config_, &bpsWrapper_->envs_, &rollouts_); - // see also python/rl/agent.py - const int numOtherActions = 1; // doAttemptGrip/doAttemptDrop - const int numJointDegrees = robot_.numPosVars; - const int numBaseDegrees = 2; // rotate and move-forward/back - actionDim_ = numOtherActions + numJointDegrees + numBaseDegrees; + checkActionMap(config_.actionMap, robot_.numPosVars); + actionDim_ = config_.actionMap.numActions; int batchNumActions = (actionDim_) * numEnvs; actions_.resize(batchNumActions, 0.f); @@ -1814,8 +1828,6 @@ void BatchedSimulator::substepPhysics() { auto& robots = robots_; - int actionIndex = 0; - const float* prevYaws = &rollouts_.yaws_[prevStorageStep_ * numEnvs]; float* yaws = &rollouts_.yaws_[currStorageStep_ * numEnvs]; const Mn::Vector2* prevPositions = @@ -1835,12 +1847,16 @@ void BatchedSimulator::substepPhysics() { // remaining substeps (not until the action changes on the next step) if (isEnvResetting(b)) { - actionIndex += actionDim_; continue; } + const int baseActionIndex = b * actionDim_; + + const float baseMoveAction = actions_[baseActionIndex + config_.actionMap.baseMove]; + const float baseRotateAction = actions_[baseActionIndex + config_.actionMap.baseRotate]; + const float grabDropAction = actions_[baseActionIndex + config_.actionMap.graspRelease]; + auto& robotInstance = robots_.robotInstances_[b]; - const float grabDropAction = actions_[actionIndex++]; // sticky grip behavior if (robotInstance.grippedFreeObjectIndex_ == -1) { @@ -1853,27 +1869,35 @@ void BatchedSimulator::substepPhysics() { robotInstance.doAttemptDrop_ = (grabDropAction < -stickyGrabDropThreshold); } - const float clampedBaseYawAction = Mn::Math::clamp(actions_[actionIndex++], -maxAbsYawAngle, maxAbsYawAngle); + const float clampedBaseYawAction = Mn::Math::clamp(baseRotateAction, -maxAbsYawAngle, maxAbsYawAngle); yaws[b] = prevYaws[b] + clampedBaseYawAction; // todo: wrap angle to 360 degrees - float clampedBaseMovementAction = Mn::Math::clamp(actions_[actionIndex++], minBaseMovement, maxBaseMovement); + float clampedBaseMovementAction = Mn::Math::clamp(baseMoveAction, minBaseMovement, maxBaseMovement); positions[b] = prevPositions[b] + Mn::Vector2(Mn::Math::cos(Mn::Math::Rad(yaws[b])), -Mn::Math::sin(Mn::Math::Rad(yaws[b]))) * clampedBaseMovementAction; + // sloppy: copy over all jointPositions, then process actionJointDegreePairs int baseJointIndex = b * robot_.numPosVars; for (int j = 0; j < robot_.numPosVars; j++) { auto& pos = jointPositions[baseJointIndex + j]; const auto& prevPos = prevJointPositions[baseJointIndex + j]; - const float clampedJointMovementAction = Mn::Math::clamp(actions_[actionIndex++], + pos = prevPos; + } + + for (const auto& pair : config_.actionMap.actionJointDegreePairs) { + int j = pair.second; + const float jointMovementAction = actions_[baseActionIndex + pair.first]; + BATCHED_SIM_ASSERT(j >= 0 && j < robot_.numPosVars); + auto& pos = jointPositions[baseJointIndex + j]; + const auto& prevPos = prevJointPositions[baseJointIndex + j]; + const float clampedJointMovementAction = Mn::Math::clamp(jointMovementAction, -maxAbsJointAngle, maxAbsJointAngle); pos = prevPos + clampedJointMovementAction; pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], robot_.jointPositionLimits.second[j]); - } } - BATCHED_SIM_ASSERT(actionIndex == actions_.size()); updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ true, /*updateForRender*/ false, /*includeResettingEnvs*/false); diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 2d8d38111f..aa89951e1c 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -28,6 +28,16 @@ struct CameraSensorConfig { float hfov = -1.f; }; +struct ActionMap { + int numActions = -1; + int baseMove = -1; + int baseRotate = -1; + int graspRelease = -1; + std::vector> actionJointDegreePairs; + ESP_SMART_POINTERS(ActionMap); +}; + + struct BatchedSimulatorConfig { int numEnvs = -1; int gpuId = -1; @@ -43,6 +53,7 @@ struct BatchedSimulatorConfig { bool doProceduralEpisodeSet = true; // only set this for doProceduralEpisodeSet==false (it is otherwise ignored) std::string episodeSetFilepath; + ActionMap actionMap; ESP_SMART_POINTERS(BatchedSimulatorConfig); }; @@ -190,7 +201,6 @@ class BatchedSimulator { ~BatchedSimulator(); void close(); - int getNumEpisodes() const; void startRender(); diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index a0412b664c..279a9d40d5 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -34,6 +34,14 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("height", &CameraSensorConfig::height, R"(Todo)") .def_readwrite("hfov", &CameraSensorConfig::hfov, R"(Todo)"); + py::class_(m, "ActionMap") + .def(py::init(&ActionMap::create<>)) + .def_readwrite("num_actions", &ActionMap::numActions, R"(Todo)") + .def_readwrite("base_move", &ActionMap::baseMove, R"(Todo)") + .def_readwrite("base_rotate", &ActionMap::baseRotate, R"(Todo)") + .def_readwrite("grasp_release", &ActionMap::graspRelease, R"(Todo)") + .def_readwrite("action_joint_degree_pairs", &ActionMap::actionJointDegreePairs, R"(Todo)"); + py::class_( m, "BatchedSimulatorConfig") .def(py::init(&BatchedSimulatorConfig::create<>)) @@ -46,7 +54,8 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") .def_readwrite("do_procedural_episode_set", &BatchedSimulatorConfig::doProceduralEpisodeSet, R"(Todo)") - .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)"); + .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)") + .def_readwrite("action_map", &BatchedSimulatorConfig::actionMap, R"(Todo)"); py::class_( m, "EnvironmentState") diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index ad228dec82..50dd4c922a 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -181,6 +181,21 @@ int key_press() { // not working: ¹ (251), num lock (-144), caps lock (-20), wi } } +ActionMap createActionMap() { + ActionMap map; + map.graspRelease = 0; + map.baseRotate = 1; + map.baseMove = 2; + const int numJointDegrees = 15; + for (int i = 0; i < numJointDegrees; i++) { + map.actionJointDegreePairs.push_back(std::make_pair(i + 3, i)); + } + + map.numActions = 18; + + return map; +} + } // namespace class BatchedSimulatorTest : public ::testing::Test {}; @@ -211,7 +226,8 @@ TEST_F(BatchedSimulatorTest, basic) { .doPairedDebugEnvs = doPairedDebugEnvs, .doProceduralEpisodeSet = true, //.doProceduralEpisodeSet = false, - //.episodeSetFilepath = "generated.episode_set.json" + //.episodeSetFilepath = "generated.episode_set.json", + .actionMap = createActionMap() }; BatchedSimulator bsim(config); @@ -264,7 +280,7 @@ TEST_F(BatchedSimulatorTest, basic) { std::cout << "Robot controls: press WASD/arrow keys to move, G to grab/drop, +/- to adjust speed, or ESC to quit." << std::endl; } - constexpr int actionDim = 18; + const int actionDim = config.actionMap.numActions; std::vector actions(actionDim * config.numEnvs, 0.f); if (doFreeCam) { for (int b = 0; b < config.numEnvs; b++) { @@ -415,9 +431,9 @@ TEST_F(BatchedSimulatorTest, basic) { actionsForEnv[j] = 0.f; } - actionsForEnv[0] = doHold ? 1.f : -1.f; - actionsForEnv[1] = baseYaw * moveSpeed; - actionsForEnv[2] = baseForward * moveSpeed; + actionsForEnv[config.actionMap.graspRelease] = doHold ? 1.f : -1.f; + actionsForEnv[config.actionMap.baseRotate] = baseYaw * moveSpeed; + actionsForEnv[config.actionMap.baseMove] = baseForward * moveSpeed; // 2 is torso up/down // 7 shoulder, + is down From 6d793f6e7c8795d1decfe319010e4d74096a9c04 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 4 Apr 2022 05:55:30 -0400 Subject: [PATCH 12/53] hack to force compile errors for missing JSON serialization functions --- src/esp/io/JsonBuiltinTypes.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/esp/io/JsonBuiltinTypes.h b/src/esp/io/JsonBuiltinTypes.h index 0de0643dc5..33663a7005 100644 --- a/src/esp/io/JsonBuiltinTypes.h +++ b/src/esp/io/JsonBuiltinTypes.h @@ -39,6 +39,7 @@ void addMember(JsonGenericValue& value, template bool readMember(const JsonGenericValue& value, const char* name, T& x); +#if 0 // temp disable so we get compile errors /** * @brief Fallback implementation for fromJsonValue to produce a runtime error * for types that haven't implemented fromJsonValue. @@ -70,6 +71,7 @@ JsonGenericValue toJsonValue(const T&, JsonAllocator&) { << typeid(T).name() << "."; return JsonGenericValue(rapidjson::kObjectType); } +#endif // toJsonValue wrappers for the 7 rapidjson builtin types. A JsonGenericValue // can be directly constructed from the builtin types. From 875c5d08254f067f3c90a81f01355a9afac6f69c Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 4 Apr 2022 05:56:32 -0400 Subject: [PATCH 13/53] add JSON serialization for std::pair --- src/esp/io/JsonStlTypes.h | 17 +++++++++++++++++ src/tests/IOTest.cpp | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/src/esp/io/JsonStlTypes.h b/src/esp/io/JsonStlTypes.h index 50829cda83..0e3b705313 100644 --- a/src/esp/io/JsonStlTypes.h +++ b/src/esp/io/JsonStlTypes.h @@ -38,6 +38,23 @@ inline bool fromJsonValue(const JsonGenericValue& obj, std::string& val) { return false; } +template +inline JsonGenericValue toJsonValue(const std::pair& val, + JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "first", val.first, allocator); + esp::io::addMember(obj, "second", val.second, allocator); + return obj; +} + +template +inline bool fromJsonValue(const JsonGenericValue& obj, std::pair& val) { + bool success = true; + success &= readMember(obj, "first", val.first); + success &= readMember(obj, "second", val.second); + return success; +} + // For std::vector, we use rapidjson::kArrayType. For an empty vector, we // omit the member altogether rather than add an empty array. diff --git a/src/tests/IOTest.cpp b/src/tests/IOTest.cpp index d6e2055b57..2f4153d7bb 100644 --- a/src/tests/IOTest.cpp +++ b/src/tests/IOTest.cpp @@ -328,6 +328,12 @@ TEST(IOTest, JsonStlTypesTest) { EXPECT_TRUE(readMember(d, "s", s2)); EXPECT_EQ(s2, s); + std::pair pair(1.5f, "second"); + addMember(d, "pair", pair, allocator); + std::pair pair2; + EXPECT_TRUE(readMember(d, "pair", pair2)); + EXPECT_EQ(pair2, pair); + // test a vector of ints std::vector vec{3, 4, 5, 6}; addMember(d, "vec", vec, allocator); From ce0f2ab300c19e8703f27c68942bc51ce14576c9 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 4 Apr 2022 06:17:29 -0400 Subject: [PATCH 14/53] * replace python action_map with collection.json actionMap, including stepMin/stepMax * add collection.json startJointPositions * add resetting to BatchedSimTest --- src/esp/batched_sim/BatchedSimulator.cpp | 105 +++++++++++--------- src/esp/batched_sim/BatchedSimulator.h | 15 +-- src/esp/batched_sim/SerializeCollection.cpp | 27 +++++ src/esp/batched_sim/SerializeCollection.h | 23 ++++- src/esp/bindings/BatchedSimBindings.cpp | 13 +-- src/tests/BatchedSimTest.cpp | 88 +++++++--------- 6 files changed, 150 insertions(+), 121 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index 209017edf2..b583f07d01 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -116,23 +116,6 @@ constexpr bool disableStageVisualsForEnv(const BatchedSimulatorConfig&, int) { } #endif -void checkActionMap(const ActionMap& actionMap, int numRobotJointDegrees) { - ESP_CHECK(actionMap.numActions > 0, "checkActionMap: numActions must be > 0"); - ESP_CHECK(actionMap.baseMove >= 0 && actionMap.baseMove < actionMap.numActions, - "checkActionMap: invalid baseMove=" << actionMap.baseMove); - ESP_CHECK(actionMap.baseRotate >= 0 && actionMap.baseRotate < actionMap.numActions, - "checkActionMap: invalid baseRotate=" << actionMap.baseRotate); - for (const auto& pair : actionMap.actionJointDegreePairs) { - int actionIdx = pair.first; - int jointIdx = pair.second; - ESP_CHECK(actionIdx >= 0 && actionIdx < actionMap.numActions, - "checkActionMap: invalid actionToJoint pair.first=" << pair.first); - ESP_CHECK(jointIdx >= 0 && jointIdx < numRobotJointDegrees, - "checkActionMap: invalid actionToJoint pair.second=" << pair.second - << " for robot with " << numRobotJointDegrees << " joint degrees of freedom"); - } -} - } // namespace @@ -1057,11 +1040,34 @@ Robot::Robot(const serialize::Collection& serializeCollection, esp::sim::Simulat void Robot::updateFromSerializeCollection(const serialize::Collection& serializeCollection) { + ESP_CHECK(serializeCollection.robots.size() == 1, "updateFromSerializeCollection: expected 1 robot"); + const auto& serializeRobot = serializeCollection.robots.front(); ESP_CHECK(linkIndexByName_.count(serializeRobot.gripper.attachLinkName), - "gripper attach link " << serializeRobot.gripper.attachLinkName - << " from collection.json not found in robot URDF"); + "updateFromSerializeCollection: gripper attach link " << serializeRobot.gripper.attachLinkName + << " not found in robot URDF"); + + ESP_CHECK(serializeRobot.startJointPositions.size() == numPosVars, + "updateFromSerializeCollection: expected " << numPosVars << " joint positions"); + + const auto& serActionMap = serializeRobot.actionMap; + ESP_CHECK(serActionMap.numActions >= 3, "updateFromSerializeCollection: expected numActions >= 3"); + ESP_CHECK(serActionMap.graspRelease.thresholds.size() == 2, + "updateFromSerializeCollection: for graspRelease, expected 2 thresholds"); + ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && serActionMap.baseMove.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid baseMove actionIdx=" << serActionMap.baseMove.actionIdx); + ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && serActionMap.baseMove.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid baseRotate actionIdx=" << serActionMap.baseRotate.actionIdx); + ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && serActionMap.baseMove.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid graspRelease actionIdx=" << serActionMap.graspRelease.actionIdx); + for (const auto& pair : serActionMap.joints) { + ESP_CHECK(pair.first >= 0 && pair.first < numPosVars, + "updateFromSerializeCollection: invalid actionMap joint index=" << pair.first + << " for robot with " << numPosVars << " degrees of freedom"); + ESP_CHECK(pair.second.actionIdx >= 0 && pair.second.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid joint actionIdx=" << pair.second.actionIdx); + } gripperLink_ = linkIndexByName_.at(serializeRobot.gripper.attachLinkName); gripperQueryOffset_ = serializeRobot.gripper.offset; @@ -1231,8 +1237,7 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { robots_ = RobotInstanceSet(&robot_, &config_, &bpsWrapper_->envs_, &rollouts_); - checkActionMap(config_.actionMap, robot_.numPosVars); - actionDim_ = config_.actionMap.numActions; + actionDim_ = getNumActions(); int batchNumActions = (actionDim_) * numEnvs; actions_.resize(batchNumActions, 0.f); @@ -1285,6 +1290,11 @@ int BatchedSimulator::getNumEpisodes() const { return episodeSet_.episodes_.size(); } +int BatchedSimulator::getNumActions() const { + BATCHED_SIM_ASSERT(!serializeCollection_.robots.empty()); + return serializeCollection_.robots[0].actionMap.numActions; +} + void BatchedSimulator::setCamera(const Mn::Vector3& camPos, const Mn::Quaternion& camRot) { @@ -1439,7 +1449,7 @@ void BatchedSimulator::resetEpisodeInstance(int b) { for (int j = 0; j < robot_.numPosVars; j++) { auto& pos = jointPositions[b * robot_.numPosVars + j]; - pos = 0.f; + pos = serializeCollection_.robots[0].startJointPositions[j]; pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], robot_.jointPositionLimits.second[j]); } @@ -1449,7 +1459,7 @@ void BatchedSimulator::resetEpisodeInstance(int b) { // 9 elbow, + is down // 10 elbow twist, + is twst to right // 11 wrist, + is down - jointPositions[b * robot_.numPosVars + 9] = float(Mn::Rad(Mn::Deg(90.f))); + //jointPositions[b * robot_.numPosVars + 9] = float(Mn::Rad(Mn::Deg(90.f))); // assume robot is not in collision on reset robots_.collisionResults_[b] = false; @@ -1813,12 +1823,6 @@ void BatchedSimulator::substepPhysics() { ProfilingScope scope("substep"); BATCHED_SIM_ASSERT(isOkToStep_); - constexpr float stickyGrabDropThreshold = 0.85f; - constexpr float minBaseMovement = -0.05f; - constexpr float maxBaseMovement = 0.1f; - // beware not all joints are angular - constexpr float maxAbsYawAngle = float(Mn::Rad(Mn::Deg(5.f))); - constexpr float maxAbsJointAngle = float(Mn::Rad(Mn::Deg(5.f))); prevStorageStep_ = currStorageStep_; currStorageStep_ = (currStorageStep_ + 1) % maxStorageSteps_; @@ -1851,27 +1855,35 @@ void BatchedSimulator::substepPhysics() { } const int baseActionIndex = b * actionDim_; + const auto& actionMap = serializeCollection_.robots[0].actionMap; + + const auto& baseMoveSetup = actionMap.baseMove; + const auto& baseRotateSetup = actionMap.baseRotate; + const auto& graspReleaseSetup = actionMap.graspRelease; - const float baseMoveAction = actions_[baseActionIndex + config_.actionMap.baseMove]; - const float baseRotateAction = actions_[baseActionIndex + config_.actionMap.baseRotate]; - const float grabDropAction = actions_[baseActionIndex + config_.actionMap.graspRelease]; + const float baseMoveAction = actions_[baseActionIndex + baseMoveSetup.actionIdx]; + const float baseRotateAction = actions_[baseActionIndex + baseRotateSetup.actionIdx]; + const float graspReleaseAction = actions_[baseActionIndex + graspReleaseSetup.actionIdx]; auto& robotInstance = robots_.robotInstances_[b]; - // sticky grip behavior + // sticky grasp/release behavior + BATCHED_SIM_ASSERT(graspReleaseSetup.thresholds.size() == 2); + BATCHED_SIM_ASSERT(graspReleaseSetup.thresholds[0] <= graspReleaseSetup.thresholds[1]); if (robotInstance.grippedFreeObjectIndex_ == -1) { - if (robotInstance.doAttemptGrip_ && grabDropAction < -stickyGrabDropThreshold) { - robotInstance.doAttemptGrip_ = false; - } else if (!robotInstance.doAttemptGrip_ && grabDropAction > stickyGrabDropThreshold) { - robotInstance.doAttemptGrip_ = true; - } + // only a very large action value triggers a grasp + robotInstance.doAttemptGrip_ = graspReleaseAction >= graspReleaseSetup.thresholds[1]; + robotInstance.doAttemptDrop_ = false; } else { - robotInstance.doAttemptDrop_ = (grabDropAction < -stickyGrabDropThreshold); + // only a very small action value triggers a release + robotInstance.doAttemptGrip_ = false; + robotInstance.doAttemptDrop_ = graspReleaseAction < graspReleaseSetup.thresholds[0]; } - const float clampedBaseYawAction = Mn::Math::clamp(baseRotateAction, -maxAbsYawAngle, maxAbsYawAngle); + const float clampedBaseYawAction = Mn::Math::clamp(baseRotateAction, baseRotateSetup.stepMin, baseRotateSetup.stepMax); yaws[b] = prevYaws[b] + clampedBaseYawAction; // todo: wrap angle to 360 degrees - float clampedBaseMovementAction = Mn::Math::clamp(baseMoveAction, minBaseMovement, maxBaseMovement); + + float clampedBaseMovementAction = Mn::Math::clamp(baseMoveAction, baseMoveSetup.stepMin, baseMoveSetup.stepMax); positions[b] = prevPositions[b] + Mn::Vector2(Mn::Math::cos(Mn::Math::Rad(yaws[b])), -Mn::Math::sin(Mn::Math::Rad(yaws[b]))) @@ -1885,17 +1897,18 @@ void BatchedSimulator::substepPhysics() { pos = prevPos; } - for (const auto& pair : config_.actionMap.actionJointDegreePairs) { - int j = pair.second; - const float jointMovementAction = actions_[baseActionIndex + pair.first]; + for (const auto& pair : actionMap.joints) { + int j = pair.first; + const auto& actionSetup = pair.second; + const float jointMovementAction = actions_[baseActionIndex + actionSetup.actionIdx]; BATCHED_SIM_ASSERT(j >= 0 && j < robot_.numPosVars); auto& pos = jointPositions[baseJointIndex + j]; const auto& prevPos = prevJointPositions[baseJointIndex + j]; const float clampedJointMovementAction = Mn::Math::clamp(jointMovementAction, - -maxAbsJointAngle, maxAbsJointAngle); + actionSetup.stepMin, actionSetup.stepMax); pos = prevPos + clampedJointMovementAction; pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], - robot_.jointPositionLimits.second[j]); + robot_.jointPositionLimits.second[j]); } } diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index aa89951e1c..033887a472 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -28,16 +28,6 @@ struct CameraSensorConfig { float hfov = -1.f; }; -struct ActionMap { - int numActions = -1; - int baseMove = -1; - int baseRotate = -1; - int graspRelease = -1; - std::vector> actionJointDegreePairs; - ESP_SMART_POINTERS(ActionMap); -}; - - struct BatchedSimulatorConfig { int numEnvs = -1; int gpuId = -1; @@ -53,7 +43,6 @@ struct BatchedSimulatorConfig { bool doProceduralEpisodeSet = true; // only set this for doProceduralEpisodeSet==false (it is otherwise ignored) std::string episodeSetFilepath; - ActionMap actionMap; ESP_SMART_POINTERS(BatchedSimulatorConfig); }; @@ -202,6 +191,10 @@ class BatchedSimulator { void close(); int getNumEpisodes() const; + int getNumActions() const; + const serialize::Collection& getSerializeCollection() const { + return serializeCollection_; + } void startRender(); void waitRender(); diff --git a/src/esp/batched_sim/SerializeCollection.cpp b/src/esp/batched_sim/SerializeCollection.cpp index 7440972778..51d0aed811 100644 --- a/src/esp/batched_sim/SerializeCollection.cpp +++ b/src/esp/batched_sim/SerializeCollection.cpp @@ -65,11 +65,38 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, return true; } +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + ContinuousActionSetup& val) { + readMember(obj, "actionIdx", val.actionIdx); + readMember(obj, "stepMin", val.stepMin); + readMember(obj, "stepMax", val.stepMax); + return true; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + DiscreteActionSetup& val) { + readMember(obj, "actionIdx", val.actionIdx); + readMember(obj, "thresholds", val.thresholds); + return true; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + ActionMap& val) { + readMember(obj, "numActions", val.numActions); + readMember(obj, "baseMove", val.baseMove); + readMember(obj, "baseRotate", val.baseRotate); + readMember(obj, "graspRelease", val.graspRelease); + readMember(obj, "joints", val.joints); + return true; +} + bool fromJsonValue(const esp::io::JsonGenericValue& obj, Robot& val) { readMember(obj, "urdfFilepath", val.urdfFilepath); + readMember(obj, "startJointPositions", val.startJointPositions); readMember(obj, "gripper", val.gripper); readMember(obj, "links", val.links); + readMember(obj, "actionMap", val.actionMap); return true; } diff --git a/src/esp/batched_sim/SerializeCollection.h b/src/esp/batched_sim/SerializeCollection.h index 6d4d349d6e..a2bd3314cf 100644 --- a/src/esp/batched_sim/SerializeCollection.h +++ b/src/esp/batched_sim/SerializeCollection.h @@ -39,6 +39,25 @@ struct FreeObject { std::string generateCollisionSpheresTechnique = ""; // "uprightCylinder", "box" }; +struct ContinuousActionSetup { + int actionIdx = -1; + float stepMin = -1.f; + float stepMax = 1.f; +}; + +struct DiscreteActionSetup { + int actionIdx = -1; + std::vector thresholds; +}; + +struct ActionMap { + int numActions = -1; + ContinuousActionSetup baseMove; + ContinuousActionSetup baseRotate; + DiscreteActionSetup graspRelease; + std::vector> joints; +}; + struct RobotGripper { std::string attachLinkName; Magnum::Vector3 offset{Magnum::Math::ZeroInit}; @@ -52,8 +71,10 @@ struct RobotLink { struct Robot { std::string urdfFilepath; + std::vector startJointPositions; RobotGripper gripper; - std::vector links; + std::vector links; + ActionMap actionMap; }; struct Collection { diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 279a9d40d5..8f1f087e74 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -34,14 +34,6 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("height", &CameraSensorConfig::height, R"(Todo)") .def_readwrite("hfov", &CameraSensorConfig::hfov, R"(Todo)"); - py::class_(m, "ActionMap") - .def(py::init(&ActionMap::create<>)) - .def_readwrite("num_actions", &ActionMap::numActions, R"(Todo)") - .def_readwrite("base_move", &ActionMap::baseMove, R"(Todo)") - .def_readwrite("base_rotate", &ActionMap::baseRotate, R"(Todo)") - .def_readwrite("grasp_release", &ActionMap::graspRelease, R"(Todo)") - .def_readwrite("action_joint_degree_pairs", &ActionMap::actionJointDegreePairs, R"(Todo)"); - py::class_( m, "BatchedSimulatorConfig") .def(py::init(&BatchedSimulatorConfig::create<>)) @@ -54,8 +46,7 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") .def_readwrite("do_procedural_episode_set", &BatchedSimulatorConfig::doProceduralEpisodeSet, R"(Todo)") - .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)") - .def_readwrite("action_map", &BatchedSimulatorConfig::actionMap, R"(Todo)"); + .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)"); py::class_( m, "EnvironmentState") @@ -84,6 +75,8 @@ void initBatchedSimBindings(py::module& m) { .def(py::init(&BatchedSimulator::create)) .def("get_num_episodes", &BatchedSimulator::getNumEpisodes, R"(todo)") + .def("get_num_actions", + &BatchedSimulator::getNumActions, R"(todo)") .def("reset", &BatchedSimulator::reset, R"(todo)") .def("get_environment_states", diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index 50dd4c922a..76c421641f 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -181,20 +181,6 @@ int key_press() { // not working: ¹ (251), num lock (-144), caps lock (-20), wi } } -ActionMap createActionMap() { - ActionMap map; - map.graspRelease = 0; - map.baseRotate = 1; - map.baseMove = 2; - const int numJointDegrees = 15; - for (int i = 0; i < numJointDegrees; i++) { - map.actionJointDegreePairs.push_back(std::make_pair(i + 3, i)); - } - - map.numActions = 18; - - return map; -} } // namespace @@ -227,7 +213,6 @@ TEST_F(BatchedSimulatorTest, basic) { .doProceduralEpisodeSet = true, //.doProceduralEpisodeSet = false, //.episodeSetFilepath = "generated.episode_set.json", - .actionMap = createActionMap() }; BatchedSimulator bsim(config); @@ -280,17 +265,9 @@ TEST_F(BatchedSimulatorTest, basic) { std::cout << "Robot controls: press WASD/arrow keys to move, G to grab/drop, +/- to adjust speed, or ESC to quit." << std::endl; } - const int actionDim = config.actionMap.numActions; + const auto& actionMap = bsim.getSerializeCollection().robots[0].actionMap; + const int actionDim = actionMap.numActions; std::vector actions(actionDim * config.numEnvs, 0.f); - if (doFreeCam) { - for (int b = 0; b < config.numEnvs; b++) { - actions[b * actionDim + 0] = 1.f; // attempt grip - //actions[b * actionDim + 1] = (b % 2 == 0) ? 0.1f : -0.1f; // base rotate - actions[b * actionDim + 2] = 0.05f; // base forward - int jointForThisEnv = ((b / 2) % 15); - actions[b * actionDim + 3 + jointForThisEnv] = (b % 2 == 0) ? 0.03f : -0.03f; - } - } int nextEpisode = 0; const int numEpisodes = bsim.getNumEpisodes(); @@ -313,10 +290,6 @@ TEST_F(BatchedSimulatorTest, basic) { std::vector envStates; - if (doOverlapPhysics) { - bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); - } - int frameIdx = 0; bool isAutoplay = false; int autoplayProgress = -1; @@ -324,7 +297,6 @@ TEST_F(BatchedSimulatorTest, basic) { while (true) { if (doOverlapPhysics) { - bsim.waitStepPhysicsOrReset(); bsim.startRender(); // make a copy of envStates envStates = bsim.getEnvironmentStates(); @@ -333,6 +305,7 @@ TEST_F(BatchedSimulatorTest, basic) { doAdvanceSim = false; } bsim.waitRender(); + bsim.waitStepPhysicsOrReset(); } else { if (doAdvanceSim) { bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); @@ -367,6 +340,7 @@ TEST_F(BatchedSimulatorTest, basic) { int key = 0; bool doStartAnimation = false; + bool doReloadAll = false; if (!isAutoplay) { key = key_press(); } @@ -398,30 +372,32 @@ TEST_F(BatchedSimulatorTest, basic) { targetLeftRight -= 1.f; } else if (key >= '1' && key <= '8') { int keyIdx = key - '1'; - jointPosIdx = 7 + keyIdx / 2; + jointPosIdx = 0 + keyIdx / 2; jointPlusMinus = (keyIdx % 2 == 1) ? -1.f : 1.f; } else if (key == 'w') { - baseForward += 1.f; + baseForward += actionMap.baseRotate.stepMax; } else if (key == 's') { - baseForward -= 1.f; + baseForward += actionMap.baseRotate.stepMin; } else if (key == 'a') { - baseYaw += float(Mn::Rad(Mn::Deg(15.f))); + baseYaw += actionMap.baseRotate.stepMax; } else if (key == 'd') { - baseYaw -= float(Mn::Rad(Mn::Deg(15.f))); + baseYaw += actionMap.baseRotate.stepMin; } else if (key == 'e') { baseUpDown = 0.1f; } else if (key == 'q') { baseUpDown = -0.1f; }else if (key == '=') { // + - moveSpeed *= 1.5f; + moveSpeed = Mn::Math::min(moveSpeed * 2.f, 1.f); } else if (key == '-') { - moveSpeed /= 1.5f; + moveSpeed = moveSpeed * 0.5f; } else if (key == 'g') { doHold = !doHold; } else if (key == 'c') { doTuneRobotCam = !doTuneRobotCam; - } else if (key == 'r') { + } else if (key == -116) { // F5 bsim.reloadSerializeCollection(); + } else if (key == 'r') { + doReloadAll = true; } for (int b = 0; b < config.numEnvs; b++) { @@ -431,9 +407,20 @@ TEST_F(BatchedSimulatorTest, basic) { actionsForEnv[j] = 0.f; } - actionsForEnv[config.actionMap.graspRelease] = doHold ? 1.f : -1.f; - actionsForEnv[config.actionMap.baseRotate] = baseYaw * moveSpeed; - actionsForEnv[config.actionMap.baseMove] = baseForward * moveSpeed; + constexpr float eps = 0.01; + actionsForEnv[actionMap.graspRelease.actionIdx] = doHold + ? actionMap.graspRelease.thresholds[1] + eps + : actionMap.graspRelease.thresholds[0] - eps; + actionsForEnv[actionMap.baseRotate.actionIdx] = baseYaw * moveSpeed; + actionsForEnv[actionMap.baseMove.actionIdx] = baseForward * moveSpeed; + + if (jointPosIdx != -1) { + BATCHED_SIM_ASSERT(jointPosIdx < actionMap.joints.size()); + const auto& jointActionSetup = actionMap.joints[jointPosIdx]; + actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus > 0.f + ? jointPlusMinus * jointActionSetup.second.stepMax * moveSpeed + : -jointPlusMinus * jointActionSetup.second.stepMin * moveSpeed; + } // 2 is torso up/down // 7 shoulder, + is down @@ -443,17 +430,12 @@ TEST_F(BatchedSimulatorTest, basic) { // 11 wrist, + is down // 12 wrist twise, + is right //actionsForEnv[3 + 8] = -targetLeftRight * rotSpeed; - actionsForEnv[3 + 2] = baseUpDown * moveSpeed; - actionsForEnv[3 + 11] = -targetUpDown * moveSpeed; - actionsForEnv[3 + 12] = -targetLeftRight * moveSpeed; - - if (jointPosIdx != -1) { - actionsForEnv[3 + jointPosIdx] = jointPlusMinus; - } - - // always have grippers open - actionsForEnv[3 + 13] = 1.f; - actionsForEnv[3 + 14] = 1.f; + // actionsForEnv[3 + 2] = baseUpDown * moveSpeed; + // actionsForEnv[3 + 11] = -targetUpDown * moveSpeed; + // actionsForEnv[3 + 12] = -targetLeftRight * moveSpeed; + // // always have grippers open + // actionsForEnv[3 + 13] = 1.f; + // actionsForEnv[3 + 14] = 1.f; } } else { @@ -528,7 +510,7 @@ TEST_F(BatchedSimulatorTest, basic) { // temp end episode on collision for (int b = 0; b < config.numEnvs; b++) { int resetPeriodForEnv = b + 1; - if (envStates[b].did_collide) { + if (envStates[b].did_collide || doReloadAll) { resets[b] = getNextEpisode(); } else { resets[b] = -1; From 1c8b08456428383621c88c31e2e771e49e77b3a7 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 4 Apr 2022 18:22:16 -0400 Subject: [PATCH 15/53] more robust grasping --- src/esp/batched_sim/BatchedSimulator.cpp | 64 +++++++++++++++++++++--- src/esp/batched_sim/EpisodeSet.h | 1 - src/tests/BatchedSimTest.cpp | 39 +++++++++------ 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index b583f07d01..21ad1d3db9 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -513,10 +513,7 @@ void BatchedSimulator::updateGripping() { } if (robotInstance.doAttemptGrip_) { - // sloppy: doing this gripping logic here since it's the only time we have access to - // the link transform as a Matrix4. - // todo: assert this is valid BATCHED_SIM_ASSERT(robotInstance.grippedFreeObjectIndex_ == -1); const auto& gripperMat = robotInstance.cachedGripperLinkMat_; auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); @@ -526,14 +523,63 @@ void BatchedSimulator::updateGripping() { int grippedFreeObjectIndex = episodeInstance.colGrid_.contactTest( gripperQueryWorldOrigin, gripperQueryRadius); if (grippedFreeObjectIndex != -1) { + + // store copy of obstacle in case we need to reinsert on failed grab + const auto obsCopy = episodeInstance.colGrid_.getObstacle(grippedFreeObjectIndex); + + // remove object before doing collision test + // perf todo: only do this after we pass the columnGridSet collision test below removeFreeObjectFromCollisionGrid(b, grippedFreeObjectIndex); robotInstance.grippedFreeObjectIndex_ = grippedFreeObjectIndex; - robotInstance.doAttemptGrip_ = false; - episodeInstance.movedFreeObjectIndexes_.push_back(grippedFreeObjectIndex); - recentStats_.numGrips_++; + // check if object will be collision-free in gripper + bool hit = false; + { + // sloppy: code copy-pasted from updateCollision() + + const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); + const auto& columnGridSet = stageFixedObject.columnGridSet_; + + // note we must assign this now so that getHeldObjectTransform works + robotInstance.grippedFreeObjectIndex_ = grippedFreeObjectIndex; + + ColumnGridSource::QueryCacheValue grippedObjectQueryCache = 0; + auto mat = getHeldObjectTransform(b); + const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + robotInstance.grippedFreeObjectIndex_); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + for (const auto& sphere : freeObject.collisionSpheres_) { + const auto& sphereLocalOrigin = sphere.origin; + auto sphereWorldOrigin = mat.transformPoint(sphereLocalOrigin); + bool thisSphereHit = columnGridSet.contactTest(sphere.radiusIdx, sphereWorldOrigin, &grippedObjectQueryCache); + // todo: proper debug-drawing of hits for held object spheres + if (thisSphereHit) { + hit = true; + break; + } + + const auto sphereRadius = getCollisionRadius(serializeCollection_, sphere.radiusIdx); + int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(sphereWorldOrigin, sphereRadius); + if (hitFreeObjectIndex != -1) { + hit = true; + break; + } + } + } + + if (!hit) { + recentStats_.numGrips_++; + envState.did_grasp = true; + } else { + // reinsert at old pose + reinsertFreeObject(b, grippedFreeObjectIndex, obsCopy.pos, + obsCopy.invRotation.invertedNormalized()); + robotInstance.grippedFreeObjectIndex_ = -1; // undo assignment + } + + robotInstance.doAttemptGrip_ = false; - envState.did_grasp = true; } recentStats_.numGripAttempts_++; @@ -2093,7 +2139,9 @@ void BatchedSimulator::signalKillPhysicsThread() { const std::vector& BatchedSimulator::getEnvironmentStates() const { ESP_CHECK(!isPhysicsThreadActive(), "Don't call getEnvironmentStates during async physics step"); - ESP_CHECK(isRenderStarted_, "For best runtime perf, call getEnvironmentStates *after* startRender"); +#ifdef NDEBUG + ESP_CHECK(isRenderStarted_, "For best runtime perf, call getEnvironmentStates *after* startRender"); +#endif return pythonEnvStates_; } diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 3afc7e56d5..2f26f9dbad 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -91,7 +91,6 @@ class EpisodeInstance { // todo: more robust storage for moved free objects static constexpr int MAX_MOVED_FREE_OBJECTS = 6; // todo: better memory management - std::vector movedFreeObjectIndexes_; int firstFreeObjectInstanceId_ = -1; #ifndef NDEBUG // todo: hook up to a well-named preproc var std::vector persistentDebugInstanceIds_; diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index 76c421641f..37ed94f14c 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -298,8 +298,6 @@ TEST_F(BatchedSimulatorTest, basic) { if (doOverlapPhysics) { bsim.startRender(); - // make a copy of envStates - envStates = bsim.getEnvironmentStates(); if (doAdvanceSim) { bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); doAdvanceSim = false; @@ -313,11 +311,12 @@ TEST_F(BatchedSimulatorTest, basic) { doAdvanceSim = false; } bsim.startRender(); - // make a copy of envStates - envStates = bsim.getEnvironmentStates(); bsim.waitRender(); } + // make a copy of envStates + envStates = bsim.getEnvironmentStates(); + uint8_t* base_color_ptr = bsim.getBpsRenderer().getColorPointer(); float* base_depth_ptr = bsim.getBpsRenderer().getDepthPointer(); @@ -351,7 +350,7 @@ TEST_F(BatchedSimulatorTest, basic) { if (!(doFreeCam || doTuneRobotCam)) { - static bool doHold = false; + bool doGrapRelease = false; float targetUpDown = 0.f; float targetLeftRight = 0.f; float baseYaw = 0.f; @@ -391,7 +390,7 @@ TEST_F(BatchedSimulatorTest, basic) { } else if (key == '-') { moveSpeed = moveSpeed * 0.5f; } else if (key == 'g') { - doHold = !doHold; + doGrapRelease = true; } else if (key == 'c') { doTuneRobotCam = !doTuneRobotCam; } else if (key == -116) { // F5 @@ -407,10 +406,18 @@ TEST_F(BatchedSimulatorTest, basic) { actionsForEnv[j] = 0.f; } + const auto& envState = envStates[b]; + constexpr float eps = 0.01; - actionsForEnv[actionMap.graspRelease.actionIdx] = doHold - ? actionMap.graspRelease.thresholds[1] + eps - : actionMap.graspRelease.thresholds[0] - eps; + if (doGrapRelease) { + bool isHolding = (envState.held_obj_idx != -1); + actionsForEnv[actionMap.graspRelease.actionIdx] = !isHolding + ? actionMap.graspRelease.thresholds[1] + eps + : actionMap.graspRelease.thresholds[0] - eps; + } else { + // do nothing + actionsForEnv[actionMap.graspRelease.actionIdx] = actionMap.graspRelease.thresholds[0] + eps; + } actionsForEnv[actionMap.baseRotate.actionIdx] = baseYaw * moveSpeed; actionsForEnv[actionMap.baseMove.actionIdx] = baseForward * moveSpeed; @@ -422,13 +429,17 @@ TEST_F(BatchedSimulatorTest, basic) { : -jointPlusMinus * jointActionSetup.second.stepMin * moveSpeed; } + // 0-1 I don't know these // 2 is torso up/down + // 3-6 I don't know these // 7 shoulder, + is down - // 8 twist, + is twist to right + // 8 shoulder twist, + is twist to right // 9 elbow, + is down // 10 elbow twist, + is twst to right // 11 wrist, + is down - // 12 wrist twise, + is right + // 12 wrist twist, + is right + // 13 gripper finger 1 + // 14 gripper finger 2 //actionsForEnv[3 + 8] = -targetLeftRight * rotSpeed; // actionsForEnv[3 + 2] = baseUpDown * moveSpeed; // actionsForEnv[3 + 11] = -targetUpDown * moveSpeed; @@ -507,10 +518,10 @@ TEST_F(BatchedSimulatorTest, basic) { } #if 1 - // temp end episode on collision for (int b = 0; b < config.numEnvs; b++) { - int resetPeriodForEnv = b + 1; - if (envStates[b].did_collide || doReloadAll) { + // end episode on collision? + if (/*envStates[b].did_collide */ false + || doReloadAll) { resets[b] = getNextEpisode(); } else { resets[b] = -1; From 199d8bcd4b0dab14c2d2f20a326e5caf4972add1 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Tue, 5 Apr 2022 00:30:28 -0400 Subject: [PATCH 16/53] more robust object-dropping; added drop_height to python env state --- src/esp/batched_sim/BatchedSimulator.cpp | 47 +++++++++++++++--------- src/esp/batched_sim/BatchedSimulator.h | 2 + src/esp/batched_sim/PlacementHelper.cpp | 23 +++++++++--- src/esp/batched_sim/PlacementHelper.h | 2 +- src/esp/bindings/BatchedSimBindings.cpp | 1 + 5 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index 21ad1d3db9..f95ce5af85 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -467,6 +467,7 @@ void BatchedSimulator::updateGripping() { // this is wrong for the case of multiple substeps envState.did_drop = false; + envState.drop_height = NAN; envState.did_grasp = false; if (shouldDrawDebugForEnv(config_, b, substep_)) { @@ -531,6 +532,7 @@ void BatchedSimulator::updateGripping() { // perf todo: only do this after we pass the columnGridSet collision test below removeFreeObjectFromCollisionGrid(b, grippedFreeObjectIndex); robotInstance.grippedFreeObjectIndex_ = grippedFreeObjectIndex; + robotInstance.grippedFreeObjectPreviousPos_ = Cr::Containers::NullOpt; // check if object will be collision-free in gripper bool hit = false; @@ -541,9 +543,6 @@ void BatchedSimulator::updateGripping() { const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); const auto& columnGridSet = stageFixedObject.columnGridSet_; - // note we must assign this now so that getHeldObjectTransform works - robotInstance.grippedFreeObjectIndex_ = grippedFreeObjectIndex; - ColumnGridSource::QueryCacheValue grippedObjectQueryCache = 0; auto mat = getHeldObjectTransform(b); const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, @@ -571,6 +570,7 @@ void BatchedSimulator::updateGripping() { if (!hit) { recentStats_.numGrips_++; envState.did_grasp = true; + robotInstance.grippedFreeObjectPreviousPos_ = obsCopy.pos; } else { // reinsert at old pose reinsertFreeObject(b, grippedFreeObjectIndex, obsCopy.pos, @@ -609,28 +609,38 @@ void BatchedSimulator::updateGripping() { episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); - constexpr int maxFailedPlacements = 3; + constexpr int maxFailedPlacements = 6; PlacementHelper placementHelper(columnGridSet, episodeInstance.colGrid_, serializeCollection_, random_, maxFailedPlacements); - bool success = placementHelper.place(heldObjMat, freeObject); - - if (success) { - const auto rotationQuat = Mn::Quaternion::fromMatrix(heldObjMat.rotation()); - reinsertFreeObject(b, freeObjectIndex, heldObjMat.translation(), rotationQuat); - } else { - if (!disableFreeObjectVisualsForEnv(config_, b)) { - // hack: remove object from scene visually - const auto glMat = toGlmMat4x3(Mn::Matrix4::translation({0.f, -10000.f, 0.f})); - int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); - env.updateInstanceTransform(instanceId, glMat); - } + BATCHED_SIM_ASSERT(robotInstance.grippedFreeObjectPreviousPos_); + const Mn::Vector3 fallbackPos = *robotInstance.grippedFreeObjectPreviousPos_; + // Provide a fallback pos so that the place() always succeeds. The fallback is + // the previous pos of this object (before grasping it). + float dropY = heldObjMat.translation().y(); + bool success = placementHelper.place(heldObjMat, freeObject, &fallbackPos); + BATCHED_SIM_ASSERT(success); + + const auto rotationQuat = Mn::Quaternion::fromMatrix(heldObjMat.rotation()); + reinsertFreeObject(b, freeObjectIndex, heldObjMat.translation(), rotationQuat); + + // Reference code for handling a failed drop by removing object. This isn't a + // good solution because PythonEnvironmentState doesn't have a way to indicate + // that an object has been removed from the scene. + // { + // if (!disableFreeObjectVisualsForEnv(config_, b)) { + // // hack: remove object from scene visually + // const auto glMat = toGlmMat4x3(Mn::Matrix4::translation({0.f, -10000.f, 0.f})); + // int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + // env.updateInstanceTransform(instanceId, glMat); + // } + // recentStats_.numFailedDrops_++; + // } - recentStats_.numFailedDrops_++; - } robotInstance.grippedFreeObjectIndex_ = -1; robotInstance.doAttemptDrop_ = false; envState.did_drop = true; + envState.drop_height = dropY - heldObjMat.translation().y(); recentStats_.numDrops_++; } @@ -1546,6 +1556,7 @@ void BatchedSimulator::resetEpisodeInstance(int b) { envState.target_obj_start_rotation = rotation; envState.did_drop = false; + envState.drop_height = NAN; envState.did_grasp = false; } diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 033887a472..727251c63c 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -70,6 +70,7 @@ struct PythonEnvironmentState { int held_obj_idx = -1; bool did_grasp = false; bool did_drop = false; + float drop_height = NAN; // other env state std::vector obj_positions; @@ -143,6 +144,7 @@ class RobotInstance { // std::vector grippedObjCollisionSphereWorldOrigins_; Magnum::Matrix4 cachedGripperLinkMat_; // sloppy: also in newNodeTransforms at glMat int grippedFreeObjectIndex_ = -1; + Corrade::Containers::Optional grippedFreeObjectPreviousPos_; // glm::mat4 cameraNewInvTransform_; Magnum::Matrix4 cameraAttachNodeTransform_; }; diff --git a/src/esp/batched_sim/PlacementHelper.cpp b/src/esp/batched_sim/PlacementHelper.cpp index 9a6942012c..be628ea844 100644 --- a/src/esp/batched_sim/PlacementHelper.cpp +++ b/src/esp/batched_sim/PlacementHelper.cpp @@ -21,9 +21,11 @@ PlacementHelper::PlacementHelper(const ColumnGridSet& columnGridSet, , random_(random) , maxFailedPlacements_(maxFailedPlacements) { + BATCHED_SIM_ASSERT(maxFailedPlacements > 0); } -bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeObject) const { +bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeObject, + const Mn::Vector3* fallbackPos) const { BATCHED_SIM_ASSERT(freeObject.collisionSpheres_.size()); @@ -39,7 +41,7 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO // cast a single small sphere down to find a resting position candidate { // todo: find a sphere near the center of mass, at the base of the object in its - // current rotation. We can use its rotated box to find this. + // current (or resting) rotation. We can use its rotated box to find this. const float sphereRadius = 0.015f; // temp hack const auto sphereLocalOrigin = Mn::Vector3( freeObject.aabb_.center().x(), @@ -74,8 +76,14 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO break; } + // The intention of this test is to catch when an object has been dropped near + // a wall or other part of the static scene, such that it's partially + // interpenetrating. + // todo: consider vertical fixup here float castDownResult = columnGridSet_.castDownTest(sphere.radiusIdx, sphereWorldOrigin, &queryCache); - constexpr float pad = 0.01f; + // sloppy: this is needed because we currently drop tilted objects, and we + // don't use a sphere at the true base of the object in the earlier castDownTest. + constexpr float pad = 0.05f; const float maxPenetrationDist = sphereRadius + pad; if (castDownResult <= -maxPenetrationDist) { didHitColumnGrid = true; @@ -90,7 +98,7 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO heldObjMat.translation().y() += minCastDownDist; // restore to drop height - if (hitFreeObjectIndex != -1) { + if (hitFreeObjectIndex != -1 && numFailedPlacements < maxFailedPlacements_ - 1) { const auto& hitObs = colGrid_.getObstacle(hitFreeObjectIndex); auto delta = heldObjMat.translation() - hitObs.pos; delta.y() = 0.f; @@ -102,7 +110,7 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO } numFailedPlacements++; - if (numFailedPlacements == maxFailedPlacements_) { + if (numFailedPlacements >= maxFailedPlacements_) { break; } @@ -119,6 +127,11 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO Mn::Math::cos(*hintAngle), 0.f, Mn::Math::sin(*hintAngle)) * bumpDist; } + if (!success && fallbackPos) { + heldObjMat.translation() = *fallbackPos; + success = true; + } + return success; } diff --git a/src/esp/batched_sim/PlacementHelper.h b/src/esp/batched_sim/PlacementHelper.h index 9d1aa92586..542f0df5a1 100644 --- a/src/esp/batched_sim/PlacementHelper.h +++ b/src/esp/batched_sim/PlacementHelper.h @@ -19,7 +19,7 @@ class PlacementHelper { PlacementHelper(const ColumnGridSet& columnGridSet, const CollisionBroadphaseGrid& colGrid, const serialize::Collection& collection, esp::core::Random& random, int maxFailedPlacements); - bool place(Magnum::Matrix4& heldObjMat, const FreeObject& freeObject) const; + bool place(Magnum::Matrix4& heldObjMat, const FreeObject& freeObject, const Mn::Vector3* fallbackPos=nullptr) const; private: const ColumnGridSet& columnGridSet_; diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 8f1f087e74..e87f89fcec 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -68,6 +68,7 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("held_obj_idx", &PythonEnvironmentState::held_obj_idx, R"(Todo)") .def_readwrite("did_grasp", &PythonEnvironmentState::did_grasp, R"(Todo)") .def_readwrite("did_drop", &PythonEnvironmentState::did_drop, R"(Todo)") + .def_readwrite("drop_height", &PythonEnvironmentState::drop_height, R"(Todo)") .def_readwrite("obj_positions", &PythonEnvironmentState::obj_positions, R"(Todo)") .def_readwrite("obj_rotations", &PythonEnvironmentState::obj_rotations, R"(Todo)"); From cfa53422ec9fcc0ded4dcae99f2dc5172f8a226b Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES <28320361+vincentpierre@users.noreply.github.com> Date: Tue, 5 Apr 2022 20:05:36 -0700 Subject: [PATCH 17/53] normalizing the actions so that -1 and 1 become the limits (#1719) Co-authored-by: vincentpierre --- src/esp/batched_sim/BatchedSimulator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index f95ce5af85..be19862fd6 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -1937,10 +1937,10 @@ void BatchedSimulator::substepPhysics() { robotInstance.doAttemptDrop_ = graspReleaseAction < graspReleaseSetup.thresholds[0]; } - const float clampedBaseYawAction = Mn::Math::clamp(baseRotateAction, baseRotateSetup.stepMin, baseRotateSetup.stepMax); + const float clampedBaseYawAction = Mn::Math::clamp(baseRotateAction * (baseRotateSetup.stepMax - baseRotateSetup.stepMin) + baseRotateSetup.stepMin, baseRotateSetup.stepMin, baseRotateSetup.stepMax); yaws[b] = prevYaws[b] + clampedBaseYawAction; // todo: wrap angle to 360 degrees - float clampedBaseMovementAction = Mn::Math::clamp(baseMoveAction, baseMoveSetup.stepMin, baseMoveSetup.stepMax); + float clampedBaseMovementAction = Mn::Math::clamp(baseMoveAction * (baseMoveSetup.stepMax - baseMoveSetup.stepMin) + baseMoveSetup.stepMin, baseMoveSetup.stepMin, baseMoveSetup.stepMax); positions[b] = prevPositions[b] + Mn::Vector2(Mn::Math::cos(Mn::Math::Rad(yaws[b])), -Mn::Math::sin(Mn::Math::Rad(yaws[b]))) @@ -1961,7 +1961,7 @@ void BatchedSimulator::substepPhysics() { BATCHED_SIM_ASSERT(j >= 0 && j < robot_.numPosVars); auto& pos = jointPositions[baseJointIndex + j]; const auto& prevPos = prevJointPositions[baseJointIndex + j]; - const float clampedJointMovementAction = Mn::Math::clamp(jointMovementAction, + const float clampedJointMovementAction = Mn::Math::clamp(jointMovementAction * (actionSetup.stepMax - actionSetup.stepMin) + actionSetup.stepMin, actionSetup.stepMin, actionSetup.stepMax); pos = prevPos + clampedJointMovementAction; pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], From efe29e135e4e2c3a644ece588cfdd28de568834a Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Wed, 6 Apr 2022 14:18:29 -0400 Subject: [PATCH 18/53] update bps3D commit --- src/deps/bps3D | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/deps/bps3D b/src/deps/bps3D index 28480f91c2..ce7e28f76f 160000 --- a/src/deps/bps3D +++ b/src/deps/bps3D @@ -1 +1 @@ -Subproject commit 28480f91c2b91ddc64266e826e7ed96db46d4a51 +Subproject commit ce7e28f76f31f302f03903c09f99d899b575365e From d4425a8808fd689871eea457e2bfef00b5148ebf Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Wed, 6 Apr 2022 14:56:36 -0400 Subject: [PATCH 19/53] add setRobotCamera and setFreeCamera --- src/esp/batched_sim/BatchedSimulator.cpp | 126 +++++++++++++---------- src/esp/batched_sim/BatchedSimulator.h | 16 +-- src/esp/bindings/BatchedSimBindings.cpp | 7 +- src/tests/BatchedSimTest.cpp | 22 ++-- 4 files changed, 96 insertions(+), 75 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index be19862fd6..f69ceb0887 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -319,11 +319,7 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF if (updateForRender) { BATCHED_SIM_ASSERT(instanceIndex < robots_.nodeNewTransforms_.size()); - robots_.nodeNewTransforms_[instanceIndex] = toGlmMat4x3(mat); - - if (robots_.robot_->cameraAttachNode_ == nodeIndex) { - robotInstance.cameraAttachNodeTransform_ = mat; - } + robots_.nodeNewTransforms_[instanceIndex] = mat; } #if 0 esp::gfx::replay::Transform absTransform{ @@ -941,17 +937,10 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { } if (!disableRobotVisualsForEnv(config_, b)) { - const auto& glMat = robots_.nodeNewTransforms_[instanceIndex]; + const auto glMat = toGlmMat4x3(safeVectorGet(robots_.nodeNewTransforms_, instanceIndex)); env.updateInstanceTransform(instanceId, glMat); } } - - // todo: cleaner handling of camera update that doesn't depend on attachment or collision - if (robot_.cameraAttachNode_ != -1) { - auto cameraTransform = robotInstance.cameraAttachNodeTransform_ * robot_.cameraAttachTransform_; - auto glCameraNewInvTransform = glm::inverse(toGlmMat4(cameraTransform)); - env.setCameraView(glCameraNewInvTransform); - } } // update gripped free object @@ -1241,11 +1230,14 @@ BpsWrapper::BpsWrapper(int gpuId, int numEnvs, bool includeDepth, bool includeCo const Mn::Vector3 camPos(Mn::Math::ZeroInit); const Mn::Quaternion camRot(Mn::Math::IdentityInit); glm::mat4 world_to_camera(glm::inverse(toGlmMat4(camPos, camRot))); + float aspectRatio = float(sensor0.width) / float(sensor0.height); for (int b = 0; b < numEnvs; b++) { glm::mat4 view = world_to_camera; - auto env = renderer_->makeEnvironment(scene_, view, sensor0.hfov, 0.f, 0.01, - 1000.f); + constexpr float near = 0.01; + constexpr float far = 1000.f; + constexpr float hfov = 60.f; // arbitrary; will be reset later + auto env = renderer_->makeEnvironment(scene_, view, hfov, aspectRatio, near, far); envs_.emplace_back(std::move(env)); } } @@ -1317,8 +1309,7 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { std::string cameraAttachLinkName = "torso_lift_link"; Mn::Vector3 camPos = {-0.536559, 1.16173, 0.568379}; Mn::Quaternion camRot = {{-0.26714, -0.541109, -0.186449}, 0.775289}; - const auto cameraMat = Mn::Matrix4::from(camRot.toMatrix(), camPos); - attachCameraToLink(cameraAttachLinkName, cameraMat); + setRobotCamera(cameraAttachLinkName, camPos, camRot, /*hfov*/60.f); } if (config_.doAsyncPhysicsStep) { @@ -1351,43 +1342,6 @@ int BatchedSimulator::getNumActions() const { return serializeCollection_.robots[0].actionMap.numActions; } - -void BatchedSimulator::setCamera(const Mn::Vector3& camPos, const Mn::Quaternion& camRot) { - - const int numEnvs = config_.numEnvs; - - robot_.cameraAttachNode_ = -1; // unattach camera - - glm::mat4 world_to_camera(glm::inverse(toGlmMat4(camPos, camRot))); - - for (int b = 0; b < numEnvs; b++) { - auto& env = bpsWrapper_->envs_[b]; - env.setCameraView(world_to_camera); - } -} - -void BatchedSimulator::attachCameraToLink(const std::string& linkName, const Magnum::Matrix4& mat) { - - const int numEnvs = config_.numEnvs; - - BATCHED_SIM_ASSERT(robot_.linkIndexByName_.count(linkName)); - - int linkIndex = robot_.linkIndexByName_[linkName]; - - robot_.cameraAttachNode_ = linkIndex + 1; - robot_.cameraAttachTransform_ = mat; - - // todo: threadsafe - for (int b = 0; b < numEnvs; b++) { - auto& env = bpsWrapper_->envs_[b]; - auto& robotInstance = robots_.robotInstances_[b]; - auto cameraTransform = robotInstance.cameraAttachNodeTransform_ * robot_.cameraAttachTransform_; - auto glCameraNewInvTransform = glm::inverse(toGlmMat4(cameraTransform)); - env.setCameraView(glCameraNewInvTransform); - } -} - - bps3D::Environment& BatchedSimulator::getBpsEnvironment(int envIndex) { BATCHED_SIM_ASSERT(envIndex < config_.numEnvs); return bpsWrapper_->envs_[envIndex]; @@ -2033,10 +1987,74 @@ bool BatchedSimulator::isPhysicsThreadActive() const { (!isStepPhysicsOrResetFinished_ || signalStepPhysics_); } + +void BatchedSimulator::setRobotCamera(const std::string& linkName, const Mn::Vector3& pos, const Mn::Quaternion& rotation, float hfov) { + + BATCHED_SIM_ASSERT(robot_.linkIndexByName_.count(linkName)); + int linkIndex = robot_.linkIndexByName_[linkName]; + robot_.cameraAttachNode_ = linkIndex + 1; + robot_.cameraAttachTransform_ = Mn::Matrix4::from(rotation.toMatrix(), pos); + cameraHfov_ = hfov; + + freeCameraTransform_ = Cr::Containers::NullOpt; +} + +void BatchedSimulator::setFreeCamera(const Mn::Vector3& pos, const Mn::Quaternion& rotation, float hfov) { + robot_.cameraAttachNode_ = -1; + freeCameraTransform_ = Mn::Matrix4::from(rotation.toMatrix(), pos); + cameraHfov_ = hfov; +} + + +void BatchedSimulator::setBpsCameraHelper(int b, const glm::mat4& glCameraInvTransform, float hfov) { + float aspectRatio = float(config_.sensor0.width) / float(config_.sensor0.height); + BATCHED_SIM_ASSERT(hfov > 0.f && hfov < 180.f); + constexpr float near = 0.01; + constexpr float far = 1000.f; + auto& env = safeVectorGet(bpsWrapper_->envs_, b); + env.setCamera(glCameraInvTransform, hfov, aspectRatio, near, far); + +} + +void BatchedSimulator::updateBpsCameras() { + + if (robot_.cameraAttachNode_ == -1) { + + BATCHED_SIM_ASSERT(freeCameraTransform_); + glm::mat4 world_to_camera(glm::inverse(toGlmMat4(*freeCameraTransform_))); + + for (int b = 0; b < config_.numEnvs; b++) { + setBpsCameraHelper(b, world_to_camera, cameraHfov_); + } + } else { + + const int numLinks = robots_.robot_->artObj->getNumLinks(); + const int numNodes = numLinks + 1; + + const auto& cameraAttachTransform = robot_.cameraAttachTransform_; + const int cameraAttachNode = robot_.cameraAttachNode_; + + for (int b = 0; b < config_.numEnvs; b++) { + + const int baseInstanceIndex = b * numNodes; + const auto instanceIndex = baseInstanceIndex + cameraAttachNode; + const auto& cameraAttachNodeTransform = safeVectorGet(robots_.nodeNewTransforms_, instanceIndex); + + auto cameraTransform = cameraAttachNodeTransform * cameraAttachTransform; + auto glCameraNewInvTransform = glm::inverse(toGlmMat4(cameraTransform)); + + setBpsCameraHelper(b, glCameraNewInvTransform, cameraHfov_); + } + } +} + void BatchedSimulator::startRender() { BATCHED_SIM_ASSERT(!isPhysicsThreadActive()); ProfilingScope scope("start render"); BATCHED_SIM_ASSERT(isOkToRender_); + + updateBpsCameras(); + bpsWrapper_->renderer_->render(bpsWrapper_->envs_.data()); isOkToRender_ = false; isRenderStarted_ = true; diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 727251c63c..6b41924efb 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -25,7 +25,6 @@ namespace batched_sim { struct CameraSensorConfig { int width = -1; int height = -1; - float hfov = -1.f; }; struct BatchedSimulatorConfig { @@ -172,7 +171,8 @@ class RobotInstanceSet { // don't try to pack this with other structs anywhere because it's int8 std::vector collisionSphereQueryCaches_; // size = num robot instances * robot num instances - std::vector nodeNewTransforms_; + // perf todo: avoid wasted storage? store as 4x3 + std::vector nodeNewTransforms_; // size = num robot instances std::vector collisionResults_; bool areCollisionResultsValid_ = false; @@ -198,6 +198,9 @@ class BatchedSimulator { return serializeCollection_; } + void setRobotCamera(const std::string& linkName, const Mn::Vector3& pos, const Mn::Quaternion& rotation, float hfov); + void setFreeCamera(const Magnum::Vector3& pos, const Magnum::Quaternion& rotation, float hfov); + void startRender(); void waitRender(); @@ -209,11 +212,6 @@ class BatchedSimulator { bps3D::Renderer& getBpsRenderer(); - // For debugging. Sets camera for all envs. - // todo: threadsafe or guard - void setCamera(const Mn::Vector3& camPos, const Mn::Quaternion& camRot); - void attachCameraToLink(const std::string& linkName, const Magnum::Matrix4& mat); - bps3D::Environment& getBpsEnvironment(int envIndex); void deleteDebugInstances(); // probably call at start of frame render @@ -259,6 +257,8 @@ class BatchedSimulator { void updateGripping(); void resetHelper(); void updatePythonEnvironmentState(); + void updateBpsCameras(); + void setBpsCameraHelper(int b, const glm::mat4& glCameraInvTransform, float hfov); // uses episode spawn location void spawnFreeObject(int b, int freeObjectIndex, bool reinsert); @@ -300,6 +300,8 @@ class BatchedSimulator { std::vector resets_; // episode index, or -1 if not resetting int maxStorageSteps_ = -1; std::vector pythonEnvStates_; + Corrade::Containers::Optional freeCameraTransform_; + float cameraHfov_ = -1.f; EpisodeSet episodeSet_; EpisodeInstanceSet episodeInstanceSet_; diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index e87f89fcec..081159fa55 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -31,8 +31,7 @@ py::capsule getDepthMemory(BatchedSimulator& bsim, const uint32_t groupIdx) void initBatchedSimBindings(py::module& m) { py::class_(m, "CameraSensorConfig") .def_readwrite("width", &CameraSensorConfig::width, R"(Todo)") - .def_readwrite("height", &CameraSensorConfig::height, R"(Todo)") - .def_readwrite("hfov", &CameraSensorConfig::hfov, R"(Todo)"); + .def_readwrite("height", &CameraSensorConfig::height, R"(Todo)"); py::class_( m, "BatchedSimulatorConfig") @@ -78,6 +77,10 @@ void initBatchedSimBindings(py::module& m) { &BatchedSimulator::getNumEpisodes, R"(todo)") .def("get_num_actions", &BatchedSimulator::getNumActions, R"(todo)") + .def("set_robot_camera", + &BatchedSimulator::setRobotCamera, R"(todo)") + .def("set_free_camera", + &BatchedSimulator::setFreeCamera, R"(todo)") .def("reset", &BatchedSimulator::reset, R"(todo)") .def("get_environment_states", diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index 37ed94f14c..fb0b79c0ae 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -191,21 +191,21 @@ TEST_F(BatchedSimulatorTest, basic) { esp::logging::LoggingContext loggingContext; constexpr bool doOverlapPhysics = false; - constexpr bool doFreeCam = false; + constexpr bool doFreeCam = true; bool doTuneRobotCam = false; constexpr bool doSaveAllFramesForVideo = false; // see make_video_from_image_files.py constexpr bool doPairedDebugEnvs = true; constexpr bool includeDepth = false; constexpr bool includeColor = true; + constexpr float cameraHfov = 70.f; const bool forceRandomActions = doFreeCam || doTuneRobotCam; BatchedSimulatorConfig config{ - // for video: 2048 x 1024, fov 80 .numEnvs = 16, .gpuId = 0, .includeDepth = includeDepth, .includeColor = includeColor, - .sensor0 = {.width = 768, .height = 768, .hfov = 70}, + .sensor0 = {.width = 768, .height = 768}, .forceRandomActions = forceRandomActions, .doAsyncPhysicsStep = doOverlapPhysics, .numSubsteps = 1, @@ -230,20 +230,18 @@ TEST_F(BatchedSimulatorTest, basic) { cameraAttachLinkName = "torso_lift_link"; camPos = {-0.536559, 1.16173, 0.568379}; camRot = {{-0.26714, -0.541109, -0.186449}, 0.775289}; - - const auto cameraMat = Mn::Matrix4::from(camRot.toMatrix(), camPos); - bsim.attachCameraToLink(cameraAttachLinkName, cameraMat); + bsim.setRobotCamera(cameraAttachLinkName, camPos, camRot, cameraHfov); } else { - if (config.sensor0.hfov == 70.f) { + if (cameraHfov == 70.f) { camPos = {-1.61004, 1.5, 3.5455}; camRot = {{0, -0.529178, 0}, 0.848511}; - } else if (config.sensor0.hfov == 25.f) { + } else if (cameraHfov == 25.f) { camRot = {{-0.339537084, -0.484963357, -0.211753935}, 0.777615011}; camPos = {-4.61396408, 9.11192417, 4.26546907}; } else { CORRADE_INTERNAL_ASSERT_UNREACHABLE(); } - bsim.setCamera(camPos, camRot); + bsim.setFreeCamera(camPos, camRot, cameraHfov); } float moveSpeed = 1.f; @@ -488,11 +486,11 @@ TEST_F(BatchedSimulatorTest, basic) { } if (doFreeCam) { - bsim.setCamera(camPos, camRot); + bsim.setFreeCamera(camPos, camRot, cameraHfov); } else { const auto cameraMat = Mn::Matrix4::from(camRot.toMatrix(), camPos); ESP_DEBUG() << "camPos: " << camPos << ", camRot: " << camRot; - bsim.attachCameraToLink(cameraAttachLinkName, cameraMat); + bsim.setRobotCamera(cameraAttachLinkName, camPos, camRot, cameraHfov); } } @@ -510,7 +508,7 @@ TEST_F(BatchedSimulatorTest, basic) { // move camera slightly camPos.y() -= 0.01f; - bsim.setCamera(camPos, camRot); + bsim.setFreeCamera(camPos, camRot, cameraHfov); if (autoplayProgress >= animDuration) { isAutoplay = false; From 7c1467ad36d014c0f47dde79845f744bde44e172 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 11 Apr 2022 09:51:39 -0400 Subject: [PATCH 20/53] fix for default actions == 0.5; add PythonEnvironmentState::robot_joint_positions_normalized --- src/esp/batched_sim/BatchedSimulator.cpp | 31 +++++++++++++++++++++--- src/esp/batched_sim/BatchedSimulator.h | 1 + src/esp/bindings/BatchedSimBindings.cpp | 1 + src/tests/BatchedSimTest.cpp | 30 ++++++++++++++--------- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index f69ceb0887..afc879c4cf 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -382,10 +382,31 @@ void BatchedSimulator::updatePythonEnvironmentState() { envState.robot_pos = Mn::Vector3(positions[b].x(), 0.f, positions[b].y()); envState.robot_rotation = yawToRotation(yaws[b]); envState.robot_joint_positions.resize(numPosVars); + envState.robot_joint_positions_normalized.resize(numPosVars); int baseJointIndex = b * robot_.numPosVars; for (int j = 0; j < numPosVars; j++) { const auto& pos = jointPositions[baseJointIndex + j]; safeVectorGet(envState.robot_joint_positions, j) = pos; + + float normalizedPos = 0.f; + if (robot_.jointPositionLimits.first[j] == -INFINITY) { + // if limits are +/- infinity, we assume this is an angular joint. We normalize + // it by wrapping into range (-2*PI, 2*PI) radians. + BATCHED_SIM_ASSERT(robot_.jointPositionLimits.second[j] == INFINITY); + normalizedPos = pos; + while (normalizedPos > float(Mn::Rad(Mn::Deg(180.f)))) { + normalizedPos -= float(Mn::Rad(Mn::Deg(360.f))); + } + while (normalizedPos <= -float(Mn::Rad(Mn::Deg(180.f)))) { + normalizedPos += float(Mn::Rad(Mn::Deg(360.f))); + } + } else { + BATCHED_SIM_ASSERT(robot_.jointPositionLimits.second[j] != INFINITY); + BATCHED_SIM_ASSERT(robot_.jointPositionLimits.first[j] < robot_.jointPositionLimits.second[j]); + normalizedPos = (pos - robot_.jointPositionLimits.first[j]) + / (robot_.jointPositionLimits.second[j] - robot_.jointPositionLimits.first[j]); + } + safeVectorGet(envState.robot_joint_positions_normalized, j) = normalizedPos; } envState.ee_pos = robotInstance.cachedGripperLinkMat_.translation(); envState.ee_rotation = Mn::Quaternion::fromMatrix( @@ -1714,23 +1735,25 @@ void BatchedSimulator::setActionsResets(std::vector&& actions, std::vecto << "at least one of actions or resets must be length " << actions_.size()); const int numEnvs = config_.numEnvs; + constexpr float defaultAction = 0.5f; // actions are normalized 0..1 + if (config_.forceRandomActions) { for (auto& action : actions_) { - action = random_.uniform_float(-1.f, 1.f); + action = random_.uniform_float(0.f, 1.f); } } else { if (!actions.empty()) { actions_ = std::move(actions); } else { - std::fill(actions_.begin(), actions_.end(), 0.f); + std::fill(actions_.begin(), actions_.end(), 0.5f); } } for (int b = 0; b < numEnvs; b++) { - // force zero actions on first step of episode + // force default actions on first step of episode const auto& envState = safeVectorGet(pythonEnvStates_, b); if (envState.episode_step_idx == 0) { - std::fill(&actions_[b * actionDim_], &actions_[(b + 1) * actionDim_], 0.f); + std::fill(&actions_[b * actionDim_], &actions_[(b + 1) * actionDim_], defaultAction); } } diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 6b41924efb..6a05768d6d 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -63,6 +63,7 @@ struct PythonEnvironmentState { Magnum::Vector3 robot_pos; Magnum::Quaternion robot_rotation; std::vector robot_joint_positions; + std::vector robot_joint_positions_normalized; Magnum::Vector3 ee_pos; Magnum::Quaternion ee_rotation; bool did_collide = false; diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 081159fa55..34a2a6d607 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -61,6 +61,7 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("robot_pos", &PythonEnvironmentState::robot_pos, R"(Todo)") .def_readwrite("robot_rotation", &PythonEnvironmentState::robot_rotation, R"(Todo)") .def_readwrite("robot_joint_positions", &PythonEnvironmentState::robot_joint_positions, R"(Todo)") + .def_readwrite("robot_joint_positions_normalized", &PythonEnvironmentState::robot_joint_positions_normalized, R"(Todo)") .def_readwrite("ee_pos", &PythonEnvironmentState::ee_pos, R"(Todo)") .def_readwrite("ee_rotation", &PythonEnvironmentState::ee_rotation, R"(Todo)") .def_readwrite("did_collide", &PythonEnvironmentState::did_collide, R"(Todo)") diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index fb0b79c0ae..e9b8e87650 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -181,6 +181,10 @@ int key_press() { // not working: ¹ (251), num lock (-144), caps lock (-20), wi } } +float calcLerpFraction(float x, float src0, float src1) { + BATCHED_SIM_ASSERT(src0 < src1); + return (x - src0) / (src1 - src0); +} } // namespace @@ -191,7 +195,7 @@ TEST_F(BatchedSimulatorTest, basic) { esp::logging::LoggingContext loggingContext; constexpr bool doOverlapPhysics = false; - constexpr bool doFreeCam = true; + constexpr bool doFreeCam = false; bool doTuneRobotCam = false; constexpr bool doSaveAllFramesForVideo = false; // see make_video_from_image_files.py constexpr bool doPairedDebugEnvs = true; @@ -202,7 +206,7 @@ TEST_F(BatchedSimulatorTest, basic) { const bool forceRandomActions = doFreeCam || doTuneRobotCam; BatchedSimulatorConfig config{ - .numEnvs = 16, .gpuId = 0, + .numEnvs = 1, .gpuId = 0, .includeDepth = includeDepth, .includeColor = includeColor, .sensor0 = {.width = 768, .height = 768}, @@ -372,13 +376,13 @@ TEST_F(BatchedSimulatorTest, basic) { jointPosIdx = 0 + keyIdx / 2; jointPlusMinus = (keyIdx % 2 == 1) ? -1.f : 1.f; } else if (key == 'w') { - baseForward += actionMap.baseRotate.stepMax; + baseForward += 1.f; } else if (key == 's') { - baseForward += actionMap.baseRotate.stepMin; + baseForward += -1.f; } else if (key == 'a') { - baseYaw += actionMap.baseRotate.stepMax; + baseYaw += 1.f; } else if (key == 'd') { - baseYaw += actionMap.baseRotate.stepMin; + baseYaw += -1.f; } else if (key == 'e') { baseUpDown = 0.1f; } else if (key == 'q') { @@ -401,7 +405,7 @@ TEST_F(BatchedSimulatorTest, basic) { float* actionsForEnv = &actions[b * actionDim]; for (int j = 0; j < actionDim; j++) { - actionsForEnv[j] = 0.f; + actionsForEnv[j] = 0.5f; } const auto& envState = envStates[b]; @@ -416,16 +420,18 @@ TEST_F(BatchedSimulatorTest, basic) { // do nothing actionsForEnv[actionMap.graspRelease.actionIdx] = actionMap.graspRelease.thresholds[0] + eps; } - actionsForEnv[actionMap.baseRotate.actionIdx] = baseYaw * moveSpeed; - actionsForEnv[actionMap.baseMove.actionIdx] = baseForward * moveSpeed; + actionsForEnv[actionMap.baseRotate.actionIdx] = calcLerpFraction(baseYaw * moveSpeed, -1.f, 1.f); + actionsForEnv[actionMap.baseMove.actionIdx] = calcLerpFraction(baseForward * moveSpeed, -1.f, 1.f); if (jointPosIdx != -1) { BATCHED_SIM_ASSERT(jointPosIdx < actionMap.joints.size()); const auto& jointActionSetup = actionMap.joints[jointPosIdx]; - actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus > 0.f - ? jointPlusMinus * jointActionSetup.second.stepMax * moveSpeed - : -jointPlusMinus * jointActionSetup.second.stepMin * moveSpeed; + // actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus > 0.f + // ? jointPlusMinus * jointActionSetup.second.stepMax * moveSpeed + // : -jointPlusMinus * jointActionSetup.second.stepMin * moveSpeed; + actionsForEnv[jointActionSetup.second.actionIdx] = calcLerpFraction(jointPlusMinus * moveSpeed, -1.f, 1.f); } + // 0-1 I don't know these // 2 is torso up/down From 63a6698b315b1a5b45407526aeceffcba7dd2040 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 11 Apr 2022 10:29:35 -0400 Subject: [PATCH 21/53] add include_frame_number option for viz_utils.make_video --- habitat_sim/utils/viz_utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/habitat_sim/utils/viz_utils.py b/habitat_sim/utils/viz_utils.py index 2a5da3df8e..92c5e3451f 100644 --- a/habitat_sim/utils/viz_utils.py +++ b/habitat_sim/utils/viz_utils.py @@ -32,7 +32,7 @@ def is_notebook() -> bool: return True -def get_fast_video_writer(video_file: str, fps: int = 60): +def get_fast_video_writer(video_file: str, fps: int = 60, include_frame_number = False): if ( "google.colab" in sys.modules and os.path.splitext(video_file)[-1] == ".mp4" @@ -51,7 +51,11 @@ def get_fast_video_writer(video_file: str, fps: int = 60): ) else: # Use software encoding - writer = imageio.get_writer(video_file, fps=fps) + output_params = [ + "-vf", + "drawtext=fontfile=Arial.ttf: text='%{frame_num}': start_number=0: x=0: y=2: fontcolor=black: fontsize=10: box=1: boxcolor=white: boxborderw=1" + ] if include_frame_number else None + writer = imageio.get_writer(video_file, fps=fps, output_params=output_params) return writer @@ -210,6 +214,7 @@ def make_video( overlay_settings: Optional[List[Dict[str, Any]]] = None, depth_clip: Optional[float] = 10.0, observation_to_image=observation_to_image, + include_frame_number = False, ): """Build a video from a passed observations array, with some images optionally overlayed. :param observations: List of observations from which the video should be constructed. @@ -233,7 +238,7 @@ def make_video( if not video_file.endswith(".mp4"): video_file = video_file + ".mp4" print("Encoding the video: %s " % video_file) - writer = get_fast_video_writer(video_file, fps=fps) + writer = get_fast_video_writer(video_file, fps=fps, include_frame_number=include_frame_number) observation_to_image = partial(observation_to_image, depth_clip=depth_clip) for ob in observations: From 8e7c0d61279897c3e7d84c655e4576b2dee10aec Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 11 Apr 2022 11:07:51 -0400 Subject: [PATCH 22/53] polish for viz_utils include_frame_number --- habitat_sim/utils/viz_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat_sim/utils/viz_utils.py b/habitat_sim/utils/viz_utils.py index 92c5e3451f..ec45d883d2 100644 --- a/habitat_sim/utils/viz_utils.py +++ b/habitat_sim/utils/viz_utils.py @@ -53,7 +53,7 @@ def get_fast_video_writer(video_file: str, fps: int = 60, include_frame_number = # Use software encoding output_params = [ "-vf", - "drawtext=fontfile=Arial.ttf: text='%{frame_num}': start_number=0: x=0: y=2: fontcolor=black: fontsize=10: box=1: boxcolor=white: boxborderw=1" + "drawtext=fontfile=Arial.ttf: text='%{frame_num}': start_number=0: x=0: y=1: fontcolor=black: fontsize=10: box=1: boxcolor=white: boxborderw=1" ] if include_frame_number else None writer = imageio.get_writer(video_file, fps=fps, output_params=output_params) return writer From dad773cd8b323425f7e78e47270369e70b9da9cb Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 11 Apr 2022 11:24:25 -0400 Subject: [PATCH 23/53] change actions range from 0..1 to -1..1 --- src/esp/batched_sim/BatchedSimulator.cpp | 25 ++++++++++++++---------- src/tests/BatchedSimTest.cpp | 9 +++++---- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index afc879c4cf..c6b054b5d2 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -38,6 +38,12 @@ static constexpr glm::mat4x3 identityGlMat_ = glm::mat4x3( 0.f, 0.f, 1.f, 0.f, 0.f, 0.f); +float remapAction(float action, float stepMin, float stepMax) { + // map (-1..+1) to (stepMin..stepMax) and clamp + float clampedNormalizedAction = (Mn::Math::clamp(action, -1.f, 1.f) + 1.f) * 0.5f; + return Mn::Math::lerp(stepMin, stepMax, clampedNormalizedAction); +} + Mn::Vector3 toMagnumVector3(const btVector3& src) { return {src.x(), src.y(), src.z()}; } @@ -1735,17 +1741,17 @@ void BatchedSimulator::setActionsResets(std::vector&& actions, std::vecto << "at least one of actions or resets must be length " << actions_.size()); const int numEnvs = config_.numEnvs; - constexpr float defaultAction = 0.5f; // actions are normalized 0..1 + constexpr float defaultAction = 0.0f; // actions are normalized -1..1 if (config_.forceRandomActions) { for (auto& action : actions_) { - action = random_.uniform_float(0.f, 1.f); + action = random_.uniform_float(-1.f, 1.f); } } else { if (!actions.empty()) { actions_ = std::move(actions); } else { - std::fill(actions_.begin(), actions_.end(), 0.5f); + std::fill(actions_.begin(), actions_.end(), defaultAction); } } @@ -1914,14 +1920,14 @@ void BatchedSimulator::substepPhysics() { robotInstance.doAttemptDrop_ = graspReleaseAction < graspReleaseSetup.thresholds[0]; } - const float clampedBaseYawAction = Mn::Math::clamp(baseRotateAction * (baseRotateSetup.stepMax - baseRotateSetup.stepMin) + baseRotateSetup.stepMin, baseRotateSetup.stepMin, baseRotateSetup.stepMax); - yaws[b] = prevYaws[b] + clampedBaseYawAction; // todo: wrap angle to 360 degrees + const float remappedBaseYawAction = remapAction(baseRotateAction, baseRotateSetup.stepMin, baseRotateSetup.stepMax); + yaws[b] = prevYaws[b] + remappedBaseYawAction; - float clampedBaseMovementAction = Mn::Math::clamp(baseMoveAction * (baseMoveSetup.stepMax - baseMoveSetup.stepMin) + baseMoveSetup.stepMin, baseMoveSetup.stepMin, baseMoveSetup.stepMax); + float remappedBaseMovementAction = remapAction(baseMoveAction, baseMoveSetup.stepMin, baseMoveSetup.stepMax); positions[b] = prevPositions[b] + Mn::Vector2(Mn::Math::cos(Mn::Math::Rad(yaws[b])), -Mn::Math::sin(Mn::Math::Rad(yaws[b]))) - * clampedBaseMovementAction; + * remappedBaseMovementAction; // sloppy: copy over all jointPositions, then process actionJointDegreePairs int baseJointIndex = b * robot_.numPosVars; @@ -1938,9 +1944,8 @@ void BatchedSimulator::substepPhysics() { BATCHED_SIM_ASSERT(j >= 0 && j < robot_.numPosVars); auto& pos = jointPositions[baseJointIndex + j]; const auto& prevPos = prevJointPositions[baseJointIndex + j]; - const float clampedJointMovementAction = Mn::Math::clamp(jointMovementAction * (actionSetup.stepMax - actionSetup.stepMin) + actionSetup.stepMin, - actionSetup.stepMin, actionSetup.stepMax); - pos = prevPos + clampedJointMovementAction; + const float remappedJointMovementAction = remapAction(jointMovementAction, actionSetup.stepMin, actionSetup.stepMax); + pos = prevPos + remappedJointMovementAction; pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], robot_.jointPositionLimits.second[j]); } diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index e9b8e87650..a47fb5ad20 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -404,8 +404,9 @@ TEST_F(BatchedSimulatorTest, basic) { for (int b = 0; b < config.numEnvs; b++) { float* actionsForEnv = &actions[b * actionDim]; + constexpr float defaultAction = 0.f; for (int j = 0; j < actionDim; j++) { - actionsForEnv[j] = 0.5f; + actionsForEnv[j] = defaultAction; } const auto& envState = envStates[b]; @@ -420,8 +421,8 @@ TEST_F(BatchedSimulatorTest, basic) { // do nothing actionsForEnv[actionMap.graspRelease.actionIdx] = actionMap.graspRelease.thresholds[0] + eps; } - actionsForEnv[actionMap.baseRotate.actionIdx] = calcLerpFraction(baseYaw * moveSpeed, -1.f, 1.f); - actionsForEnv[actionMap.baseMove.actionIdx] = calcLerpFraction(baseForward * moveSpeed, -1.f, 1.f); + actionsForEnv[actionMap.baseRotate.actionIdx] = baseYaw * moveSpeed; + actionsForEnv[actionMap.baseMove.actionIdx] = baseForward * moveSpeed; if (jointPosIdx != -1) { BATCHED_SIM_ASSERT(jointPosIdx < actionMap.joints.size()); @@ -429,7 +430,7 @@ TEST_F(BatchedSimulatorTest, basic) { // actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus > 0.f // ? jointPlusMinus * jointActionSetup.second.stepMax * moveSpeed // : -jointPlusMinus * jointActionSetup.second.stepMin * moveSpeed; - actionsForEnv[jointActionSetup.second.actionIdx] = calcLerpFraction(jointPlusMinus * moveSpeed, -1.f, 1.f); + actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus * moveSpeed; } From f8f6f35d6692d8d547ab5bb9d64a18fd04b4778a Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Tue, 12 Apr 2022 14:32:59 -0400 Subject: [PATCH 24/53] add debug RGB sensor; remove debugPairedEnv stuff --- src/esp/batched_sim/BatchedSimulator.cpp | 352 +++++++++++------------ src/esp/batched_sim/BatchedSimulator.h | 31 +- src/esp/bindings/BatchedSimBindings.cpp | 20 +- src/tests/BatchedSimTest.cpp | 73 +++-- 4 files changed, 253 insertions(+), 223 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index c6b054b5d2..b05a735846 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -81,47 +81,6 @@ std::string getMeshNameFromURDFVisualFilepath(const std::string& filepath) { return retval; } -#ifdef ENABLE_DEBUG_INSTANCES -bool isPairedDebugEnv(const BatchedSimulatorConfig& config, int b) { - return config.doPairedDebugEnvs && (b % 2 == 1); -} - -bool shouldDrawDebugForEnv(const BatchedSimulatorConfig& config, int b, int substep) { - return isPairedDebugEnv(config, b) && (substep % config.numSubsteps == 0); -} -bool shouldAddColumnGridDebugVisualsForEnv(const BatchedSimulatorConfig& config, int b) { - return isPairedDebugEnv(config, b); -} -bool disableRobotVisualsForEnv(const BatchedSimulatorConfig& config, int b) { - return isPairedDebugEnv(config, b); -} -bool disableFreeObjectVisualsForEnv(const BatchedSimulatorConfig& config, int b) { - return isPairedDebugEnv(config, b); -} -bool disableStageVisualsForEnv(const BatchedSimulatorConfig& config, int b) { - return isPairedDebugEnv(config, b); -} -#else -constexpr bool isPairedDebugEnv(const BatchedSimulatorConfig&, int) { - return false; -} -constexpr bool shouldDrawDebugForEnv(const BatchedSimulatorConfig&, int, int) { - return false; -} -constexpr bool shouldAddColumnGridDebugVisualsForEnv(const BatchedSimulatorConfig&, int) { - return false; -} -constexpr bool disableRobotVisualsForEnv(const BatchedSimulatorConfig&, int) { - return false; -} -constexpr bool disableFreeObjectVisualsForEnv(const BatchedSimulatorConfig&, int) { - return false; -} -constexpr bool disableStageVisualsForEnv(const BatchedSimulatorConfig&, int) { - return false; // false; -} -#endif - } // namespace @@ -163,13 +122,8 @@ RobotInstanceSet::RobotInstanceSet(Robot* robot, auto instanceBlueprint = robot_->sceneMapping_->findInstanceBlueprint(nodeName); - if (!disableRobotVisualsForEnv(*config_, b)) { - instanceId = env.addInstance(instanceBlueprint.meshIdx_, - instanceBlueprint.mtrlIdx_, identityGlMat_); - } else { - // sloppy; avoid leaving as -1 because that indicates no visual model attached to this link - instanceId = -2; - } + instanceId = env.addInstance(instanceBlueprint.meshIdx_, + instanceBlueprint.mtrlIdx_, identityGlMat_); } else { // these are camera links // sloppy: we should avoid having these items in the nodeInstanceIds_ @@ -222,7 +176,6 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF BATCHED_SIM_ASSERT(updateForPhysics || updateForRender); - // esp::gfx::replay::Keyframe debugKeyframe; int numLinks = robots_.robot_->artObj->getNumLinks(); int numNodes = numLinks + 1; int numEnvs = config_.numEnvs; @@ -493,7 +446,7 @@ void BatchedSimulator::updateGripping() { envState.drop_height = NAN; envState.did_grasp = false; - if (shouldDrawDebugForEnv(config_, b, substep_)) { + if (b < config_.numDebugEnvs) { const auto& gripperMat = robotInstance.cachedGripperLinkMat_; auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); @@ -681,15 +634,14 @@ void BatchedSimulator::updateCollision() { robots_.areCollisionResultsValid_ = true; -#ifdef ENABLE_DEBUG_INSTANCES - std::vector sphereHits(robot_.numCollisionSpheres_ * numEnvs, false); - std::vector heldObjectHits(numEnvs, false); - std::vector freeObjectHits(episodeSet_.maxFreeObjects_, false); -#else std::vector sphereHits; std::vector heldObjectHits; std::vector freeObjectHits; -#endif + if (config_.numDebugEnvs > 0) { + sphereHits.resize(robot_.numCollisionSpheres_ * config_.numDebugEnvs, false); + heldObjectHits.resize(config_.numDebugEnvs, false); + freeObjectHits.resize(episodeSet_.maxFreeObjects_, false); + } for (int b = 0; b < numEnvs; b++) { @@ -715,18 +667,12 @@ void BatchedSimulator::updateCollision() { bool thisSphereHit = columnGridSet.contactTest(radiusIdx, spherePos, &queryCache); - // If pairedDebugEnv, we want to visualize all collision sphere hits, not just the - // first one. This is misleading debug visualization, since we actually early-out - // after the first hit. - // (my reasoning is that it will be confusing to casual viewers to see *any* green - // spheres in collision) if (thisSphereHit) { hit = true; - if (shouldDrawDebugForEnv(config_, b, substep_)) { + if (b < config_.numDebugEnvs) { sphereHits[baseSphereIndex + s] = thisSphereHit; - } else { - break; } + break; } } @@ -743,11 +689,10 @@ void BatchedSimulator::updateCollision() { // todo: proper debug-drawing of hits for held object spheres if (thisSphereHit) { hit = true; - if (shouldDrawDebugForEnv(config_, b, substep_)) { + if (b < config_.numDebugEnvs) { heldObjectHits[b] = thisSphereHit; - } else { - break; } + break; } } } @@ -762,7 +707,7 @@ void BatchedSimulator::updateCollision() { continue; } - if (robots_.collisionResults_[b] && !shouldDrawDebugForEnv(config_, b, substep_)) { + if (robots_.collisionResults_[b]) { // already had a hit against column grid so don't test free objects continue; } @@ -784,12 +729,11 @@ void BatchedSimulator::updateCollision() { int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(spherePos, sphereRadius); if (hitFreeObjectIndex != -1) { hit = true; - if (shouldDrawDebugForEnv(config_, b, substep_)) { + if (b < config_.numDebugEnvs) { sphereHits[baseSphereIndex + s] = true; freeObjectHits[hitFreeObjectIndex] = true; - } else { - break; } + break; } } @@ -807,18 +751,17 @@ void BatchedSimulator::updateCollision() { int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(sphereWorldOrigin, sphereRadius); if (hitFreeObjectIndex != -1) { hit = true; - if (shouldDrawDebugForEnv(config_, b, substep_)) { + if (b < config_.numDebugEnvs) { heldObjectHits[b] = true; freeObjectHits[hitFreeObjectIndex] = true; - } else { - break; } + break; } } } // render free objects to debug env, colored by collision result - if (shouldDrawDebugForEnv(config_, b, substep_)) { + if (b < config_.numDebugEnvs) { for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { if (episodeInstance.colGrid_.isObstacleDisabled(freeObjectIndex)) { continue; @@ -842,7 +785,6 @@ void BatchedSimulator::updateCollision() { addSphereDebugInstance("sphere_blue_wireframe", b, sphereWorldOrigin, sphereRadius); } #endif - freeObjectHits[freeObjectIndex] = false; // clear for next env } @@ -858,7 +800,7 @@ void BatchedSimulator::updateCollision() { } // render collision spheres for debug env, colored by collision result - if (shouldDrawDebugForEnv(config_, b, substep_)) { + if (b < config_.numDebugEnvs) { const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const auto& robotInstance = robots_.robotInstances_[b]; @@ -963,27 +905,23 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { continue; } - if (!disableRobotVisualsForEnv(config_, b)) { - const auto glMat = toGlmMat4x3(safeVectorGet(robots_.nodeNewTransforms_, instanceIndex)); - env.updateInstanceTransform(instanceId, glMat); - } + const auto glMat = toGlmMat4x3(safeVectorGet(robots_.nodeNewTransforms_, instanceIndex)); + env.updateInstanceTransform(instanceId, glMat); } } // update gripped free object if (didRobotMove && robotInstance.grippedFreeObjectIndex_ != -1) { - if (!disableFreeObjectVisualsForEnv(config_, b)) { - int freeObjectIndex = robotInstance.grippedFreeObjectIndex_; - auto mat = getHeldObjectTransform(b); - glm::mat4x3 glMat = toGlmMat4x3(mat); - int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); - env.updateInstanceTransform(instanceId, glMat); + int freeObjectIndex = robotInstance.grippedFreeObjectIndex_; + auto mat = getHeldObjectTransform(b); + glm::mat4x3 glMat = toGlmMat4x3(mat); + int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + env.updateInstanceTransform(instanceId, glMat); - auto& envState = safeVectorGet(pythonEnvStates_, b); - safeVectorGet(envState.obj_positions, freeObjectIndex) = mat.translation(); - safeVectorGet(envState.obj_rotations, freeObjectIndex) = Mn::Quaternion::fromMatrix(mat.rotation()); - } + auto& envState = safeVectorGet(pythonEnvStates_, b); + safeVectorGet(envState.obj_positions, freeObjectIndex) = mat.translation(); + safeVectorGet(envState.obj_rotations, freeObjectIndex) = Mn::Quaternion::fromMatrix(mat.rotation()); } } @@ -1232,10 +1170,9 @@ void Robot::updateFromSerializeCollection(const serialize::Collection& serialize } BpsWrapper::BpsWrapper(int gpuId, int numEnvs, bool includeDepth, bool includeColor, - const CameraSensorConfig& sensor0) { - glm::u32vec2 out_dim( - sensor0.width, - sensor0.height); // see also rollout_test.py, python/rl/agent.py + const CameraSensorConfig& sensor) { + ESP_CHECK(sensor.width > 0 && sensor.height > 0, "BpsWrapper: invalid sensor width=" + << sensor.width << " or height=" << sensor.height); BATCHED_SIM_ASSERT(gpuId != -1); bps3D::RenderMode mode {}; @@ -1247,7 +1184,7 @@ BpsWrapper::BpsWrapper(int gpuId, int numEnvs, bool includeDepth, bool includeCo } renderer_ = std::make_unique(bps3D::RenderConfig{ - gpuId, 1, uint32_t(numEnvs), out_dim.x, out_dim.y, false, mode}); + gpuId, 1, uint32_t(numEnvs), (unsigned int)sensor.width, (unsigned int)sensor.height, false, mode}); loader_ = std::make_unique(renderer_->makeLoader()); const std::string filepath = @@ -1257,7 +1194,7 @@ BpsWrapper::BpsWrapper(int gpuId, int numEnvs, bool includeDepth, bool includeCo const Mn::Vector3 camPos(Mn::Math::ZeroInit); const Mn::Quaternion camRot(Mn::Math::IdentityInit); glm::mat4 world_to_camera(glm::inverse(toGlmMat4(camPos, camRot))); - float aspectRatio = float(sensor0.width) / float(sensor0.height); + float aspectRatio = float(sensor.width) / float(sensor.height); for (int b = 0; b < numEnvs; b++) { glm::mat4 view = world_to_camera; @@ -1278,6 +1215,9 @@ BpsWrapper::~BpsWrapper() { } BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { + + ESP_CHECK(config.numDebugEnvs <= config.numEnvs, "BatchedSimulator: numDebugEnvs must be <= numEnvs"); + config_ = config; const int numEnvs = config_.numEnvs; @@ -1288,10 +1228,14 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { bpsWrapper_ = std::make_unique(config_.gpuId, config_.numEnvs, config_.includeDepth, config_.includeColor, config_.sensor0); + if (config_.numDebugEnvs > 0) { + debugBpsWrapper_ = std::make_unique(config_.gpuId, config_.numDebugEnvs, + /*includeDepth*/false, /*includeColor*/true, config_.debugSensor); + } -#ifdef ENABLE_DEBUG_INSTANCES - debugInstancesByEnv_.resize(config_.numEnvs); -#endif + if (config_.numDebugEnvs > 0) { + debugInstancesByEnv_.resize(config_.numDebugEnvs); + } pythonEnvStates_.resize(numEnvs); initEpisodeSet(); @@ -1336,7 +1280,7 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { std::string cameraAttachLinkName = "torso_lift_link"; Mn::Vector3 camPos = {-0.536559, 1.16173, 0.568379}; Mn::Quaternion camRot = {{-0.26714, -0.541109, -0.186449}, 0.775289}; - setRobotCamera(cameraAttachLinkName, camPos, camRot, /*hfov*/60.f); + setCamera("sensor0", camPos, camRot, /*hfov*/60.f, cameraAttachLinkName); } if (config_.doAsyncPhysicsStep) { @@ -1374,6 +1318,12 @@ bps3D::Environment& BatchedSimulator::getBpsEnvironment(int envIndex) { return bpsWrapper_->envs_[envIndex]; } +bps3D::Environment& BatchedSimulator::getDebugBpsEnvironment(int envIndex) { + BATCHED_SIM_ASSERT(config_.numDebugEnvs > 0); + BATCHED_SIM_ASSERT(envIndex < config_.numDebugEnvs); + return debugBpsWrapper_->envs_[envIndex]; +} + // one-time init for envs void BatchedSimulator::initEpisodeInstances() { @@ -1407,30 +1357,27 @@ void BatchedSimulator::clearEpisodeInstance(int b) { return; // nothing to do } -#ifdef ENABLE_DEBUG_INSTANCES - for (auto& instanceId : episodeInstance.persistentDebugInstanceIds_) { - env.deleteInstance(instanceId); - } - episodeInstance.persistentDebugInstanceIds_.clear(); -#endif - - if (!disableFreeObjectVisualsForEnv(config_, b)) { - // Remove free object bps instances **in reverse order**. This is so bps3D will later - // allocate us new instance IDs (from its free list) in ascending order (see assert - // in spawnFreeObject). - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - for (int freeObjectIndex = episode.numFreeObjectSpawns_ - 1; freeObjectIndex >= 0; freeObjectIndex--) { - int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + if (b < config_.numDebugEnvs) { + for (auto& instanceId : episodeInstance.persistentDebugInstanceIds_) { + auto& env = getDebugBpsEnvironment(b); env.deleteInstance(instanceId); } + episodeInstance.persistentDebugInstanceIds_.clear(); + } + + // Remove free object bps instances **in reverse order**. This is so bps3D will later + // allocate us new instance IDs (from its free list) in ascending order (see assert + // in spawnFreeObject). + const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + for (int freeObjectIndex = episode.numFreeObjectSpawns_ - 1; freeObjectIndex >= 0; freeObjectIndex--) { + int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + env.deleteInstance(instanceId); } episodeInstance.firstFreeObjectInstanceId_ = -1; - if (!disableStageVisualsForEnv(config_, b)) { - // remove stage bps instance - env.deleteInstance(episodeInstance.stageFixedObjectInstanceId_); - } + // remove stage bps instance + env.deleteInstance(episodeInstance.stageFixedObjectInstanceId_); // remove all free objects from collision grid episodeInstance.colGrid_.removeAllObstacles(); @@ -1438,6 +1385,8 @@ void BatchedSimulator::clearEpisodeInstance(int b) { void BatchedSimulator::resetEpisodeInstance(int b) { //ProfilingScope scope("BSim resetEpisodeInstance"); + ESP_CHECK(resets_[b] >= 0 && resets_[b] < getNumEpisodes(), "resetEpisodeInstance: episode_idx=" + << resets_[b] << " is invalid for getNumEpisodes()=" << getNumEpisodes()); auto& env = getBpsEnvironment(b); @@ -1456,10 +1405,8 @@ void BatchedSimulator::resetEpisodeInstance(int b) { { const auto& stageBlueprint = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex).instanceBlueprint_; - if (!disableStageVisualsForEnv(config_, b)) { - episodeInstance.stageFixedObjectInstanceId_ = env.addInstance( - stageBlueprint.meshIdx_, stageBlueprint.mtrlIdx_, identityGlMat_); - } + episodeInstance.stageFixedObjectInstanceId_ = env.addInstance( + stageBlueprint.meshIdx_, stageBlueprint.mtrlIdx_, identityGlMat_); } auto& envState = safeVectorGet(pythonEnvStates_, b); @@ -1541,7 +1488,7 @@ void BatchedSimulator::resetEpisodeInstance(int b) { envState.did_grasp = false; } - if (shouldAddColumnGridDebugVisualsForEnv(config_, b)) { + if (b < config_.numDebugEnvs) { debugRenderColumnGrids(b); } } @@ -1646,15 +1593,13 @@ void BatchedSimulator::spawnFreeObject(int b, int freeObjectIndex, bool reinsert Mn::Matrix4 mat = Mn::Matrix4::from( rotation.toMatrix(), freeObjectSpawn.startPos_); glm::mat4x3 glMat = toGlmMat4x3(mat); - if (!disableFreeObjectVisualsForEnv(config_, b)) { - int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); - // store the first free object's bps instanceId and assume the rest will be contiguous - if (freeObjectIndex == 0) { - BATCHED_SIM_ASSERT(episodeInstance.firstFreeObjectInstanceId_ == -1); - episodeInstance.firstFreeObjectInstanceId_ = instanceId; - } - BATCHED_SIM_ASSERT(instanceId == getFreeObjectBpsInstanceId(b, freeObjectIndex)); + int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); + // store the first free object's bps instanceId and assume the rest will be contiguous + if (freeObjectIndex == 0) { + BATCHED_SIM_ASSERT(episodeInstance.firstFreeObjectInstanceId_ == -1); + episodeInstance.firstFreeObjectInstanceId_ = instanceId; } + BATCHED_SIM_ASSERT(instanceId == getFreeObjectBpsInstanceId(b, freeObjectIndex)); } if (!reinsert) { @@ -1693,14 +1638,12 @@ void BatchedSimulator::reinsertFreeObject(int b, int freeObjectIndex, auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); episodeInstance.colGrid_.reinsertObstacle(freeObjectIndex, pos, rotation); - if (!disableFreeObjectVisualsForEnv(config_, b)) { - // sloppy quat to Matrix3x3 - Mn::Matrix4 mat = Mn::Matrix4::from( - rotation.toMatrix(), pos); - glm::mat4x3 glMat = toGlmMat4x3(mat); - int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); - env.updateInstanceTransform(instanceId, glMat); - } + // sloppy quat to Matrix3x3 + Mn::Matrix4 mat = Mn::Matrix4::from( + rotation.toMatrix(), pos); + glm::mat4x3 glMat = toGlmMat4x3(mat); + int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + env.updateInstanceTransform(instanceId, glMat); auto& envState = safeVectorGet(pythonEnvStates_, b); safeVectorGet(envState.obj_positions, freeObjectIndex) = pos; @@ -1768,18 +1711,6 @@ void BatchedSimulator::setActionsResets(std::vector&& actions, std::vecto } else { std::fill(resets_.begin(), resets_.end(), -1); } - - if (config_.doPairedDebugEnvs) { - // copy actions from non-debug env to its paired debug env - // every other env, see isPairedDebugEnv - for (int b = 1; b < numEnvs; b += 2) { - BATCHED_SIM_ASSERT(isPairedDebugEnv(config_, b)); - for (int actionIdx = 0; actionIdx < actionDim_; actionIdx++) { - actions_[b * actionDim_ + actionIdx] = actions_[(b - 1) * actionDim_ + actionIdx]; - } - resets_[b] = resets_[b - 1]; - } - } } void BatchedSimulator::reset(std::vector&& resets) { @@ -2016,6 +1947,40 @@ bool BatchedSimulator::isPhysicsThreadActive() const { } +void BatchedSimulator::enableDebugSensor(bool enable) { + BATCHED_SIM_ASSERT(!isRenderStarted_); + enableDebugSensor_ = enable; +} + +void BatchedSimulator::setCamera(const std::string& sensorName, const Mn::Vector3& pos, + const Mn::Quaternion& rotation, float hfov, const std::string& attachLinkName) { + + ESP_CHECK(sensorName == "sensor0" || sensorName == "debug", + "setCamera: sensorName must be \"sensor0\" or \"debug\""); + if (!config_.numDebugEnvs > 0) { + ESP_CHECK(sensorName != "debug", "setCamera: you must set BatchedSimulatorConfig::numDebugEnvs > 0 in order to set the debug camera"); + } + + int nodeIndex = -1; + if (!attachLinkName.empty()) { + ESP_CHECK(robot_.linkIndexByName_.count(attachLinkName), + "setCamera: invalid attachLinkName=" << attachLinkName + << ". Check your robot URDF for valid link names."); + int linkIndex = robot_.linkIndexByName_[attachLinkName]; + nodeIndex = linkIndex + 1; + } + + const bool isDebug = sensorName == "debug"; + Camera& cam = isDebug ? debugCam_ : mainCam_; + + cam = Camera{ + .attachNodeIndex = nodeIndex, + .transform = Mn::Matrix4::from(rotation.toMatrix(), pos), + .hfov = hfov + }; +}; + +#if 0 void BatchedSimulator::setRobotCamera(const std::string& linkName, const Mn::Vector3& pos, const Mn::Quaternion& rotation, float hfov) { BATCHED_SIM_ASSERT(robot_.linkIndexByName_.count(linkName)); @@ -2032,37 +1997,44 @@ void BatchedSimulator::setFreeCamera(const Mn::Vector3& pos, const Mn::Quaternio freeCameraTransform_ = Mn::Matrix4::from(rotation.toMatrix(), pos); cameraHfov_ = hfov; } +#endif -void BatchedSimulator::setBpsCameraHelper(int b, const glm::mat4& glCameraInvTransform, float hfov) { - float aspectRatio = float(config_.sensor0.width) / float(config_.sensor0.height); +void BatchedSimulator::setBpsCameraHelper(bool isDebug, int b, const glm::mat4& glCameraInvTransform, float hfov) { + + const CameraSensorConfig& sensor = isDebug ? config_.debugSensor : config_.sensor0; + float aspectRatio = float(sensor.width) / float(sensor.height); BATCHED_SIM_ASSERT(hfov > 0.f && hfov < 180.f); constexpr float near = 0.01; constexpr float far = 1000.f; - auto& env = safeVectorGet(bpsWrapper_->envs_, b); + auto& env = isDebug + ? getDebugBpsEnvironment(b) + : getBpsEnvironment(b); env.setCamera(glCameraInvTransform, hfov, aspectRatio, near, far); } -void BatchedSimulator::updateBpsCameras() { +void BatchedSimulator::updateBpsCameras(bool isDebug) { - if (robot_.cameraAttachNode_ == -1) { + const Camera& cam = isDebug ? debugCam_ : mainCam_; + const int numEnvs = isDebug ? config_.numDebugEnvs : config_.numEnvs; - BATCHED_SIM_ASSERT(freeCameraTransform_); - glm::mat4 world_to_camera(glm::inverse(toGlmMat4(*freeCameraTransform_))); + if (cam.attachNodeIndex == -1) { - for (int b = 0; b < config_.numEnvs; b++) { - setBpsCameraHelper(b, world_to_camera, cameraHfov_); + glm::mat4 world_to_camera(glm::inverse(toGlmMat4(cam.transform))); + + for (int b = 0; b < numEnvs; b++) { + setBpsCameraHelper(isDebug, b, world_to_camera, cam.hfov); } } else { const int numLinks = robots_.robot_->artObj->getNumLinks(); const int numNodes = numLinks + 1; - const auto& cameraAttachTransform = robot_.cameraAttachTransform_; - const int cameraAttachNode = robot_.cameraAttachNode_; + const auto& cameraAttachTransform = cam.transform; + const int cameraAttachNode = cam.attachNodeIndex; - for (int b = 0; b < config_.numEnvs; b++) { + for (int b = 0; b < numEnvs; b++) { const int baseInstanceIndex = b * numNodes; const auto instanceIndex = baseInstanceIndex + cameraAttachNode; @@ -2071,7 +2043,7 @@ void BatchedSimulator::updateBpsCameras() { auto cameraTransform = cameraAttachNodeTransform * cameraAttachTransform; auto glCameraNewInvTransform = glm::inverse(toGlmMat4(cameraTransform)); - setBpsCameraHelper(b, glCameraNewInvTransform, cameraHfov_); + setBpsCameraHelper(isDebug, b, glCameraNewInvTransform, cam.hfov); } } } @@ -2081,9 +2053,14 @@ void BatchedSimulator::startRender() { ProfilingScope scope("start render"); BATCHED_SIM_ASSERT(isOkToRender_); - updateBpsCameras(); - + updateBpsCameras(/*isDebug*/false); bpsWrapper_->renderer_->render(bpsWrapper_->envs_.data()); + + if (config_.numDebugEnvs > 0 && enableDebugSensor_) { + updateBpsCameras(/*isDebug*/true); + debugBpsWrapper_->renderer_->render(debugBpsWrapper_->envs_.data()); + } + isOkToRender_ = false; isRenderStarted_ = true; } @@ -2092,6 +2069,9 @@ void BatchedSimulator::waitRender() { ProfilingScope scope("wait for GPU render"); BATCHED_SIM_ASSERT(isRenderStarted_); bpsWrapper_->renderer_->waitForFrame(); + if (config_.numDebugEnvs > 0 && enableDebugSensor_) { + debugBpsWrapper_->renderer_->waitForFrame(); + } isRenderStarted_ = false; isOkToRender_ = true; } @@ -2101,6 +2081,11 @@ bps3D::Renderer& BatchedSimulator::getBpsRenderer() { return *bpsWrapper_->renderer_.get(); } +bps3D::Renderer& BatchedSimulator::getDebugBpsRenderer() { + BATCHED_SIM_ASSERT(config_.numDebugEnvs > 0); + BATCHED_SIM_ASSERT(debugBpsWrapper_->renderer_.get()); + return *debugBpsWrapper_->renderer_.get(); +} RolloutRecord::RolloutRecord(int numRolloutSubsteps, int numEnvs, @@ -2118,36 +2103,34 @@ RolloutRecord::RolloutRecord(int numRolloutSubsteps, } void BatchedSimulator::deleteDebugInstances() { -#ifdef ENABLE_DEBUG_INSTANCES - const int numEnvs = config_.numEnvs; - for (int b = 0; b < numEnvs; b++) { + if (config_.numDebugEnvs > 0) { + const int numEnvs = config_.numDebugEnvs; + for (int b = 0; b < numEnvs; b++) { - auto& env = bpsWrapper_->envs_[b]; - for (int instanceId : debugInstancesByEnv_[b]) { + auto& env = getDebugBpsEnvironment(b); + for (int instanceId : safeVectorGet(debugInstancesByEnv_, b)) { - env.deleteInstance(instanceId); - } + env.deleteInstance(instanceId); + } - debugInstancesByEnv_[b].clear(); + safeVectorGet(debugInstancesByEnv_, b).clear(); + } } -#endif } int BatchedSimulator::addDebugInstance(const std::string& name, int envIndex, const Magnum::Matrix4& transform, bool persistent) { -#ifdef ENABLE_DEBUG_INSTANCES + BATCHED_SIM_ASSERT(config_.numDebugEnvs > 0); + glm::mat4x3 glMat = toGlmMat4x3(transform); const auto& blueprint = sceneMapping_.findInstanceBlueprint(name); BATCHED_SIM_ASSERT(envIndex < config_.numEnvs); - auto& env = getBpsEnvironment(envIndex); + auto& env = getDebugBpsEnvironment(envIndex); int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); if (!persistent) { - debugInstancesByEnv_[envIndex].push_back(instanceId); + safeVectorGet(debugInstancesByEnv_, envIndex).push_back(instanceId); } return instanceId; -#else - return -1; -#endif } std::string BatchedSimulator::getRecentStatsAndReset() const { @@ -2246,9 +2229,9 @@ void BatchedSimulator::physicsThreadFunc(int startEnvIndex, int numEnvs) { void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxProgress) { -#ifdef ENABLE_DEBUG_INSTANCES + BATCHED_SIM_ASSERT(b < config_.numDebugEnvs); - auto& env = safeVectorGet(bpsWrapper_->envs_, b); + auto& env = getDebugBpsEnvironment(b); auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); @@ -2301,7 +2284,6 @@ void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxPro } } } -#endif } void BatchedSimulator::reloadSerializeCollection() { diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 6a05768d6d..ee00c00a8b 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -27,18 +27,25 @@ struct CameraSensorConfig { int height = -1; }; +struct Camera { + int attachNodeIndex = -1; // -1 indicates free camera (not parented to node) + Magnum::Matrix4 transform = Magnum::Matrix4(Magnum::Math::IdentityInit); + float hfov = -1.f; // degrees +}; + struct BatchedSimulatorConfig { int numEnvs = -1; int gpuId = -1; bool includeDepth = true; bool includeColor = true; CameraSensorConfig sensor0; + int numDebugEnvs = 0; + CameraSensorConfig debugSensor; bool forceRandomActions = false; bool doAsyncPhysicsStep = false; int numSubsteps = 1; // if enabled, for every odd env, we don't render any visuals except debug stuff, // e.g. collision visualization. We should keep even and odd envs in sync (e.g. same actions). - bool doPairedDebugEnvs = false; bool doProceduralEpisodeSet = true; // only set this for doProceduralEpisodeSet==false (it is otherwise ignored) std::string episodeSetFilepath; @@ -113,8 +120,6 @@ struct Robot { float gripperQueryRadius_ = 0.f; std::unordered_map linkIndexByName_; - int cameraAttachNode_ = -1; // beware off-by-one with links - Magnum::Matrix4 cameraAttachTransform_; }; // This is misnamed and isn't actually used to store entire rollouts. It contains @@ -199,8 +204,12 @@ class BatchedSimulator { return serializeCollection_; } - void setRobotCamera(const std::string& linkName, const Mn::Vector3& pos, const Mn::Quaternion& rotation, float hfov); - void setFreeCamera(const Magnum::Vector3& pos, const Magnum::Quaternion& rotation, float hfov); + void enableDebugSensor(bool enable); + + // void setRobotCamera(const std::string& linkName, const Mn::Vector3& pos, const Mn::Quaternion& rotation, float hfov); + // void setFreeCamera(const Magnum::Vector3& pos, const Magnum::Quaternion& rotation, float hfov); + void setCamera(const std::string& sensorName, const Mn::Vector3& pos, + const Mn::Quaternion& rotation, float hfov, const std::string& attachLinkName); void startRender(); void waitRender(); @@ -212,8 +221,10 @@ class BatchedSimulator { void waitStepPhysicsOrReset(); bps3D::Renderer& getBpsRenderer(); + bps3D::Renderer& getDebugBpsRenderer(); bps3D::Environment& getBpsEnvironment(int envIndex); + bps3D::Environment& getDebugBpsEnvironment(int envIndex); void deleteDebugInstances(); // probably call at start of frame render // probably add these every frame ("immediate mode", not persistent) @@ -258,8 +269,8 @@ class BatchedSimulator { void updateGripping(); void resetHelper(); void updatePythonEnvironmentState(); - void updateBpsCameras(); - void setBpsCameraHelper(int b, const glm::mat4& glCameraInvTransform, float hfov); + void updateBpsCameras(bool isDebug); + void setBpsCameraHelper(bool isDebug, int b, const glm::mat4& glCameraInvTransform, float hfov); // uses episode spawn location void spawnFreeObject(int b, int freeObjectIndex, bool reinsert); @@ -285,6 +296,7 @@ class BatchedSimulator { BatchedSimulatorConfig config_; serialize::Collection serializeCollection_; + bool enableDebugSensor_ = false; bool isOkToRender_ = false; bool isOkToStep_ = false; bool isRenderStarted_ = false; @@ -296,13 +308,14 @@ class BatchedSimulator { RolloutRecord rollouts_; std::unique_ptr legacySim_; std::unique_ptr bpsWrapper_; + std::unique_ptr debugBpsWrapper_; int actionDim_ = -1; std::vector actions_; std::vector resets_; // episode index, or -1 if not resetting int maxStorageSteps_ = -1; std::vector pythonEnvStates_; - Corrade::Containers::Optional freeCameraTransform_; - float cameraHfov_ = -1.f; + Camera mainCam_; + Camera debugCam_; EpisodeSet episodeSet_; EpisodeInstanceSet episodeInstanceSet_; diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 34a2a6d607..7d11d50d64 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -21,6 +21,10 @@ py::capsule getColorMemory(BatchedSimulator& bsim, const uint32_t groupIdx) { return py::capsule(bsim.getBpsRenderer().getColorPointer(groupIdx)); } +py::capsule getDebugColorMemory(BatchedSimulator& bsim, const uint32_t groupIdx) { + return py::capsule(bsim.getDebugBpsRenderer().getColorPointer(groupIdx)); +} + py::capsule getDepthMemory(BatchedSimulator& bsim, const uint32_t groupIdx) { return py::capsule(bsim.getBpsRenderer().getDepthPointer(groupIdx)); @@ -41,6 +45,8 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("include_depth", &BatchedSimulatorConfig::includeDepth, R"(Todo)") .def_readwrite("include_color", &BatchedSimulatorConfig::includeColor, R"(Todo)") .def_readwrite("sensor0", &BatchedSimulatorConfig::sensor0, R"(Todo)") + .def_readwrite("num_debug_envs", &BatchedSimulatorConfig::numDebugEnvs, R"(Todo)") + .def_readwrite("debug_sensor", &BatchedSimulatorConfig::debugSensor, R"(Todo)") .def_readwrite("force_random_actions", &BatchedSimulatorConfig::forceRandomActions, R"(Todo)") .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") @@ -78,10 +84,10 @@ void initBatchedSimBindings(py::module& m) { &BatchedSimulator::getNumEpisodes, R"(todo)") .def("get_num_actions", &BatchedSimulator::getNumActions, R"(todo)") - .def("set_robot_camera", - &BatchedSimulator::setRobotCamera, R"(todo)") - .def("set_free_camera", - &BatchedSimulator::setFreeCamera, R"(todo)") + .def("set_camera", + &BatchedSimulator::setCamera, R"(todo)") + .def("enable_debug_sensor", + &BatchedSimulator::enableDebugSensor, R"(todo)") .def("reset", &BatchedSimulator::reset, R"(todo)") .def("get_environment_states", @@ -105,6 +111,12 @@ void initBatchedSimBindings(py::module& m) { return getDepthMemory(self, groupIdx); }, R"(todo)") + .def( + "debug_rgba", + [](BatchedSimulator& self, const uint32_t groupIdx) { + return getDebugColorMemory(self, groupIdx); + }, + R"(todo)") .def("get_recent_stats_and_reset", &BatchedSimulator::getRecentStatsAndReset, R"(todo)"); } diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index a47fb5ad20..d637c4b428 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -198,7 +198,7 @@ TEST_F(BatchedSimulatorTest, basic) { constexpr bool doFreeCam = false; bool doTuneRobotCam = false; constexpr bool doSaveAllFramesForVideo = false; // see make_video_from_image_files.py - constexpr bool doPairedDebugEnvs = true; + constexpr bool includeDebugSensor = true; constexpr bool includeDepth = false; constexpr bool includeColor = true; constexpr float cameraHfov = 70.f; @@ -206,14 +206,15 @@ TEST_F(BatchedSimulatorTest, basic) { const bool forceRandomActions = doFreeCam || doTuneRobotCam; BatchedSimulatorConfig config{ - .numEnvs = 1, .gpuId = 0, + .numEnvs = 2, .gpuId = 0, .includeDepth = includeDepth, .includeColor = includeColor, .sensor0 = {.width = 768, .height = 768}, + .numDebugEnvs = 1, + .debugSensor {.width = 512, .height = 512}, .forceRandomActions = forceRandomActions, .doAsyncPhysicsStep = doOverlapPhysics, .numSubsteps = 1, - .doPairedDebugEnvs = doPairedDebugEnvs, .doProceduralEpisodeSet = true, //.doProceduralEpisodeSet = false, //.episodeSetFilepath = "generated.episode_set.json", @@ -224,6 +225,8 @@ TEST_F(BatchedSimulatorTest, basic) { Mn::Quaternion camRot; std::string cameraAttachLinkName; + bsim.enableDebugSensor(true); + if (!doFreeCam) { // over-the-shoulder cam // cameraAttachLinkName = "torso_lift_link"; @@ -234,7 +237,8 @@ TEST_F(BatchedSimulatorTest, basic) { cameraAttachLinkName = "torso_lift_link"; camPos = {-0.536559, 1.16173, 0.568379}; camRot = {{-0.26714, -0.541109, -0.186449}, 0.775289}; - bsim.setRobotCamera(cameraAttachLinkName, camPos, camRot, cameraHfov); + bsim.setCamera("sensor0", camPos, camRot, cameraHfov, cameraAttachLinkName); + bsim.setCamera("debug", camPos, camRot, cameraHfov, cameraAttachLinkName); } else { if (cameraHfov == 70.f) { camPos = {-1.61004, 1.5, 3.5455}; @@ -245,7 +249,8 @@ TEST_F(BatchedSimulatorTest, basic) { } else { CORRADE_INTERNAL_ASSERT_UNREACHABLE(); } - bsim.setFreeCamera(camPos, camRot, cameraHfov); + bsim.setCamera("sensor0", camPos, camRot, cameraHfov, ""); + bsim.setCamera("debug", camPos, camRot, cameraHfov, ""); } float moveSpeed = 1.f; @@ -319,24 +324,39 @@ TEST_F(BatchedSimulatorTest, basic) { // make a copy of envStates envStates = bsim.getEnvironmentStates(); - uint8_t* base_color_ptr = bsim.getBpsRenderer().getColorPointer(); - float* base_depth_ptr = bsim.getBpsRenderer().getDepthPointer(); - - // temp hack copied from BpsWrapper internals - glm::u32vec2 out_dim(config.sensor0.width, config.sensor0.height); + for (bool isDebug : {false, true}) { + + if (isDebug && !includeDebugSensor) { + continue; + } - for (int b = 0; b < config.numEnvs; b++) { - std::stringstream ss; - ss << "./env" << b << "_frame" - << std::setfill('0') << std::setw(4) << frameIdx << ".bmp"; - - saveFrame(doSaveAllFramesForVideo ? ss.str().c_str() : nullptr, - ("./latest_env" + std::to_string(b) + ".bmp").c_str(), - base_color_ptr + b * out_dim.x * out_dim.y * 4, out_dim.x, - out_dim.y, 4); - // saveFrame(("./out_depth_" + std::to_string(b) + ".bmp").c_str(), - // base_depth_ptr + b * out_dim.x * out_dim.y, out_dim.x, out_dim.y, - // 1); + uint8_t* base_color_ptr = isDebug + ? bsim.getDebugBpsRenderer().getColorPointer() + : bsim.getBpsRenderer().getColorPointer(); + // float* base_depth_ptr = isDebug + // ? nullptr + // : bsim.getBpsRenderer().getDepthPointer(); + glm::u32vec2 out_dim = isDebug + ? glm::u32vec2(config.debugSensor.width, config.debugSensor.height) + : glm::u32vec2(config.sensor0.width, config.sensor0.height); + + const int numEnvs = isDebug ? config.numDebugEnvs : config.numEnvs; + + for (int b = 0; b < numEnvs; b++) { + std::stringstream ss; + ss << (isDebug ? "./debugenv" : "./env") + << b << "_frame" + << std::setfill('0') << std::setw(4) << frameIdx << ".bmp"; + + saveFrame(doSaveAllFramesForVideo ? ss.str().c_str() : nullptr, + ((isDebug ? "./latest_debugenv" : "./latest_env") + + std::to_string(b) + ".bmp").c_str(), + base_color_ptr + b * out_dim.x * out_dim.y * 4, out_dim.x, + out_dim.y, 4); + // saveFrame(("./out_depth_" + std::to_string(b) + ".bmp").c_str(), + // base_depth_ptr + b * out_dim.x * out_dim.y, out_dim.x, out_dim.y, + // 1); + } } int key = 0; @@ -493,11 +513,13 @@ TEST_F(BatchedSimulatorTest, basic) { } if (doFreeCam) { - bsim.setFreeCamera(camPos, camRot, cameraHfov); + bsim.setCamera("sensor0", camPos, camRot, cameraHfov, ""); + bsim.setCamera("debug", camPos, camRot, cameraHfov, ""); } else { const auto cameraMat = Mn::Matrix4::from(camRot.toMatrix(), camPos); ESP_DEBUG() << "camPos: " << camPos << ", camRot: " << camRot; - bsim.setRobotCamera(cameraAttachLinkName, camPos, camRot, cameraHfov); + bsim.setCamera("sensor0", camPos, camRot, cameraHfov, cameraAttachLinkName); + bsim.setCamera("debug", camPos, camRot, cameraHfov, cameraAttachLinkName); } } @@ -515,7 +537,8 @@ TEST_F(BatchedSimulatorTest, basic) { // move camera slightly camPos.y() -= 0.01f; - bsim.setFreeCamera(camPos, camRot, cameraHfov); + bsim.setCamera("sensor0", camPos, camRot, cameraHfov, ""); + bsim.setCamera("debug", camPos, camRot, cameraHfov, ""); if (autoplayProgress >= animDuration) { isAutoplay = false; From b5438e33e9e85dc5ceca68387819967e0284d3c7 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Tue, 12 Apr 2022 18:13:49 -0400 Subject: [PATCH 25/53] release compile fix; add num_procedural_episodes config param --- src/esp/batched_sim/BatchedSimulator.cpp | 3 +-- src/esp/batched_sim/BatchedSimulator.h | 1 + src/esp/batched_sim/EpisodeSet.h | 2 -- src/esp/bindings/BatchedSimBindings.cpp | 1 + 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index b05a735846..fe6c02aa74 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -1659,8 +1659,7 @@ void BatchedSimulator::initEpisodeSet() { ESP_CHECK(config_.episodeSetFilepath.empty(), "For BatchedSimulatorConfig::doProceduralEpisodeSet==true, don't specify episodeSetFilepath"); - constexpr int numEpisodesToGenerate = 100; // arbitrary - episodeSet_ = generateBenchmarkEpisodeSet(numEpisodesToGenerate, sceneMapping_, serializeCollection_); + episodeSet_ = generateBenchmarkEpisodeSet(config_.numProceduralEpisodes, sceneMapping_, serializeCollection_); episodeSet_.saveToFile("../data/generated.episode_set.json"); } else { ESP_CHECK(!config_.episodeSetFilepath.empty(), diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index ee00c00a8b..970e94caf4 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -47,6 +47,7 @@ struct BatchedSimulatorConfig { // if enabled, for every odd env, we don't render any visuals except debug stuff, // e.g. collision visualization. We should keep even and odd envs in sync (e.g. same actions). bool doProceduralEpisodeSet = true; + int numProceduralEpisodes = 100; // only set this for doProceduralEpisodeSet==false (it is otherwise ignored) std::string episodeSetFilepath; ESP_SMART_POINTERS(BatchedSimulatorConfig); diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 2f26f9dbad..c2858aea40 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -92,9 +92,7 @@ class EpisodeInstance { static constexpr int MAX_MOVED_FREE_OBJECTS = 6; // todo: better memory management int firstFreeObjectInstanceId_ = -1; - #ifndef NDEBUG // todo: hook up to a well-named preproc var std::vector persistentDebugInstanceIds_; - #endif }; class EpisodeInstanceSet { diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 7d11d50d64..54d8713def 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -51,6 +51,7 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") .def_readwrite("do_procedural_episode_set", &BatchedSimulatorConfig::doProceduralEpisodeSet, R"(Todo)") + .def_readwrite("num_procedural_episodes", &BatchedSimulatorConfig::numProceduralEpisodes, R"(Todo)") .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)"); py::class_( From d234759f68a45e9b896aef379614e307ef5646fe Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Thu, 14 Apr 2022 09:44:29 -0400 Subject: [PATCH 26/53] added EpisodeGeneratorConfig; fix for CollisionBroadphaseGrid::contactTest; improve PlacementHelper::place; support for procedural robot placement --- src/esp/batched_sim/BatchedSimulator.cpp | 8 +- src/esp/batched_sim/BatchedSimulator.h | 8 +- src/esp/batched_sim/CMakeLists.txt | 2 + .../batched_sim/CollisionBroadphaseGrid.cpp | 8 +- src/esp/batched_sim/CollisionBroadphaseGrid.h | 2 +- src/esp/batched_sim/EpisodeGenerator.cpp | 296 +++++++++++++ src/esp/batched_sim/EpisodeGenerator.h | 32 ++ src/esp/batched_sim/EpisodeSet.cpp | 411 ++++++------------ src/esp/batched_sim/EpisodeSet.h | 8 +- src/esp/batched_sim/GlmUtils.cpp | 8 + src/esp/batched_sim/GlmUtils.h | 2 + src/esp/batched_sim/PlacementHelper.cpp | 55 ++- src/esp/batched_sim/SerializeCollection.cpp | 2 +- src/esp/bindings/BatchedSimBindings.cpp | 14 +- src/tests/BatchedSimTest.cpp | 18 +- 15 files changed, 549 insertions(+), 325 deletions(-) create mode 100644 src/esp/batched_sim/EpisodeGenerator.cpp create mode 100644 src/esp/batched_sim/EpisodeGenerator.h diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index fe6c02aa74..e7d7398abf 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -28,8 +28,6 @@ namespace batched_sim { namespace { -static const char* serializeCollectionFilepath_ = "../data/replicacad_composite.collection.json"; - static Mn::Vector3 INVALID_VEC3 = Mn::Vector3(NAN, NAN, NAN); static constexpr glm::mat4x3 identityGlMat_ = glm::mat4x3( @@ -1224,7 +1222,7 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { sceneMapping_ = BpsSceneMapping::loadFromFile( "../data/bps_data/replicacad_composite/replicacad_composite.bps.mapping.json"); - serializeCollection_ = serialize::Collection::loadFromFile(serializeCollectionFilepath_); + serializeCollection_ = serialize::Collection::loadFromFile(config_.collectionFilepath); bpsWrapper_ = std::make_unique(config_.gpuId, config_.numEnvs, config_.includeDepth, config_.includeColor, config_.sensor0); @@ -1659,7 +1657,7 @@ void BatchedSimulator::initEpisodeSet() { ESP_CHECK(config_.episodeSetFilepath.empty(), "For BatchedSimulatorConfig::doProceduralEpisodeSet==true, don't specify episodeSetFilepath"); - episodeSet_ = generateBenchmarkEpisodeSet(config_.numProceduralEpisodes, sceneMapping_, serializeCollection_); + episodeSet_ = generateBenchmarkEpisodeSet(config_.episodeGeneratorConfig, sceneMapping_, serializeCollection_); episodeSet_.saveToFile("../data/generated.episode_set.json"); } else { ESP_CHECK(!config_.episodeSetFilepath.empty(), @@ -2289,7 +2287,7 @@ void BatchedSimulator::reloadSerializeCollection() { int numEnvs = bpsWrapper_->envs_.size(); - serializeCollection_ = serialize::Collection::loadFromFile(serializeCollectionFilepath_); + serializeCollection_ = serialize::Collection::loadFromFile(config_.collectionFilepath); robot_.updateFromSerializeCollection(serializeCollection_); diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 970e94caf4..8896f32489 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -10,6 +10,7 @@ #include "esp/batched_sim/ColumnGrid.h" #include "esp/batched_sim/BpsSceneMapping.h" #include "esp/batched_sim/EpisodeSet.h" +#include "esp/batched_sim/EpisodeGenerator.h" #include "esp/batched_sim/SerializeCollection.h" #include "esp/core/random.h" @@ -44,12 +45,13 @@ struct BatchedSimulatorConfig { bool forceRandomActions = false; bool doAsyncPhysicsStep = false; int numSubsteps = 1; - // if enabled, for every odd env, we don't render any visuals except debug stuff, - // e.g. collision visualization. We should keep even and odd envs in sync (e.g. same actions). + bool enableRobotCollision = true; + bool enableHeldObjectCollision = true; bool doProceduralEpisodeSet = true; - int numProceduralEpisodes = 100; + EpisodeGeneratorConfig episodeGeneratorConfig; // only set this for doProceduralEpisodeSet==false (it is otherwise ignored) std::string episodeSetFilepath; + std::string collectionFilepath = "../data/replicacad_composite.collection.json"; ESP_SMART_POINTERS(BatchedSimulatorConfig); }; diff --git a/src/esp/batched_sim/CMakeLists.txt b/src/esp/batched_sim/CMakeLists.txt index d2916e851e..a7a65e8fc0 100644 --- a/src/esp/batched_sim/CMakeLists.txt +++ b/src/esp/batched_sim/CMakeLists.txt @@ -14,6 +14,8 @@ add_library( CollisionBroadphaseGrid.h ColumnGridBuilder.cpp ColumnGridBuilder.h + EpisodeGenerator.cpp + EpisodeGenerator.h EpisodeSet.cpp EpisodeSet.h GlmUtils.cpp diff --git a/src/esp/batched_sim/CollisionBroadphaseGrid.cpp b/src/esp/batched_sim/CollisionBroadphaseGrid.cpp index 9143297ca9..d3ab5ed8e0 100644 --- a/src/esp/batched_sim/CollisionBroadphaseGrid.cpp +++ b/src/esp/batched_sim/CollisionBroadphaseGrid.cpp @@ -73,13 +73,11 @@ std::pair CollisionBroadphaseGrid::findSphereGridCellIndex(float x, fl // use the max corner of the sphere to do lookup const float halfCellFloatX = (x + maxSphereRadius_ - minX_) * invGridSpacing_ * 2.f; if (halfCellFloatX < 0.f || halfCellFloatX >= (float)(dimX_ * 2)) { - BATCHED_SIM_ASSERT(false); return std::make_pair(-1, -1); } const float halfCellFloatZ = (z + maxSphereRadius_ - minZ_) * invGridSpacing_ * 2.f; if (halfCellFloatZ < 0.f || halfCellFloatZ >= (float)(dimZ_ * 2)) { - BATCHED_SIM_ASSERT(false); return std::make_pair(-1, -1); } @@ -319,8 +317,12 @@ int CollisionBroadphaseGrid::contactTest(const Magnum::Vector3& spherePos, float BATCHED_SIM_ASSERT(sphereRadius <= maxSphereRadius_); + const auto pair = findSphereGridCellIndex(spherePos.x(), spherePos.z()); + if (pair.first == -1) { + return -1; + } + const auto& cell = getCell(pair); const float sphereRadiusSq = sphereRadius * sphereRadius; - const auto& cell = getCell(findSphereGridCellIndex(spherePos.x(), spherePos.z())); for (int i = 0; i < cell.numObstacles; i++) { int16_t obsIndex = cell.obstacles[i]; diff --git a/src/esp/batched_sim/CollisionBroadphaseGrid.h b/src/esp/batched_sim/CollisionBroadphaseGrid.h index 5138feb77e..ad836468f9 100644 --- a/src/esp/batched_sim/CollisionBroadphaseGrid.h +++ b/src/esp/batched_sim/CollisionBroadphaseGrid.h @@ -84,7 +84,7 @@ class CollisionBroadphaseGrid { //// private API exposed only for testing //// public: - static constexpr int MAX_OBSTACLES_PER_CELL = 12; + static constexpr int MAX_OBSTACLES_PER_CELL = 16; struct Cell { diff --git a/src/esp/batched_sim/EpisodeGenerator.cpp b/src/esp/batched_sim/EpisodeGenerator.cpp new file mode 100644 index 0000000000..0dd8e2156f --- /dev/null +++ b/src/esp/batched_sim/EpisodeGenerator.cpp @@ -0,0 +1,296 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#include "esp/batched_sim/EpisodeGenerator.h" +#include "esp/batched_sim/PlacementHelper.h" + +#include "esp/core/random.h" +#include "esp/core/Check.h" + +namespace Cr = Corrade; +namespace Mn = Magnum; + +namespace esp { +namespace batched_sim { + +namespace { + +void addStageFixedObject(EpisodeSet& set, const std::string& name) { + + FixedObject fixedObj; + fixedObj.name_ = name; + fixedObj.needsPostLoadFixup_ = true; + + set.fixedObjects_.push_back(std::move(fixedObj)); +} + +void addFreeObject(EpisodeSet& set, const std::string& name) { + + FreeObject freeObj; + freeObj.name_ = name; + freeObj.needsPostLoadFixup_ = true; + + // all YCB objects needs this to be upright + const auto baseRot = Mn::Quaternion::rotation(Mn::Deg(-90), Mn::Vector3(1.f, 0.f, 0.f)); + + constexpr int numRotationsAboutUpAxis = 32; + for (int i = 0; i < numRotationsAboutUpAxis; i++) { + const auto angle = Mn::Deg((float)i * 360.f / numRotationsAboutUpAxis); + const auto rotAboutUpAxis = Mn::Quaternion::rotation(angle, Mn::Vector3(0.f, 1.f, 0.f)); + freeObj.startRotations_.push_back((rotAboutUpAxis * baseRot)); + } + + set.freeObjects_.emplace_back(std::move(freeObj)); +} + +FreeObject createFreeObjectProxyForRobot(const serialize::Collection& collection) { + +#if 0 + const auto& serLink = safeVectorGet(safeVectorGet(collection.robots, 0).links, 0); + ESP_CHECK(serLink.linkName == "base_link", "createFreeObjectProxyForRobot failed"); + + FreeObject freeObject; + + freeObject.aabb_ = Mn::Range3D({0.f, 0.f, 0.f}, {0.f, 0.f, 0.f}); + + for (const auto& serSphere : serLink.collisionSpheres) { + int radiusIdx = getCollisionRadiusIndex(collection, serSphere.radius); + freeObject.collisionSpheres_.push_back({serSphere.origin, radiusIdx}); + + freeObject.aabb_.min() = min(freeObject.aabb_.min(), + serSphere.origin - Mn::Vector3(serSphere.radius, serSphere.radius, serSphere.radius)); + freeObject.aabb_.max() = max(freeObject.aabb_.max(), + serSphere.origin + Mn::Vector3(serSphere.radius, serSphere.radius, serSphere.radius)); + } +#endif + + FreeObject freeObject; + + freeObject.startRotations_.push_back(Mn::Quaternion(Mn::Math::IdentityInit)); + + auto it = std::find_if(collection.freeObjects.begin(), collection.freeObjects.end(), + [](const auto& item) { return item.name == "robotProxy"; }); + ESP_CHECK(it != collection.freeObjects.end(), "createFreeObjectProxyForRobot failed"); + + updateFromSerializeCollection(freeObject, *it, collection); + BATCHED_SIM_ASSERT(!freeObject.collisionSpheres_.empty()); + + return freeObject; +} + +void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, + const serialize::Collection& collection, int stageFixedObjectIndex, + core::Random& random, const FreeObject& robotProxy) { + Episode episode; + episode.stageFixedObjIndex = stageFixedObjectIndex; + episode.firstFreeObjectSpawnIndex_ = set.freeObjectSpawns_.size(); + + if (config.useFixedRobotStartPos) { + episode.agentStartPos_ = Mn::Vector2(2.59f, 0.f); + episode.agentStartYaw_ = config.useFixedRobotStartYaw + ? -float(Mn::Rad(Mn::Deg(90.f))) + : random.uniform_float(-float(Mn::Rad(Mn::Deg(135.f))), -float(Mn::Rad(Mn::Deg(45.f)))); + } else { + // set to NAN for now and we'll find a robot start pos later in here + episode.agentStartPos_ = Mn::Vector2(NAN, NAN); + } + + // 1 spawn for target object, plus nontarget objects + constexpr int numTargetObjectSpawns = 1; + int numObjectSpawns = numTargetObjectSpawns + + random.uniform_int(config.minNontargetObjects, config.maxNontargetObjects + 1); + episode.numFreeObjectSpawns_ = 0; + + // good for area around staircase and living room + constexpr float allowedSnapDown = 0.05f; + Mn::Range3D objectSpawnRange({-1.f, 0.2f, -0.5f}, {4.f, 2.f, 3.f}); // above the floor + Mn::Range3D robotSpawnRange({0.f, 0.05f, 0.5f}, {3.f, 0.05f, 2.f}); // just above the floor + + // good for white bookshelf for stage 5 + // Mn::Range3D objectSpawnRange({0.33f, 0.15f, -0.4f}, {1.18f, 1.85f, -0.25f}); + + // exclusion range is for legacy useFixedRobotStartPos + const auto robotStartPos = Mn::Vector3(2.59, 0.f, 0.f); + const auto pad = Mn::Vector3(0.9f, 2.f, 0.9); + const auto exclusionRange = Mn::Range3D(robotStartPos - pad, robotStartPos + pad); + + const auto& stageFixedObject = safeVectorGet(set.fixedObjects_, episode.stageFixedObjIndex); + const auto& columnGrid = stageFixedObject.columnGridSet_.getColumnGrid(0); + // perf todo: re-use this across entire set (have extents for set) + // todo: find extents for entire EpisodeSet, not just this specific columnGrid + constexpr int maxBytes = 1000 * 1024; + // this is tuned assuming a building-scale simulation with household-object-scale obstacles + constexpr float maxGridSpacing = 0.5f; + CollisionBroadphaseGrid colGrid = CollisionBroadphaseGrid(getMaxCollisionRadius(collection), + columnGrid.minX, columnGrid.minZ, + columnGrid.getMaxX(), columnGrid.getMaxZ(), + maxBytes, maxGridSpacing); + + constexpr int maxFailedPlacements = 3; + PlacementHelper placementHelper(stageFixedObject.columnGridSet_, + colGrid, collection, random, maxFailedPlacements); + + episode.targetObjIndex_ = 0; // arbitrary + int numSpawnAttempts = 2000; + bool success = false; + for (int i = 0; i < numSpawnAttempts; i++) { + + // find a robot spawn after finding all object spawns + bool isRobotPosAttempt = (episode.numFreeObjectSpawns_ == numObjectSpawns) + && std::isnan(episode.agentStartPos_.x()); + + // find a goal position spawn after finding robot spawn and all object spawns + bool isGoalPositionAttempt = (episode.numFreeObjectSpawns_ == numObjectSpawns) + && !isRobotPosAttempt; + + const bool useExclusionRange = !isRobotPosAttempt && config.useFixedRobotStartPos; + + const auto& spawnRange = isRobotPosAttempt ? robotSpawnRange : objectSpawnRange; + + FreeObjectSpawn spawn; + const FreeObject* freeObjectPtr = nullptr; + if (isRobotPosAttempt) { + spawn.freeObjIndex_ = -1; + freeObjectPtr = &robotProxy; + } else { + // for the goal position, use the free object correspnding to targetObjIdx + spawn.freeObjIndex_ = isGoalPositionAttempt + ? set.freeObjectSpawns_[episode.targetObjIndex_].freeObjIndex_ + : random.uniform_int(0, set.freeObjects_.size()); + + freeObjectPtr = &safeVectorGet(set.freeObjects_, spawn.freeObjIndex_); + } + const auto& freeObject = *freeObjectPtr; + + spawn.startRotationIndex_ = random.uniform_int(0, freeObject.startRotations_.size()); + + Mn::Vector3 randomPos; + int numAttempts = 0; + while (true) { + numAttempts++; + randomPos = Mn::Vector3( + random.uniform_float(spawnRange.min().x(), spawnRange.max().x()), + random.uniform_float(spawnRange.min().y(), spawnRange.max().y()), + random.uniform_float(spawnRange.min().z(), spawnRange.max().z())); + + if (!useExclusionRange || !exclusionRange.contains(randomPos)) { + break; + } + BATCHED_SIM_ASSERT(numAttempts < 1000); + } + + const auto rotation = freeObject.startRotations_[spawn.startRotationIndex_]; + Mn::Matrix4 mat = Mn::Matrix4::from( + rotation.toMatrix(), randomPos); + + if (placementHelper.place(mat, freeObject)) { + auto adjustedSpawnRange = spawnRange; + adjustedSpawnRange.min().y() -= allowedSnapDown; + if (!adjustedSpawnRange.contains(mat.translation())) { + continue; + } + spawn.startPos_ = mat.translation(); + + if (isRobotPosAttempt) { + episode.agentStartPos_ = Mn::Vector2(spawn.startPos_.x(), spawn.startPos_.z()); + episode.agentStartYaw_ = config.useFixedRobotStartYaw + ? -float(Mn::Rad(Mn::Deg(90.f))) + : random.uniform_float(-float(Mn::Rad(Mn::Deg(180.f))), float(Mn::Rad(Mn::Deg(180.f)))); + } else if (isGoalPositionAttempt) { + episode.targetObjGoalPos_ = spawn.startPos_; + episode.targetObjGoalRotation_ = rotation; + success = true; + break; + } else { + set.freeObjectSpawns_.emplace_back(std::move(spawn)); + episode.numFreeObjectSpawns_++; + + // add to colGrid so future spawns don't intersect this one + colGrid.insertObstacle(spawn.startPos_, rotation, &freeObject.aabb_); + } + } + } + constexpr int numGoalPositions = 1; + ESP_CHECK(success, "episode-generation failed; couldn't find " + << (numObjectSpawns + numGoalPositions) << " collision-free spawn locations"); + + set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); + + set.episodes_.emplace_back(std::move(episode)); +} + +} + +EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, + const BpsSceneMapping& sceneMapping, + const serialize::Collection& collection) { + + int numEpisodes = config.numEpisodes; + + core::Random random(config.seed); + + EpisodeSet set; + + std::vector selectedReplicaCadBakedStages = { + "Baked_sc0_staging_00", + "Baked_sc0_staging_01", + "Baked_sc0_staging_02", + "Baked_sc0_staging_03", + "Baked_sc0_staging_04", + "Baked_sc0_staging_05", + "Baked_sc0_staging_06", + "Baked_sc0_staging_07", + "Baked_sc0_staging_08", + "Baked_sc0_staging_09", + "Baked_sc0_staging_10", + "Baked_sc0_staging_11", + // "Baked_sc0_staging_12", + }; + ESP_CHECK(config.numStageVariations <= selectedReplicaCadBakedStages.size(), + "generateBenchmarkEpisodeSet: config.numStageVariations=" << config.numStageVariations + << " must be <= selectedReplicaCadBakedStages.size()=" << selectedReplicaCadBakedStages.size()); + + for (int i = 0; i < config.numStageVariations; i++) { + addStageFixedObject(set, selectedReplicaCadBakedStages[i]); + } + + std::vector selectedYCBObjects = { + //"024_bowl", + "003_cracker_box", + "010_potted_meat_can", + "002_master_chef_can", + "004_sugar_box", + "005_tomato_soup_can", + //"009_gelatin_box", + //"008_pudding_box", + "007_tuna_fish_can", + }; + ESP_CHECK(config.numObjectVariations <= selectedYCBObjects.size(), + "generateBenchmarkEpisodeSet: config.numObjectVariations=" << config.numObjectVariations + << " must be <= selectedYCBObjects.size()=" << selectedYCBObjects.size()); + + for (int i = 0; i < config.numObjectVariations; i++) { + addFreeObject(set, selectedYCBObjects[i]); + } + + // sloppy: call postLoadFixup before adding episodes; this means that + // set.maxFreeObjects_ gets computed incorrectly in here (but it will get computed + // correctly, incrementally, in addEpisode). + postLoadFixup(set, sceneMapping, collection); + + const auto robotProxy = createFreeObjectProxyForRobot(collection); + + // distribute stages across episodes + for (int i = 0; i < numEpisodes; i++) { + int stageIndex = i * config.numStageVariations / numEpisodes; + addEpisode(config, set, collection, stageIndex, random, robotProxy); + } + BATCHED_SIM_ASSERT(set.maxFreeObjects_ > 0); + + return set; +} + + +} // namespace batched_sim +} // namespace esp diff --git a/src/esp/batched_sim/EpisodeGenerator.h b/src/esp/batched_sim/EpisodeGenerator.h new file mode 100644 index 0000000000..79c8b7e719 --- /dev/null +++ b/src/esp/batched_sim/EpisodeGenerator.h @@ -0,0 +1,32 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef ESP_BATCHEDSIM_EPISODEGENERATOR_H_ +#define ESP_BATCHEDSIM_EPISODEGENERATOR_H_ + +#include "esp/batched_sim/EpisodeSet.h" + +namespace esp { +namespace batched_sim { + +struct EpisodeGeneratorConfig { + int numEpisodes = 100; + int seed = 3; // this is 3 for legacy reason + int numStageVariations = 12; // see selectedReplicaCadBakedStages + int numObjectVariations = 6; // see selectedYCBObjects + int minNontargetObjects = 27; + int maxNontargetObjects = 32; + bool useFixedRobotStartPos = true; + bool useFixedRobotStartYaw = false; + float maxRobotReachXZ = -1.f; // unused + ESP_SMART_POINTERS(EpisodeGeneratorConfig); +}; + +EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, + const BpsSceneMapping& sceneMapping, const serialize::Collection& collection); + +} // namespace batched_sim +} // namespace esp + +#endif diff --git a/src/esp/batched_sim/EpisodeSet.cpp b/src/esp/batched_sim/EpisodeSet.cpp index 8d32d99ca1..59f58bdd14 100644 --- a/src/esp/batched_sim/EpisodeSet.cpp +++ b/src/esp/batched_sim/EpisodeSet.cpp @@ -4,9 +4,7 @@ #include "esp/batched_sim/EpisodeSet.h" #include "esp/batched_sim/BatchedSimAssert.h" -#include "esp/batched_sim/PlacementHelper.h" -#include "esp/core/random.h" #include "esp/core/Check.h" #include "esp/io/json.h" @@ -18,248 +16,64 @@ namespace Mn = Magnum; namespace esp { namespace batched_sim { -namespace { +void updateFromSerializeCollection(FreeObject& freeObject, const serialize::FreeObject& serFreeObject, const serialize::Collection& collection) { -void addStageFixedObject(EpisodeSet& set, const std::string& name) { + freeObject.aabb_ = Mn::Range3D(serFreeObject.collisionBox.min, serFreeObject.collisionBox.max); + freeObject.heldRotationIndex_ = serFreeObject.heldRotationIndex; + ESP_CHECK(freeObject.heldRotationIndex_ >= 0 + && freeObject.heldRotationIndex_ < freeObject.startRotations_.size(), + "updateFromSerializeCollection: heldRotationIndex " << serFreeObject.heldRotationIndex + << " is out-of-range for FreeObject " + << freeObject.name_ << " with startRotations.size() == " << freeObject.startRotations_.size()); - FixedObject fixedObj; - fixedObj.name_ = name; - fixedObj.needsPostLoadFixup_ = true; + freeObject.collisionSpheres_.clear(); + std::vector generatedSpheres; + const std::vector* serializeCollisionSpheres = nullptr; - set.fixedObjects_.push_back(std::move(fixedObj)); -} - -// radius of sphere at origin that bounds this AABB -float getOriginBoundingSphereRadiusSquaredForAABB(const Magnum::Range3D& aabb) { - auto absMin = Mn::Math::abs(aabb.min()); - Mn::Vector3 maxCorner = Mn::Math::max(absMin, aabb.max()); - return maxCorner.dot(); -} - -void addFreeObject(EpisodeSet& set, const std::string& name) { - - FreeObject freeObj; - freeObj.name_ = name; - freeObj.needsPostLoadFixup_ = true; - - // all YCB objects needs this to be upright - const auto baseRot = Mn::Quaternion::rotation(Mn::Deg(-90), Mn::Vector3(1.f, 0.f, 0.f)); - - constexpr int numRotationsAboutUpAxis = 32; - for (int i = 0; i < numRotationsAboutUpAxis; i++) { - const auto angle = Mn::Deg((float)i * 360.f / numRotationsAboutUpAxis); - const auto rotAboutUpAxis = Mn::Quaternion::rotation(angle, Mn::Vector3(0.f, 1.f, 0.f)); - freeObj.startRotations_.push_back((rotAboutUpAxis * baseRot)); - } - - set.freeObjects_.emplace_back(std::move(freeObj)); -} - -void addEpisode(EpisodeSet& set, const serialize::Collection& collection, int stageFixedObjectIndex, core::Random& random) { - Episode episode; - episode.stageFixedObjIndex = stageFixedObjectIndex; - episode.firstFreeObjectSpawnIndex_ = set.freeObjectSpawns_.size(); - - // place robot agent - episode.agentStartPos_ = Mn::Vector2(2.59f, 0.f); - episode.agentStartYaw_ = random.uniform_float(-float(Mn::Rad(Mn::Deg(135.f))), -float(Mn::Rad(Mn::Deg(45.f)))); - - // keep object count close to 28 (from Hab 2.0 benchmark), but include variation - int targetNumSpawns = random.uniform_int(28, 33); - episode.numFreeObjectSpawns_ = 0; - - // good for area around staircase and living room - Mn::Range3D spawnRange({-1.f, 0.15f, -0.5f}, {4.f, 2.f, 3.f}); - - // good for white bookshelf for stage 5 - // Mn::Range3D spawnRange({0.33f, 0.15f, -0.4f}, {1.18f, 1.85f, -0.25f}); - - const auto robotStartPos = Mn::Vector3(2.59, 0.f, 0.f); - const auto pad = Mn::Vector3(0.9f, 2.f, 0.9); - const auto exclusionRange = Mn::Range3D(robotStartPos - pad, robotStartPos + pad); - - const auto& stageFixedObject = safeVectorGet(set.fixedObjects_, episode.stageFixedObjIndex); - const auto& columnGrid = stageFixedObject.columnGridSet_.getColumnGrid(0); - // perf todo: re-use this across entire set (have extents for set) - // todo: find extents for entire EpisodeSet, not just this specific columnGrid - constexpr int maxBytes = 1000 * 1024; - // this is tuned assuming a building-scale simulation with household-object-scale obstacles - constexpr float maxGridSpacing = 0.5f; - CollisionBroadphaseGrid colGrid = CollisionBroadphaseGrid(getMaxCollisionRadius(collection), - columnGrid.minX, columnGrid.minZ, - columnGrid.getMaxX(), columnGrid.getMaxZ(), - maxBytes, maxGridSpacing); - - constexpr int maxFailedPlacements = 3; - PlacementHelper placementHelper(stageFixedObject.columnGridSet_, - colGrid, collection, random, maxFailedPlacements); - - std::array selectedFreeObjectIndices = {1, 2, 3, 4, 5, 8}; - - episode.targetObjIndex_ = 0; // arbitrary - int numSpawnAttempts = 2000; - bool success = false; - for (int i = 0; i < numSpawnAttempts; i++) { - - // After finding all object spawns, do a search for the target object's goal - // position. This logic is mostly identical to finding an object spawn. - bool isGoalPositionAttempt = (episode.numFreeObjectSpawns_ == targetNumSpawns); - - FreeObjectSpawn spawn; - // for the goal position, use the free object correspnding to targetObjIdx - spawn.freeObjIndex_ = isGoalPositionAttempt - ? set.freeObjectSpawns_[episode.targetObjIndex_].freeObjIndex_ - : selectedFreeObjectIndices[random.uniform_int(0, selectedFreeObjectIndices.size())]; - const auto& freeObject = safeVectorGet(set.freeObjects_, spawn.freeObjIndex_); - spawn.startRotationIndex_ = random.uniform_int(0, freeObject.startRotations_.size()); - - Mn::Vector3 randomPos; - int numAttempts = 0; - while (true) { - numAttempts++; - randomPos = Mn::Vector3( - random.uniform_float(spawnRange.min().x(), spawnRange.max().x()), - random.uniform_float(spawnRange.min().y(), spawnRange.max().y()), - random.uniform_float(spawnRange.min().z(), spawnRange.max().z())); - - if (!exclusionRange.contains(randomPos)) { - break; - } - BATCHED_SIM_ASSERT(numAttempts < 1000); - } - - const auto rotation = freeObject.startRotations_[spawn.startRotationIndex_]; - Mn::Matrix4 mat = Mn::Matrix4::from( - rotation.toMatrix(), randomPos); - - if (placementHelper.place(mat, freeObject)) { - if (!spawnRange.contains(mat.translation())) { - continue; - } - spawn.startPos_ = mat.translation(); - - if (isGoalPositionAttempt) { - episode.targetObjGoalPos_ = spawn.startPos_; - episode.targetObjGoalRotation_ = rotation; - success = true; - break; - } else { - set.freeObjectSpawns_.emplace_back(std::move(spawn)); - episode.numFreeObjectSpawns_++; - - // add to colGrid so future spawns don't intersect this one - colGrid.insertObstacle(spawn.startPos_, rotation, &freeObject.aabb_); - } - } - } - constexpr int numGoalPositions = 1; - ESP_CHECK(success, "episode-generation failed; couldn't find " - << (targetNumSpawns + numGoalPositions) << " collision-free spawn locations"); - - set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); - - set.episodes_.emplace_back(std::move(episode)); -} - -} - -EpisodeSet generateBenchmarkEpisodeSet(int numEpisodes, - const BpsSceneMapping& sceneMapping, - const serialize::Collection& collection) { - - // 5 is hand-picked for a demo - core::Random random(/*seed*/3); - - EpisodeSet set; - - std::vector replicaCadBakedStages = { - "Baked_sc0_staging_00", - "Baked_sc0_staging_01", - "Baked_sc0_staging_02", - "Baked_sc0_staging_03", - "Baked_sc0_staging_04", - "Baked_sc0_staging_05", - "Baked_sc0_staging_06", - "Baked_sc0_staging_07", - "Baked_sc0_staging_08", - "Baked_sc0_staging_09", - "Baked_sc0_staging_10", - "Baked_sc0_staging_11", - // "Baked_sc0_staging_12", - }; - - for (const auto& stageName : replicaCadBakedStages) { - addStageFixedObject(set, stageName); - } - - for (const auto& serFreeObject : collection.freeObjects) { - addFreeObject(set, serFreeObject.name); - } - - // sloppy: call postLoadFixup before adding episodes; this means that - // set.maxFreeObjects_ gets computed incorrectly in here (but it will get computed - // correctly, incrementally, in addEpisode). - postLoadFixup(set, sceneMapping, collection); - - // distribute stages across episodes - for (int i = 0; i < numEpisodes; i++) { - int stageIndex = i * set.fixedObjects_.size() / numEpisodes; - addEpisode(set, collection, stageIndex, random); - } - BATCHED_SIM_ASSERT(set.maxFreeObjects_ > 0); - - return set; -} - - -void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& collection) { + if (!serFreeObject.generateCollisionSpheresTechnique.empty()) { - for (const auto& serFreeObject : collection.freeObjects) { - - auto it = std::find_if(set.freeObjects_.begin(), set.freeObjects_.end(), - [&serFreeObject](const auto& item) { return item.name_ == serFreeObject.name; }); - ESP_CHECK(it != set.freeObjects_.end(), "collection free object with name " << - serFreeObject.name << " not found in EpisodeSet. If you hit this error during " - "hot-reloading, try restarting the simulator."); - - auto& freeObject = *it; - freeObject.aabb_ = Mn::Range3D(serFreeObject.collisionBox.min, serFreeObject.collisionBox.max); - freeObject.heldRotationIndex_ = serFreeObject.heldRotationIndex; - ESP_CHECK(freeObject.heldRotationIndex_ >= 0 - && freeObject.heldRotationIndex_ < freeObject.startRotations_.size(), - "updateFromSerializeCollection: heldRotationIndex " << serFreeObject.heldRotationIndex - << " is out-of-range for FreeObject " - << freeObject.name_ << " with startRotations.size() == " << freeObject.startRotations_.size()); - - freeObject.collisionSpheres_.clear(); - std::vector generatedSpheres; - const std::vector* serializeCollisionSpheres = nullptr; - - if (!serFreeObject.generateCollisionSpheresTechnique.empty()) { + auto& spheres = generatedSpheres; + serializeCollisionSpheres = &generatedSpheres; - auto& spheres = generatedSpheres; - serializeCollisionSpheres = &generatedSpheres; + float smallRadius = 0.015f; + float mediumRadius = 0.05f; + float largeRadius = 0.12f; - float smallRadius = 0.015f; - float mediumRadius = 0.05f; + const auto& aabb = freeObject.aabb_; + Mn::Vector3 aabbCenter = aabb.center(); - const auto& aabb = freeObject.aabb_; - Mn::Vector3 aabbCenter = aabb.center(); + if (serFreeObject.generateCollisionSpheresTechnique == "box") { - if (serFreeObject.generateCollisionSpheresTechnique == "box") { + // small and medium spheres at each corner + // consolidate duplicates at the end - // small and medium spheres at each corner - // consolidate duplicates at the end + // insert larger spheres first, so that de-duplication (later) leaves larger spheres + for (float r : {largeRadius, mediumRadius, smallRadius}) { + if (aabb.size().length() < r * 2.f) { + // object is too small for even one sphere of this radius + continue; + } + if (aabb.sizeZ() < r * 2.f) { + continue; + } - // insert larger spheres first, so that de-duplication (later) leaves larger spheres - for (float r : {mediumRadius, smallRadius}) { - if (aabb.size().length() < r * 2.f) { - // object is too small for even one sphere of this radius - continue; - } - if (aabb.sizeZ() < r * 2.f) { - continue; + if (r == largeRadius) { + + int numX = int(aabb.sizeX() / (largeRadius * 2.f)) + 1; + int numY = int(aabb.sizeY() / (largeRadius * 2.f)) + 1; + int numZ = int(aabb.sizeZ() / (largeRadius * 2.f)) + 1; + Mn::Vector3 origin; + for (int ix = 0; ix < numX; ix++) { + origin.x() = Mn::Math::lerp(aabb.min().x() + largeRadius, aabb.max().x() - largeRadius, float(ix) / (numX - 1)); + for (int iy = 0; iy < numY; iy++) { + origin.y() = Mn::Math::lerp(aabb.min().y() + largeRadius, aabb.max().y() - largeRadius, float(iy) / (numY - 1)); + for (int iz = 0; iz < numZ; iz++) { + origin.z() = Mn::Math::lerp(aabb.min().z() + largeRadius, aabb.max().z() - largeRadius, float(iz) / (numZ - 1)); + spheres.push_back({origin, r}); + } + } } + } else { spheres.push_back({aabb.backBottomLeft(), r}); spheres.push_back({aabb.backBottomRight(), r}); spheres.push_back({aabb.backTopLeft(), r}); @@ -269,77 +83,94 @@ void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& spheres.push_back({aabb.frontTopLeft(), r}); spheres.push_back({aabb.frontTopRight(), r}); } + } - } else if (serFreeObject.generateCollisionSpheresTechnique == "uprightCylinder") { + } else if (serFreeObject.generateCollisionSpheresTechnique == "uprightCylinder") { - // insert larger spheres first, so that de-duplication (later) leaves larger spheres - for (float r : {mediumRadius, smallRadius}) { - if (aabb.size().length() < r * 2.f) { - // object is too small for even one sphere of this radius - continue; - } + // insert larger spheres first, so that de-duplication (later) leaves larger spheres + for (float r : {largeRadius, mediumRadius, smallRadius}) { + if (aabb.size().length() < r * 2.f) { + // object is too small for even one sphere of this radius + continue; + } - for (float z : {aabb.min().z(), aabb.max().z()}) { - for (int xyDim = 0; xyDim < 2; xyDim++) { - int otherXyDim = xyDim == 0 ? 1 : 0; - Mn::Vector3 pMin; - pMin[xyDim] = aabb.min()[xyDim]; - pMin[otherXyDim] = aabb.center()[otherXyDim]; - pMin.z() = z; - - Mn::Vector3 pMax; - pMax[xyDim] = aabb.max()[xyDim]; - pMax[otherXyDim] = aabb.center()[otherXyDim]; - pMax.z() = z; - - spheres.push_back({pMin, r}); - spheres.push_back({pMax, r}); - } + for (float z : {aabb.min().z(), aabb.max().z()}) { + for (int xyDim = 0; xyDim < 2; xyDim++) { + int otherXyDim = xyDim == 0 ? 1 : 0; + Mn::Vector3 pMin; + pMin[xyDim] = aabb.min()[xyDim]; + pMin[otherXyDim] = aabb.center()[otherXyDim]; + pMin.z() = z; + + Mn::Vector3 pMax; + pMax[xyDim] = aabb.max()[xyDim]; + pMax[otherXyDim] = aabb.center()[otherXyDim]; + pMax.z() = z; + + spheres.push_back({pMin, r}); + spheres.push_back({pMax, r}); } } - - } else { - ESP_CHECK(false, "free object generateCollisionSpheresTechnique \"" - << serFreeObject.generateCollisionSpheresTechnique << "\" not recognized. " - "Valid values are empty-string, \"box\", and \"uprightCylinder\""); } - // clamp to fit inside box extents, but don't move sphere center past center of box (to other side) - for (auto& sphere : spheres) { - Mn::Vector3 clampedOrigin; - for (int dim = 0; dim < 3; dim++) { - clampedOrigin[dim] = sphere.origin[dim] < aabbCenter[dim] - ? Mn::Math::clamp(sphere.origin[dim], - Mn::Math::min(aabb.min()[dim] + sphere.radius, aabbCenter[dim]), aabbCenter[dim]) - : Mn::Math::clamp(sphere.origin[dim], - aabbCenter[dim], Mn::Math::max(aabb.max()[dim] - sphere.radius, aabbCenter[dim])); - } - sphere.origin = clampedOrigin; + } else { + ESP_CHECK(false, "free object generateCollisionSpheresTechnique \"" + << serFreeObject.generateCollisionSpheresTechnique << "\" not recognized. " + "Valid values are empty-string, \"box\", and \"uprightCylinder\""); + } + + // clamp to fit inside box extents, but don't move sphere center past center of box (to other side) + for (auto& sphere : spheres) { + Mn::Vector3 clampedOrigin; + for (int dim = 0; dim < 3; dim++) { + clampedOrigin[dim] = sphere.origin[dim] < aabbCenter[dim] + ? Mn::Math::clamp(sphere.origin[dim], + Mn::Math::min(aabb.min()[dim] + sphere.radius, aabbCenter[dim]), aabbCenter[dim]) + : Mn::Math::clamp(sphere.origin[dim], + aabbCenter[dim], Mn::Math::max(aabb.max()[dim] - sphere.radius, aabbCenter[dim])); } + sphere.origin = clampedOrigin; + } - // remove duplicates - for (int i = spheres.size() - 1; i >= 0; i--) { - bool foundDup = false; - for (int j = 0; j < i; j++) { - if (spheres[i].origin == spheres[j].origin) { - auto it = spheres.begin() + i; - spheres.erase(spheres.begin() + i); - break; - } + // remove duplicates + for (int i = spheres.size() - 1; i >= 0; i--) { + bool foundDup = false; + for (int j = 0; j < i; j++) { + if (spheres[i].origin == spheres[j].origin) { + auto it = spheres.begin() + i; + spheres.erase(spheres.begin() + i); + break; } } - - BATCHED_SIM_ASSERT(!spheres.empty()); - } else { - ESP_CHECK(!serFreeObject.collisionSpheres.empty(), "no collision spheres for free object " - << serFreeObject.name << " and generateCollisionSpheresFromBox==false"); - serializeCollisionSpheres = &serFreeObject.collisionSpheres; } - for (const auto& serSphere : *serializeCollisionSpheres) { - int radiusIdx = getCollisionRadiusIndex(collection, serSphere.radius); - freeObject.collisionSpheres_.push_back({serSphere.origin, radiusIdx}); + BATCHED_SIM_ASSERT(!spheres.empty()); + } else { + ESP_CHECK(!serFreeObject.collisionSpheres.empty(), "no collision spheres for free object " + << serFreeObject.name << " and generateCollisionSpheresFromBox==false"); + serializeCollisionSpheres = &serFreeObject.collisionSpheres; + } + + for (const auto& serSphere : *serializeCollisionSpheres) { + int radiusIdx = getCollisionRadiusIndex(collection, serSphere.radius); + freeObject.collisionSpheres_.push_back({serSphere.origin, radiusIdx}); + } + +} + +void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& collection) { + + for (const auto& serFreeObject : collection.freeObjects) { + + auto it = std::find_if(set.freeObjects_.begin(), set.freeObjects_.end(), + [&serFreeObject](const auto& item) { return item.name_ == serFreeObject.name; }); + if (it == set.freeObjects_.end()) { + // collection may have info for free objects not used in the current EpisodeSet + continue; } + + auto& freeObject = *it; + updateFromSerializeCollection(freeObject, serFreeObject, collection); } } diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index c2858aea40..934e0201ff 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -2,8 +2,8 @@ // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree. -#ifndef ESP_BATCHEDSIM_EPISODE_H_ -#define ESP_BATCHEDSIM_EPISODE_H_ +#ifndef ESP_BATCHEDSIM_EPISODESET_H_ +#define ESP_BATCHEDSIM_EPISODESET_H_ #include "esp/batched_sim/BatchedSimAssert.h" #include "esp/batched_sim/ColumnGrid.h" @@ -102,9 +102,7 @@ class EpisodeInstanceSet { void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& collection); -// todo: move to separate file -EpisodeSet generateBenchmarkEpisodeSet(int numEpisodes, - const BpsSceneMapping& sceneMapping, const serialize::Collection& collection); +void updateFromSerializeCollection(FreeObject& freeObject, const serialize::FreeObject& serFreeObject, const serialize::Collection& collection); void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const serialize::Collection& collection); diff --git a/src/esp/batched_sim/GlmUtils.cpp b/src/esp/batched_sim/GlmUtils.cpp index 99c34dec68..2ceacd7862 100644 --- a/src/esp/batched_sim/GlmUtils.cpp +++ b/src/esp/batched_sim/GlmUtils.cpp @@ -77,6 +77,14 @@ Magnum::Vector3 inverseTransformPoint(const glm::mat4x3& glMat, const Magnum::Ve return result; } +Magnum::Vector3 getRangeCorner(const Magnum::Range3D& range, int cornerIdx) { + + return Mn::Vector3( + (cornerIdx % 2) ? range.min().x() : range.max().x(), + ((cornerIdx / 2) % 2) ? range.min().y() : range.max().y(), + ((cornerIdx / 4) % 2) ? range.min().z() : range.max().z() + ); +} bool sphereBoxContactTest(const Magnum::Vector3& sphereOrigin, float sphereRadiusSq, const Magnum::Range3D& aabb) { diff --git a/src/esp/batched_sim/GlmUtils.h b/src/esp/batched_sim/GlmUtils.h index b734384c64..e9d5be4ef4 100644 --- a/src/esp/batched_sim/GlmUtils.h +++ b/src/esp/batched_sim/GlmUtils.h @@ -27,6 +27,8 @@ Magnum::Vector3 getMagnumTranslation(const glm::mat4x3& glMat); Magnum::Vector3 inverseTransformPoint(const glm::mat4x3& glMat, const Magnum::Vector3& pos); +Magnum::Vector3 getRangeCorner(const Magnum::Range3D& range, int cornerIdx); + bool sphereBoxContactTest(const Magnum::Vector3& sphereOrigin, float sphereRadiusSq, const Magnum::Range3D& aabb); template diff --git a/src/esp/batched_sim/PlacementHelper.cpp b/src/esp/batched_sim/PlacementHelper.cpp index be628ea844..2969a2bd8f 100644 --- a/src/esp/batched_sim/PlacementHelper.cpp +++ b/src/esp/batched_sim/PlacementHelper.cpp @@ -4,6 +4,8 @@ #include "esp/batched_sim/PlacementHelper.h" #include "esp/batched_sim/BatchedSimAssert.h" +#include "esp/batched_sim/GlmUtils.h" + #include "esp/core/Check.h" namespace Mn = Magnum; @@ -33,36 +35,59 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO bool success = false; while (true) { - float minCastDownDist = std::numeric_limits::max(); Cr::Containers::Optional hintAngle; // todo: smarter caching for held object spheres ColumnGridSource::QueryCacheValue queryCache = 0; // cast a single small sphere down to find a resting position candidate + const float sphereRadius = 0.015f; // temp hack + const float radiusIdx = 0; // temp hack + + Mn::Vector3 querySphereWorldOrigin; { + auto centerWorld = heldObjMat.transformPoint(freeObject.aabb_.center()); + + Mn::Vector3 minCornerRotated; + constexpr int numBoxCorners = 8; + for (int i = 0; i < numBoxCorners; i++) { + Mn::Vector3 cornerLocal = getRangeCorner(freeObject.aabb_, i); + const auto cornerRotated = heldObjMat.transformVector(cornerLocal); + if (i == 0 || cornerRotated.y() < minCornerRotated.y()) { + minCornerRotated = cornerRotated; + } + } + + auto minCornerWorld = minCornerRotated + heldObjMat.translation(); + // cast down from center of object in XZ, at bottom of object (y of lowest corner), + // and adjust for sphere radius + querySphereWorldOrigin = Mn::Vector3( + centerWorld.x(), + minCornerWorld.y() + sphereRadius, + centerWorld.z()); + #if 0 // todo: find a sphere near the center of mass, at the base of the object in its // current (or resting) rotation. We can use its rotated box to find this. - const float sphereRadius = 0.015f; // temp hack const auto sphereLocalOrigin = Mn::Vector3( freeObject.aabb_.center().x(), freeObject.aabb_.center().y(), freeObject.aabb_.min().z() + sphereRadius); - float radiusIdx = 0; // temp hack auto sphereWorldOrigin = heldObjMat.transformPoint(sphereLocalOrigin); - - float castDownResult = columnGridSet_.castDownTest(radiusIdx, sphereWorldOrigin, &queryCache); - // there's no guarantee of collision-free due to rotation on drop - // todo: decide how to handle bad collisions - minCastDownDist = Mn::Math::min(minCastDownDist, castDownResult); + #endif } + + float queryCastDownResult = columnGridSet_.castDownTest(radiusIdx, querySphereWorldOrigin, &queryCache); + // there's no guarantee of collision-free due to rotation on drop + // todo: decide how to handle bad collisions + float querySurfaceY = querySphereWorldOrigin.y() - queryCastDownResult - sphereRadius; + constexpr float maxPenetrationDist = 0.2f; // sloppy; we should have a better way of knowing that castDownTest didn't hit anything constexpr float maxValidCastDownDist = 1000.f; - if (minCastDownDist <= -maxPenetrationDist || minCastDownDist > maxValidCastDownDist) { + if (queryCastDownResult <= -maxPenetrationDist || queryCastDownResult > maxValidCastDownDist) { // continue below, no hint } else { - heldObjMat.translation().y() -= minCastDownDist; + heldObjMat.translation().y() -= queryCastDownResult; int hitFreeObjectIndex = -1; bool didHitColumnGrid = false; @@ -80,12 +105,14 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO // a wall or other part of the static scene, such that it's partially // interpenetrating. // todo: consider vertical fixup here - float castDownResult = columnGridSet_.castDownTest(sphere.radiusIdx, sphereWorldOrigin, &queryCache); + float thisCastDownResult = columnGridSet_.castDownTest(sphere.radiusIdx, sphereWorldOrigin, &queryCache); + + float thisSurfaceY = sphereWorldOrigin.y() - thisCastDownResult - sphereRadius; // sloppy: this is needed because we currently drop tilted objects, and we // don't use a sphere at the true base of the object in the earlier castDownTest. constexpr float pad = 0.05f; - const float maxPenetrationDist = sphereRadius + pad; - if (castDownResult <= -maxPenetrationDist) { + const float maxSurfaceY = querySurfaceY + sphereRadius + pad; + if (thisSurfaceY > maxSurfaceY) { didHitColumnGrid = true; break; } @@ -96,7 +123,7 @@ bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeO break; } - heldObjMat.translation().y() += minCastDownDist; // restore to drop height + heldObjMat.translation().y() += queryCastDownResult; // restore to drop height if (hitFreeObjectIndex != -1 && numFailedPlacements < maxFailedPlacements_ - 1) { const auto& hitObs = colGrid_.getObstacle(hitFreeObjectIndex); diff --git a/src/esp/batched_sim/SerializeCollection.cpp b/src/esp/batched_sim/SerializeCollection.cpp index 51d0aed811..dd4d23b7bc 100644 --- a/src/esp/batched_sim/SerializeCollection.cpp +++ b/src/esp/batched_sim/SerializeCollection.cpp @@ -111,7 +111,7 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, Collection Collection::loadFromFile(const std::string& filepath) { Collection collection; - ESP_CHECK(Cr::Utility::Directory::exists(filepath), "couldn't find collection file " << filepath); + ESP_CHECK(Cr::Utility::Directory::exists(filepath), "couldn't find collection file {" << filepath << "}"); auto newDoc = esp::io::parseJsonFile(filepath); esp::io::readMember(newDoc, "collection", collection); return collection; diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 54d8713def..f5bb30be2d 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -37,6 +37,18 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("width", &CameraSensorConfig::width, R"(Todo)") .def_readwrite("height", &CameraSensorConfig::height, R"(Todo)"); + py::class_( + m, "EpisodeGeneratorConfig") + .def(py::init(&EpisodeGeneratorConfig::create<>)) + .def_readwrite("numEpisodes", &EpisodeGeneratorConfig::numEpisodes, R"(Todo)") + .def_readwrite("seed", &EpisodeGeneratorConfig::seed, R"(Todo)") + .def_readwrite("num_stage_variations", &EpisodeGeneratorConfig::numStageVariations, R"(Todo)") + .def_readwrite("num_object_variations", &EpisodeGeneratorConfig::numObjectVariations, R"(Todo)") + .def_readwrite("min_nontarget_objects", &EpisodeGeneratorConfig::minNontargetObjects, R"(Todo)") + .def_readwrite("max_nontarget_objects", &EpisodeGeneratorConfig::maxNontargetObjects, R"(Todo)") + .def_readwrite("used_fixed_robot_start_pos", &EpisodeGeneratorConfig::useFixedRobotStartPos, R"(Todo)") + .def_readwrite("use_fixed_robot_start_yaw", &EpisodeGeneratorConfig::useFixedRobotStartYaw, R"(Todo)"); + py::class_( m, "BatchedSimulatorConfig") .def(py::init(&BatchedSimulatorConfig::create<>)) @@ -51,7 +63,7 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") .def_readwrite("do_procedural_episode_set", &BatchedSimulatorConfig::doProceduralEpisodeSet, R"(Todo)") - .def_readwrite("num_procedural_episodes", &BatchedSimulatorConfig::numProceduralEpisodes, R"(Todo)") + .def_readwrite("episode_generator_config", &BatchedSimulatorConfig::episodeGeneratorConfig, R"(Todo)") .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)"); py::class_( diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index d637c4b428..a19107ea6b 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -205,19 +205,33 @@ TEST_F(BatchedSimulatorTest, basic) { const bool forceRandomActions = doFreeCam || doTuneRobotCam; + EpisodeGeneratorConfig generatorConfig{ + .numEpisodes = 100, + .seed = 4, + .numStageVariations = 12, + .numObjectVariations = 6, + .minNontargetObjects = 27, + .maxNontargetObjects = 32, + .useFixedRobotStartPos = false, + .useFixedRobotStartYaw = false + }; + + BatchedSimulatorConfig config{ - .numEnvs = 2, .gpuId = 0, + .numEnvs = 1, .gpuId = 0, .includeDepth = includeDepth, .includeColor = includeColor, .sensor0 = {.width = 768, .height = 768}, .numDebugEnvs = 1, - .debugSensor {.width = 512, .height = 512}, + .debugSensor {.width = 768, .height = 768}, .forceRandomActions = forceRandomActions, .doAsyncPhysicsStep = doOverlapPhysics, .numSubsteps = 1, .doProceduralEpisodeSet = true, + .episodeGeneratorConfig = generatorConfig, //.doProceduralEpisodeSet = false, //.episodeSetFilepath = "generated.episode_set.json", + .collectionFilepath = "../data/replicacad_composite.collection.json" }; BatchedSimulator bsim(config); From 060c764e24a10d540fa7beddaf905a49b311ae6a Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 18 Apr 2022 09:20:28 -0400 Subject: [PATCH 27/53] json serialize support for std::array --- src/esp/io/JsonStlTypes.h | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/esp/io/JsonStlTypes.h b/src/esp/io/JsonStlTypes.h index 0e3b705313..27d889558f 100644 --- a/src/esp/io/JsonStlTypes.h +++ b/src/esp/io/JsonStlTypes.h @@ -98,6 +98,50 @@ bool readMember(const JsonGenericValue& value, return true; } +// std::array works similarly to vector +template +void addMember(JsonGenericValue& value, + rapidjson::GenericStringRef name, + const std::array& vec, + JsonAllocator& allocator) { + if (!vec.empty()) { + addMember(value, name, toJsonArrayHelper(vec.data(), vec.size(), allocator), + allocator); + } +} + +template +bool readMember(const JsonGenericValue& value, + const char* tag, + std::array& vec) { + JsonGenericValue::ConstMemberIterator itr = value.FindMember(tag); + if (itr != value.MemberEnd()) { + const JsonGenericValue& arr = itr->value; + if (!arr.IsArray()) { + ESP_ERROR() << "JSON tag" << tag << "is not an array"; + return false; + } + if (arr.Size() != vec.size()) { + ESP_ERROR() << "wrong array in JSON tag" + << tag; + return false; + } + for (size_t i = 0; i < arr.Size(); i++) { + const auto& itemObj = arr[i]; + T item; + if (!fromJsonValue(itemObj, item)) { + ESP_ERROR() << "Failed to parse array element" << i << "in JSON tag" + << tag; + return false; + } + vec[i] = std::move(item); + } + return true; + } + // fail on missing elem for std::array + return false; +} + /** * @brief Specialization to handle reading a JSON object into an * std::map. Check passed json doc for existence of From 39114b78d84f88a8f35f3805e7308b0caaa01240 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 18 Apr 2022 09:25:51 -0400 Subject: [PATCH 28/53] added EpisodeGeneratorConfig::use_fixed_robot_joint_start_positions and support for random joint start positions --- src/esp/batched_sim/BatchedSimulator.cpp | 43 +++++++-- src/esp/batched_sim/EpisodeGenerator.cpp | 112 ++++++++++++++++++++--- src/esp/batched_sim/EpisodeGenerator.h | 2 +- src/esp/batched_sim/EpisodeSet.cpp | 3 +- src/esp/batched_sim/EpisodeSet.h | 2 + src/esp/batched_sim/GlmUtils.cpp | 5 + src/esp/batched_sim/GlmUtils.h | 1 + src/esp/bindings/BatchedSimBindings.cpp | 3 +- src/tests/BatchedSimTest.cpp | 8 +- 9 files changed, 153 insertions(+), 26 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index e7d7398abf..8267966528 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -56,11 +56,6 @@ Mn::Matrix4 toMagnumMatrix4(const btTransform& btTrans) { return tmp2; } -Mn::Quaternion yawToRotation(float yawRadians) { - constexpr Mn::Vector3 upAxis(0.f, 1.f, 0.f); - return Mn::Quaternion::rotation(Mn::Rad(yawRadians), upAxis); -} - Mn::Vector3 groundPositionToVector3(const Mn::Vector2& src) { constexpr float groundY = 0.f; return Mn::Vector3(src.x(), groundY, src.y()); @@ -957,6 +952,30 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { } } #endif + + #if 0 + // temp debug + if (config_.numDebugEnvs > 0) { + + const float* yaws = &rollouts_.yaws_[currStorageStep_ * numEnvs]; + const Mn::Vector2* positions = &rollouts_.positions_[currStorageStep_ * numEnvs]; + + const auto& collection = serializeCollection_; + auto it = std::find_if(collection.freeObjects.begin(), collection.freeObjects.end(), + [](const auto& item) { return item.name == "robotProxy"; }); + BATCHED_SIM_ASSERT(it != collection.freeObjects.end()); + const auto& proxyFreeObj = *it; + const auto& aabb = Magnum::Range3D(proxyFreeObj.collisionBox.min, proxyFreeObj.collisionBox.max); + + for (int b = 0; b < config_.numDebugEnvs; b++) { + + addBoxDebugInstance("cube_pink_wireframe", b, + Mn::Vector3(positions[b].x(), 0.f, positions[b].y()), + yawToRotation(yaws[b]), + aabb); + } + } + #endif } @@ -1429,9 +1448,21 @@ void BatchedSimulator::resetEpisodeInstance(int b) { positions[b] = episode.agentStartPos_; yaws[b] = episode.agentStartYaw_; + const auto& serRobot = safeVectorGet(serializeCollection_.robots, 0); + for (int j = 0; j < robot_.numPosVars; j++) { + auto& pos = jointPositions[b * robot_.numPosVars + j]; + pos = serRobot.startJointPositions[j]; + } + + for (int i = 0; i < serRobot.actionMap.joints.size(); i++) { + const auto& pair = serRobot.actionMap.joints[i]; + int j = pair.first; + auto& pos = jointPositions[b * robot_.numPosVars + j]; + pos = safeVectorGet(episode.robotStartJointPositions_, i); + } + for (int j = 0; j < robot_.numPosVars; j++) { auto& pos = jointPositions[b * robot_.numPosVars + j]; - pos = serializeCollection_.robots[0].startJointPositions[j]; pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], robot_.jointPositionLimits.second[j]); } diff --git a/src/esp/batched_sim/EpisodeGenerator.cpp b/src/esp/batched_sim/EpisodeGenerator.cpp index 0dd8e2156f..0c18f8eb65 100644 --- a/src/esp/batched_sim/EpisodeGenerator.cpp +++ b/src/esp/batched_sim/EpisodeGenerator.cpp @@ -4,6 +4,7 @@ #include "esp/batched_sim/EpisodeGenerator.h" #include "esp/batched_sim/PlacementHelper.h" +#include "esp/batched_sim/GlmUtils.h" #include "esp/core/random.h" #include "esp/core/Check.h" @@ -79,13 +80,90 @@ FreeObject createFreeObjectProxyForRobot(const serialize::Collection& collection return freeObject; } +void setFetchJointStartPositions(const EpisodeGeneratorConfig& config, Episode& episode, const serialize::Collection& collection, + core::Random& random) { + + const auto& serRobot = safeVectorGet(collection.robots, 0); + ESP_CHECK(serRobot.actionMap.joints.size() <= Episode::MAX_JOINT_POSITIONS, + "setJointStartPositions: collection.json robot actionMap.joints.size()=" << serRobot.actionMap.joints.size() + << " must be <= Episode::MAX_JOINT_POSITIONS=" << Episode::MAX_JOINT_POSITIONS); + if (config.useFixedRobotJointStartPositions) { + + for (int i = 0; i < serRobot.actionMap.joints.size(); i++) { + const auto& pair = serRobot.actionMap.joints[i]; + int jointIdx = pair.first; + const auto& setup = pair.second; + episode.robotStartJointPositions_[i] = serRobot.startJointPositions[jointIdx]; + } + + } else { + + constexpr float refAngle = float(Mn::Rad(Mn::Deg(180.f))); + // for joints 6-12 + constexpr std::array jointsRangeMin = { + -1.6056, + -1.22099996, + -refAngle, + -2.25099993, + -refAngle, + -2.16000009, + -refAngle + }; + + constexpr std::array jointsRangeMax = { + 1.6056, + 1.51800001, + refAngle, + 2.25099993, + refAngle, + 2.16000009, + refAngle + }; + + for (int i = 0; i < serRobot.actionMap.joints.size(); i++) { + const auto& pair = serRobot.actionMap.joints[i]; + int jointIdx = pair.first; + const auto& setup = pair.second; + auto& pos = episode.robotStartJointPositions_[i]; + + // hacky logic to ensure EE doesn't extend too far from base (where it + // might be in collision with the environment). + if (jointIdx == 9 || jointIdx == 11) { + constexpr float refAngle = (float)Mn::Rad(Mn::Deg(90.f)); + constexpr float pad = (float)Mn::Rad(Mn::Deg(20.f)); + + const float rangeMin = safeVectorGet(jointsRangeMin, i); + const float rangeMax = safeVectorGet(jointsRangeMax, i); + // near min or max + pos = (random.uniform_uint() % 2) + ? random.uniform_float(rangeMin, rangeMin + pad) + : random.uniform_float(rangeMax - pad, rangeMax); + } else { + // random angle over entire range + pos = random.uniform_float(safeVectorGet(jointsRangeMin, i), safeVectorGet(jointsRangeMax, i)); + } + } + + + } + + +} + + + void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, const serialize::Collection& collection, int stageFixedObjectIndex, - core::Random& random, const FreeObject& robotProxy) { + core::Random& random, core::Random& random2, const FreeObject& robotProxy) { Episode episode; episode.stageFixedObjIndex = stageFixedObjectIndex; episode.firstFreeObjectSpawnIndex_ = set.freeObjectSpawns_.size(); + // Use a separate rand generator for joint start positions. This is so that toggling + // random vs fixed joint start positions doesn't affect the rest of episode + // generation. + setFetchJointStartPositions(config, episode, collection, random2); + if (config.useFixedRobotStartPos) { episode.agentStartPos_ = Mn::Vector2(2.59f, 0.f); episode.agentStartYaw_ = config.useFixedRobotStartYaw @@ -104,8 +182,8 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, // good for area around staircase and living room constexpr float allowedSnapDown = 0.05f; - Mn::Range3D objectSpawnRange({-1.f, 0.2f, -0.5f}, {4.f, 2.f, 3.f}); // above the floor - Mn::Range3D robotSpawnRange({0.f, 0.05f, 0.5f}, {3.f, 0.05f, 2.f}); // just above the floor + Mn::Range3D objectSpawnRange({-2.4f, 0.2f, -1.f}, {4.3f, 2.f, 4.f}); // above the floor + Mn::Range3D robotSpawnRange({1.f, 0.05f, -0.5f}, {3.3f, 0.05f, 7.0f}); // just above the floor // good for white bookshelf for stage 5 // Mn::Range3D objectSpawnRange({0.33f, 0.15f, -0.4f}, {1.18f, 1.85f, -0.25f}); @@ -127,12 +205,12 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, columnGrid.getMaxX(), columnGrid.getMaxZ(), maxBytes, maxGridSpacing); - constexpr int maxFailedPlacements = 3; + constexpr int maxFailedPlacements = 1; PlacementHelper placementHelper(stageFixedObject.columnGridSet_, colGrid, collection, random, maxFailedPlacements); episode.targetObjIndex_ = 0; // arbitrary - int numSpawnAttempts = 2000; + int numSpawnAttempts = 4000; bool success = false; for (int i = 0; i < numSpawnAttempts; i++) { @@ -150,9 +228,18 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, FreeObjectSpawn spawn; const FreeObject* freeObjectPtr = nullptr; + Mn::Quaternion rotation; + float robotYaw = 0.f; if (isRobotPosAttempt) { spawn.freeObjIndex_ = -1; freeObjectPtr = &robotProxy; + + robotYaw = config.useFixedRobotStartYaw + ? -float(Mn::Rad(Mn::Deg(90.f))) + : random.uniform_float(-float(Mn::Rad(Mn::Deg(180.f))), float(Mn::Rad(Mn::Deg(180.f)))); + + rotation = yawToRotation(robotYaw); + } else { // for the goal position, use the free object correspnding to targetObjIdx spawn.freeObjIndex_ = isGoalPositionAttempt @@ -160,11 +247,11 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, : random.uniform_int(0, set.freeObjects_.size()); freeObjectPtr = &safeVectorGet(set.freeObjects_, spawn.freeObjIndex_); + spawn.startRotationIndex_ = random.uniform_int(0, freeObjectPtr->startRotations_.size()); + rotation = freeObjectPtr->startRotations_[spawn.startRotationIndex_]; } const auto& freeObject = *freeObjectPtr; - spawn.startRotationIndex_ = random.uniform_int(0, freeObject.startRotations_.size()); - Mn::Vector3 randomPos; int numAttempts = 0; while (true) { @@ -180,7 +267,6 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, BATCHED_SIM_ASSERT(numAttempts < 1000); } - const auto rotation = freeObject.startRotations_[spawn.startRotationIndex_]; Mn::Matrix4 mat = Mn::Matrix4::from( rotation.toMatrix(), randomPos); @@ -194,9 +280,7 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, if (isRobotPosAttempt) { episode.agentStartPos_ = Mn::Vector2(spawn.startPos_.x(), spawn.startPos_.z()); - episode.agentStartYaw_ = config.useFixedRobotStartYaw - ? -float(Mn::Rad(Mn::Deg(90.f))) - : random.uniform_float(-float(Mn::Rad(Mn::Deg(180.f))), float(Mn::Rad(Mn::Deg(180.f)))); + episode.agentStartYaw_ = robotYaw; } else if (isGoalPositionAttempt) { episode.targetObjGoalPos_ = spawn.startPos_; episode.targetObjGoalRotation_ = rotation; @@ -213,7 +297,8 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, } constexpr int numGoalPositions = 1; ESP_CHECK(success, "episode-generation failed; couldn't find " - << (numObjectSpawns + numGoalPositions) << " collision-free spawn locations"); + << (numObjectSpawns + numGoalPositions) << " collision-free spawn locations with " + << "stageFixedObjectIndex=" << stageFixedObjectIndex); set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); @@ -229,6 +314,7 @@ EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, int numEpisodes = config.numEpisodes; core::Random random(config.seed); + core::Random random2(config.seed); EpisodeSet set; @@ -284,7 +370,7 @@ EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, // distribute stages across episodes for (int i = 0; i < numEpisodes; i++) { int stageIndex = i * config.numStageVariations / numEpisodes; - addEpisode(config, set, collection, stageIndex, random, robotProxy); + addEpisode(config, set, collection, stageIndex, random, random2, robotProxy); } BATCHED_SIM_ASSERT(set.maxFreeObjects_ > 0); diff --git a/src/esp/batched_sim/EpisodeGenerator.h b/src/esp/batched_sim/EpisodeGenerator.h index 79c8b7e719..ee038512ea 100644 --- a/src/esp/batched_sim/EpisodeGenerator.h +++ b/src/esp/batched_sim/EpisodeGenerator.h @@ -19,7 +19,7 @@ struct EpisodeGeneratorConfig { int maxNontargetObjects = 32; bool useFixedRobotStartPos = true; bool useFixedRobotStartYaw = false; - float maxRobotReachXZ = -1.f; // unused + bool useFixedRobotJointStartPositions = true; ESP_SMART_POINTERS(EpisodeGeneratorConfig); }; diff --git a/src/esp/batched_sim/EpisodeSet.cpp b/src/esp/batched_sim/EpisodeSet.cpp index 59f58bdd14..58891940b2 100644 --- a/src/esp/batched_sim/EpisodeSet.cpp +++ b/src/esp/batched_sim/EpisodeSet.cpp @@ -337,9 +337,9 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, esp::io::readMember(obj, "firstFreeObjectSpawnIndex", val.firstFreeObjectSpawnIndex_); esp::io::readMember(obj, "agentStartPos", val.agentStartPos_); esp::io::readMember(obj, "agentStartYaw", val.agentStartYaw_); + esp::io::readMember(obj, "robotStartJointPositions", val.robotStartJointPositions_); esp::io::readMember(obj, "targetObjGoalPos", val.targetObjGoalPos_); esp::io::readMember(obj, "targetObjGoalRotation", val.targetObjGoalRotation_); - return true; } @@ -352,6 +352,7 @@ esp::io::JsonGenericValue toJsonValue(const Episode& x, esp::io::addMember(obj, "firstFreeObjectSpawnIndex", x.firstFreeObjectSpawnIndex_, allocator); esp::io::addMember(obj, "agentStartPos", x.agentStartPos_, allocator); esp::io::addMember(obj, "agentStartYaw", x.agentStartYaw_, allocator); + esp::io::addMember(obj, "robotStartJointPositions", x.robotStartJointPositions_, allocator); esp::io::addMember(obj, "targetObjGoalPos", x.targetObjGoalPos_, allocator); esp::io::addMember(obj, "targetObjGoalRotation", x.targetObjGoalRotation_, allocator); diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 934e0201ff..20688f6baf 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -62,6 +62,8 @@ class Episode { int32_t firstFreeObjectSpawnIndex_ = -1; // index into EpisodeSet::freeObjectSpawns_ Magnum::Vector2 agentStartPos_ = Magnum::Vector2(Magnum::Math::ZeroInit); float agentStartYaw_ = 0.f; // radians + static constexpr int MAX_JOINT_POSITIONS = 7; + std::array robotStartJointPositions_; // task-specific Magnum::Vector3 targetObjGoalPos_ = Magnum::Vector3(Magnum::Math::ZeroInit); diff --git a/src/esp/batched_sim/GlmUtils.cpp b/src/esp/batched_sim/GlmUtils.cpp index 2ceacd7862..0e902b23c4 100644 --- a/src/esp/batched_sim/GlmUtils.cpp +++ b/src/esp/batched_sim/GlmUtils.cpp @@ -174,6 +174,11 @@ bool batchSphereOrientedBoxContactTest(const glm::mat4x3** orientedBoxTransforms return resultBits; } +Mn::Quaternion yawToRotation(float yawRadians) { + constexpr Mn::Vector3 upAxis(0.f, 1.f, 0.f); + return Mn::Quaternion::rotation(Mn::Rad(yawRadians), upAxis); +} + template bool batchSphereOrientedBoxContactTest<64, true>(const glm::mat4x3** orientedBoxTransforms, const Magnum::Vector3** positions, float sphereRadiusSq, const Magnum::Range3D** boxRanges, int numTests); diff --git a/src/esp/batched_sim/GlmUtils.h b/src/esp/batched_sim/GlmUtils.h index e9d5be4ef4..14ca026e05 100644 --- a/src/esp/batched_sim/GlmUtils.h +++ b/src/esp/batched_sim/GlmUtils.h @@ -36,6 +36,7 @@ bool batchSphereOrientedBoxContactTest(const glm::mat4x3** orientedBoxTransforms const Magnum::Vector3** positions, float sphereRadiusSq, const Magnum::Range3D** boxRanges, int numTests); +Magnum::Quaternion yawToRotation(float yawRadians); /* diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index f5bb30be2d..2a0adc2cb2 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -47,7 +47,8 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("min_nontarget_objects", &EpisodeGeneratorConfig::minNontargetObjects, R"(Todo)") .def_readwrite("max_nontarget_objects", &EpisodeGeneratorConfig::maxNontargetObjects, R"(Todo)") .def_readwrite("used_fixed_robot_start_pos", &EpisodeGeneratorConfig::useFixedRobotStartPos, R"(Todo)") - .def_readwrite("use_fixed_robot_start_yaw", &EpisodeGeneratorConfig::useFixedRobotStartYaw, R"(Todo)"); + .def_readwrite("use_fixed_robot_start_yaw", &EpisodeGeneratorConfig::useFixedRobotStartYaw, R"(Todo)") + .def_readwrite("use_fixed_robot_joint_start_positions", &EpisodeGeneratorConfig::useFixedRobotJointStartPositions, R"(Todo)"); py::class_( m, "BatchedSimulatorConfig") diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index a19107ea6b..404f5dca12 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -207,13 +207,14 @@ TEST_F(BatchedSimulatorTest, basic) { EpisodeGeneratorConfig generatorConfig{ .numEpisodes = 100, - .seed = 4, + .seed = 0, .numStageVariations = 12, .numObjectVariations = 6, .minNontargetObjects = 27, .maxNontargetObjects = 32, .useFixedRobotStartPos = false, - .useFixedRobotStartYaw = false + .useFixedRobotStartYaw = false, + .useFixedRobotJointStartPositions = false }; @@ -228,9 +229,8 @@ TEST_F(BatchedSimulatorTest, basic) { .doAsyncPhysicsStep = doOverlapPhysics, .numSubsteps = 1, .doProceduralEpisodeSet = true, + //.episodeSetFilepath = "../data/generated.episode_set.json", .episodeGeneratorConfig = generatorConfig, - //.doProceduralEpisodeSet = false, - //.episodeSetFilepath = "generated.episode_set.json", .collectionFilepath = "../data/replicacad_composite.collection.json" }; BatchedSimulator bsim(config); From 4309a55d99ccfc233c68183984261f81737de1de Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Wed, 20 Apr 2022 06:41:20 -0400 Subject: [PATCH 29/53] add BatchedSimulatorConfig.enable_robot_collision and enable_held_object_collision --- src/esp/batched_sim/BatchedSimulator.cpp | 76 +++++++----------------- src/esp/batched_sim/BatchedSimulator.h | 2 + src/esp/batched_sim/PlacementHelper.cpp | 2 - src/esp/bindings/BatchedSimBindings.cpp | 2 + src/tests/BatchedSimTest.cpp | 8 ++- 5 files changed, 30 insertions(+), 60 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index 8267966528..7cc2e71737 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -1127,61 +1127,6 @@ void Robot::updateFromSerializeCollection(const serialize::Collection& serialize numCollisionSpheres++; } } -#if 0 - std::vector armLinks = { - "shoulder_lift_link", - "shoulder_pan_link", - "upperarm_roll_link", - "elbow_flex_link", - "forearm_roll_link", - "wrist_flex_link", - "wrist_roll_link", - //"gripper_link" - }; - - constexpr int numArmLinkSpheres = 4; - constexpr float armLinkLength = 0.15f; - - std::vector bodyLinks = { - "torso_lift_link", - "base_link" - }; - - constexpr int numBodyLinkSpheres = 16; - constexpr float bodyRingRadius = 0.28f; - constexpr float bodyRingLocalHeight = 0.6f; - constexpr float bodyOffsetX = -0.2f; - - for (int i = -1; i < numLinks; i++) { - const auto nodeIndex = i + 1; // 0 is base - const auto& link = artObj->getLink(i); // -1 gets base link - - // temp hard-coded spheres - if (std::find(armLinks.begin(), armLinks.end(), link.linkName) != armLinks.end()) { - for (int j = 0; j < numArmLinkSpheres; j++) { - float offset = (float)j / numArmLinkSpheres * armLinkLength; - collisionSphereLocalOriginsByNode_[nodeIndex].push_back({offset, 0.f, 0.f}); - numCollisionSpheres++; - } - } - - else if (std::find(bodyLinks.begin(), bodyLinks.end(), link.linkName) != bodyLinks.end()) { - // add spheres in a circle - for (int j = 0; j < numBodyLinkSpheres; j++) { - Mn::Deg angle = (float)j / numBodyLinkSpheres * Mn::Deg(360); - collisionSphereLocalOriginsByNode_[nodeIndex].push_back( - Mn::Vector3(Mn::Math::sin(angle) + bodyOffsetX, bodyRingLocalHeight, Mn::Math::cos(angle)) * bodyRingRadius); - numCollisionSpheres++; - } - } - - if (link.linkName == "gripper_link") { - gripperLink_ = i; - gripperQueryOffset_ = Mn::Vector3(0.3f, 0.f, 0.f); - gripperQueryRadius_ = 0.1f; - } - } -#endif numCollisionSpheres_ = numCollisionSpheres; } @@ -1268,6 +1213,8 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { robot_ = Robot(serializeCollection_, legacySim_.get(), &sceneMapping_); + checkDisableRobotAndFreeObjectsCollision(); + int numLinks = robot_.artObj->getNumLinks(); int numNodes = numLinks + 1; // include base @@ -2327,6 +2274,25 @@ void BatchedSimulator::reloadSerializeCollection() { updateFromSerializeCollection(episodeSet_, serializeCollection_); + checkDisableRobotAndFreeObjectsCollision(); +} + +void BatchedSimulator::checkDisableRobotAndFreeObjectsCollision() { + + if (!config_.enableRobotCollision) { + for (auto& nodeSpheres : robot_.collisionSpheresByNode_) { + nodeSpheres.clear(); + } + robot_.collisionSpheres_.clear(); + robot_.numCollisionSpheres_ = 0; + } + + if (!config_.enableHeldObjectCollision) { + for (auto& freeObject : episodeSet_.freeObjects_) { + freeObject.collisionSpheres_.clear(); + } + } + } diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 8896f32489..364462d51a 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -297,6 +297,8 @@ class BatchedSimulator { bool isPhysicsThreadActive() const; + void checkDisableRobotAndFreeObjectsCollision(); + BatchedSimulatorConfig config_; serialize::Collection serializeCollection_; bool enableDebugSensor_ = false; diff --git a/src/esp/batched_sim/PlacementHelper.cpp b/src/esp/batched_sim/PlacementHelper.cpp index 2969a2bd8f..2a19b17656 100644 --- a/src/esp/batched_sim/PlacementHelper.cpp +++ b/src/esp/batched_sim/PlacementHelper.cpp @@ -29,8 +29,6 @@ PlacementHelper::PlacementHelper(const ColumnGridSet& columnGridSet, bool PlacementHelper::place(Magnum::Matrix4& heldObjMat, const FreeObject& freeObject, const Mn::Vector3* fallbackPos) const { - BATCHED_SIM_ASSERT(freeObject.collisionSpheres_.size()); - int numFailedPlacements = 0; bool success = false; diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 2a0adc2cb2..2ebc6c91bc 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -63,6 +63,8 @@ void initBatchedSimBindings(py::module& m) { .def_readwrite("force_random_actions", &BatchedSimulatorConfig::forceRandomActions, R"(Todo)") .def_readwrite("do_async_physics_step", &BatchedSimulatorConfig::doAsyncPhysicsStep, R"(Todo)") .def_readwrite("num_physics_substeps", &BatchedSimulatorConfig::numSubsteps, R"(Todo)") + .def_readwrite("enable_robot_collision", &BatchedSimulatorConfig::enableRobotCollision, R"(Todo)") + .def_readwrite("enable_held_object_collision", &BatchedSimulatorConfig::enableHeldObjectCollision, R"(Todo)") .def_readwrite("do_procedural_episode_set", &BatchedSimulatorConfig::doProceduralEpisodeSet, R"(Todo)") .def_readwrite("episode_generator_config", &BatchedSimulatorConfig::episodeGeneratorConfig, R"(Todo)") .def_readwrite("episode_set_filepath", &BatchedSimulatorConfig::episodeSetFilepath, R"(Todo)"); diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index 404f5dca12..b0d71e774e 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -212,9 +212,9 @@ TEST_F(BatchedSimulatorTest, basic) { .numObjectVariations = 6, .minNontargetObjects = 27, .maxNontargetObjects = 32, - .useFixedRobotStartPos = false, - .useFixedRobotStartYaw = false, - .useFixedRobotJointStartPositions = false + .useFixedRobotStartPos = true, + .useFixedRobotStartYaw = true, + .useFixedRobotJointStartPositions = true }; @@ -228,6 +228,8 @@ TEST_F(BatchedSimulatorTest, basic) { .forceRandomActions = forceRandomActions, .doAsyncPhysicsStep = doOverlapPhysics, .numSubsteps = 1, + .enableRobotCollision = true, + .enableHeldObjectCollision = true, .doProceduralEpisodeSet = true, //.episodeSetFilepath = "../data/generated.episode_set.json", .episodeGeneratorConfig = generatorConfig, From ee6219f4df2fdba26b69a18fed85b5dcded46d61 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Wed, 20 Apr 2022 07:07:16 -0400 Subject: [PATCH 30/53] add debug rendering for object start and goal poses --- src/esp/batched_sim/BatchedSimulator.cpp | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index 7cc2e71737..3982cbd615 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -918,6 +918,41 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { } } + for (int b = 0; b < config_.numDebugEnvs; b++) { + if (isEnvResetting(b)) { + continue; + } + + const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const int freeObjectIndex = episode.targetObjIndex_; + const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& startRotation = safeVectorGet(freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); + + constexpr float pad = 0.05; + addBoxDebugInstance("cube_pink_wireframe", b, + freeObjectSpawn.startPos_, + startRotation, + freeObject.aabb_, pad); + + addSphereDebugInstance( + "sphere_pink_wireframe", + b, freeObjectSpawn.startPos_, /*radius*/0.05f); + + addBoxDebugInstance("cube_blue_wireframe", b, + episode.targetObjGoalPos_, + episode.targetObjGoalRotation_, + freeObject.aabb_); + + addSphereDebugInstance( + "sphere_blue_wireframe", + b, episode.targetObjGoalPos_, /*radius*/0.05f); + + } + + // add debug ground lines #if 0 BATCHED_SIM_ASSERT(!config_.doPairedDebugEnv); From 903096d7c6118c09fd0e7ae65ba75e289dafeb71 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Thu, 21 Apr 2022 09:34:10 -0400 Subject: [PATCH 31/53] add get_spherical_coordinates --- src/esp/batched_sim/GlmUtils.cpp | 22 ++++++++++++++++++++++ src/esp/batched_sim/GlmUtils.h | 2 ++ src/esp/bindings/BatchedSimBindings.cpp | 4 ++++ 3 files changed, 28 insertions(+) diff --git a/src/esp/batched_sim/GlmUtils.cpp b/src/esp/batched_sim/GlmUtils.cpp index 0e902b23c4..2bf6a2e3cf 100644 --- a/src/esp/batched_sim/GlmUtils.cpp +++ b/src/esp/batched_sim/GlmUtils.cpp @@ -179,6 +179,28 @@ Mn::Quaternion yawToRotation(float yawRadians) { return Mn::Quaternion::rotation(Mn::Rad(yawRadians), upAxis); } + +// see also batched_env.py _get_spherical_coordinates_ref +Mn::Vector3 getSphericalCoordinates( + const Mn::Vector3& sourcePos, Mn::Vector3& goalPos, const Mn::Quaternion& sourceRotation) { + + const auto directionVec = goalPos - sourcePos; + // todo: check if we need inverse of sourceRotation + const auto directionVecAgent = sourceRotation.inverted().transformVector(directionVec); + + const auto phi = float(std::atan2(directionVecAgent.x(), -directionVecAgent.z())); + float length = directionVec.length(); + float theta; + constexpr float eps = 1e-6; + if (length > eps) { + theta = float(Mn::Math::acos(directionVecAgent.y() / length)); + } else { + theta = 0.f; + } + float rho = length; + return {rho, theta, phi}; +} + template bool batchSphereOrientedBoxContactTest<64, true>(const glm::mat4x3** orientedBoxTransforms, const Magnum::Vector3** positions, float sphereRadiusSq, const Magnum::Range3D** boxRanges, int numTests); diff --git a/src/esp/batched_sim/GlmUtils.h b/src/esp/batched_sim/GlmUtils.h index 14ca026e05..ce4e794a33 100644 --- a/src/esp/batched_sim/GlmUtils.h +++ b/src/esp/batched_sim/GlmUtils.h @@ -38,6 +38,8 @@ bool batchSphereOrientedBoxContactTest(const glm::mat4x3** orientedBoxTransforms Magnum::Quaternion yawToRotation(float yawRadians); +Magnum::Vector3 getSphericalCoordinates( + const Magnum::Vector3& sourcePos, Magnum::Vector3& goalPos, const Magnum::Quaternion& sourceRotation); /* */ diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 2ebc6c91bc..7391031671 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -8,6 +8,7 @@ #include #include "esp/batched_sim/BatchedSimulator.h" +#include "esp/batched_sim/GlmUtils.h" namespace py = pybind11; using py::literals::operator""_a; @@ -33,6 +34,9 @@ py::capsule getDepthMemory(BatchedSimulator& bsim, const uint32_t groupIdx) } // namespace void initBatchedSimBindings(py::module& m) { + + m.def("get_spherical_coordinates", &getSphericalCoordinates); + py::class_(m, "CameraSensorConfig") .def_readwrite("width", &CameraSensorConfig::width, R"(Todo)") .def_readwrite("height", &CameraSensorConfig::height, R"(Todo)"); From c109bd0c32149a9e2846f0502dd655e0efcab42f Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Fri, 29 Apr 2022 05:40:09 -0400 Subject: [PATCH 32/53] fix for getSphericalCoordinates NaNs --- src/esp/batched_sim/GlmUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/esp/batched_sim/GlmUtils.cpp b/src/esp/batched_sim/GlmUtils.cpp index 2bf6a2e3cf..c5323ca6a2 100644 --- a/src/esp/batched_sim/GlmUtils.cpp +++ b/src/esp/batched_sim/GlmUtils.cpp @@ -193,7 +193,7 @@ Mn::Vector3 getSphericalCoordinates( float theta; constexpr float eps = 1e-6; if (length > eps) { - theta = float(Mn::Math::acos(directionVecAgent.y() / length)); + theta = float(Mn::Math::acos(Mn::Math::clamp(directionVecAgent.y() / length, -1.f, 1.f))); } else { theta = 0.f; } From 46cda12fa7f474210cce71f29659bb32c72f2535 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Fri, 13 May 2022 11:58:30 -0400 Subject: [PATCH 33/53] add static scenes support; formatting cleanup --- src/esp/batched_sim/BatchedSimulator.cpp | 1209 ++++++++++++---------- src/esp/batched_sim/BatchedSimulator.h | 142 ++- src/esp/batched_sim/BpsSceneMapping.cpp | 24 +- src/esp/batched_sim/ColumnGrid.cpp | 80 +- src/esp/batched_sim/EpisodeGenerator.cpp | 394 ++++--- src/esp/batched_sim/EpisodeGenerator.h | 15 +- src/esp/batched_sim/EpisodeSet.cpp | 366 ++++--- src/esp/batched_sim/EpisodeSet.h | 84 +- src/tests/BatchedSimTest.cpp | 445 ++++---- src/utils/viewer/viewer.cpp | 78 +- 10 files changed, 1703 insertions(+), 1134 deletions(-) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index 3982cbd615..63e22181d7 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -3,8 +3,8 @@ // LICENSE file in the root directory of this source tree. #include "BatchedSimulator.h" -#include "esp/batched_sim/GlmUtils.h" #include "esp/batched_sim/BatchedSimAssert.h" +#include "esp/batched_sim/GlmUtils.h" #include "esp/batched_sim/PlacementHelper.h" #include "esp/batched_sim/ProfilingScope.h" @@ -13,9 +13,9 @@ // #include +#include #include #include -#include #include @@ -30,15 +30,13 @@ namespace { static Mn::Vector3 INVALID_VEC3 = Mn::Vector3(NAN, NAN, NAN); -static constexpr glm::mat4x3 identityGlMat_ = glm::mat4x3( - 1.f, 0.f, 0.f, - 0.f, 1.f, 0.f, - 0.f, 0.f, 1.f, - 0.f, 0.f, 0.f); +static constexpr glm::mat4x3 identityGlMat_ = + glm::mat4x3(1.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f); float remapAction(float action, float stepMin, float stepMax) { // map (-1..+1) to (stepMin..stepMax) and clamp - float clampedNormalizedAction = (Mn::Math::clamp(action, -1.f, 1.f) + 1.f) * 0.5f; + float clampedNormalizedAction = + (Mn::Math::clamp(action, -1.f, 1.f) + 1.f) * 0.5f; return Mn::Math::lerp(stepMin, stepMax, clampedNormalizedAction); } @@ -76,7 +74,6 @@ std::string getMeshNameFromURDFVisualFilepath(const std::string& filepath) { } // namespace - RobotInstanceSet::RobotInstanceSet(Robot* robot, const BatchedSimulatorConfig* config, std::vector* envs, @@ -115,8 +112,9 @@ RobotInstanceSet::RobotInstanceSet(Robot* robot, auto instanceBlueprint = robot_->sceneMapping_->findInstanceBlueprint(nodeName); - instanceId = env.addInstance(instanceBlueprint.meshIdx_, - instanceBlueprint.mtrlIdx_, identityGlMat_); + instanceId = + env.addInstance(instanceBlueprint.meshIdx_, + instanceBlueprint.mtrlIdx_, identityGlMat_); } else { // these are camera links // sloppy: we should avoid having these items in the nodeInstanceIds_ @@ -164,11 +162,14 @@ void BatchedSimulator::reverseActionsForEnvironment(int b) { } } -void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateForPhysics, bool updateForRender, bool includeResettingEnvs) { - //ProfilingScope scope("BSim updateLinkTransforms"); +void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, + bool updateForPhysics, + bool updateForRender, + bool includeResettingEnvs) { + // ProfilingScope scope("BSim updateLinkTransforms"); BATCHED_SIM_ASSERT(updateForPhysics || updateForRender); - + int numLinks = robots_.robot_->artObj->getNumLinks(); int numNodes = numLinks + 1; int numEnvs = config_.numEnvs; @@ -185,12 +186,12 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF const Mn::Vector2* positions = &robots_.rollouts_->positions_[currRolloutSubstep * numEnvs]; const float* jointPositions = - &robots_.rollouts_->jointPositions_[currRolloutSubstep * numEnvs * numPosVars]; + &robots_.rollouts_ + ->jointPositions_[currRolloutSubstep * numEnvs * numPosVars]; Mn::Matrix4* rootTransforms = &robots_.rollouts_->rootTransforms_[currRolloutSubstep * numEnvs]; for (int b = 0; b < config_.numEnvs; b++) { - if (!includeResettingEnvs && isEnvResetting(b)) { posCount += numPosVars; continue; @@ -237,8 +238,8 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF } if (!updateForRender) { - if (robots_.robot_->collisionSpheresByNode_[nodeIndex].empty() - && robots_.robot_->gripperLink_ != i) { + if (robots_.robot_->collisionSpheresByNode_[nodeIndex].empty() && + robots_.robot_->gripperLink_ != i) { continue; } } @@ -259,11 +260,15 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF } if (updateForPhysics) { - // perf todo: loop through collision spheres (and look up link id), instead of this sparse way here - // compute collision sphere transforms - for (const auto& localSphereIdx : robots_.robot_->collisionSpheresByNode_[nodeIndex]) { - const auto& sphere = safeVectorGet(robots_.robot_->collisionSpheres_, localSphereIdx); - auto& worldSphere = robots_.collisionSphereWorldOrigins_[baseSphereIndex + localSphereIdx]; + // perf todo: loop through collision spheres (and look up link id), + // instead of this sparse way here compute collision sphere transforms + for (const auto& localSphereIdx : + robots_.robot_->collisionSpheresByNode_[nodeIndex]) { + const auto& sphere = safeVectorGet( + robots_.robot_->collisionSpheres_, localSphereIdx); + auto& worldSphere = + robots_.collisionSphereWorldOrigins_[baseSphereIndex + + localSphereIdx]; worldSphere = mat.transformPoint(sphere.origin); BATCHED_SIM_ASSERT(!std::isnan(worldSphere.x())); } @@ -299,37 +304,35 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, bool updateF } } #endif - } - void BatchedSimulator::updatePythonEnvironmentState() { - const int numEnvs = config_.numEnvs; const int numPosVars = robot_.numPosVars; - const Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_, currStorageStep_ * numEnvs); - const float* yaws = &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); - const float* jointPositions = - &safeVectorGet(rollouts_.jointPositions_, currStorageStep_ * numEnvs * numPosVars); + const Mn::Vector2* positions = + &safeVectorGet(rollouts_.positions_, currStorageStep_ * numEnvs); + const float* yaws = + &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); + const float* jointPositions = &safeVectorGet( + rollouts_.jointPositions_, currStorageStep_ * numEnvs * numPosVars); for (int b = 0; b < config_.numEnvs; b++) { - const auto& robotInstance = safeVectorGet(robots_.robotInstances_, b); auto& envState = safeVectorGet(pythonEnvStates_, b); -/* - // robot state - Magnum::Vector3 robot_pos; - Magnum::Quaternion robot_rotation; - std::vector robot_joint_positions; - Magnum::Vector3 ee_pos; - Magnum::Quaternion ee_rotation; - bool did_collide = false; + /* + // robot state + Magnum::Vector3 robot_pos; + Magnum::Quaternion robot_rotation; + std::vector robot_joint_positions; + Magnum::Vector3 ee_pos; + Magnum::Quaternion ee_rotation; + bool did_collide = false; - // other env state - std::vector obj_positions; - std::vector obj_rotations; - */ + // other env state + std::vector obj_positions; + std::vector obj_rotations; + */ envState.robot_pos = Mn::Vector3(positions[b].x(), 0.f, positions[b].y()); envState.robot_rotation = yawToRotation(yaws[b]); @@ -342,8 +345,8 @@ void BatchedSimulator::updatePythonEnvironmentState() { float normalizedPos = 0.f; if (robot_.jointPositionLimits.first[j] == -INFINITY) { - // if limits are +/- infinity, we assume this is an angular joint. We normalize - // it by wrapping into range (-2*PI, 2*PI) radians. + // if limits are +/- infinity, we assume this is an angular joint. We + // normalize it by wrapping into range (-2*PI, 2*PI) radians. BATCHED_SIM_ASSERT(robot_.jointPositionLimits.second[j] == INFINITY); normalizedPos = pos; while (normalizedPos > float(Mn::Rad(Mn::Deg(180.f)))) { @@ -354,15 +357,18 @@ void BatchedSimulator::updatePythonEnvironmentState() { } } else { BATCHED_SIM_ASSERT(robot_.jointPositionLimits.second[j] != INFINITY); - BATCHED_SIM_ASSERT(robot_.jointPositionLimits.first[j] < robot_.jointPositionLimits.second[j]); - normalizedPos = (pos - robot_.jointPositionLimits.first[j]) - / (robot_.jointPositionLimits.second[j] - robot_.jointPositionLimits.first[j]); + BATCHED_SIM_ASSERT(robot_.jointPositionLimits.first[j] < + robot_.jointPositionLimits.second[j]); + normalizedPos = (pos - robot_.jointPositionLimits.first[j]) / + (robot_.jointPositionLimits.second[j] - + robot_.jointPositionLimits.first[j]); } - safeVectorGet(envState.robot_joint_positions_normalized, j) = normalizedPos; + safeVectorGet(envState.robot_joint_positions_normalized, j) = + normalizedPos; } envState.ee_pos = robotInstance.cachedGripperLinkMat_.translation(); envState.ee_rotation = Mn::Quaternion::fromMatrix( - robotInstance.cachedGripperLinkMat_.rotation()); + robotInstance.cachedGripperLinkMat_.rotation()); // todo: do logical "or" over all substeps envState.did_collide = robots_.collisionResults_[b]; // envState.obj_positions // this is updated incrementally @@ -372,11 +378,11 @@ void BatchedSimulator::updatePythonEnvironmentState() { // did_drop updated incrementally } -#if 0 // reference code to visualize pythonEnvironmentState +#if 0 // reference code to visualize pythonEnvironmentState #ifdef ENABLE_DEBUG_INSTANCES const auto& aabb = Magnum::Range3D(-Mn::Vector3(0.05, 0.05, 0.05), Mn::Vector3(0.05, 0.05, 0.05)); for (int b = 0; b < config_.numEnvs; b++) { - + const auto& envState = safeVectorGet(pythonEnvStates_, b); // robot addBoxDebugInstance( @@ -399,33 +405,38 @@ void BatchedSimulator::updatePythonEnvironmentState() { #endif } -void BatchedSimulator::addSphereDebugInstance(const std::string& name, int b, const Magnum::Vector3& spherePos, float radius) { - auto mat = Mn::Matrix4::translation(spherePos) - * Mn::Matrix4::scaling(Mn::Vector3(radius, radius, radius)); +void BatchedSimulator::addSphereDebugInstance(const std::string& name, + int b, + const Magnum::Vector3& spherePos, + float radius) { + auto mat = Mn::Matrix4::translation(spherePos) * + Mn::Matrix4::scaling(Mn::Vector3(radius, radius, radius)); addDebugInstance(name, b, mat); } -void BatchedSimulator::addBoxDebugInstance(const std::string& name, int b, const Magnum::Vector3& pos, - const Magnum::Quaternion& rotation, const Magnum::Range3D& aabb, float pad, bool showBackfaces) { - +void BatchedSimulator::addBoxDebugInstance(const std::string& name, + int b, + const Magnum::Vector3& pos, + const Magnum::Quaternion& rotation, + const Magnum::Range3D& aabb, + float pad, + bool showBackfaces) { auto adjustedAabb = aabb.padded({pad, pad, pad}); Mn::Matrix4 mat = Mn::Matrix4::from(rotation.toMatrix(), pos); - Mn::Matrix4 localToBox = Mn::Matrix4::translation(adjustedAabb.center()) - * Mn::Matrix4::scaling(adjustedAabb.size() * 0.5 * (showBackfaces ? -1.f : 1.f)); + Mn::Matrix4 localToBox = Mn::Matrix4::translation(adjustedAabb.center()) * + Mn::Matrix4::scaling(adjustedAabb.size() * 0.5 * + (showBackfaces ? -1.f : 1.f)); auto adjustedMat = mat * localToBox; addDebugInstance(name, b, adjustedMat); } - - void BatchedSimulator::updateGripping() { - //ProfilingScope scope("BSim updateGripping"); + // ProfilingScope scope("BSim updateGripping"); const int numEnvs = config_.numEnvs; for (int b = 0; b < numEnvs; b++) { - if (isEnvResetting(b)) { continue; } @@ -440,65 +451,64 @@ void BatchedSimulator::updateGripping() { envState.did_grasp = false; if (b < config_.numDebugEnvs) { - const auto& gripperMat = robotInstance.cachedGripperLinkMat_; - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto gripperQueryWorldOrigin = gripperMat.transformPoint( - robot_.gripperQueryOffset_); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto gripperQueryWorldOrigin = + gripperMat.transformPoint(robot_.gripperQueryOffset_); const auto gripperQueryRadius = robot_.gripperQueryRadius_; - + // draw preview of grip attempt if (robotInstance.grippedFreeObjectIndex_ == -1) { - int grippedFreeObjectIndex = episodeInstance.colGrid_.contactTest( - gripperQueryWorldOrigin, gripperQueryRadius); - + gripperQueryWorldOrigin, gripperQueryRadius); + // highlight the object that could be gripped if we attempted if (grippedFreeObjectIndex != -1) { - const auto& obs = episodeInstance.colGrid_.getObstacle(grippedFreeObjectIndex); - addBoxDebugInstance( - "cube_blue_wireframe", - b, obs.pos, obs.invRotation.invertedNormalized(), *obs.aabb, 0.01); + const auto& obs = + episodeInstance.colGrid_.getObstacle(grippedFreeObjectIndex); + addBoxDebugInstance("cube_blue_wireframe", b, obs.pos, + obs.invRotation.invertedNormalized(), *obs.aabb, + 0.01); } // show query sphere - addSphereDebugInstance( - "sphere_blue_wireframe", - b, gripperQueryWorldOrigin, gripperQueryRadius); + addSphereDebugInstance("sphere_blue_wireframe", b, + gripperQueryWorldOrigin, gripperQueryRadius); } // draw line down from gripper query or held object constexpr float w = 0.005f; - addBoxDebugInstance( - "cube_blue", - b, gripperQueryWorldOrigin, Mn::Quaternion(Mn::Math::IdentityInit), - Mn::Range3D({-w, -1.f, -w}, {w, -0.04, w})); + addBoxDebugInstance("cube_blue", b, gripperQueryWorldOrigin, + Mn::Quaternion(Mn::Math::IdentityInit), + Mn::Range3D({-w, -1.f, -w}, {w, -0.04, w})); } - // don't attempt grip if there was a collision (there's currently a bug where - // cachedGripperLinkMat_ is wrong) - // sloppy: not clear if this field is up-to-date or one-frame-stale + // don't attempt grip if there was a collision (there's currently a bug + // where cachedGripperLinkMat_ is wrong) sloppy: not clear if this field is + // up-to-date or one-frame-stale if (robots_.collisionResults_[b]) { continue; } if (robotInstance.doAttemptGrip_) { - BATCHED_SIM_ASSERT(robotInstance.grippedFreeObjectIndex_ == -1); const auto& gripperMat = robotInstance.cachedGripperLinkMat_; - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto gripperQueryWorldOrigin = gripperMat.transformPoint( - robot_.gripperQueryOffset_); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto gripperQueryWorldOrigin = + gripperMat.transformPoint(robot_.gripperQueryOffset_); const auto gripperQueryRadius = robot_.gripperQueryRadius_; int grippedFreeObjectIndex = episodeInstance.colGrid_.contactTest( - gripperQueryWorldOrigin, gripperQueryRadius); + gripperQueryWorldOrigin, gripperQueryRadius); if (grippedFreeObjectIndex != -1) { - // store copy of obstacle in case we need to reinsert on failed grab - const auto obsCopy = episodeInstance.colGrid_.getObstacle(grippedFreeObjectIndex); + const auto obsCopy = + episodeInstance.colGrid_.getObstacle(grippedFreeObjectIndex); // remove object before doing collision test - // perf todo: only do this after we pass the columnGridSet collision test below + // perf todo: only do this after we pass the columnGridSet collision + // test below removeFreeObjectFromCollisionGrid(b, grippedFreeObjectIndex); robotInstance.grippedFreeObjectIndex_ = grippedFreeObjectIndex; robotInstance.grippedFreeObjectPreviousPos_ = Cr::Containers::NullOpt; @@ -508,27 +518,35 @@ void BatchedSimulator::updateGripping() { { // sloppy: code copy-pasted from updateCollision() - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); - const auto& columnGridSet = stageFixedObject.columnGridSet_; + const auto& episode = safeVectorGet(episodeSet_.episodes_, + episodeInstance.episodeIndex_); + const auto& staticScene = safeVectorGet(episodeSet_.staticScenes_, + episode.staticSceneIndex_); + const auto& columnGridSet = staticScene.columnGridSet_; ColumnGridSource::QueryCacheValue grippedObjectQueryCache = 0; auto mat = getHeldObjectTransform(b); - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + robotInstance.grippedFreeObjectIndex_); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + + robotInstance.grippedFreeObjectIndex_); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, + freeObjectSpawn.freeObjIndex_); for (const auto& sphere : freeObject.collisionSpheres_) { const auto& sphereLocalOrigin = sphere.origin; auto sphereWorldOrigin = mat.transformPoint(sphereLocalOrigin); - bool thisSphereHit = columnGridSet.contactTest(sphere.radiusIdx, sphereWorldOrigin, &grippedObjectQueryCache); + bool thisSphereHit = columnGridSet.contactTest( + sphere.radiusIdx, sphereWorldOrigin, &grippedObjectQueryCache); // todo: proper debug-drawing of hits for held object spheres if (thisSphereHit) { hit = true; break; } - const auto sphereRadius = getCollisionRadius(serializeCollection_, sphere.radiusIdx); - int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(sphereWorldOrigin, sphereRadius); + const auto sphereRadius = + getCollisionRadius(serializeCollection_, sphere.radiusIdx); + int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest( + sphereWorldOrigin, sphereRadius); if (hitFreeObjectIndex != -1) { hit = true; break; @@ -543,12 +561,11 @@ void BatchedSimulator::updateGripping() { } else { // reinsert at old pose reinsertFreeObject(b, grippedFreeObjectIndex, obsCopy.pos, - obsCopy.invRotation.invertedNormalized()); + obsCopy.invRotation.invertedNormalized()); robotInstance.grippedFreeObjectIndex_ = -1; // undo assignment } robotInstance.doAttemptGrip_ = false; - } recentStats_.numGripAttempts_++; @@ -561,46 +578,57 @@ void BatchedSimulator::updateGripping() { //// find nearest up axis from list stored in freeObject //// rotate to match target up axis - // search for xy, starting at dropPos (reuse this for episode generation/settling) + // search for xy, starting at dropPos (reuse this for episode + // generation/settling) //// snap to stage (use all spheres?) //// compute collision spheres, test collision against free objects // add to col grid // search for historical robot pose that is coll free int freeObjectIndex = robotInstance.grippedFreeObjectIndex_; - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); - const auto& columnGridSet = stageFixedObject.columnGridSet_; + const auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& staticScene = + safeVectorGet(episodeSet_.staticScenes_, episode.staticSceneIndex_); + const auto& columnGridSet = staticScene.columnGridSet_; auto heldObjMat = getHeldObjectTransform(b); - - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, + freeObjectSpawn.freeObjIndex_); constexpr int maxFailedPlacements = 6; - PlacementHelper placementHelper(columnGridSet, episodeInstance.colGrid_, - serializeCollection_, random_, maxFailedPlacements); + PlacementHelper placementHelper(columnGridSet, episodeInstance.colGrid_, + serializeCollection_, random_, + maxFailedPlacements); BATCHED_SIM_ASSERT(robotInstance.grippedFreeObjectPreviousPos_); - const Mn::Vector3 fallbackPos = *robotInstance.grippedFreeObjectPreviousPos_; - // Provide a fallback pos so that the place() always succeeds. The fallback is - // the previous pos of this object (before grasping it). + const Mn::Vector3 fallbackPos = + *robotInstance.grippedFreeObjectPreviousPos_; + // Provide a fallback pos so that the place() always succeeds. The + // fallback is the previous pos of this object (before grasping it). float dropY = heldObjMat.translation().y(); - bool success = placementHelper.place(heldObjMat, freeObject, &fallbackPos); + bool success = + placementHelper.place(heldObjMat, freeObject, &fallbackPos); BATCHED_SIM_ASSERT(success); - const auto rotationQuat = Mn::Quaternion::fromMatrix(heldObjMat.rotation()); - reinsertFreeObject(b, freeObjectIndex, heldObjMat.translation(), rotationQuat); + const auto rotationQuat = + Mn::Quaternion::fromMatrix(heldObjMat.rotation()); + reinsertFreeObject(b, freeObjectIndex, heldObjMat.translation(), + rotationQuat); - // Reference code for handling a failed drop by removing object. This isn't a - // good solution because PythonEnvironmentState doesn't have a way to indicate - // that an object has been removed from the scene. + // Reference code for handling a failed drop by removing object. This + // isn't a good solution because PythonEnvironmentState doesn't have a way + // to indicate that an object has been removed from the scene. // { // if (!disableFreeObjectVisualsForEnv(config_, b)) { // // hack: remove object from scene visually - // const auto glMat = toGlmMat4x3(Mn::Matrix4::translation({0.f, -10000.f, 0.f})); - // int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); - // env.updateInstanceTransform(instanceId, glMat); + // const auto glMat = toGlmMat4x3(Mn::Matrix4::translation({0.f, + // -10000.f, 0.f})); int instanceId = getFreeObjectBpsInstanceId(b, + // freeObjectIndex); env.updateInstanceTransform(instanceId, glMat); // } // recentStats_.numFailedDrops_++; // } @@ -616,9 +644,8 @@ void BatchedSimulator::updateGripping() { } } - void BatchedSimulator::updateCollision() { - //ProfilingScope scope("BSim updateCollision"); + // ProfilingScope scope("BSim updateCollision"); const int numEnvs = config_.numEnvs; @@ -631,34 +658,40 @@ void BatchedSimulator::updateCollision() { std::vector heldObjectHits; std::vector freeObjectHits; if (config_.numDebugEnvs > 0) { - sphereHits.resize(robot_.numCollisionSpheres_ * config_.numDebugEnvs, false); + sphereHits.resize(robot_.numCollisionSpheres_ * config_.numDebugEnvs, + false); heldObjectHits.resize(config_.numDebugEnvs, false); freeObjectHits.resize(episodeSet_.maxFreeObjects_, false); } for (int b = 0; b < numEnvs; b++) { - if (isEnvResetting(b)) { continue; } - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); - const auto& columnGridSet = stageFixedObject.columnGridSet_; + const auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& staticScene = + safeVectorGet(episodeSet_.staticScenes_, episode.staticSceneIndex_); + const auto& columnGridSet = staticScene.columnGridSet_; const int baseSphereIndex = b * robot_.numCollisionSpheres_; const auto& robotInstance = robots_.robotInstances_[b]; bool hit = false; - // perf todo: if there was a hit last frame, cache that sphere and test it first here + // perf todo: if there was a hit last frame, cache that sphere and test it + // first here for (int s = 0; s < robot_.numCollisionSpheres_; s++) { const int sphereIndex = baseSphereIndex + s; - // perf todo: reconsider query cache usage; maybe one per link or one per robot + // perf todo: reconsider query cache usage; maybe one per link or one per + // robot auto& queryCache = robots_.collisionSphereQueryCaches_[sphereIndex]; const auto& spherePos = robots_.collisionSphereWorldOrigins_[sphereIndex]; const int radiusIdx = robot_.collisionSpheres_[s].radiusIdx; - bool thisSphereHit = columnGridSet.contactTest(radiusIdx, spherePos, &queryCache); + bool thisSphereHit = + columnGridSet.contactTest(radiusIdx, spherePos, &queryCache); if (thisSphereHit) { hit = true; @@ -672,30 +705,33 @@ void BatchedSimulator::updateCollision() { if (!hit && robotInstance.grippedFreeObjectIndex_ != -1) { ColumnGridSource::QueryCacheValue grippedObjectQueryCache = 0; auto mat = getHeldObjectTransform(b); - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + robotInstance.grippedFreeObjectIndex_); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + + robotInstance.grippedFreeObjectIndex_); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, + freeObjectSpawn.freeObjIndex_); for (const auto& sphere : freeObject.collisionSpheres_) { const auto& sphereLocalOrigin = sphere.origin; auto sphereWorldOrigin = mat.transformPoint(sphereLocalOrigin); - bool thisSphereHit = columnGridSet.contactTest(sphere.radiusIdx, sphereWorldOrigin, &grippedObjectQueryCache); + bool thisSphereHit = columnGridSet.contactTest( + sphere.radiusIdx, sphereWorldOrigin, &grippedObjectQueryCache); // todo: proper debug-drawing of hits for held object spheres if (thisSphereHit) { hit = true; - if (b < config_.numDebugEnvs) { + if (b < config_.numDebugEnvs) { heldObjectHits[b] = thisSphereHit; } break; } } - } + } robots_.collisionResults_[b] = hit; } // test against free objects for (int b = 0; b < numEnvs; b++) { - if (isEnvResetting(b)) { continue; } @@ -705,21 +741,28 @@ void BatchedSimulator::updateCollision() { continue; } - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const int baseSphereIndex = b * robot_.numCollisionSpheres_; bool hit = false; auto& env = bpsWrapper_->envs_[b]; const auto& robotInstance = robots_.robotInstances_[b]; - // perf todo: if there was a hit last frame, cache that sphere and test it first here + // perf todo: if there was a hit last frame, cache that sphere and test it + // first here for (int s = 0; s < robot_.numCollisionSpheres_; s++) { const int sphereIndex = baseSphereIndex + s; - const auto& spherePos = safeVectorGet(robots_.collisionSphereWorldOrigins_, sphereIndex); - const int radiusIdx = safeVectorGet(robot_.collisionSpheres_, s).radiusIdx; - const auto sphereRadius = getCollisionRadius(serializeCollection_, radiusIdx); - - int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(spherePos, sphereRadius); + const auto& spherePos = + safeVectorGet(robots_.collisionSphereWorldOrigins_, sphereIndex); + const int radiusIdx = + safeVectorGet(robot_.collisionSpheres_, s).radiusIdx; + const auto sphereRadius = + getCollisionRadius(serializeCollection_, radiusIdx); + + int hitFreeObjectIndex = + episodeInstance.colGrid_.contactTest(spherePos, sphereRadius); if (hitFreeObjectIndex != -1) { hit = true; if (b < config_.numDebugEnvs) { @@ -733,15 +776,20 @@ void BatchedSimulator::updateCollision() { if (!hit && robotInstance.grippedFreeObjectIndex_ != -1) { int grippedObjectQueryCache = 0; auto mat = getHeldObjectTransform(b); - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + robotInstance.grippedFreeObjectIndex_); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + + robotInstance.grippedFreeObjectIndex_); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, + freeObjectSpawn.freeObjIndex_); for (const auto& sphere : freeObject.collisionSpheres_) { const auto& sphereLocalOrigin = sphere.origin; auto sphereWorldOrigin = mat.transformPoint(sphereLocalOrigin); - const auto sphereRadius = getCollisionRadius(serializeCollection_, sphere.radiusIdx); + const auto sphereRadius = + getCollisionRadius(serializeCollection_, sphere.radiusIdx); - int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest(sphereWorldOrigin, sphereRadius); + int hitFreeObjectIndex = episodeInstance.colGrid_.contactTest( + sphereWorldOrigin, sphereRadius); if (hitFreeObjectIndex != -1) { hit = true; if (b < config_.numDebugEnvs) { @@ -749,24 +797,26 @@ void BatchedSimulator::updateCollision() { freeObjectHits[hitFreeObjectIndex] = true; } break; - } - } - } + } + } + } // render free objects to debug env, colored by collision result if (b < config_.numDebugEnvs) { - for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { + for (int freeObjectIndex = 0; + freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { if (episodeInstance.colGrid_.isObstacleDisabled(freeObjectIndex)) { continue; } const auto& obs = episodeInstance.colGrid_.getObstacle(freeObjectIndex); addBoxDebugInstance( - freeObjectHits[freeObjectIndex] ? "cube_pink_wireframe" : "cube_orange_wireframe", - b, obs.pos, obs.invRotation.invertedNormalized(), *obs.aabb); + freeObjectHits[freeObjectIndex] ? "cube_pink_wireframe" + : "cube_orange_wireframe", + b, obs.pos, obs.invRotation.invertedNormalized(), *obs.aabb); - #if 0 +#if 0 // render collision spheres - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, + const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); Mn::Matrix4 mat = Mn::Matrix4::from( @@ -777,62 +827,77 @@ void BatchedSimulator::updateCollision() { auto sphereWorldOrigin = mat.transformPoint(sphereLocalOrigin); addSphereDebugInstance("sphere_blue_wireframe", b, sphereWorldOrigin, sphereRadius); } - #endif +#endif - freeObjectHits[freeObjectIndex] = false; // clear for next env + freeObjectHits[freeObjectIndex] = false; // clear for next env } - } + } robots_.collisionResults_[b] = robots_.collisionResults_[b] || hit; } for (int b = 0; b < numEnvs; b++) { - if (isEnvResetting(b)) { continue; } // render collision spheres for debug env, colored by collision result if (b < config_.numDebugEnvs) { - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const auto& robotInstance = robots_.robotInstances_[b]; const int baseSphereIndex = b * robot_.numCollisionSpheres_; for (int s = 0; s < robot_.numCollisionSpheres_; s++) { const int sphereIndex = baseSphereIndex + s; - const auto& spherePos = safeVectorGet(robots_.collisionSphereWorldOrigins_, sphereIndex); - const int radiusIdx = safeVectorGet(robot_.collisionSpheres_, s).radiusIdx; - const auto sphereRadius = getCollisionRadius(serializeCollection_, radiusIdx); - addSphereDebugInstance(sphereHits[baseSphereIndex + s] ? "sphere_pink_wireframe" : "sphere_green_wireframe", - b, spherePos, sphereRadius); - sphereHits[baseSphereIndex + s] = false; // clear for next env + const auto& spherePos = + safeVectorGet(robots_.collisionSphereWorldOrigins_, sphereIndex); + const int radiusIdx = + safeVectorGet(robot_.collisionSpheres_, s).radiusIdx; + const auto sphereRadius = + getCollisionRadius(serializeCollection_, radiusIdx); + addSphereDebugInstance(sphereHits[baseSphereIndex + s] + ? "sphere_pink_wireframe" + : "sphere_green_wireframe", + b, spherePos, sphereRadius); + sphereHits[baseSphereIndex + s] = false; // clear for next env } if (robotInstance.grippedFreeObjectIndex_ != -1) { int grippedObjectQueryCache = 0; auto mat = getHeldObjectTransform(b); - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + robotInstance.grippedFreeObjectIndex_); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + + robotInstance.grippedFreeObjectIndex_); + const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, + freeObjectSpawn.freeObjIndex_); for (const auto& sphere : freeObject.collisionSpheres_) { const auto& sphereLocalOrigin = sphere.origin; auto sphereWorldOrigin = mat.transformPoint(sphereLocalOrigin); - const auto sphereRadius = getCollisionRadius(serializeCollection_, sphere.radiusIdx); - addSphereDebugInstance( - heldObjectHits[b] ? "sphere_pink_wireframe" : "sphere_blue_wireframe", - b, sphereWorldOrigin, sphereRadius); + const auto sphereRadius = + getCollisionRadius(serializeCollection_, sphere.radiusIdx); + addSphereDebugInstance(heldObjectHits[b] ? "sphere_pink_wireframe" + : "sphere_blue_wireframe", + b, sphereWorldOrigin, sphereRadius); } } } if (robots_.collisionResults_[b]) { // todo: more robust handling of this, maybe at episode-load time - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); const auto& envState = safeVectorGet(pythonEnvStates_, b); - ESP_CHECK(envState.episode_step_idx > 0, "For episode " << episodeInstance.episodeIndex_ - << ", the robot is in collision on the first step of the episode. In your " - << "episode set, revise agentStartPos/agentStartYaw or rearrange the scene."); + ESP_CHECK(envState.episode_step_idx > 0, + "For episode " + << episodeInstance.episodeIndex_ + << ", the robot is in collision on the first step of the " + "episode. In your " + << "episode set, revise agentStartPos/agentStartYaw or " + "rearrange the scene."); recentStats_.numStepsInCollision_++; } } @@ -841,14 +906,13 @@ void BatchedSimulator::updateCollision() { } void BatchedSimulator::postCollisionUpdate() { - //ProfilingScope scope("BSim postCollisionUpdate"); + // ProfilingScope scope("BSim postCollisionUpdate"); const int numEnvs = config_.numEnvs; BATCHED_SIM_ASSERT(robots_.areCollisionResultsValid_); for (int b = 0; b < numEnvs; b++) { - if (isEnvResetting(b)) { continue; } @@ -862,7 +926,7 @@ void BatchedSimulator::postCollisionUpdate() { } void BatchedSimulator::updateRenderInstances(bool forceUpdate) { - //ProfilingScope scope("BSim updateRenderInstances"); + // ProfilingScope scope("BSim updateRenderInstances"); const int numEnvs = config_.numEnvs; int numLinks = robot_.artObj->getNumLinks(); @@ -873,15 +937,15 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { } for (int b = 0; b < numEnvs; b++) { - auto& robotInstance = robots_.robotInstances_[b]; auto& env = bpsWrapper_->envs_[b]; - // temp hack: we don't currently have bookeeping to know if a robot moved over - // several substeps, so we assume it did here. perf todo: fix this - bool didRobotMove = forceUpdate || - (!robots_.collisionResults_[b] || config_.numSubsteps > 1) || - isEnvResetting(b); + // temp hack: we don't currently have bookeeping to know if a robot moved + // over several substeps, so we assume it did here. perf todo: fix this + bool didRobotMove = + forceUpdate || + (!robots_.collisionResults_[b] || config_.numSubsteps > 1) || + isEnvResetting(b); // update robot links and camera if (didRobotMove) { @@ -898,23 +962,25 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { continue; } - const auto glMat = toGlmMat4x3(safeVectorGet(robots_.nodeNewTransforms_, instanceIndex)); + const auto glMat = toGlmMat4x3( + safeVectorGet(robots_.nodeNewTransforms_, instanceIndex)); env.updateInstanceTransform(instanceId, glMat); } } // update gripped free object if (didRobotMove && robotInstance.grippedFreeObjectIndex_ != -1) { - int freeObjectIndex = robotInstance.grippedFreeObjectIndex_; auto mat = getHeldObjectTransform(b); - glm::mat4x3 glMat = toGlmMat4x3(mat); + glm::mat4x3 glMat = toGlmMat4x3(mat); int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); env.updateInstanceTransform(instanceId, glMat); auto& envState = safeVectorGet(pythonEnvStates_, b); - safeVectorGet(envState.obj_positions, freeObjectIndex) = mat.translation(); - safeVectorGet(envState.obj_rotations, freeObjectIndex) = Mn::Quaternion::fromMatrix(mat.rotation()); + safeVectorGet(envState.obj_positions, freeObjectIndex) = + mat.translation(); + safeVectorGet(envState.obj_rotations, freeObjectIndex) = + Mn::Quaternion::fromMatrix(mat.rotation()); } } @@ -922,39 +988,36 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { if (isEnvResetting(b)) { continue; } - - const auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + + const auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const int freeObjectIndex = episode.targetObjIndex_; - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); - const auto& startRotation = safeVectorGet(freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); + const auto& freeObject = + safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& startRotation = safeVectorGet( + freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); constexpr float pad = 0.05; - addBoxDebugInstance("cube_pink_wireframe", b, - freeObjectSpawn.startPos_, - startRotation, - freeObject.aabb_, pad); - - addSphereDebugInstance( - "sphere_pink_wireframe", - b, freeObjectSpawn.startPos_, /*radius*/0.05f); - - addBoxDebugInstance("cube_blue_wireframe", b, - episode.targetObjGoalPos_, - episode.targetObjGoalRotation_, - freeObject.aabb_); - - addSphereDebugInstance( - "sphere_blue_wireframe", - b, episode.targetObjGoalPos_, /*radius*/0.05f); - - } - - - // add debug ground lines - #if 0 + addBoxDebugInstance("cube_pink_wireframe", b, freeObjectSpawn.startPos_, + startRotation, freeObject.aabb_, pad); + + addSphereDebugInstance("sphere_pink_wireframe", b, + freeObjectSpawn.startPos_, /*radius*/ 0.05f); + + addBoxDebugInstance("cube_blue_wireframe", b, episode.targetObjGoalPos_, + episode.targetObjGoalRotation_, freeObject.aabb_); + + addSphereDebugInstance("sphere_blue_wireframe", b, + episode.targetObjGoalPos_, /*radius*/ 0.05f); + } + +// add debug ground lines +#if 0 BATCHED_SIM_ASSERT(!config_.doPairedDebugEnv); { for (int b = 0; b < numEnvs; b++) { @@ -969,7 +1032,7 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { int globalFreeObjectIndex = b * episodeSet_.maxFreeObjects_ + freeObjectIndex; - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, + const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); @@ -986,9 +1049,9 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { } } } - #endif +#endif - #if 0 +#if 0 // temp debug if (config_.numDebugEnvs > 0) { @@ -1004,47 +1067,55 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { for (int b = 0; b < config_.numDebugEnvs; b++) { - addBoxDebugInstance("cube_pink_wireframe", b, + addBoxDebugInstance("cube_pink_wireframe", b, Mn::Vector3(positions[b].x(), 0.f, positions[b].y()), yawToRotation(yaws[b]), aabb); } } - #endif +#endif } - Mn::Matrix4 BatchedSimulator::getHeldObjectTransform(int b) const { - auto& robotInstance = robots_.robotInstances_[b]; BATCHED_SIM_ASSERT(robotInstance.grippedFreeObjectIndex_ != -1); - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const int freeObjectIndex = robotInstance.grippedFreeObjectIndex_; - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); + const auto& freeObject = + safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); // todo: better decide how to orient gripped object const int startRotationIndex = freeObject.heldRotationIndex_; - const auto& rotation = safeVectorGet(freeObject.startRotations_, startRotationIndex); + const auto& rotation = + safeVectorGet(freeObject.startRotations_, startRotationIndex); - Mn::Matrix4 linkToGripper = Mn::Matrix4::translation(robot_.gripperQueryOffset_); - Mn::Matrix4 toOrientedObject = Mn::Matrix4::from(rotation.toMatrix(), Mn::Vector3(Mn::Math::ZeroInit)); + Mn::Matrix4 linkToGripper = + Mn::Matrix4::translation(robot_.gripperQueryOffset_); + Mn::Matrix4 toOrientedObject = + Mn::Matrix4::from(rotation.toMatrix(), Mn::Vector3(Mn::Math::ZeroInit)); // todo: offset to a few centimeters from the edge, instead of the center - Mn::Matrix4 toObjectCenter = Mn::Matrix4::translation(-freeObject.aabb_.center()); + Mn::Matrix4 toObjectCenter = + Mn::Matrix4::translation(-freeObject.aabb_.center()); - auto mat = robotInstance.cachedGripperLinkMat_ * linkToGripper * toOrientedObject * toObjectCenter; + auto mat = robotInstance.cachedGripperLinkMat_ * linkToGripper * + toOrientedObject * toObjectCenter; return mat; } - -Robot::Robot(const serialize::Collection& serializeCollection, esp::sim::Simulator* sim, BpsSceneMapping* sceneMapping) { - - ESP_CHECK(!serializeCollection.robots.empty(), "no robot found in collection.json"); +Robot::Robot(const serialize::Collection& serializeCollection, + esp::sim::Simulator* sim, + BpsSceneMapping* sceneMapping) { + ESP_CHECK(!serializeCollection.robots.empty(), + "no robot found in collection.json"); const serialize::Robot& serializeRobot = serializeCollection.robots.front(); const std::string filepath = serializeRobot.urdfFilepath; @@ -1075,7 +1146,7 @@ Robot::Robot(const serialize::Collection& serializeCollection, esp::sim::Simulat collisionSpheresByNode_.resize(numNodes); for (int i = -1; i < numLinks; i++) { - const auto nodeIndex = i + 1; // 0 is base + const auto nodeIndex = i + 1; // 0 is base BATCHED_SIM_ASSERT(i == -1 || i == linkIds[i]); const auto& link = artObj->getLink(i); // -1 gets base link linkIndexByName_[link.linkName] = i; @@ -1100,35 +1171,49 @@ Robot::Robot(const serialize::Collection& serializeCollection, esp::sim::Simulat updateFromSerializeCollection(serializeCollection); } -void Robot::updateFromSerializeCollection(const serialize::Collection& serializeCollection) { - - ESP_CHECK(serializeCollection.robots.size() == 1, "updateFromSerializeCollection: expected 1 robot"); +void Robot::updateFromSerializeCollection( + const serialize::Collection& serializeCollection) { + ESP_CHECK(serializeCollection.robots.size() == 1, + "updateFromSerializeCollection: expected 1 robot"); const auto& serializeRobot = serializeCollection.robots.front(); - ESP_CHECK(linkIndexByName_.count(serializeRobot.gripper.attachLinkName), - "updateFromSerializeCollection: gripper attach link " << serializeRobot.gripper.attachLinkName - << " not found in robot URDF"); + ESP_CHECK(linkIndexByName_.count(serializeRobot.gripper.attachLinkName), + "updateFromSerializeCollection: gripper attach link " + << serializeRobot.gripper.attachLinkName + << " not found in robot URDF"); ESP_CHECK(serializeRobot.startJointPositions.size() == numPosVars, - "updateFromSerializeCollection: expected " << numPosVars << " joint positions"); + "updateFromSerializeCollection: expected " << numPosVars + << " joint positions"); const auto& serActionMap = serializeRobot.actionMap; - ESP_CHECK(serActionMap.numActions >= 3, "updateFromSerializeCollection: expected numActions >= 3"); - ESP_CHECK(serActionMap.graspRelease.thresholds.size() == 2, - "updateFromSerializeCollection: for graspRelease, expected 2 thresholds"); - ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && serActionMap.baseMove.actionIdx < serActionMap.numActions, - "updateFromSerializeCollection: invalid baseMove actionIdx=" << serActionMap.baseMove.actionIdx); - ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && serActionMap.baseMove.actionIdx < serActionMap.numActions, - "updateFromSerializeCollection: invalid baseRotate actionIdx=" << serActionMap.baseRotate.actionIdx); - ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && serActionMap.baseMove.actionIdx < serActionMap.numActions, - "updateFromSerializeCollection: invalid graspRelease actionIdx=" << serActionMap.graspRelease.actionIdx); + ESP_CHECK(serActionMap.numActions >= 3, + "updateFromSerializeCollection: expected numActions >= 3"); + ESP_CHECK( + serActionMap.graspRelease.thresholds.size() == 2, + "updateFromSerializeCollection: for graspRelease, expected 2 thresholds"); + ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && + serActionMap.baseMove.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid baseMove actionIdx=" + << serActionMap.baseMove.actionIdx); + ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && + serActionMap.baseMove.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid baseRotate actionIdx=" + << serActionMap.baseRotate.actionIdx); + ESP_CHECK(serActionMap.baseMove.actionIdx >= 0 && + serActionMap.baseMove.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid graspRelease actionIdx=" + << serActionMap.graspRelease.actionIdx); for (const auto& pair : serActionMap.joints) { ESP_CHECK(pair.first >= 0 && pair.first < numPosVars, - "updateFromSerializeCollection: invalid actionMap joint index=" << pair.first - << " for robot with " << numPosVars << " degrees of freedom"); - ESP_CHECK(pair.second.actionIdx >= 0 && pair.second.actionIdx < serActionMap.numActions, - "updateFromSerializeCollection: invalid joint actionIdx=" << pair.second.actionIdx); + "updateFromSerializeCollection: invalid actionMap joint index=" + << pair.first << " for robot with " << numPosVars + << " degrees of freedom"); + ESP_CHECK(pair.second.actionIdx >= 0 && + pair.second.actionIdx < serActionMap.numActions, + "updateFromSerializeCollection: invalid joint actionIdx=" + << pair.second.actionIdx); } gripperLink_ = linkIndexByName_.at(serializeRobot.gripper.attachLinkName); @@ -1147,9 +1232,9 @@ void Robot::updateFromSerializeCollection(const serialize::Collection& serialize collisionSpheres_.clear(); for (const auto& serLink : serializeRobot.links) { - - ESP_CHECK(linkIndexByName_.count(serLink.linkName), "link " << serLink.linkName - << " from collection.json not found in robot URDF"); + ESP_CHECK(linkIndexByName_.count(serLink.linkName), + "link " << serLink.linkName + << " from collection.json not found in robot URDF"); int linkIndex = linkIndexByName_[serLink.linkName]; int nodeIndex = linkIndex + 1; @@ -1157,7 +1242,8 @@ void Robot::updateFromSerializeCollection(const serialize::Collection& serialize for (const auto& serSphere : serLink.collisionSpheres) { nodeSpheres.push_back(collisionSpheres_.size()); - int radiusIdx = getCollisionRadiusIndex(serializeCollection, serSphere.radius); + int radiusIdx = + getCollisionRadiusIndex(serializeCollection, serSphere.radius); collisionSpheres_.push_back({serSphere.origin, radiusIdx}); numCollisionSpheres++; } @@ -1166,27 +1252,31 @@ void Robot::updateFromSerializeCollection(const serialize::Collection& serialize numCollisionSpheres_ = numCollisionSpheres; } -BpsWrapper::BpsWrapper(int gpuId, int numEnvs, bool includeDepth, bool includeColor, - const CameraSensorConfig& sensor) { - ESP_CHECK(sensor.width > 0 && sensor.height > 0, "BpsWrapper: invalid sensor width=" - << sensor.width << " or height=" << sensor.height); +BpsWrapper::BpsWrapper(int gpuId, + int numEnvs, + bool includeDepth, + bool includeColor, + const CameraSensorConfig& sensor, + const std::string& sceneFilepath) { + ESP_CHECK(sensor.width > 0 && sensor.height > 0, + "BpsWrapper: invalid sensor width=" << sensor.width << " or height=" + << sensor.height); BATCHED_SIM_ASSERT(gpuId != -1); - bps3D::RenderMode mode {}; + bps3D::RenderMode mode{}; if (includeDepth) { - mode |= bps3D::RenderMode::Depth; + mode |= bps3D::RenderMode::Depth; } if (includeColor) { - mode |= bps3D::RenderMode::UnlitRGB; + mode |= bps3D::RenderMode::UnlitRGB; } renderer_ = std::make_unique(bps3D::RenderConfig{ - gpuId, 1, uint32_t(numEnvs), (unsigned int)sensor.width, (unsigned int)sensor.height, false, mode}); + gpuId, 1, uint32_t(numEnvs), (unsigned int)sensor.width, + (unsigned int)sensor.height, false, mode}); loader_ = std::make_unique(renderer_->makeLoader()); - const std::string filepath = - "../data/bps_data/replicacad_composite/replicacad_composite.bps"; - scene_ = loader_->loadScene(filepath); + scene_ = loader_->loadScene(sceneFilepath); const Mn::Vector3 camPos(Mn::Math::ZeroInit); const Mn::Quaternion camRot(Mn::Math::IdentityInit); @@ -1197,8 +1287,9 @@ BpsWrapper::BpsWrapper(int gpuId, int numEnvs, bool includeDepth, bool includeCo glm::mat4 view = world_to_camera; constexpr float near = 0.01; constexpr float far = 1000.f; - constexpr float hfov = 60.f; // arbitrary; will be reset later - auto env = renderer_->makeEnvironment(scene_, view, hfov, aspectRatio, near, far); + constexpr float hfov = 60.f; // arbitrary; will be reset later + auto env = + renderer_->makeEnvironment(scene_, view, hfov, aspectRatio, near, far); envs_.emplace_back(std::move(env)); } } @@ -1212,22 +1303,29 @@ BpsWrapper::~BpsWrapper() { } BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { - - ESP_CHECK(config.numDebugEnvs <= config.numEnvs, "BatchedSimulator: numDebugEnvs must be <= numEnvs"); + ESP_CHECK(config.numDebugEnvs <= config.numEnvs, + "BatchedSimulator: numDebugEnvs must be <= numEnvs"); config_ = config; const int numEnvs = config_.numEnvs; - sceneMapping_ = BpsSceneMapping::loadFromFile( - "../data/bps_data/replicacad_composite/replicacad_composite.bps.mapping.json"); + const std::string sceneMappingFilepath = + config_.renderAssetCompositeFilepath + ".mapping.json"; + sceneMapping_ = BpsSceneMapping::loadFromFile(sceneMappingFilepath); - serializeCollection_ = serialize::Collection::loadFromFile(config_.collectionFilepath); + serializeCollection_ = + serialize::Collection::loadFromFile(config_.collectionFilepath); - bpsWrapper_ = std::make_unique(config_.gpuId, config_.numEnvs, - config_.includeDepth, config_.includeColor, config_.sensor0); + bpsWrapper_ = std::make_unique( + config_.gpuId, config_.numEnvs, config_.includeDepth, + config_.includeColor, config_.sensor0, + config_.renderAssetCompositeFilepath); if (config_.numDebugEnvs > 0) { - debugBpsWrapper_ = std::make_unique(config_.gpuId, config_.numDebugEnvs, - /*includeDepth*/false, /*includeColor*/true, config_.debugSensor); + // perf todo: separate renderAssetsComposite for debug models + debugBpsWrapper_ = std::make_unique( + config_.gpuId, config_.numDebugEnvs, + /*includeDepth*/ false, /*includeColor*/ true, config_.debugSensor, + config_.renderAssetCompositeFilepath); } if (config_.numDebugEnvs > 0) { @@ -1253,15 +1351,16 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { int numLinks = robot_.artObj->getNumLinks(); int numNodes = numLinks + 1; // include base - robots_ = RobotInstanceSet(&robot_, &config_, &bpsWrapper_->envs_, &rollouts_); + robots_ = + RobotInstanceSet(&robot_, &config_, &bpsWrapper_->envs_, &rollouts_); actionDim_ = getNumActions(); - int batchNumActions = (actionDim_) * numEnvs; + int batchNumActions = (actionDim_)*numEnvs; actions_.resize(batchNumActions, 0.f); resets_.resize(numEnvs, -1); - maxStorageSteps_ = 3; // todo: get rid of storage steps nonsense + maxStorageSteps_ = 3; // todo: get rid of storage steps nonsense rollouts_ = RolloutRecord(maxStorageSteps_, numEnvs, robot_.numPosVars, numNodes); @@ -1279,11 +1378,12 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { std::string cameraAttachLinkName = "torso_lift_link"; Mn::Vector3 camPos = {-0.536559, 1.16173, 0.568379}; Mn::Quaternion camRot = {{-0.26714, -0.541109, -0.186449}, 0.775289}; - setCamera("sensor0", camPos, camRot, /*hfov*/60.f, cameraAttachLinkName); + setCamera("sensor0", camPos, camRot, /*hfov*/ 60.f, cameraAttachLinkName); } if (config_.doAsyncPhysicsStep) { - physicsThread_ = std::thread(&BatchedSimulator::physicsThreadFunc, this, 0, config_.numEnvs); + physicsThread_ = std::thread(&BatchedSimulator::physicsThreadFunc, this, 0, + config_.numEnvs); } } @@ -1302,7 +1402,6 @@ BatchedSimulator::~BatchedSimulator() { close(); } - int BatchedSimulator::getNumEpisodes() const { return episodeSet_.episodes_.size(); } @@ -1325,7 +1424,6 @@ bps3D::Environment& BatchedSimulator::getDebugBpsEnvironment(int envIndex) { // one-time init for envs void BatchedSimulator::initEpisodeInstances() { - const int numEnvs = config_.numEnvs; // we expect all the episode instances to get default-constructed in here @@ -1334,26 +1432,29 @@ void BatchedSimulator::initEpisodeInstances() { episodeInstanceSet_.episodeInstanceByEnv_.resize(numEnvs); for (int b = 0; b < numEnvs; b++) { - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); constexpr int maxBytes = 1000 * 1024; - // this is tuned assuming a building-scale simulation with household-object-scale obstacles + // this is tuned assuming a building-scale simulation with + // household-object-scale obstacles constexpr float maxGridSpacing = 0.5f; - episodeInstance.colGrid_ = CollisionBroadphaseGrid(getMaxCollisionRadius(serializeCollection_), - episodeSet_.allEpisodesAABB_.min().x(), episodeSet_.allEpisodesAABB_.min().z(), - episodeSet_.allEpisodesAABB_.max().x(), episodeSet_.allEpisodesAABB_.max().z(), - maxBytes, maxGridSpacing); + episodeInstance.colGrid_ = CollisionBroadphaseGrid( + getMaxCollisionRadius(serializeCollection_), + episodeSet_.allEpisodesAABB_.min().x(), + episodeSet_.allEpisodesAABB_.min().z(), + episodeSet_.allEpisodesAABB_.max().x(), + episodeSet_.allEpisodesAABB_.max().z(), maxBytes, maxGridSpacing); } } - void BatchedSimulator::clearEpisodeInstance(int b) { - auto& env = getBpsEnvironment(b); - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); if (episodeInstance.episodeIndex_ == -1) { - return; // nothing to do + return; // nothing to do } if (b < config_.numDebugEnvs) { @@ -1364,56 +1465,70 @@ void BatchedSimulator::clearEpisodeInstance(int b) { episodeInstance.persistentDebugInstanceIds_.clear(); } - // Remove free object bps instances **in reverse order**. This is so bps3D will later - // allocate us new instance IDs (from its free list) in ascending order (see assert - // in spawnFreeObject). - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - for (int freeObjectIndex = episode.numFreeObjectSpawns_ - 1; freeObjectIndex >= 0; freeObjectIndex--) { + // Remove free object bps instances **in reverse order**. This is so bps3D + // will later allocate us new instance IDs (from its free list) in ascending + // order (see assert in spawnFreeObject). + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + for (int freeObjectIndex = episode.numFreeObjectSpawns_ - 1; + freeObjectIndex >= 0; freeObjectIndex--) { int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); env.deleteInstance(instanceId); } - + episodeInstance.firstFreeObjectInstanceId_ = -1; - // remove stage bps instance - env.deleteInstance(episodeInstance.stageFixedObjectInstanceId_); - + // remove staticScene bps instances + for (const auto id : episodeInstance.staticSceneInstanceIds_) { + env.deleteInstance(id); + } + episodeInstance.staticSceneInstanceIds_.clear(); + // remove all free objects from collision grid episodeInstance.colGrid_.removeAllObstacles(); } void BatchedSimulator::resetEpisodeInstance(int b) { - //ProfilingScope scope("BSim resetEpisodeInstance"); - ESP_CHECK(resets_[b] >= 0 && resets_[b] < getNumEpisodes(), "resetEpisodeInstance: episode_idx=" - << resets_[b] << " is invalid for getNumEpisodes()=" << getNumEpisodes()); + // ProfilingScope scope("BSim resetEpisodeInstance"); + ESP_CHECK(resets_[b] >= 0 && resets_[b] < getNumEpisodes(), + "resetEpisodeInstance: episode_idx=" + << resets_[b] + << " is invalid for getNumEpisodes()=" << getNumEpisodes()); auto& env = getBpsEnvironment(b); clearEpisodeInstance(b); - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); BATCHED_SIM_ASSERT(episodeInstance.colGrid_.getNumObstacleInstances() == 0); BATCHED_SIM_ASSERT(isEnvResetting(b)); episodeInstance.episodeIndex_ = resets_[b]; - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - // add stage - { - const auto& stageBlueprint = - safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex).instanceBlueprint_; - episodeInstance.stageFixedObjectInstanceId_ = env.addInstance( - stageBlueprint.meshIdx_, stageBlueprint.mtrlIdx_, identityGlMat_); + // add static scene render asset instances + const auto& staticScene = + safeVectorGet(episodeSet_.staticScenes_, episode.staticSceneIndex_); + for (const auto& instance : staticScene.renderAssetInstances_) { + const auto& renderAsset = + safeVectorGet(episodeSet_.renderAssets_, instance.renderAssetIndex_); + const auto& blueprint = renderAsset.instanceBlueprint_; + + episodeInstance.staticSceneInstanceIds_.push_back(env.addInstance( + blueprint.meshIdx_, blueprint.mtrlIdx_, instance.glMat_)); } auto& envState = safeVectorGet(pythonEnvStates_, b); - envState.obj_positions.resize(episode.numFreeObjectSpawns_); - envState.obj_rotations.resize(episode.numFreeObjectSpawns_); + envState.obj_positions.resize(episode.numFreeObjectSpawns_); + envState.obj_rotations.resize(episode.numFreeObjectSpawns_); - for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; freeObjectIndex++) { - spawnFreeObject(b, freeObjectIndex, /*reinsert*/false); + for (int freeObjectIndex = 0; freeObjectIndex < episode.numFreeObjectSpawns_; + freeObjectIndex++) { + spawnFreeObject(b, freeObjectIndex, /*reinsert*/ false); } // reset robot (note that robot's bps instances are not re-created here) @@ -1423,9 +1538,10 @@ void BatchedSimulator::resetEpisodeInstance(int b) { const int numEnvs = bpsWrapper_->envs_.size(); const int numPosVars = robot_.numPosVars; float* yaws = &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); - Mn::Vector2* positions = &safeVectorGet(rollouts_.positions_,currStorageStep_ * numEnvs); - float* jointPositions = - &safeVectorGet(rollouts_.jointPositions_,currStorageStep_ * numEnvs * numPosVars); + Mn::Vector2* positions = + &safeVectorGet(rollouts_.positions_, currStorageStep_ * numEnvs); + float* jointPositions = &safeVectorGet( + rollouts_.jointPositions_, currStorageStep_ * numEnvs * numPosVars); positions[b] = episode.agentStartPos_; yaws[b] = episode.agentStartYaw_; @@ -1454,7 +1570,8 @@ void BatchedSimulator::resetEpisodeInstance(int b) { // 9 elbow, + is down // 10 elbow twist, + is twst to right // 11 wrist, + is down - //jointPositions[b * robot_.numPosVars + 9] = float(Mn::Rad(Mn::Deg(90.f))); + // jointPositions[b * robot_.numPosVars + 9] = + // float(Mn::Rad(Mn::Deg(90.f))); // assume robot is not in collision on reset robots_.collisionResults_[b] = false; @@ -1469,10 +1586,9 @@ void BatchedSimulator::resetEpisodeInstance(int b) { // int episode_idx = -1; // 0..len(episodes)-1 // int episode_step_idx = -1; // will be zero if this env was just reset // int target_obj_idx = -1; // see obj_positions, obj_rotations - // // all positions/rotations are relative to the mesh, i.e. some arbitrary coordinate frame - // Magnum::Vector3 target_obj_start_pos; - // Magnum::Quaternion target_obj_start_rotation; - // Magnum::Vector3 robot_start_pos; + // // all positions/rotations are relative to the mesh, i.e. some arbitrary + // coordinate frame Magnum::Vector3 target_obj_start_pos; Magnum::Quaternion + // target_obj_start_rotation; Magnum::Vector3 robot_start_pos; // Magnum::Quaternion robot_start_rotation; // Magnum::Vector3 goal_pos; // Magnum::Quaternion goal_rotation; @@ -1486,10 +1602,13 @@ void BatchedSimulator::resetEpisodeInstance(int b) { envState.robot_start_rotation = yawToRotation(episode.agentStartYaw_); const int freeObjectIndex = episode.targetObjIndex_; - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); - const auto& rotation = safeVectorGet(freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); + const auto& freeObject = + safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + const auto& rotation = safeVectorGet(freeObject.startRotations_, + freeObjectSpawn.startRotationIndex_); envState.target_obj_start_pos = freeObjectSpawn.startPos_; envState.target_obj_start_rotation = rotation; @@ -1559,7 +1678,7 @@ void BatchedSimulator::resetEpisodeInstance(int b) { int freeObjectIndex = episodeInstance.movedFreeObjectIndexes_[i]; // disabled objects including currently-held and removed-from-scene if (!episodeInstance.colGrid_.isObstacleDisabled(freeObjectIndex)) { - removeFreeObjectFromCollisionGrid(b, freeObjectIndex); + removeFreeObjectFromCollisionGrid(b, freeObjectIndex); } // reinsert at spawn location spawnFreeObject(b, freeObjectIndex, /*reinsert*/true); @@ -1577,7 +1696,7 @@ void BatchedSimulator::resetEpisodeInstance(int b) { auto& envState = safeVectorGet(pythonEnvStates_, b); envState.obj_positions.resize(episode.numFreeObjectSpawns_); - envState.episode_step_idx = 0; + envState.episode_step_idx = 0; } #endif @@ -1585,49 +1704,62 @@ bool BatchedSimulator::isEnvResetting(int b) const { return safeVectorGet(resets_, b) != -1; } - - -void BatchedSimulator::spawnFreeObject(int b, int freeObjectIndex, bool reinsert) { - +void BatchedSimulator::spawnFreeObject(int b, + int freeObjectIndex, + bool reinsert) { auto& env = getBpsEnvironment(b); - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - const auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - const auto& freeObjectSpawn = safeVectorGet(episodeSet_.freeObjectSpawns_, - episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); - const auto& freeObject = safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); - const auto& blueprint = freeObject.instanceBlueprint_; - const auto& rotation = safeVectorGet(freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + const auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& freeObjectSpawn = + safeVectorGet(episodeSet_.freeObjectSpawns_, + episode.firstFreeObjectSpawnIndex_ + freeObjectIndex); + const auto& freeObject = + safeVectorGet(episodeSet_.freeObjects_, freeObjectSpawn.freeObjIndex_); + // perf todo: store instanceBlueprint_ directly in freeObject to avoid extra + // indirection + const auto& renderAsset = + safeVectorGet(episodeSet_.renderAssets_, freeObject.renderAssetIndex_); + const auto& blueprint = renderAsset.instanceBlueprint_; + const auto& rotation = safeVectorGet(freeObject.startRotations_, + freeObjectSpawn.startRotationIndex_); // create bps instance if (!reinsert) { // sloppy: this matrix gets created differently on episode reset - Mn::Matrix4 mat = Mn::Matrix4::from( - rotation.toMatrix(), freeObjectSpawn.startPos_); - glm::mat4x3 glMat = toGlmMat4x3(mat); - int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); - // store the first free object's bps instanceId and assume the rest will be contiguous + Mn::Matrix4 mat = + Mn::Matrix4::from(rotation.toMatrix(), freeObjectSpawn.startPos_); + glm::mat4x3 glMat = toGlmMat4x3(mat); + int instanceId = + env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); + // store the first free object's bps instanceId and assume the rest will be + // contiguous if (freeObjectIndex == 0) { BATCHED_SIM_ASSERT(episodeInstance.firstFreeObjectInstanceId_ == -1); episodeInstance.firstFreeObjectInstanceId_ = instanceId; } - BATCHED_SIM_ASSERT(instanceId == getFreeObjectBpsInstanceId(b, freeObjectIndex)); + BATCHED_SIM_ASSERT(instanceId == + getFreeObjectBpsInstanceId(b, freeObjectIndex)); } if (!reinsert) { int16_t obsIndex = episodeInstance.colGrid_.insertObstacle( - freeObjectSpawn.startPos_, rotation, &freeObject.aabb_); + freeObjectSpawn.startPos_, rotation, &freeObject.aabb_); BATCHED_SIM_ASSERT(obsIndex == freeObjectIndex); auto& envState = safeVectorGet(pythonEnvStates_, b); - safeVectorGet(envState.obj_positions, freeObjectIndex) = freeObjectSpawn.startPos_; + safeVectorGet(envState.obj_positions, freeObjectIndex) = + freeObjectSpawn.startPos_; safeVectorGet(envState.obj_rotations, freeObjectIndex) = rotation; } else { reinsertFreeObject(b, freeObjectIndex, freeObjectSpawn.startPos_, rotation); } } -void BatchedSimulator::removeFreeObjectFromCollisionGrid(int b, int freeObjectIndex) { - - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); +void BatchedSimulator::removeFreeObjectFromCollisionGrid(int b, + int freeObjectIndex) { + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); episodeInstance.colGrid_.disableObstacle(freeObjectIndex); // perf todo: remove this @@ -1635,66 +1767,72 @@ void BatchedSimulator::removeFreeObjectFromCollisionGrid(int b, int freeObjectIn safeVectorGet(envState.obj_positions, freeObjectIndex) = INVALID_VEC3; } -int BatchedSimulator::getFreeObjectBpsInstanceId(int b, int freeObjectIndex) const { - - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); +int BatchedSimulator::getFreeObjectBpsInstanceId(int b, + int freeObjectIndex) const { + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); BATCHED_SIM_ASSERT(episodeInstance.firstFreeObjectInstanceId_ != -1); return episodeInstance.firstFreeObjectInstanceId_ + freeObjectIndex; } -void BatchedSimulator::reinsertFreeObject(int b, int freeObjectIndex, - const Magnum::Vector3& pos, const Magnum::Quaternion& rotation) { - +void BatchedSimulator::reinsertFreeObject(int b, + int freeObjectIndex, + const Magnum::Vector3& pos, + const Magnum::Quaternion& rotation) { auto& env = getBpsEnvironment(b); - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); episodeInstance.colGrid_.reinsertObstacle(freeObjectIndex, pos, rotation); // sloppy quat to Matrix3x3 - Mn::Matrix4 mat = Mn::Matrix4::from( - rotation.toMatrix(), pos); - glm::mat4x3 glMat = toGlmMat4x3(mat); + Mn::Matrix4 mat = Mn::Matrix4::from(rotation.toMatrix(), pos); + glm::mat4x3 glMat = toGlmMat4x3(mat); int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); env.updateInstanceTransform(instanceId, glMat); auto& envState = safeVectorGet(pythonEnvStates_, b); safeVectorGet(envState.obj_positions, freeObjectIndex) = pos; safeVectorGet(envState.obj_rotations, freeObjectIndex) = rotation; - } void BatchedSimulator::initEpisodeSet() { - const int numEnvs = config_.numEnvs; if (config_.doProceduralEpisodeSet) { - ESP_CHECK(config_.episodeSetFilepath.empty(), - "For BatchedSimulatorConfig::doProceduralEpisodeSet==true, don't specify episodeSetFilepath"); + ESP_CHECK(config_.episodeSetFilepath.empty(), + "For BatchedSimulatorConfig::doProceduralEpisodeSet==true, don't " + "specify episodeSetFilepath"); - episodeSet_ = generateBenchmarkEpisodeSet(config_.episodeGeneratorConfig, sceneMapping_, serializeCollection_); + episodeSet_ = generateBenchmarkEpisodeSet( + config_.episodeGeneratorConfig, sceneMapping_, serializeCollection_); episodeSet_.saveToFile("../data/generated.episode_set.json"); } else { - ESP_CHECK(!config_.episodeSetFilepath.empty(), - "For BatchedSimulatorConfig::doProceduralEpisodeSet==false, you must specify episodeSetFilepath"); + ESP_CHECK(!config_.episodeSetFilepath.empty(), + "For BatchedSimulatorConfig::doProceduralEpisodeSet==false, you " + "must specify episodeSetFilepath"); episodeSet_ = EpisodeSet::loadFromFile(config_.episodeSetFilepath); postLoadFixup(episodeSet_, sceneMapping_, serializeCollection_); } } -void BatchedSimulator::setActionsResets(std::vector&& actions, std::vector&& resets) { - //ProfilingScope scope("BSim setActions"); +void BatchedSimulator::setActionsResets(std::vector&& actions, + std::vector&& resets) { + // ProfilingScope scope("BSim setActions"); // note we allow empty actions OR empty resets ESP_CHECK(actions.empty() || actions.size() == actions_.size(), - "BatchedSimulator::setActionsResets: actions length should be " << - actions_.size() << ", not " << actions.size()); + "BatchedSimulator::setActionsResets: actions length should be " + << actions_.size() << ", not " << actions.size()); ESP_CHECK(resets.empty() || resets.size() == resets_.size(), - "BatchedSimulator::setActionsResets: resets length should be " << - resets_.size() << ", not " << resets.size()); - ESP_CHECK(!actions.empty() || !resets.empty(), "BatchedSimulator::setActionsResets: " - << "at least one of actions or resets must be length " << actions_.size()); + "BatchedSimulator::setActionsResets: resets length should be " + << resets_.size() << ", not " << resets.size()); + ESP_CHECK(!actions.empty() || !resets.empty(), + "BatchedSimulator::setActionsResets: " + << "at least one of actions or resets must be length " + << actions_.size()); const int numEnvs = config_.numEnvs; - constexpr float defaultAction = 0.0f; // actions are normalized -1..1 + constexpr float defaultAction = 0.0f; // actions are normalized -1..1 if (config_.forceRandomActions) { for (auto& action : actions_) { @@ -1712,7 +1850,8 @@ void BatchedSimulator::setActionsResets(std::vector&& actions, std::vecto // force default actions on first step of episode const auto& envState = safeVectorGet(pythonEnvStates_, b); if (envState.episode_step_idx == 0) { - std::fill(&actions_[b * actionDim_], &actions_[(b + 1) * actionDim_], defaultAction); + std::fill(&actions_[b * actionDim_], &actions_[(b + 1) * actionDim_], + defaultAction); } } @@ -1727,7 +1866,8 @@ void BatchedSimulator::reset(std::vector&& resets) { ProfilingScope scope("reset episodes"); // we shouldn't be in the middle of rendering or stepping physics - ESP_CHECK(!isPhysicsThreadActive(), "Don't call reset during async physics step"); + ESP_CHECK(!isPhysicsThreadActive(), + "Don't call reset during async physics step"); ESP_CHECK(!isRenderStarted_, "Don't call reset during async render"); startStepPhysicsOrReset(std::vector(), std::move(resets)); @@ -1738,11 +1878,9 @@ void BatchedSimulator::reset(std::vector&& resets) { // called within step to reset whatever envs are requested to reset void BatchedSimulator::resetHelper() { - const int numEnvs = bpsWrapper_->envs_.size(); for (int b = 0; b < numEnvs; b++) { - if (!isEnvResetting(b)) { continue; } @@ -1755,8 +1893,8 @@ void BatchedSimulator::resetHelper() { } } - -void BatchedSimulator::startStepPhysicsOrReset(std::vector&& actions, std::vector&& resets) { +void BatchedSimulator::startStepPhysicsOrReset(std::vector&& actions, + std::vector&& resets) { ProfilingScope scope("start async physics"); BATCHED_SIM_ASSERT(!isPhysicsThreadActive()); @@ -1793,9 +1931,10 @@ void BatchedSimulator::stepPhysics() { resetHelper(); - updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ false, /*updateForRender*/ true, /*includeResettingEnvs*/true); + updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ false, + /*updateForRender*/ true, /*includeResettingEnvs*/ true); - updateRenderInstances(/*forceUpdate*/false); + updateRenderInstances(/*forceUpdate*/ false); updatePythonEnvironmentState(); } @@ -1804,7 +1943,6 @@ void BatchedSimulator::substepPhysics() { ProfilingScope scope("substep"); BATCHED_SIM_ASSERT(isOkToStep_); - prevStorageStep_ = currStorageStep_; currStorageStep_ = (currStorageStep_ + 1) % maxStorageSteps_; @@ -1827,9 +1965,8 @@ void BatchedSimulator::substepPhysics() { // stepping code for (int b = 0; b < numEnvs; b++) { - - // perf todo: if collision occurs on a substep, don't try stepping again for the - // remaining substeps (not until the action changes on the next step) + // perf todo: if collision occurs on a substep, don't try stepping again for + // the remaining substeps (not until the action changes on the next step) if (isEnvResetting(b)) { continue; @@ -1842,33 +1979,41 @@ void BatchedSimulator::substepPhysics() { const auto& baseRotateSetup = actionMap.baseRotate; const auto& graspReleaseSetup = actionMap.graspRelease; - const float baseMoveAction = actions_[baseActionIndex + baseMoveSetup.actionIdx]; - const float baseRotateAction = actions_[baseActionIndex + baseRotateSetup.actionIdx]; - const float graspReleaseAction = actions_[baseActionIndex + graspReleaseSetup.actionIdx]; + const float baseMoveAction = + actions_[baseActionIndex + baseMoveSetup.actionIdx]; + const float baseRotateAction = + actions_[baseActionIndex + baseRotateSetup.actionIdx]; + const float graspReleaseAction = + actions_[baseActionIndex + graspReleaseSetup.actionIdx]; auto& robotInstance = robots_.robotInstances_[b]; // sticky grasp/release behavior BATCHED_SIM_ASSERT(graspReleaseSetup.thresholds.size() == 2); - BATCHED_SIM_ASSERT(graspReleaseSetup.thresholds[0] <= graspReleaseSetup.thresholds[1]); + BATCHED_SIM_ASSERT(graspReleaseSetup.thresholds[0] <= + graspReleaseSetup.thresholds[1]); if (robotInstance.grippedFreeObjectIndex_ == -1) { // only a very large action value triggers a grasp - robotInstance.doAttemptGrip_ = graspReleaseAction >= graspReleaseSetup.thresholds[1]; + robotInstance.doAttemptGrip_ = + graspReleaseAction >= graspReleaseSetup.thresholds[1]; robotInstance.doAttemptDrop_ = false; } else { // only a very small action value triggers a release robotInstance.doAttemptGrip_ = false; - robotInstance.doAttemptDrop_ = graspReleaseAction < graspReleaseSetup.thresholds[0]; + robotInstance.doAttemptDrop_ = + graspReleaseAction < graspReleaseSetup.thresholds[0]; } - const float remappedBaseYawAction = remapAction(baseRotateAction, baseRotateSetup.stepMin, baseRotateSetup.stepMax); + const float remappedBaseYawAction = remapAction( + baseRotateAction, baseRotateSetup.stepMin, baseRotateSetup.stepMax); yaws[b] = prevYaws[b] + remappedBaseYawAction; - float remappedBaseMovementAction = remapAction(baseMoveAction, baseMoveSetup.stepMin, baseMoveSetup.stepMax); + float remappedBaseMovementAction = remapAction( + baseMoveAction, baseMoveSetup.stepMin, baseMoveSetup.stepMax); positions[b] = - prevPositions[b] + - Mn::Vector2(Mn::Math::cos(Mn::Math::Rad(yaws[b])), -Mn::Math::sin(Mn::Math::Rad(yaws[b]))) - * remappedBaseMovementAction; + prevPositions[b] + Mn::Vector2(Mn::Math::cos(Mn::Math::Rad(yaws[b])), + -Mn::Math::sin(Mn::Math::Rad(yaws[b]))) * + remappedBaseMovementAction; // sloppy: copy over all jointPositions, then process actionJointDegreePairs int baseJointIndex = b * robot_.numPosVars; @@ -1881,18 +2026,22 @@ void BatchedSimulator::substepPhysics() { for (const auto& pair : actionMap.joints) { int j = pair.first; const auto& actionSetup = pair.second; - const float jointMovementAction = actions_[baseActionIndex + actionSetup.actionIdx]; + const float jointMovementAction = + actions_[baseActionIndex + actionSetup.actionIdx]; BATCHED_SIM_ASSERT(j >= 0 && j < robot_.numPosVars); auto& pos = jointPositions[baseJointIndex + j]; const auto& prevPos = prevJointPositions[baseJointIndex + j]; - const float remappedJointMovementAction = remapAction(jointMovementAction, actionSetup.stepMin, actionSetup.stepMax); + const float remappedJointMovementAction = remapAction( + jointMovementAction, actionSetup.stepMin, actionSetup.stepMax); pos = prevPos + remappedJointMovementAction; pos = Mn::Math::clamp(pos, robot_.jointPositionLimits.first[j], - robot_.jointPositionLimits.second[j]); + robot_.jointPositionLimits.second[j]); } } - updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ true, /*updateForRender*/ false, /*includeResettingEnvs*/false); + updateLinkTransforms(currStorageStep_, /*updateforPhysics*/ true, + /*updateForRender*/ false, + /*includeResettingEnvs*/ false); updateCollision(); @@ -1903,7 +2052,6 @@ void BatchedSimulator::substepPhysics() { // robots_.applyActionPenalties(actions_); } - #if 0 void BatchedSimulator::stepPhysicsWithReferenceActions() { int numEnvs = bpsWrapper_->envs_.size(); @@ -1953,29 +2101,33 @@ void BatchedSimulator::stepPhysicsWithReferenceActions() { bool BatchedSimulator::isPhysicsThreadActive() const { return config_.doAsyncPhysicsStep && - (!isStepPhysicsOrResetFinished_ || signalStepPhysics_); + (!isStepPhysicsOrResetFinished_ || signalStepPhysics_); } - void BatchedSimulator::enableDebugSensor(bool enable) { BATCHED_SIM_ASSERT(!isRenderStarted_); enableDebugSensor_ = enable; } -void BatchedSimulator::setCamera(const std::string& sensorName, const Mn::Vector3& pos, - const Mn::Quaternion& rotation, float hfov, const std::string& attachLinkName) { - - ESP_CHECK(sensorName == "sensor0" || sensorName == "debug", - "setCamera: sensorName must be \"sensor0\" or \"debug\""); +void BatchedSimulator::setCamera(const std::string& sensorName, + const Mn::Vector3& pos, + const Mn::Quaternion& rotation, + float hfov, + const std::string& attachLinkName) { + ESP_CHECK(sensorName == "sensor0" || sensorName == "debug", + "setCamera: sensorName must be \"sensor0\" or \"debug\""); if (!config_.numDebugEnvs > 0) { - ESP_CHECK(sensorName != "debug", "setCamera: you must set BatchedSimulatorConfig::numDebugEnvs > 0 in order to set the debug camera"); + ESP_CHECK(sensorName != "debug", + "setCamera: you must set BatchedSimulatorConfig::numDebugEnvs > " + "0 in order to set the debug camera"); } int nodeIndex = -1; if (!attachLinkName.empty()) { - ESP_CHECK(robot_.linkIndexByName_.count(attachLinkName), - "setCamera: invalid attachLinkName=" << attachLinkName - << ". Check your robot URDF for valid link names."); + ESP_CHECK(robot_.linkIndexByName_.count(attachLinkName), + "setCamera: invalid attachLinkName=" + << attachLinkName + << ". Check your robot URDF for valid link names."); int linkIndex = robot_.linkIndexByName_[attachLinkName]; nodeIndex = linkIndex + 1; } @@ -1983,11 +2135,9 @@ void BatchedSimulator::setCamera(const std::string& sensorName, const Mn::Vector const bool isDebug = sensorName == "debug"; Camera& cam = isDebug ? debugCam_ : mainCam_; - cam = Camera{ - .attachNodeIndex = nodeIndex, - .transform = Mn::Matrix4::from(rotation.toMatrix(), pos), - .hfov = hfov - }; + cam = Camera{.attachNodeIndex = nodeIndex, + .transform = Mn::Matrix4::from(rotation.toMatrix(), pos), + .hfov = hfov}; }; #if 0 @@ -2009,35 +2159,31 @@ void BatchedSimulator::setFreeCamera(const Mn::Vector3& pos, const Mn::Quaternio } #endif - -void BatchedSimulator::setBpsCameraHelper(bool isDebug, int b, const glm::mat4& glCameraInvTransform, float hfov) { - - const CameraSensorConfig& sensor = isDebug ? config_.debugSensor : config_.sensor0; +void BatchedSimulator::setBpsCameraHelper(bool isDebug, + int b, + const glm::mat4& glCameraInvTransform, + float hfov) { + const CameraSensorConfig& sensor = + isDebug ? config_.debugSensor : config_.sensor0; float aspectRatio = float(sensor.width) / float(sensor.height); BATCHED_SIM_ASSERT(hfov > 0.f && hfov < 180.f); constexpr float near = 0.01; constexpr float far = 1000.f; - auto& env = isDebug - ? getDebugBpsEnvironment(b) - : getBpsEnvironment(b); + auto& env = isDebug ? getDebugBpsEnvironment(b) : getBpsEnvironment(b); env.setCamera(glCameraInvTransform, hfov, aspectRatio, near, far); - } void BatchedSimulator::updateBpsCameras(bool isDebug) { - const Camera& cam = isDebug ? debugCam_ : mainCam_; const int numEnvs = isDebug ? config_.numDebugEnvs : config_.numEnvs; if (cam.attachNodeIndex == -1) { - glm::mat4 world_to_camera(glm::inverse(toGlmMat4(cam.transform))); for (int b = 0; b < numEnvs; b++) { setBpsCameraHelper(isDebug, b, world_to_camera, cam.hfov); } } else { - const int numLinks = robots_.robot_->artObj->getNumLinks(); const int numNodes = numLinks + 1; @@ -2045,10 +2191,10 @@ void BatchedSimulator::updateBpsCameras(bool isDebug) { const int cameraAttachNode = cam.attachNodeIndex; for (int b = 0; b < numEnvs; b++) { - const int baseInstanceIndex = b * numNodes; const auto instanceIndex = baseInstanceIndex + cameraAttachNode; - const auto& cameraAttachNodeTransform = safeVectorGet(robots_.nodeNewTransforms_, instanceIndex); + const auto& cameraAttachNodeTransform = + safeVectorGet(robots_.nodeNewTransforms_, instanceIndex); auto cameraTransform = cameraAttachNodeTransform * cameraAttachTransform; auto glCameraNewInvTransform = glm::inverse(toGlmMat4(cameraTransform)); @@ -2063,11 +2209,11 @@ void BatchedSimulator::startRender() { ProfilingScope scope("start render"); BATCHED_SIM_ASSERT(isOkToRender_); - updateBpsCameras(/*isDebug*/false); + updateBpsCameras(/*isDebug*/ false); bpsWrapper_->renderer_->render(bpsWrapper_->envs_.data()); if (config_.numDebugEnvs > 0 && enableDebugSensor_) { - updateBpsCameras(/*isDebug*/true); + updateBpsCameras(/*isDebug*/ true); debugBpsWrapper_->renderer_->render(debugBpsWrapper_->envs_.data()); } @@ -2116,10 +2262,8 @@ void BatchedSimulator::deleteDebugInstances() { if (config_.numDebugEnvs > 0) { const int numEnvs = config_.numDebugEnvs; for (int b = 0; b < numEnvs; b++) { - auto& env = getDebugBpsEnvironment(b); for (int instanceId : safeVectorGet(debugInstancesByEnv_, b)) { - env.deleteInstance(instanceId); } @@ -2128,15 +2272,18 @@ void BatchedSimulator::deleteDebugInstances() { } } -int BatchedSimulator::addDebugInstance(const std::string& name, int envIndex, - const Magnum::Matrix4& transform, bool persistent) { +int BatchedSimulator::addDebugInstance(const std::string& name, + int envIndex, + const Magnum::Matrix4& transform, + bool persistent) { BATCHED_SIM_ASSERT(config_.numDebugEnvs > 0); - + glm::mat4x3 glMat = toGlmMat4x3(transform); const auto& blueprint = sceneMapping_.findInstanceBlueprint(name); BATCHED_SIM_ASSERT(envIndex < config_.numEnvs); auto& env = getDebugBpsEnvironment(envIndex); - int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); + int instanceId = + env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); if (!persistent) { safeVectorGet(debugInstancesByEnv_, envIndex).push_back(instanceId); } @@ -2149,27 +2296,37 @@ std::string BatchedSimulator::getRecentStatsAndReset() const { } else if (recentStats_.numEpisodes_ == 0) { return "no recent episodes"; } - float collisionFraction = recentStats_.numSteps_ > 0 ? (float)recentStats_.numStepsInCollision_ / recentStats_.numSteps_ : -1.f; - float gripAttemptsPerEpisode = (float)recentStats_.numGripAttempts_ / recentStats_.numEpisodes_; - float gripsPerEpisode = (float)recentStats_.numGrips_ / recentStats_.numEpisodes_; - float dropsPerEpisode = (float)recentStats_.numDrops_ / recentStats_.numEpisodes_; - float failedDropsPerEpisode = (float)recentStats_.numFailedDrops_ / recentStats_.numEpisodes_; + float collisionFraction = + recentStats_.numSteps_ > 0 + ? (float)recentStats_.numStepsInCollision_ / recentStats_.numSteps_ + : -1.f; + float gripAttemptsPerEpisode = + (float)recentStats_.numGripAttempts_ / recentStats_.numEpisodes_; + float gripsPerEpisode = + (float)recentStats_.numGrips_ / recentStats_.numEpisodes_; + float dropsPerEpisode = + (float)recentStats_.numDrops_ / recentStats_.numEpisodes_; + float failedDropsPerEpisode = + (float)recentStats_.numFailedDrops_ / recentStats_.numEpisodes_; recentStats_ = StatRecord(); std::stringstream ss; - ss << "collisionFraction " << std::setprecision(5) << collisionFraction << ", " - << "gripAttemptsPerEpisode " << std::setprecision(5) << gripAttemptsPerEpisode << ", " - << "gripsPerEpisode " << std::setprecision(5) << gripsPerEpisode << ", " - << "dropsPerEpisode " << std::setprecision(5) << dropsPerEpisode << ", " - << "failedDropsPerEpisode " << std::setprecision(5) << failedDropsPerEpisode; + ss << "collisionFraction " << std::setprecision(5) << collisionFraction + << ", " + << "gripAttemptsPerEpisode " << std::setprecision(5) + << gripAttemptsPerEpisode << ", " + << "gripsPerEpisode " << std::setprecision(5) << gripsPerEpisode << ", " + << "dropsPerEpisode " << std::setprecision(5) << dropsPerEpisode << ", " + << "failedDropsPerEpisode " << std::setprecision(5) + << failedDropsPerEpisode; return ss.str(); } void BatchedSimulator::signalStepPhysics() { - //ProfilingScope scope("BSim signalStepPhysics"); - + // ProfilingScope scope("BSim signalStepPhysics"); + BATCHED_SIM_ASSERT(config_.doAsyncPhysicsStep); BATCHED_SIM_ASSERT(isStepPhysicsOrResetFinished_); BATCHED_SIM_ASSERT(!signalStepPhysics_); @@ -2177,29 +2334,32 @@ void BatchedSimulator::signalStepPhysics() { std::lock_guard lck(physicsSignalMutex_); signalStepPhysics_ = true; - physicsCondVar_.notify_one(); // notify_all? + physicsCondVar_.notify_one(); // notify_all? } void BatchedSimulator::signalKillPhysicsThread() { std::lock_guard lck(physicsSignalMutex_); signalKillPhysicsThread_ = true; - physicsCondVar_.notify_one(); // notify_all? + physicsCondVar_.notify_one(); // notify_all? } - -const std::vector& BatchedSimulator::getEnvironmentStates() const { - ESP_CHECK(!isPhysicsThreadActive(), "Don't call getEnvironmentStates during async physics step"); +const std::vector& +BatchedSimulator::getEnvironmentStates() const { + ESP_CHECK(!isPhysicsThreadActive(), + "Don't call getEnvironmentStates during async physics step"); #ifdef NDEBUG - ESP_CHECK(isRenderStarted_, "For best runtime perf, call getEnvironmentStates *after* startRender"); + ESP_CHECK( + isRenderStarted_, + "For best runtime perf, call getEnvironmentStates *after* startRender"); #endif - return pythonEnvStates_; + return pythonEnvStates_; } void BatchedSimulator::waitStepPhysicsOrReset() { - //ProfilingScope scope("BSim waitStepPhysicsOrReset"); + // ProfilingScope scope("BSim waitStepPhysicsOrReset"); if (config_.doAsyncPhysicsStep) { std::unique_lock lck(physicsFinishMutex_); - physicsCondVar_.wait(lck, [&]{ return isStepPhysicsOrResetFinished_; }); + physicsCondVar_.wait(lck, [&] { return isStepPhysicsOrResetFinished_; }); } } @@ -2212,7 +2372,8 @@ void BatchedSimulator::physicsThreadFunc(int startEnvIndex, int numEnvs) { { ProfilingScope scope("wait for main thread"); std::unique_lock lck(physicsSignalMutex_); - physicsCondVar_.wait(lck, [&]{ return signalStepPhysics_ || signalKillPhysicsThread_; }); + physicsCondVar_.wait( + lck, [&] { return signalStepPhysics_ || signalKillPhysicsThread_; }); didSignalStepPhysics = signalStepPhysics_; didSignalKillPhysicsThread = signalKillPhysicsThread_; signalStepPhysics_ = false; @@ -2232,22 +2393,27 @@ void BatchedSimulator::physicsThreadFunc(int startEnvIndex, int numEnvs) { // ProfilingScope scope("physicsThreadFunc notify after step"); std::lock_guard lck(physicsFinishMutex_); isStepPhysicsOrResetFinished_ = true; - physicsCondVar_.notify_one(); // notify_all? + physicsCondVar_.notify_one(); // notify_all? } } } - -void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxProgress) { +void BatchedSimulator::debugRenderColumnGrids(int b, + int minProgress, + int maxProgress) { BATCHED_SIM_ASSERT(b < config_.numDebugEnvs); auto& env = getDebugBpsEnvironment(b); - auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); - auto& episode = safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); - const auto& stageFixedObject = safeVectorGet(episodeSet_.fixedObjects_, episode.stageFixedObjIndex); - const auto& source = stageFixedObject.columnGridSet_.getColumnGrid(0); + auto& episodeInstance = + safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); + auto& episode = + safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); + const auto& staticScene = + safeVectorGet(episodeSet_.staticScenes_, episode.staticSceneIndex_); + const auto& source = staticScene.columnGridSet_.getColumnGrid(0); - const auto& blueprint = sceneMapping_.findInstanceBlueprint("cube_gray_shaded"); + const auto& blueprint = + sceneMapping_.findInstanceBlueprint("cube_gray_shaded"); // note off by one const float maxOccludedY = 3.f; @@ -2255,10 +2421,9 @@ void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxPro // temp for (int cellZ = 0; cellZ < source.dimZ; cellZ++) { for (int cellX = 0; cellX < source.dimX; cellX++) { - int progress = cellX + cellZ; - if (progress < minProgress || - (maxProgress != -1 && progress >= maxProgress)) { + if (progress < minProgress || + (maxProgress != -1 && progress >= maxProgress)) { continue; } @@ -2271,25 +2436,22 @@ void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxPro continue; } float occludedMinY = col0.freeMaxY; - float occludedMaxY = (col1.freeMinY == source.INVALID_Y) - ? maxOccludedY - : col1.freeMinY; + float occludedMaxY = + (col1.freeMinY == source.INVALID_Y) ? maxOccludedY : col1.freeMinY; Mn::Range3D aabb( - Mn::Vector3( - source.minX + cellX * source.gridSpacing, - occludedMinY, - source.minZ + cellZ * source.gridSpacing), - Mn::Vector3( - source.minX + (cellX + 1) * source.gridSpacing, - occludedMaxY, - source.minZ + (cellZ + 1) * source.gridSpacing)); + Mn::Vector3(source.minX + cellX * source.gridSpacing, occludedMinY, + source.minZ + cellZ * source.gridSpacing), + Mn::Vector3(source.minX + (cellX + 1) * source.gridSpacing, + occludedMaxY, + source.minZ + (cellZ + 1) * source.gridSpacing)); - Mn::Matrix4 localToBox = Mn::Matrix4::translation(aabb.center()) - * Mn::Matrix4::scaling(aabb.size() * 0.5); + Mn::Matrix4 localToBox = Mn::Matrix4::translation(aabb.center()) * + Mn::Matrix4::scaling(aabb.size() * 0.5); glm::mat4x3 glMat = toGlmMat4x3(localToBox); - int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); + int instanceId = + env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); episodeInstance.persistentDebugInstanceIds_.push_back(instanceId); } } @@ -2297,15 +2459,17 @@ void BatchedSimulator::debugRenderColumnGrids(int b, int minProgress, int maxPro } void BatchedSimulator::reloadSerializeCollection() { - int numEnvs = bpsWrapper_->envs_.size(); - serializeCollection_ = serialize::Collection::loadFromFile(config_.collectionFilepath); + serializeCollection_ = + serialize::Collection::loadFromFile(config_.collectionFilepath); robot_.updateFromSerializeCollection(serializeCollection_); - robots_.collisionSphereWorldOrigins_.resize(robot_.numCollisionSpheres_ * numEnvs); - robots_.collisionSphereQueryCaches_.resize(robot_.numCollisionSpheres_ * numEnvs, 0); + robots_.collisionSphereWorldOrigins_.resize(robot_.numCollisionSpheres_ * + numEnvs); + robots_.collisionSphereQueryCaches_.resize( + robot_.numCollisionSpheres_ * numEnvs, 0); updateFromSerializeCollection(episodeSet_, serializeCollection_); @@ -2313,7 +2477,6 @@ void BatchedSimulator::reloadSerializeCollection() { } void BatchedSimulator::checkDisableRobotAndFreeObjectsCollision() { - if (!config_.enableRobotCollision) { for (auto& nodeSpheres : robot_.collisionSpheresByNode_) { nodeSpheres.clear(); @@ -2327,9 +2490,7 @@ void BatchedSimulator::checkDisableRobotAndFreeObjectsCollision() { freeObject.collisionSpheres_.clear(); } } - } - } // namespace batched_sim } // namespace esp diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 364462d51a..9e88db16bd 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -5,19 +5,19 @@ #ifndef ESP_BATCHEDSIM_BATCHED_SIMULATOR_H_ #define ESP_BATCHEDSIM_BATCHED_SIMULATOR_H_ -#include "esp/physics/bullet/BulletArticulatedObject.h" -#include "esp/sim/Simulator.h" -#include "esp/batched_sim/ColumnGrid.h" #include "esp/batched_sim/BpsSceneMapping.h" -#include "esp/batched_sim/EpisodeSet.h" +#include "esp/batched_sim/ColumnGrid.h" #include "esp/batched_sim/EpisodeGenerator.h" +#include "esp/batched_sim/EpisodeSet.h" #include "esp/batched_sim/SerializeCollection.h" #include "esp/core/random.h" +#include "esp/physics/bullet/BulletArticulatedObject.h" +#include "esp/sim/Simulator.h" #include -#include #include +#include #include namespace esp { @@ -29,9 +29,9 @@ struct CameraSensorConfig { }; struct Camera { - int attachNodeIndex = -1; // -1 indicates free camera (not parented to node) + int attachNodeIndex = -1; // -1 indicates free camera (not parented to node) Magnum::Matrix4 transform = Magnum::Matrix4(Magnum::Math::IdentityInit); - float hfov = -1.f; // degrees + float hfov = -1.f; // degrees }; struct BatchedSimulatorConfig { @@ -51,17 +51,20 @@ struct BatchedSimulatorConfig { EpisodeGeneratorConfig episodeGeneratorConfig; // only set this for doProceduralEpisodeSet==false (it is otherwise ignored) std::string episodeSetFilepath; - std::string collectionFilepath = "../data/replicacad_composite.collection.json"; + std::string collectionFilepath = + "../data/replicacad_composite.collection.json"; + std::string renderAssetCompositeFilepath = + "../data/bps_data/composite/composite.bps"; ESP_SMART_POINTERS(BatchedSimulatorConfig); }; - struct PythonEnvironmentState { // curr episode - int episode_idx = -1; // 0..len(episodes)-1 - int episode_step_idx = -1; // will be zero if this env was just reset - int target_obj_idx = -1; // see obj_positions, obj_rotations - // all positions/rotations are relative to the mesh, i.e. some arbitrary coordinate frame + int episode_idx = -1; // 0..len(episodes)-1 + int episode_step_idx = -1; // will be zero if this env was just reset + int target_obj_idx = -1; // see obj_positions, obj_rotations + // all positions/rotations are relative to the mesh, i.e. some arbitrary + // coordinate frame Magnum::Vector3 target_obj_start_pos; Magnum::Quaternion target_obj_start_rotation; Magnum::Vector3 robot_start_pos; @@ -90,7 +93,12 @@ struct PythonEnvironmentState { }; struct BpsWrapper { - BpsWrapper(int gpuId, int numEnvs, bool includeDepth, bool includeColor, const CameraSensorConfig& sensor0); + BpsWrapper(int gpuId, + int numEnvs, + bool includeDepth, + bool includeColor, + const CameraSensorConfig& sensor0, + const std::string& sceneFilepath); ~BpsWrapper(); std::shared_ptr scene_; @@ -100,10 +108,13 @@ struct BpsWrapper { }; struct Robot { - Robot(const serialize::Collection& serializeCollection, esp::sim::Simulator* sim, BpsSceneMapping* sceneMapping); + Robot(const serialize::Collection& serializeCollection, + esp::sim::Simulator* sim, + BpsSceneMapping* sceneMapping); Robot() = default; - void updateFromSerializeCollection(const serialize::Collection& serializeCollection); + void updateFromSerializeCollection( + const serialize::Collection& serializeCollection); int numPosVars = -1; int numInstances_ = -1; @@ -119,14 +130,15 @@ struct Robot { std::vector collisionSpheres_; int gripperLink_ = -1; - Magnum::Vector3 gripperQueryOffset_; /// let's also use this as the position for the gripped obj + Magnum::Vector3 gripperQueryOffset_; /// let's also use this as the position + /// for the gripped obj float gripperQueryRadius_ = 0.f; std::unordered_map linkIndexByName_; }; -// This is misnamed and isn't actually used to store entire rollouts. It contains -// some env state. See also RobotInstance and RobotInstanceSet. +// This is misnamed and isn't actually used to store entire rollouts. It +// contains some env state. See also RobotInstance and RobotInstanceSet. struct RolloutRecord { RolloutRecord() = default; RolloutRecord(int numRolloutSubsteps, @@ -138,8 +150,9 @@ struct RolloutRecord { std::vector jointPositions_; // [numRolloutSubsteps * numEnvs * numJointVars] std::vector yaws_; // [numRolloutSubsteps * numEnvs] - std::vector positions_; // [numRolloutSubsteps * numEnvs] - std::vector rootTransforms_; // [numRolloutSubsteps * numEnvs] + std::vector positions_; // [numRolloutSubsteps * numEnvs] + std::vector + rootTransforms_; // [numRolloutSubsteps * numEnvs] std::vector nodeTransforms_; // [numRolloutSubsteps * numEnvs * numNodes] }; @@ -150,7 +163,8 @@ class RobotInstance { bool doAttemptGrip_ = false; bool doAttemptDrop_ = false; // std::vector grippedObjCollisionSphereWorldOrigins_; - Magnum::Matrix4 cachedGripperLinkMat_; // sloppy: also in newNodeTransforms at glMat + Magnum::Matrix4 + cachedGripperLinkMat_; // sloppy: also in newNodeTransforms at glMat int grippedFreeObjectIndex_ = -1; Corrade::Containers::Optional grippedFreeObjectPreviousPos_; // glm::mat4 cameraNewInvTransform_; @@ -175,10 +189,11 @@ class RobotInstanceSet { // size = num robot instances * robot num nodes (num nodes = num links + 1) std::vector nodeInstanceIds_; // perf todo: try bullet aligned object array - // size = num robot instances * robot num collision spheres + // size = num robot instances * robot num collision spheres std::vector collisionSphereWorldOrigins_; // don't try to pack this with other structs anywhere because it's int8 - std::vector collisionSphereQueryCaches_; + std::vector + collisionSphereQueryCaches_; // size = num robot instances * robot num instances // perf todo: avoid wasted storage? store as 4x3 std::vector nodeNewTransforms_; @@ -194,7 +209,6 @@ class RobotInstanceSet { std::vector robotInstances_; }; - class BatchedSimulator { public: BatchedSimulator(const BatchedSimulatorConfig& config); @@ -208,11 +222,15 @@ class BatchedSimulator { } void enableDebugSensor(bool enable); - - // void setRobotCamera(const std::string& linkName, const Mn::Vector3& pos, const Mn::Quaternion& rotation, float hfov); - // void setFreeCamera(const Magnum::Vector3& pos, const Magnum::Quaternion& rotation, float hfov); - void setCamera(const std::string& sensorName, const Mn::Vector3& pos, - const Mn::Quaternion& rotation, float hfov, const std::string& attachLinkName); + + // void setRobotCamera(const std::string& linkName, const Mn::Vector3& pos, + // const Mn::Quaternion& rotation, float hfov); void setFreeCamera(const + // Magnum::Vector3& pos, const Magnum::Quaternion& rotation, float hfov); + void setCamera(const std::string& sensorName, + const Mn::Vector3& pos, + const Mn::Quaternion& rotation, + float hfov, + const std::string& attachLinkName); void startRender(); void waitRender(); @@ -220,7 +238,8 @@ class BatchedSimulator { // todo: thread-safe access to PythonEnvironmentState void reset(std::vector&& resets); const std::vector& getEnvironmentStates() const; - void startStepPhysicsOrReset(std::vector&& actions, std::vector&& resets); + void startStepPhysicsOrReset(std::vector&& actions, + std::vector&& resets); void waitStepPhysicsOrReset(); bps3D::Renderer& getBpsRenderer(); @@ -229,26 +248,34 @@ class BatchedSimulator { bps3D::Environment& getBpsEnvironment(int envIndex); bps3D::Environment& getDebugBpsEnvironment(int envIndex); - void deleteDebugInstances(); // probably call at start of frame render + void deleteDebugInstances(); // probably call at start of frame render // probably add these every frame ("immediate mode", not persistent) // beware: probably not threadsafe - int addDebugInstance(const std::string& name, int envIndex, - const Magnum::Matrix4& transform=Magnum::Matrix4(Mn::Math::IdentityInit), bool persistent=false); - - void addSphereDebugInstance(const std::string& name, int b, - const Magnum::Vector3& spherePos, float radius); - void addBoxDebugInstance(const std::string& name, int b, - const Magnum::Vector3& pos, const Magnum::Quaternion& rotation, - const Magnum::Range3D& aabb, float pad=0.f, bool showBackfaces=false); - - std::string getRecentStatsAndReset() const; // todo: threadsafe - - void debugRenderColumnGrids(int b, int minProgress=0, int maxProgress=-1); + int addDebugInstance(const std::string& name, + int envIndex, + const Magnum::Matrix4& transform = + Magnum::Matrix4(Mn::Math::IdentityInit), + bool persistent = false); + + void addSphereDebugInstance(const std::string& name, + int b, + const Magnum::Vector3& spherePos, + float radius); + void addBoxDebugInstance(const std::string& name, + int b, + const Magnum::Vector3& pos, + const Magnum::Quaternion& rotation, + const Magnum::Range3D& aabb, + float pad = 0.f, + bool showBackfaces = false); + + std::string getRecentStatsAndReset() const; // todo: threadsafe + + void debugRenderColumnGrids(int b, int minProgress = 0, int maxProgress = -1); void reloadSerializeCollection(); private: - struct StatRecord { int numSteps_ = 0; int numEpisodes_ = 0; @@ -259,10 +286,14 @@ class BatchedSimulator { int numFailedDrops_ = 0; }; - void setActionsResets(std::vector&& actions, std::vector&& resets); + void setActionsResets(std::vector&& actions, + std::vector&& resets); void stepPhysics(); void substepPhysics(); - void updateLinkTransforms(int currRolloutSubstep, bool updateforPhysics, bool updateForRender, bool includeResettingEnvs); + void updateLinkTransforms(int currRolloutSubstep, + bool updateforPhysics, + bool updateForRender, + bool includeResettingEnvs); void updateCollision(); // for each robot, undo action if collision void postCollisionUpdate(); @@ -273,19 +304,24 @@ class BatchedSimulator { void resetHelper(); void updatePythonEnvironmentState(); void updateBpsCameras(bool isDebug); - void setBpsCameraHelper(bool isDebug, int b, const glm::mat4& glCameraInvTransform, float hfov); + void setBpsCameraHelper(bool isDebug, + int b, + const glm::mat4& glCameraInvTransform, + float hfov); // uses episode spawn location void spawnFreeObject(int b, int freeObjectIndex, bool reinsert); // remember to update your bps instance after calling this! void removeFreeObjectFromCollisionGrid(int b, int freeObjectIndex); - void reinsertFreeObject(int b, int freeObjectIndex, - const Magnum::Vector3& pos, const Magnum::Quaternion& rotation); + void reinsertFreeObject(int b, + int freeObjectIndex, + const Magnum::Vector3& pos, + const Magnum::Quaternion& rotation); void initEpisodeSet(); void initEpisodeInstances(); void clearEpisodeInstance(int b); - void resetEpisodeInstance(int b); // uses resets_[b] + void resetEpisodeInstance(int b); // uses resets_[b] bool isEnvResetting(int b) const; void physicsThreadFunc(int startEnvIndex, int numEnvs); @@ -316,7 +352,7 @@ class BatchedSimulator { std::unique_ptr debugBpsWrapper_; int actionDim_ = -1; std::vector actions_; - std::vector resets_; // episode index, or -1 if not resetting + std::vector resets_; // episode index, or -1 if not resetting int maxStorageSteps_ = -1; std::vector pythonEnvStates_; Camera mainCam_; @@ -333,7 +369,7 @@ class BatchedSimulator { std::thread physicsThread_; std::mutex physicsSignalMutex_; std::mutex physicsFinishMutex_; - std::condition_variable physicsCondVar_; + std::condition_variable physicsCondVar_; bool signalStepPhysics_ = false; bool signalKillPhysicsThread_ = false; bool isStepPhysicsOrResetFinished_ = true; diff --git a/src/esp/batched_sim/BpsSceneMapping.cpp b/src/esp/batched_sim/BpsSceneMapping.cpp index c582005e54..6f4162957d 100644 --- a/src/esp/batched_sim/BpsSceneMapping.cpp +++ b/src/esp/batched_sim/BpsSceneMapping.cpp @@ -4,20 +4,25 @@ #include "BpsSceneMapping.h" -#include "esp/io/json.h" #include "esp/core/Check.h" +#include "esp/io/json.h" + +#include namespace esp { namespace batched_sim { BpsSceneMapping BpsSceneMapping::loadFromFile(const std::string& filepath) { + ESP_CHECK(Cr::Utility::Directory::exists(filepath), + "couldn't find BpsSceneMapping file " << filepath); BpsSceneMapping mapping; auto newDoc = esp::io::parseJsonFile(filepath); esp::io::readMember(newDoc, "mapping", mapping); return mapping; } -bool fromJsonValue(const esp::io::JsonGenericValue& obj, BpsSceneMapping::MeshMapping& x) { +bool fromJsonValue(const esp::io::JsonGenericValue& obj, + BpsSceneMapping::MeshMapping& x) { esp::io::readMember(obj, "name", x.name); esp::io::readMember(obj, "meshIdx", x.meshIdx); esp::io::readMember(obj, "mtrlIdx", x.mtrlIdx); @@ -31,8 +36,9 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, BpsSceneMapping& x) { BpsSceneMapping::InstanceBlueprint BpsSceneMapping::findInstanceBlueprint( const std::string& nodeName) const { - - ESP_CHECK(!nodeName.empty(), "findInstanceBlueprint: empty input name. Check your episode data."); + ESP_CHECK( + !nodeName.empty(), + "findInstanceBlueprint: empty input name. Check your episode data."); for (const auto& meshMapping : meshMappings) { if (meshMapping.name == nodeName) { @@ -41,10 +47,12 @@ BpsSceneMapping::InstanceBlueprint BpsSceneMapping::findInstanceBlueprint( } } - ESP_CHECK(false, "findInstanceBlueprint: no mapping found for render asset " - << nodeName << ". For every free object and stage, we expect to find a " - << "corresponding render asset with the same name. See meshMappings names in your " - << ".bps.mapping.json file for valid render asset names."); + ESP_CHECK( + false, + "findInstanceBlueprint: no mapping found for render asset " + << nodeName + << " referenced in your episode set. See meshMappings names in your " + << ".bps.mapping.json file for valid render asset names."); CORRADE_INTERNAL_ASSERT_UNREACHABLE(); } diff --git a/src/esp/batched_sim/ColumnGrid.cpp b/src/esp/batched_sim/ColumnGrid.cpp index 909de78696..82b275443c 100644 --- a/src/esp/batched_sim/ColumnGrid.cpp +++ b/src/esp/batched_sim/ColumnGrid.cpp @@ -4,13 +4,13 @@ #include "ColumnGrid.h" #include "esp/batched_sim/BatchedSimAssert.h" -#include "esp/core/logging.h" #include "esp/core/Check.h" +#include "esp/core/logging.h" #include +#include #include #include -#include #include @@ -37,22 +37,23 @@ struct FileHeader { uint32_t numLayers = 0; }; -} - -bool ColumnGridSource::contactTest(const Mn::Vector3& pos, - ColumnGridSource::QueryCacheValue* queryCache) const { +} // namespace +bool ColumnGridSource::contactTest( + const Mn::Vector3& pos, + ColumnGridSource::QueryCacheValue* queryCache) const { return castDownTest(pos, queryCache) < 0.f; } -float ColumnGridSource::castDownTest(const Mn::Vector3& pos, - ColumnGridSource::QueryCacheValue* queryCache) const { - +float ColumnGridSource::castDownTest( + const Mn::Vector3& pos, + ColumnGridSource::QueryCacheValue* queryCache) const { // todo: think about this more (for objects vs interiors) constexpr float outsideVolumeRetVal = -INVALID_Y; const float cellFloatX = (pos.x() - minX) * invGridSpacing; - if (cellFloatX < 0.f || cellFloatX >= (float)dimX) { // perf todo: pre-cache float dimX + if (cellFloatX < 0.f || + cellFloatX >= (float)dimX) { // perf todo: pre-cache float dimX return outsideVolumeRetVal; } @@ -65,10 +66,11 @@ float ColumnGridSource::castDownTest(const Mn::Vector3& pos, float queryY = pos.y(); - // consider allowing a margin and then clamping, to account for numeric imprecision + // consider allowing a margin and then clamping, to account for numeric + // imprecision BATCHED_SIM_ASSERT(cellFloatX >= 0.f && cellFloatX < (float)dimX); BATCHED_SIM_ASSERT(cellFloatX < (float)MAX_INTEGER_MATH_COORD); - const int globalCellX = int(cellFloatX); // note truncation, not founding + const int globalCellX = int(cellFloatX); // note truncation, not founding BATCHED_SIM_ASSERT(globalCellX >= 0 && globalCellX <= MAX_INTEGER_MATH_COORD); BATCHED_SIM_ASSERT(cellFloatZ >= 0.f && cellFloatZ < (float)dimZ); @@ -84,8 +86,10 @@ float ColumnGridSource::castDownTest(const Mn::Vector3& pos, return outsideVolumeRetVal; } - const int localCellX = (globalCellX & globalToLocalCellMask) >> patch.localCellShift; - const int localCellZ = (globalCellZ & globalToLocalCellMask) >> patch.localCellShift; + const int localCellX = + (globalCellX & globalToLocalCellMask) >> patch.localCellShift; + const int localCellZ = + (globalCellZ & globalToLocalCellMask) >> patch.localCellShift; const int localCellIdx = getLocalCellIndex(localCellX, localCellZ); @@ -107,7 +111,7 @@ float ColumnGridSource::castDownTest(const Mn::Vector3& pos, const auto& col = getColumn(patch, localCellIdx, layerIndex); if (queryY < col.freeMinY) { *queryCache = layerIndex; - return queryY - col.freeMinY; // in between two free columns + return queryY - col.freeMinY; // in between two free columns } if (queryY <= col.freeMaxY) { *queryCache = layerIndex; @@ -124,7 +128,7 @@ float ColumnGridSource::castDownTest(const Mn::Vector3& pos, return outsideVolumeRetVal; } const auto& col = getColumn(patch, localCellIdx, layerIndex); - if (queryY > col.freeMaxY) { // in between two free columns + if (queryY > col.freeMaxY) { // in between two free columns *queryCache = layerIndex; BATCHED_SIM_ASSERT(queryY < prevColFreeMinY); return queryY - prevColFreeMinY; @@ -136,11 +140,9 @@ float ColumnGridSource::castDownTest(const Mn::Vector3& pos, prevColFreeMinY = col.freeMinY; } } - } void ColumnGridSource::load(const std::string& filepath) { - FILE* fp = fopen(filepath.c_str(), "rb"); if (!fp) { ESP_ERROR() << "unable to open file for loading at " << filepath; @@ -156,8 +158,8 @@ void ColumnGridSource::load(const std::string& filepath) { } if (header.version != CURRENT_FILE_VERSION) { fclose(fp); - ESP_ERROR() << "on-disk version is " << header.version - << " instead of current version " << CURRENT_FILE_VERSION; + ESP_ERROR() << "on-disk version is " << header.version + << " instead of current version " << CURRENT_FILE_VERSION; return; } @@ -176,7 +178,8 @@ void ColumnGridSource::load(const std::string& filepath) { ensureLayer(header.numLayers - 1); for (int i = 0; i < layers.size(); i++) { - size_t readLen = fread(layers[i].columns.data(), sizeof(Column), dimX * dimZ, fp); + size_t readLen = + fread(layers[i].columns.data(), sizeof(Column), dimX * dimZ, fp); if (readLen != dimX * dimZ) { ESP_ERROR() << "file read error for " << filepath; return; @@ -187,12 +190,8 @@ void ColumnGridSource::load(const std::string& filepath) { } void ColumnGridSource::save(const std::string& filepath) { - FILE* fp = fopen(filepath.c_str(), "wb"); - if (!fp) { - ESP_ERROR() << "unable to open file for saving at " << filepath; - return; - } + ESP_CHECK(fp, "unable to open file for saving at " << filepath); FileHeader header{}; header.dimX = dimX; @@ -213,11 +212,13 @@ void ColumnGridSource::save(const std::string& filepath) { } void ColumnGridSet::load(const std::string& filepathBase) { - int columnGridFilepathNumber = 0; while (true) { - // hacky naming convention for ReplicaCAD baked scenes which just contain a stage - std::string columnGridFilepath = filepathBase + std::to_string(columnGridFilepathNumber++) + ".columngrid"; + // hacky naming convention for ReplicaCAD baked scenes which just contain a + // stage + std::string columnGridFilepath = + filepathBase + "." + std::to_string(columnGridFilepathNumber++) + + ".columngrid"; // sloppy: we should know what files we're loading ahead of time if (Cr::Utility::Directory::exists(columnGridFilepath)) { ColumnGridSource columnGrid; @@ -225,10 +226,13 @@ void ColumnGridSet::load(const std::string& filepathBase) { sphereRadii_.push_back(columnGrid.sphereRadius); columnGrids_.push_back(std::move(columnGrid)); } else { + if (columnGridFilepathNumber == 1) { + ESP_CHECK(!columnGrids_.empty(), + "couldn't find " << columnGridFilepath); + } break; } } - ESP_CHECK(!columnGrids_.empty(), "no .columngrid files found for " << filepathBase); } const ColumnGridSource& ColumnGridSet::getColumnGrid(int radiusIdx) const { @@ -236,20 +240,24 @@ const ColumnGridSource& ColumnGridSet::getColumnGrid(int radiusIdx) const { return source; } -bool ColumnGridSet::contactTest(int radiusIdx, const Magnum::Vector3& pos, - ColumnGridSource::QueryCacheValue* queryCache) const { +bool ColumnGridSet::contactTest( + int radiusIdx, + const Magnum::Vector3& pos, + ColumnGridSource::QueryCacheValue* queryCache) const { const auto& source = safeVectorGet(columnGrids_, radiusIdx); return source.contactTest(pos, queryCache); } // returns distance down to contact (or up to contact-free) -// positive indicates contact-free; negative indicates distance up to be contact-free -float ColumnGridSet::castDownTest(int radiusIdx, const Magnum::Vector3& pos, - ColumnGridSource::QueryCacheValue* queryCache) const { +// positive indicates contact-free; negative indicates distance up to be +// contact-free +float ColumnGridSet::castDownTest( + int radiusIdx, + const Magnum::Vector3& pos, + ColumnGridSource::QueryCacheValue* queryCache) const { const auto& source = safeVectorGet(columnGrids_, radiusIdx); return source.castDownTest(pos, queryCache); } - } // namespace batched_sim } // namespace esp diff --git a/src/esp/batched_sim/EpisodeGenerator.cpp b/src/esp/batched_sim/EpisodeGenerator.cpp index 0c18f8eb65..b094fd5805 100644 --- a/src/esp/batched_sim/EpisodeGenerator.cpp +++ b/src/esp/batched_sim/EpisodeGenerator.cpp @@ -3,50 +3,128 @@ // LICENSE file in the root directory of this source tree. #include "esp/batched_sim/EpisodeGenerator.h" -#include "esp/batched_sim/PlacementHelper.h" #include "esp/batched_sim/GlmUtils.h" +#include "esp/batched_sim/PlacementHelper.h" -#include "esp/core/random.h" #include "esp/core/Check.h" +#include "esp/core/random.h" +#include "esp/gfx/replay/Keyframe.h" +#include "esp/io/JsonAllTypes.h" +#include "esp/io/json.h" + +#include + +#include +#include +#include namespace Cr = Corrade; namespace Mn = Magnum; +using esp::gfx::replay::Keyframe; +using esp::gfx::replay::RenderAssetInstanceKey; + namespace esp { namespace batched_sim { namespace { -void addStageFixedObject(EpisodeSet& set, const std::string& name) { +Keyframe loadKeyframe(const std::string& filepath) { + ESP_CHECK(Corrade::Utility::Directory::exists(filepath), + "loadKeyframe: can't find replay file " << filepath); + + auto newDoc = esp::io::parseJsonFile(filepath); + std::vector keyframes; + esp::io::readMember(newDoc, "keyframes", keyframes); + ESP_CHECK(keyframes.size() == 1, "loadKeyframe: for replay file " + << filepath + << ", expected 1 keyframe but loaded " + << keyframes.size()); + return std::move(keyframes.front()); +} - FixedObject fixedObj; - fixedObj.name_ = name; - fixedObj.needsPostLoadFixup_ = true; +int findOrInsertRenderAsset(std::vector& renderAssets, + const std::string& filepathOrName) { + auto name = std::filesystem::path(filepathOrName).stem(); + auto it = std::find_if(renderAssets.begin(), renderAssets.end(), + [&](const auto& item) { return item.name_ == name; }); + if (it != renderAssets.end()) { + return it - renderAssets.begin(); + } else { + renderAssets.push_back(RenderAsset{.name_ = name}); + return renderAssets.size() - 1; + } +} - set.fixedObjects_.push_back(std::move(fixedObj)); +void addStaticScene(EpisodeSet& set, const std::string& name) { + StaticScene staticScene; + staticScene.name_ = name; + staticScene.columnGridSetName_ = name; + + std::string replayFilepath = "../data/replays/" + name + ".replay.json"; + auto keyframe = loadKeyframe(replayFilepath); + + ESP_CHECK(keyframe.stateUpdates.size() == keyframe.creations.size(), + "addStaticScene: keyframe.stateUpdates.size() != " + "keyframe.creations.size()"); + + for (int i = 0; i < keyframe.creations.size(); i++) { + const auto& creationPair = keyframe.creations[i]; + const auto& updatePair = keyframe.stateUpdates[i]; + ESP_CHECK(creationPair.first == i, + "unexpected instanceKey in " << replayFilepath); + ESP_CHECK(updatePair.first == i, + "unexpected instanceKey in " << replayFilepath); + auto key = creationPair.first; + + const auto& creationInfo = creationPair.second; + const auto& update = updatePair.second; + + ESP_CHECK(creationInfo.isRGBD(), + "addStaticScene: unexpected RenderAssetInstanceCreation isRGBD"); + int renderAssetIndex = + findOrInsertRenderAsset(set.renderAssets_, creationInfo.filepath); + + auto scale = + creationInfo.scale ? *creationInfo.scale : Mn::Vector3(1.f, 1.f, 1.f); + + Transform transform{.origin_ = update.absTransform.translation, + .rotation_ = update.absTransform.rotation, + .scale_ = scale}; + RenderAssetInstance instance{.renderAssetIndex_ = renderAssetIndex, + .transform_ = transform}; + staticScene.renderAssetInstances_.push_back(instance); + } + + staticScene.needsPostLoadFixup_ = true; + + set.staticScenes_.push_back(std::move(staticScene)); } void addFreeObject(EpisodeSet& set, const std::string& name) { + int renderAssetIndex = findOrInsertRenderAsset(set.renderAssets_, name); FreeObject freeObj; - freeObj.name_ = name; + freeObj.renderAssetIndex_ = renderAssetIndex; freeObj.needsPostLoadFixup_ = true; // all YCB objects needs this to be upright - const auto baseRot = Mn::Quaternion::rotation(Mn::Deg(-90), Mn::Vector3(1.f, 0.f, 0.f)); + const auto baseRot = + Mn::Quaternion::rotation(Mn::Deg(-90), Mn::Vector3(1.f, 0.f, 0.f)); constexpr int numRotationsAboutUpAxis = 32; for (int i = 0; i < numRotationsAboutUpAxis; i++) { const auto angle = Mn::Deg((float)i * 360.f / numRotationsAboutUpAxis); - const auto rotAboutUpAxis = Mn::Quaternion::rotation(angle, Mn::Vector3(0.f, 1.f, 0.f)); + const auto rotAboutUpAxis = + Mn::Quaternion::rotation(angle, Mn::Vector3(0.f, 1.f, 0.f)); freeObj.startRotations_.push_back((rotAboutUpAxis * baseRot)); } - set.freeObjects_.emplace_back(std::move(freeObj)); + set.freeObjects_.emplace_back(std::move(freeObj)); } -FreeObject createFreeObjectProxyForRobot(const serialize::Collection& collection) { - +FreeObject createFreeObjectProxyForRobot( + const serialize::Collection& collection) { #if 0 const auto& serLink = safeVectorGet(safeVectorGet(collection.robots, 0).links, 0); ESP_CHECK(serLink.linkName == "base_link", "createFreeObjectProxyForRobot failed"); @@ -59,9 +137,9 @@ FreeObject createFreeObjectProxyForRobot(const serialize::Collection& collection int radiusIdx = getCollisionRadiusIndex(collection, serSphere.radius); freeObject.collisionSpheres_.push_back({serSphere.origin, radiusIdx}); - freeObject.aabb_.min() = min(freeObject.aabb_.min(), + freeObject.aabb_.min() = min(freeObject.aabb_.min(), serSphere.origin - Mn::Vector3(serSphere.radius, serSphere.radius, serSphere.radius)); - freeObject.aabb_.max() = max(freeObject.aabb_.max(), + freeObject.aabb_.max() = max(freeObject.aabb_.max(), serSphere.origin + Mn::Vector3(serSphere.radius, serSphere.radius, serSphere.radius)); } #endif @@ -70,9 +148,11 @@ FreeObject createFreeObjectProxyForRobot(const serialize::Collection& collection freeObject.startRotations_.push_back(Mn::Quaternion(Mn::Math::IdentityInit)); - auto it = std::find_if(collection.freeObjects.begin(), collection.freeObjects.end(), - [](const auto& item) { return item.name == "robotProxy"; }); - ESP_CHECK(it != collection.freeObjects.end(), "createFreeObjectProxyForRobot failed"); + auto it = + std::find_if(collection.freeObjects.begin(), collection.freeObjects.end(), + [](const auto& item) { return item.name == "robotProxy"; }); + ESP_CHECK(it != collection.freeObjects.end(), + "createFreeObjectProxyForRobot failed"); updateFromSerializeCollection(freeObject, *it, collection); BATCHED_SIM_ASSERT(!freeObject.collisionSpheres_.empty()); @@ -80,45 +160,36 @@ FreeObject createFreeObjectProxyForRobot(const serialize::Collection& collection return freeObject; } -void setFetchJointStartPositions(const EpisodeGeneratorConfig& config, Episode& episode, const serialize::Collection& collection, - core::Random& random) { - +void setFetchJointStartPositions(const EpisodeGeneratorConfig& config, + Episode& episode, + const serialize::Collection& collection, + core::Random& random) { const auto& serRobot = safeVectorGet(collection.robots, 0); - ESP_CHECK(serRobot.actionMap.joints.size() <= Episode::MAX_JOINT_POSITIONS, - "setJointStartPositions: collection.json robot actionMap.joints.size()=" << serRobot.actionMap.joints.size() - << " must be <= Episode::MAX_JOINT_POSITIONS=" << Episode::MAX_JOINT_POSITIONS); + ESP_CHECK( + serRobot.actionMap.joints.size() <= Episode::MAX_JOINT_POSITIONS, + "setJointStartPositions: collection.json robot actionMap.joints.size()=" + << serRobot.actionMap.joints.size() + << " must be <= Episode::MAX_JOINT_POSITIONS=" + << Episode::MAX_JOINT_POSITIONS); if (config.useFixedRobotJointStartPositions) { - for (int i = 0; i < serRobot.actionMap.joints.size(); i++) { const auto& pair = serRobot.actionMap.joints[i]; int jointIdx = pair.first; const auto& setup = pair.second; - episode.robotStartJointPositions_[i] = serRobot.startJointPositions[jointIdx]; + episode.robotStartJointPositions_[i] = + serRobot.startJointPositions[jointIdx]; } } else { - constexpr float refAngle = float(Mn::Rad(Mn::Deg(180.f))); // for joints 6-12 constexpr std::array jointsRangeMin = { - -1.6056, - -1.22099996, - -refAngle, - -2.25099993, - -refAngle, - -2.16000009, - -refAngle - }; + -1.6056, -1.22099996, -refAngle, -2.25099993, + -refAngle, -2.16000009, -refAngle}; constexpr std::array jointsRangeMax = { - 1.6056, - 1.51800001, - refAngle, - 2.25099993, - refAngle, - 2.16000009, - refAngle - }; + 1.6056, 1.51800001, refAngle, 2.25099993, + refAngle, 2.16000009, refAngle}; for (int i = 0; i < serRobot.actionMap.joints.size(); i++) { const auto& pair = serRobot.actionMap.joints[i]; @@ -136,39 +207,40 @@ void setFetchJointStartPositions(const EpisodeGeneratorConfig& config, Episode& const float rangeMax = safeVectorGet(jointsRangeMax, i); // near min or max pos = (random.uniform_uint() % 2) - ? random.uniform_float(rangeMin, rangeMin + pad) - : random.uniform_float(rangeMax - pad, rangeMax); + ? random.uniform_float(rangeMin, rangeMin + pad) + : random.uniform_float(rangeMax - pad, rangeMax); } else { // random angle over entire range - pos = random.uniform_float(safeVectorGet(jointsRangeMin, i), safeVectorGet(jointsRangeMax, i)); + pos = random.uniform_float(safeVectorGet(jointsRangeMin, i), + safeVectorGet(jointsRangeMax, i)); } } - - } - - } - - -void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, - const serialize::Collection& collection, int stageFixedObjectIndex, - core::Random& random, core::Random& random2, const FreeObject& robotProxy) { +void addEpisode(const EpisodeGeneratorConfig& config, + EpisodeSet& set, + const serialize::Collection& collection, + int staticSceneIndex, + core::Random& random, + core::Random& random2, + const FreeObject& robotProxy) { Episode episode; - episode.stageFixedObjIndex = stageFixedObjectIndex; + episode.staticSceneIndex_ = staticSceneIndex; episode.firstFreeObjectSpawnIndex_ = set.freeObjectSpawns_.size(); - // Use a separate rand generator for joint start positions. This is so that toggling - // random vs fixed joint start positions doesn't affect the rest of episode - // generation. + // Use a separate rand generator for joint start positions. This is so that + // toggling random vs fixed joint start positions doesn't affect the rest of + // episode generation. setFetchJointStartPositions(config, episode, collection, random2); if (config.useFixedRobotStartPos) { episode.agentStartPos_ = Mn::Vector2(2.59f, 0.f); - episode.agentStartYaw_ = config.useFixedRobotStartYaw - ? -float(Mn::Rad(Mn::Deg(90.f))) - : random.uniform_float(-float(Mn::Rad(Mn::Deg(135.f))), -float(Mn::Rad(Mn::Deg(45.f)))); + episode.agentStartYaw_ = + config.useFixedRobotStartYaw + ? -float(Mn::Rad(Mn::Deg(90.f))) + : random.uniform_float(-float(Mn::Rad(Mn::Deg(135.f))), + -float(Mn::Rad(Mn::Deg(45.f)))); } else { // set to NAN for now and we'll find a robot start pos later in here episode.agentStartPos_ = Mn::Vector2(NAN, NAN); @@ -176,55 +248,60 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, // 1 spawn for target object, plus nontarget objects constexpr int numTargetObjectSpawns = 1; - int numObjectSpawns = numTargetObjectSpawns - + random.uniform_int(config.minNontargetObjects, config.maxNontargetObjects + 1); + int numObjectSpawns = numTargetObjectSpawns + + random.uniform_int(config.minNontargetObjects, + config.maxNontargetObjects + 1); episode.numFreeObjectSpawns_ = 0; // good for area around staircase and living room constexpr float allowedSnapDown = 0.05f; - Mn::Range3D objectSpawnRange({-2.4f, 0.2f, -1.f}, {4.3f, 2.f, 4.f}); // above the floor - Mn::Range3D robotSpawnRange({1.f, 0.05f, -0.5f}, {3.3f, 0.05f, 7.0f}); // just above the floor - - // good for white bookshelf for stage 5 - // Mn::Range3D objectSpawnRange({0.33f, 0.15f, -0.4f}, {1.18f, 1.85f, -0.25f}); + Mn::Range3D objectSpawnRange({-2.4f, 0.2f, -1.f}, + {4.3f, 2.f, 4.f}); // above the floor + Mn::Range3D robotSpawnRange({1.f, 0.05f, -0.5f}, + {3.3f, 0.05f, 7.0f}); // just above the floor // exclusion range is for legacy useFixedRobotStartPos const auto robotStartPos = Mn::Vector3(2.59, 0.f, 0.f); const auto pad = Mn::Vector3(0.9f, 2.f, 0.9); - const auto exclusionRange = Mn::Range3D(robotStartPos - pad, robotStartPos + pad); + const auto exclusionRange = + Mn::Range3D(robotStartPos - pad, robotStartPos + pad); - const auto& stageFixedObject = safeVectorGet(set.fixedObjects_, episode.stageFixedObjIndex); - const auto& columnGrid = stageFixedObject.columnGridSet_.getColumnGrid(0); + const auto& staticScene = + safeVectorGet(set.staticScenes_, episode.staticSceneIndex_); + const auto& columnGrid = staticScene.columnGridSet_.getColumnGrid(0); // perf todo: re-use this across entire set (have extents for set) // todo: find extents for entire EpisodeSet, not just this specific columnGrid constexpr int maxBytes = 1000 * 1024; - // this is tuned assuming a building-scale simulation with household-object-scale obstacles + // this is tuned assuming a building-scale simulation with + // household-object-scale obstacles constexpr float maxGridSpacing = 0.5f; - CollisionBroadphaseGrid colGrid = CollisionBroadphaseGrid(getMaxCollisionRadius(collection), - columnGrid.minX, columnGrid.minZ, - columnGrid.getMaxX(), columnGrid.getMaxZ(), - maxBytes, maxGridSpacing); + CollisionBroadphaseGrid colGrid = CollisionBroadphaseGrid( + getMaxCollisionRadius(collection), columnGrid.minX, columnGrid.minZ, + columnGrid.getMaxX(), columnGrid.getMaxZ(), maxBytes, maxGridSpacing); constexpr int maxFailedPlacements = 1; - PlacementHelper placementHelper(stageFixedObject.columnGridSet_, - colGrid, collection, random, maxFailedPlacements); + PlacementHelper placementHelper(staticScene.columnGridSet_, colGrid, + collection, random, maxFailedPlacements); - episode.targetObjIndex_ = 0; // arbitrary + episode.targetObjIndex_ = 0; // arbitrary int numSpawnAttempts = 4000; bool success = false; for (int i = 0; i < numSpawnAttempts; i++) { - // find a robot spawn after finding all object spawns - bool isRobotPosAttempt = (episode.numFreeObjectSpawns_ == numObjectSpawns) - && std::isnan(episode.agentStartPos_.x()); + bool isRobotPosAttempt = + (episode.numFreeObjectSpawns_ == numObjectSpawns) && + std::isnan(episode.agentStartPos_.x()); - // find a goal position spawn after finding robot spawn and all object spawns - bool isGoalPositionAttempt = (episode.numFreeObjectSpawns_ == numObjectSpawns) - && !isRobotPosAttempt; + // find a goal position spawn after finding robot spawn and all object + // spawns + bool isGoalPositionAttempt = + (episode.numFreeObjectSpawns_ == numObjectSpawns) && !isRobotPosAttempt; - const bool useExclusionRange = !isRobotPosAttempt && config.useFixedRobotStartPos; + const bool useExclusionRange = + !isRobotPosAttempt && config.useFixedRobotStartPos; - const auto& spawnRange = isRobotPosAttempt ? robotSpawnRange : objectSpawnRange; + const auto& spawnRange = + isRobotPosAttempt ? robotSpawnRange : objectSpawnRange; FreeObjectSpawn spawn; const FreeObject* freeObjectPtr = nullptr; @@ -235,19 +312,22 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, freeObjectPtr = &robotProxy; robotYaw = config.useFixedRobotStartYaw - ? -float(Mn::Rad(Mn::Deg(90.f))) - : random.uniform_float(-float(Mn::Rad(Mn::Deg(180.f))), float(Mn::Rad(Mn::Deg(180.f)))); + ? -float(Mn::Rad(Mn::Deg(90.f))) + : random.uniform_float(-float(Mn::Rad(Mn::Deg(180.f))), + float(Mn::Rad(Mn::Deg(180.f)))); rotation = yawToRotation(robotYaw); } else { // for the goal position, use the free object correspnding to targetObjIdx - spawn.freeObjIndex_ = isGoalPositionAttempt - ? set.freeObjectSpawns_[episode.targetObjIndex_].freeObjIndex_ - : random.uniform_int(0, set.freeObjects_.size()); + spawn.freeObjIndex_ = + isGoalPositionAttempt + ? set.freeObjectSpawns_[episode.targetObjIndex_].freeObjIndex_ + : random.uniform_int(0, set.freeObjects_.size()); freeObjectPtr = &safeVectorGet(set.freeObjects_, spawn.freeObjIndex_); - spawn.startRotationIndex_ = random.uniform_int(0, freeObjectPtr->startRotations_.size()); + spawn.startRotationIndex_ = + random.uniform_int(0, freeObjectPtr->startRotations_.size()); rotation = freeObjectPtr->startRotations_[spawn.startRotationIndex_]; } const auto& freeObject = *freeObjectPtr; @@ -257,9 +337,9 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, while (true) { numAttempts++; randomPos = Mn::Vector3( - random.uniform_float(spawnRange.min().x(), spawnRange.max().x()), - random.uniform_float(spawnRange.min().y(), spawnRange.max().y()), - random.uniform_float(spawnRange.min().z(), spawnRange.max().z())); + random.uniform_float(spawnRange.min().x(), spawnRange.max().x()), + random.uniform_float(spawnRange.min().y(), spawnRange.max().y()), + random.uniform_float(spawnRange.min().z(), spawnRange.max().z())); if (!useExclusionRange || !exclusionRange.contains(randomPos)) { break; @@ -267,8 +347,7 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, BATCHED_SIM_ASSERT(numAttempts < 1000); } - Mn::Matrix4 mat = Mn::Matrix4::from( - rotation.toMatrix(), randomPos); + Mn::Matrix4 mat = Mn::Matrix4::from(rotation.toMatrix(), randomPos); if (placementHelper.place(mat, freeObject)) { auto adjustedSpawnRange = spawnRange; @@ -279,7 +358,8 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, spawn.startPos_ = mat.translation(); if (isRobotPosAttempt) { - episode.agentStartPos_ = Mn::Vector2(spawn.startPos_.x(), spawn.startPos_.z()); + episode.agentStartPos_ = + Mn::Vector2(spawn.startPos_.x(), spawn.startPos_.z()); episode.agentStartYaw_ = robotYaw; } else if (isGoalPositionAttempt) { episode.targetObjGoalPos_ = spawn.startPos_; @@ -296,21 +376,23 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet& set, } } constexpr int numGoalPositions = 1; - ESP_CHECK(success, "episode-generation failed; couldn't find " - << (numObjectSpawns + numGoalPositions) << " collision-free spawn locations with " - << "stageFixedObjectIndex=" << stageFixedObjectIndex); + ESP_CHECK(success, "episode-generation failed; couldn't find " + << (numObjectSpawns + numGoalPositions) + << " collision-free spawn locations with " + << "staticSceneIndex_=" << episode.staticSceneIndex_); - set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); + set.maxFreeObjects_ = + Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); set.episodes_.emplace_back(std::move(episode)); } -} - -EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, - const BpsSceneMapping& sceneMapping, - const serialize::Collection& collection) { +} // namespace +EpisodeSet generateBenchmarkEpisodeSet( + const EpisodeGeneratorConfig& config, + const BpsSceneMapping& sceneMapping, + const serialize::Collection& collection) { int numEpisodes = config.numEpisodes; core::Random random(config.seed); @@ -318,65 +400,85 @@ EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, EpisodeSet set; - std::vector selectedReplicaCadBakedStages = { - "Baked_sc0_staging_00", - "Baked_sc0_staging_01", - "Baked_sc0_staging_02", - "Baked_sc0_staging_03", - "Baked_sc0_staging_04", - "Baked_sc0_staging_05", - "Baked_sc0_staging_06", - "Baked_sc0_staging_07", - "Baked_sc0_staging_08", - "Baked_sc0_staging_09", - "Baked_sc0_staging_10", - "Baked_sc0_staging_11", - // "Baked_sc0_staging_12", + std::vector selectedReplicaCadBakedScenes = { + "Baked_sc0_staging_00", "Baked_sc0_staging_01", "Baked_sc0_staging_02", + "Baked_sc0_staging_03", "Baked_sc0_staging_04", "Baked_sc0_staging_05", + "Baked_sc0_staging_06", "Baked_sc0_staging_07", "Baked_sc0_staging_08", + "Baked_sc0_staging_09", "Baked_sc0_staging_10", "Baked_sc0_staging_11", + "Baked_sc0_staging_12", "Baked_sc0_staging_13", "Baked_sc0_staging_14", + "Baked_sc0_staging_15", "Baked_sc0_staging_16", "Baked_sc0_staging_17", + "Baked_sc0_staging_18", "Baked_sc0_staging_19", "Baked_sc0_staging_20", + "Baked_sc1_staging_00", "Baked_sc1_staging_01", "Baked_sc1_staging_02", + "Baked_sc1_staging_03", "Baked_sc1_staging_04", "Baked_sc1_staging_05", + "Baked_sc1_staging_06", "Baked_sc1_staging_07", "Baked_sc1_staging_08", + "Baked_sc1_staging_09", "Baked_sc1_staging_10", "Baked_sc1_staging_11", + "Baked_sc1_staging_12", "Baked_sc1_staging_13", "Baked_sc1_staging_14", + "Baked_sc1_staging_15", "Baked_sc1_staging_16", "Baked_sc1_staging_17", + "Baked_sc1_staging_18", "Baked_sc1_staging_19", "Baked_sc1_staging_20", + "Baked_sc2_staging_00", "Baked_sc2_staging_01", "Baked_sc2_staging_02", + "Baked_sc2_staging_03", "Baked_sc2_staging_04", "Baked_sc2_staging_05", + "Baked_sc2_staging_06", "Baked_sc2_staging_07", "Baked_sc2_staging_08", + "Baked_sc2_staging_09", "Baked_sc2_staging_10", "Baked_sc2_staging_11", + "Baked_sc2_staging_12", "Baked_sc2_staging_13", "Baked_sc2_staging_14", + "Baked_sc2_staging_15", "Baked_sc2_staging_16", "Baked_sc2_staging_17", + "Baked_sc2_staging_18", "Baked_sc2_staging_19", "Baked_sc2_staging_20", + "Baked_sc3_staging_00", "Baked_sc3_staging_01", "Baked_sc3_staging_02", + "Baked_sc3_staging_03", "Baked_sc3_staging_04", "Baked_sc3_staging_05", + "Baked_sc3_staging_06", "Baked_sc3_staging_07", "Baked_sc3_staging_08", + "Baked_sc3_staging_09", "Baked_sc3_staging_10", "Baked_sc3_staging_11", + "Baked_sc3_staging_12", "Baked_sc3_staging_13", "Baked_sc3_staging_14", + "Baked_sc3_staging_15", "Baked_sc3_staging_16", "Baked_sc3_staging_17", + "Baked_sc3_staging_18", "Baked_sc3_staging_19", "Baked_sc3_staging_20", + }; - ESP_CHECK(config.numStageVariations <= selectedReplicaCadBakedStages.size(), - "generateBenchmarkEpisodeSet: config.numStageVariations=" << config.numStageVariations - << " must be <= selectedReplicaCadBakedStages.size()=" << selectedReplicaCadBakedStages.size()); + ESP_CHECK(config.numStageVariations <= selectedReplicaCadBakedScenes.size(), + "generateBenchmarkEpisodeSet: config.numStageVariations=" + << config.numStageVariations + << " must be <= selectedReplicaCadBakedScenes.size()=" + << selectedReplicaCadBakedScenes.size()); for (int i = 0; i < config.numStageVariations; i++) { - addStageFixedObject(set, selectedReplicaCadBakedStages[i]); + addStaticScene(set, selectedReplicaCadBakedScenes[i]); } std::vector selectedYCBObjects = { - //"024_bowl", - "003_cracker_box", - "010_potted_meat_can", - "002_master_chef_can", - "004_sugar_box", - "005_tomato_soup_can", - //"009_gelatin_box", - //"008_pudding_box", - "007_tuna_fish_can", + "024_bowl", + "003_cracker_box", + "010_potted_meat_can", + "002_master_chef_can", + "004_sugar_box", + "005_tomato_soup_can", + "009_gelatin_box", + "008_pudding_box", + "007_tuna_fish_can", }; - ESP_CHECK(config.numObjectVariations <= selectedYCBObjects.size(), - "generateBenchmarkEpisodeSet: config.numObjectVariations=" << config.numObjectVariations - << " must be <= selectedYCBObjects.size()=" << selectedYCBObjects.size()); - + ESP_CHECK(config.numObjectVariations <= selectedYCBObjects.size(), + "generateBenchmarkEpisodeSet: config.numObjectVariations=" + << config.numObjectVariations + << " must be <= selectedYCBObjects.size()=" + << selectedYCBObjects.size()); + for (int i = 0; i < config.numObjectVariations; i++) { addFreeObject(set, selectedYCBObjects[i]); } // sloppy: call postLoadFixup before adding episodes; this means that - // set.maxFreeObjects_ gets computed incorrectly in here (but it will get computed - // correctly, incrementally, in addEpisode). + // set.maxFreeObjects_ gets computed incorrectly in here (but it will get + // computed correctly, incrementally, in addEpisode). postLoadFixup(set, sceneMapping, collection); const auto robotProxy = createFreeObjectProxyForRobot(collection); - // distribute stages across episodes + // distribute scenes across episodes for (int i = 0; i < numEpisodes; i++) { - int stageIndex = i * config.numStageVariations / numEpisodes; - addEpisode(config, set, collection, stageIndex, random, random2, robotProxy); + int sceneIndex = i * config.numStageVariations / numEpisodes; + addEpisode(config, set, collection, sceneIndex, random, random2, + robotProxy); } BATCHED_SIM_ASSERT(set.maxFreeObjects_ > 0); return set; } - } // namespace batched_sim } // namespace esp diff --git a/src/esp/batched_sim/EpisodeGenerator.h b/src/esp/batched_sim/EpisodeGenerator.h index ee038512ea..59a8c7f4c8 100644 --- a/src/esp/batched_sim/EpisodeGenerator.h +++ b/src/esp/batched_sim/EpisodeGenerator.h @@ -12,19 +12,20 @@ namespace batched_sim { struct EpisodeGeneratorConfig { int numEpisodes = 100; - int seed = 3; // this is 3 for legacy reason - int numStageVariations = 12; // see selectedReplicaCadBakedStages - int numObjectVariations = 6; // see selectedYCBObjects + int seed = 0; + int numStageVariations = 84; // see selectedReplicaCadBakedStages + int numObjectVariations = 9; // see selectedYCBObjects int minNontargetObjects = 27; int maxNontargetObjects = 32; - bool useFixedRobotStartPos = true; + bool useFixedRobotStartPos = false; bool useFixedRobotStartYaw = false; - bool useFixedRobotJointStartPositions = true; + bool useFixedRobotJointStartPositions = false; ESP_SMART_POINTERS(EpisodeGeneratorConfig); }; -EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, - const BpsSceneMapping& sceneMapping, const serialize::Collection& collection); +EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, + const BpsSceneMapping& sceneMapping, + const serialize::Collection& collection); } // namespace batched_sim } // namespace esp diff --git a/src/esp/batched_sim/EpisodeSet.cpp b/src/esp/batched_sim/EpisodeSet.cpp index 58891940b2..21d78c48dc 100644 --- a/src/esp/batched_sim/EpisodeSet.cpp +++ b/src/esp/batched_sim/EpisodeSet.cpp @@ -4,6 +4,7 @@ #include "esp/batched_sim/EpisodeSet.h" #include "esp/batched_sim/BatchedSimAssert.h" +#include "esp/batched_sim/GlmUtils.h" #include "esp/core/Check.h" #include "esp/io/json.h" @@ -16,22 +17,30 @@ namespace Mn = Magnum; namespace esp { namespace batched_sim { -void updateFromSerializeCollection(FreeObject& freeObject, const serialize::FreeObject& serFreeObject, const serialize::Collection& collection) { +const std::string& getFreeObjectName(const FreeObject& freeObject, + const EpisodeSet& set) { + return safeVectorGet(set.renderAssets_, freeObject.renderAssetIndex_).name_; +} - freeObject.aabb_ = Mn::Range3D(serFreeObject.collisionBox.min, serFreeObject.collisionBox.max); +void updateFromSerializeCollection(FreeObject& freeObject, + const serialize::FreeObject& serFreeObject, + const serialize::Collection& collection) { + freeObject.aabb_ = Mn::Range3D(serFreeObject.collisionBox.min, + serFreeObject.collisionBox.max); freeObject.heldRotationIndex_ = serFreeObject.heldRotationIndex; - ESP_CHECK(freeObject.heldRotationIndex_ >= 0 - && freeObject.heldRotationIndex_ < freeObject.startRotations_.size(), - "updateFromSerializeCollection: heldRotationIndex " << serFreeObject.heldRotationIndex - << " is out-of-range for FreeObject " - << freeObject.name_ << " with startRotations.size() == " << freeObject.startRotations_.size()); + ESP_CHECK( + freeObject.heldRotationIndex_ >= 0 && + freeObject.heldRotationIndex_ < freeObject.startRotations_.size(), + "updateFromSerializeCollection: heldRotationIndex " + << serFreeObject.heldRotationIndex + << " is out-of-range for FreeObject with startRotations.size() == " + << freeObject.startRotations_.size()); freeObject.collisionSpheres_.clear(); std::vector generatedSpheres; const std::vector* serializeCollisionSpheres = nullptr; if (!serFreeObject.generateCollisionSpheresTechnique.empty()) { - auto& spheres = generatedSpheres; serializeCollisionSpheres = &generatedSpheres; @@ -43,11 +52,11 @@ void updateFromSerializeCollection(FreeObject& freeObject, const serialize::Free Mn::Vector3 aabbCenter = aabb.center(); if (serFreeObject.generateCollisionSpheresTechnique == "box") { - // small and medium spheres at each corner // consolidate duplicates at the end - // insert larger spheres first, so that de-duplication (later) leaves larger spheres + // insert larger spheres first, so that de-duplication (later) leaves + // larger spheres for (float r : {largeRadius, mediumRadius, smallRadius}) { if (aabb.size().length() < r * 2.f) { // object is too small for even one sphere of this radius @@ -58,17 +67,22 @@ void updateFromSerializeCollection(FreeObject& freeObject, const serialize::Free } if (r == largeRadius) { - int numX = int(aabb.sizeX() / (largeRadius * 2.f)) + 1; int numY = int(aabb.sizeY() / (largeRadius * 2.f)) + 1; int numZ = int(aabb.sizeZ() / (largeRadius * 2.f)) + 1; Mn::Vector3 origin; for (int ix = 0; ix < numX; ix++) { - origin.x() = Mn::Math::lerp(aabb.min().x() + largeRadius, aabb.max().x() - largeRadius, float(ix) / (numX - 1)); + origin.x() = Mn::Math::lerp(aabb.min().x() + largeRadius, + aabb.max().x() - largeRadius, + float(ix) / (numX - 1)); for (int iy = 0; iy < numY; iy++) { - origin.y() = Mn::Math::lerp(aabb.min().y() + largeRadius, aabb.max().y() - largeRadius, float(iy) / (numY - 1)); + origin.y() = Mn::Math::lerp(aabb.min().y() + largeRadius, + aabb.max().y() - largeRadius, + float(iy) / (numY - 1)); for (int iz = 0; iz < numZ; iz++) { - origin.z() = Mn::Math::lerp(aabb.min().z() + largeRadius, aabb.max().z() - largeRadius, float(iz) / (numZ - 1)); + origin.z() = Mn::Math::lerp(aabb.min().z() + largeRadius, + aabb.max().z() - largeRadius, + float(iz) / (numZ - 1)); spheres.push_back({origin, r}); } } @@ -85,9 +99,10 @@ void updateFromSerializeCollection(FreeObject& freeObject, const serialize::Free } } - } else if (serFreeObject.generateCollisionSpheresTechnique == "uprightCylinder") { - - // insert larger spheres first, so that de-duplication (later) leaves larger spheres + } else if (serFreeObject.generateCollisionSpheresTechnique == + "uprightCylinder") { + // insert larger spheres first, so that de-duplication (later) leaves + // larger spheres for (float r : {largeRadius, mediumRadius, smallRadius}) { if (aabb.size().length() < r * 2.f) { // object is too small for even one sphere of this radius @@ -114,20 +129,27 @@ void updateFromSerializeCollection(FreeObject& freeObject, const serialize::Free } } else { - ESP_CHECK(false, "free object generateCollisionSpheresTechnique \"" - << serFreeObject.generateCollisionSpheresTechnique << "\" not recognized. " - "Valid values are empty-string, \"box\", and \"uprightCylinder\""); + ESP_CHECK(false, "free object generateCollisionSpheresTechnique \"" + << serFreeObject.generateCollisionSpheresTechnique + << "\" not recognized. " + "Valid values are empty-string, \"box\", and " + "\"uprightCylinder\""); } - // clamp to fit inside box extents, but don't move sphere center past center of box (to other side) + // clamp to fit inside box extents, but don't move sphere center past center + // of box (to other side) for (auto& sphere : spheres) { Mn::Vector3 clampedOrigin; for (int dim = 0; dim < 3; dim++) { - clampedOrigin[dim] = sphere.origin[dim] < aabbCenter[dim] - ? Mn::Math::clamp(sphere.origin[dim], - Mn::Math::min(aabb.min()[dim] + sphere.radius, aabbCenter[dim]), aabbCenter[dim]) - : Mn::Math::clamp(sphere.origin[dim], - aabbCenter[dim], Mn::Math::max(aabb.max()[dim] - sphere.radius, aabbCenter[dim])); + clampedOrigin[dim] = + sphere.origin[dim] < aabbCenter[dim] + ? Mn::Math::clamp(sphere.origin[dim], + Mn::Math::min(aabb.min()[dim] + sphere.radius, + aabbCenter[dim]), + aabbCenter[dim]) + : Mn::Math::clamp(sphere.origin[dim], aabbCenter[dim], + Mn::Math::max(aabb.max()[dim] - sphere.radius, + aabbCenter[dim])); } sphere.origin = clampedOrigin; } @@ -146,8 +168,10 @@ void updateFromSerializeCollection(FreeObject& freeObject, const serialize::Free BATCHED_SIM_ASSERT(!spheres.empty()); } else { - ESP_CHECK(!serFreeObject.collisionSpheres.empty(), "no collision spheres for free object " - << serFreeObject.name << " and generateCollisionSpheresFromBox==false"); + ESP_CHECK(!serFreeObject.collisionSpheres.empty(), + "no collision spheres for free object " + << serFreeObject.name + << " and generateCollisionSpheresFromBox==false"); serializeCollisionSpheres = &serFreeObject.collisionSpheres; } @@ -155,17 +179,19 @@ void updateFromSerializeCollection(FreeObject& freeObject, const serialize::Free int radiusIdx = getCollisionRadiusIndex(collection, serSphere.radius); freeObject.collisionSpheres_.push_back({serSphere.origin, radiusIdx}); } - } -void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& collection) { - +void updateFromSerializeCollection(EpisodeSet& set, + const serialize::Collection& collection) { for (const auto& serFreeObject : collection.freeObjects) { - auto it = std::find_if(set.freeObjects_.begin(), set.freeObjects_.end(), - [&serFreeObject](const auto& item) { return item.name_ == serFreeObject.name; }); + [&](const auto& item) { + return getFreeObjectName(item, set) == + serFreeObject.name; + }); if (it == set.freeObjects_.end()) { - // collection may have info for free objects not used in the current EpisodeSet + // collection may have info for free objects not used in the current + // EpisodeSet continue; } @@ -174,52 +200,78 @@ void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& } } -void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const serialize::Collection& collection) { +void postLoadFixup(EpisodeSet& set, + const BpsSceneMapping& sceneMapping, + const serialize::Collection& collection) { + for (auto& renderAsset : set.renderAssets_) { + renderAsset.instanceBlueprint_ = + sceneMapping.findInstanceBlueprint(renderAsset.name_); + renderAsset.needsPostLoadFixup_ = false; + } for (auto& freeObj : set.freeObjects_) { - freeObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(freeObj.name_); - freeObj.needsPostLoadFixup_ = false; + ESP_CHECK(freeObj.renderAssetIndex_ >= 0 && + freeObj.renderAssetIndex_ < set.renderAssets_.size(), + "postLoadFixup: FreeObject " + << getFreeObjectName(freeObj, set) + << " has out-of-range renderAssetIndex_=" + << freeObj.renderAssetIndex_); for (const auto& rot : freeObj.startRotations_) { - ESP_CHECK(rot.isNormalized(), "postLoadFixup: FreeObject " << freeObj.name_ << " rotation " << rot << " isn't normalized"); + ESP_CHECK(rot.isNormalized(), "postLoadFixup: FreeObject " + << getFreeObjectName(freeObj, set) + << " rotation " << rot + << " isn't normalized"); } } - + set.allEpisodesAABB_ = Mn::Range3D(Mn::Math::ZeroInit); bool isFirstFixedObj = true; - for (auto& fixedObj : set.fixedObjects_) { - fixedObj.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(fixedObj.name_); + for (auto& staticScene : set.staticScenes_) { + for (auto& instance : staticScene.renderAssetInstances_) { + ESP_CHECK(instance.renderAssetIndex_ >= 0 && + instance.renderAssetIndex_ < set.renderAssets_.size(), + "postLoadFixup: StaticScene render asset instance has " + "out-of-range renderAssetIndex_=" + << instance.renderAssetIndex_); + + const auto& transform = instance.transform_; + Mn::Matrix4 mat = + Mn::Matrix4::from(transform.rotation_.toMatrix(), transform.origin_); + mat = mat * Mn::Matrix4::scaling(instance.transform_.scale_); + instance.glMat_ = toGlmMat4x3(mat); + } // sloppy: copy-pasted from addStageFixedObject - std::string columnGridFilepathBase = "../data/columngrids/" + fixedObj.name_ + "_stage_only"; - fixedObj.columnGridSet_.load(columnGridFilepathBase); + std::string columnGridFilepathBase = + "../data/columngrids/" + staticScene.columnGridSetName_; + staticScene.columnGridSet_.load(columnGridFilepathBase); - ESP_CHECK(fixedObj.columnGridSet_.getSphereRadii() == collection.collisionRadiusWorkingSet, - "ColumnGridSet " << fixedObj.name_ << " with radii " << fixedObj.columnGridSet_.getSphereRadii() - << " doesn't match collection collision radius working set " - << collection.collisionRadiusWorkingSet); + ESP_CHECK(staticScene.columnGridSet_.getSphereRadii() == + collection.collisionRadiusWorkingSet, + "ColumnGridSet " + << staticScene.columnGridSetName_ << " with radii " + << staticScene.columnGridSet_.getSphereRadii() + << " doesn't match collection collision radius working set " + << collection.collisionRadiusWorkingSet); - fixedObj.needsPostLoadFixup_ = false; + staticScene.needsPostLoadFixup_ = false; // update set.allEpisodesAABB_ { - const auto& columnGrid = fixedObj.columnGridSet_.getColumnGrid(0); + const auto& columnGrid = staticScene.columnGridSet_.getColumnGrid(0); constexpr float dummyMinY = -1e7f; constexpr float dummyMaxY = 1e7f; - Mn::Vector3 cgMin( - columnGrid.minX, - dummyMinY, - columnGrid.minZ); - Mn::Vector3 cgMax( - columnGrid.getMaxX(), - dummyMaxY, - columnGrid.getMaxZ()); + Mn::Vector3 cgMin(columnGrid.minX, dummyMinY, columnGrid.minZ); + Mn::Vector3 cgMax(columnGrid.getMaxX(), dummyMaxY, columnGrid.getMaxZ()); if (isFirstFixedObj) { set.allEpisodesAABB_ = Mn::Range3D(cgMin, cgMax); isFirstFixedObj = false; } else { - set.allEpisodesAABB_.min() = Mn::Math::min(set.allEpisodesAABB_.min(), cgMin); - set.allEpisodesAABB_.max() = Mn::Math::max(set.allEpisodesAABB_.max(), cgMax); + set.allEpisodesAABB_.min() = + Mn::Math::min(set.allEpisodesAABB_.min(), cgMin); + set.allEpisodesAABB_.max() = + Mn::Math::max(set.allEpisodesAABB_.max(), cgMax); } } } @@ -227,32 +279,44 @@ void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, const s set.maxFreeObjects_ = 0; for (int i = 0; i < set.episodes_.size(); i++) { const auto& episode = safeVectorGet(set.episodes_, i); - set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, (int32_t)episode.numFreeObjectSpawns_); + set.maxFreeObjects_ = Mn::Math::max(set.maxFreeObjects_, + (int32_t)episode.numFreeObjectSpawns_); - ESP_CHECK(episode.targetObjGoalRotation_.isNormalized(), - "postLoadFixup: episode " << i << " targetObjGoalRotation " - << episode.targetObjGoalRotation_ << " isn't normalized"); + ESP_CHECK(episode.targetObjGoalRotation_.isNormalized(), + "postLoadFixup: episode " << i << " targetObjGoalRotation " + << episode.targetObjGoalRotation_ + << " isn't normalized"); ESP_CHECK(episode.numFreeObjectSpawns_ > 0, - "postLoadFixup: episode " << i << " has invalid numFreeObjectSpawns_=" << episode.numFreeObjectSpawns_); - ESP_CHECK(episode.stageFixedObjIndex >= 0 && episode.stageFixedObjIndex < set.fixedObjects_.size(), - "postLoadFixup: episode " << i << " has invalid stageFixedObjIndex=" << episode.stageFixedObjIndex); - ESP_CHECK(episode.targetObjIndex_ >= 0 && episode.targetObjIndex_ < episode.numFreeObjectSpawns_, - "postLoadFixup: episode " << i << " has invalid targetObjIndex_=" << episode.targetObjIndex_); + "postLoadFixup: episode " << i + << " has invalid numFreeObjectSpawns_=" + << episode.numFreeObjectSpawns_); + ESP_CHECK(episode.staticSceneIndex_ >= 0 && + episode.staticSceneIndex_ < set.staticScenes_.size(), + "postLoadFixup: episode " << i + << " has invalid staticSceneIndex_=" + << episode.staticSceneIndex_); + ESP_CHECK(episode.targetObjIndex_ >= 0 && + episode.targetObjIndex_ < episode.numFreeObjectSpawns_, + "postLoadFixup: episode " << i << " has invalid targetObjIndex_=" + << episode.targetObjIndex_); } - set.needsPostLoadFixup_ = false; updateFromSerializeCollection(set, collection); -} + for (auto& freeObj : set.freeObjects_) { + freeObj.needsPostLoadFixup_ = false; + } + set.needsPostLoadFixup_ = false; +} -bool fromJsonValue(const esp::io::JsonGenericValue& obj, - FreeObject& val) { - // sloppy: some fields here are populated by updateFromSerializeCollection so they - // shouldn't be saved/loaded - esp::io::readMember(obj, "name", val.name_); +bool fromJsonValue(const esp::io::JsonGenericValue& obj, FreeObject& val) { + // sloppy: some fields here are populated by updateFromSerializeCollection so + // they shouldn't be saved/loaded + esp::io::readMember(obj, "renderAssetIndex", val.renderAssetIndex_); // esp::io::readMember(obj, "aabb", val.aabb_); esp::io::readMember(obj, "startRotations", val.startRotations_); - ESP_CHECK(!val.startRotations_.empty(), "FreeObject " << val.name_ << " has empty startRotations"); + ESP_CHECK(!val.startRotations_.empty(), + "FreeObject has empty startRotations"); // esp::io::readMember(obj, "heldRotationIndex", val.heldRotationIndex_); // esp::io::readMember(obj, "collisionSpheres", val.collisionSpheres_); @@ -263,21 +327,21 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, } esp::io::JsonGenericValue toJsonValue(const FreeObject& x, - esp::io::JsonAllocator& allocator) { - // sloppy: some fields here are populated by updateFromSerializeCollection so they - // shouldn't be saved/loaded + esp::io::JsonAllocator& allocator) { + // sloppy: some fields here are populated by updateFromSerializeCollection so + // they shouldn't be saved/loaded esp::io::JsonGenericValue obj(rapidjson::kObjectType); - esp::io::addMember(obj, "name", x.name_, allocator); - //esp::io::addMember(obj, "aabb", x.aabb_, allocator); + esp::io::addMember(obj, "renderAssetIndex", x.renderAssetIndex_, allocator); + // esp::io::addMember(obj, "aabb", x.aabb_, allocator); esp::io::addMember(obj, "startRotations", x.startRotations_, allocator); - //esp::io::addMember(obj, "heldRotationIndex", x.heldRotationIndex_, allocator); - //esp::io::addMember(obj, "collisionSpheres", x.collisionSpheres_, allocator); + // esp::io::addMember(obj, "heldRotationIndex", x.heldRotationIndex_, + // allocator); esp::io::addMember(obj, "collisionSpheres", + // x.collisionSpheres_, allocator); return obj; } -bool fromJsonValue(const esp::io::JsonGenericValue& obj, - CollisionSphere& val) { +bool fromJsonValue(const esp::io::JsonGenericValue& obj, CollisionSphere& val) { esp::io::readMember(obj, "origin", val.origin); esp::io::readMember(obj, "radiusIdx", val.radiusIdx); @@ -285,7 +349,7 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, } esp::io::JsonGenericValue toJsonValue(const CollisionSphere& x, - esp::io::JsonAllocator& allocator) { + esp::io::JsonAllocator& allocator) { esp::io::JsonGenericValue obj(rapidjson::kObjectType); esp::io::addMember(obj, "origin", x.origin, allocator); esp::io::addMember(obj, "radiusIdx", x.radiusIdx, allocator); @@ -293,25 +357,79 @@ esp::io::JsonGenericValue toJsonValue(const CollisionSphere& x, return obj; } +bool fromJsonValue(const esp::io::JsonGenericValue& obj, Transform& val) { + esp::io::readMember(obj, "origin", val.origin_); + esp::io::readMember(obj, "rotation", val.rotation_); + esp::io::readMember(obj, "scale", val.scale_); + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const Transform& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "origin", x.origin_, allocator); + esp::io::addMember(obj, "rotation", x.rotation_, allocator); + esp::io::addMember(obj, "scale", x.scale_, allocator); + + return obj; +} + bool fromJsonValue(const esp::io::JsonGenericValue& obj, - FixedObject& val) { + RenderAssetInstance& val) { + esp::io::readMember(obj, "renderAssetIndex", val.renderAssetIndex_); + esp::io::readMember(obj, "transform", val.transform_); + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const RenderAssetInstance& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "renderAssetIndex", x.renderAssetIndex_, allocator); + esp::io::addMember(obj, "transform", x.transform_, allocator); + + return obj; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, RenderAsset& val) { esp::io::readMember(obj, "name", val.name_); - // for instanceBlueprint_ and columnGridSet_ + // for instanceBlueprint_ val.needsPostLoadFixup_ = true; return true; } -esp::io::JsonGenericValue toJsonValue(const FixedObject& x, - esp::io::JsonAllocator& allocator) { +esp::io::JsonGenericValue toJsonValue(const RenderAsset& x, + esp::io::JsonAllocator& allocator) { esp::io::JsonGenericValue obj(rapidjson::kObjectType); esp::io::addMember(obj, "name", x.name_, allocator); return obj; } -bool fromJsonValue(const esp::io::JsonGenericValue& obj, - FreeObjectSpawn& val) { +bool fromJsonValue(const esp::io::JsonGenericValue& obj, StaticScene& val) { + esp::io::readMember(obj, "name", val.name_); + esp::io::readMember(obj, "renderAssetInstances", val.renderAssetInstances_); + esp::io::readMember(obj, "columnGridSetName", val.columnGridSetName_); + + // for columnGridSet_ + val.needsPostLoadFixup_ = true; + + return true; +} + +esp::io::JsonGenericValue toJsonValue(const StaticScene& x, + esp::io::JsonAllocator& allocator) { + esp::io::JsonGenericValue obj(rapidjson::kObjectType); + esp::io::addMember(obj, "name", x.name_, allocator); + esp::io::addMember(obj, "renderAssetInstances", x.renderAssetInstances_, + allocator); + esp::io::addMember(obj, "columnGridSetName", x.columnGridSetName_, allocator); + return obj; +} + +bool fromJsonValue(const esp::io::JsonGenericValue& obj, FreeObjectSpawn& val) { esp::io::readMember(obj, "freeObjIndex", val.freeObjIndex_); esp::io::readMember(obj, "startRotationIndex", val.startRotationIndex_); esp::io::readMember(obj, "startPos", val.startPos_); @@ -320,52 +438,57 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, } esp::io::JsonGenericValue toJsonValue(const FreeObjectSpawn& x, - esp::io::JsonAllocator& allocator) { + esp::io::JsonAllocator& allocator) { esp::io::JsonGenericValue obj(rapidjson::kObjectType); esp::io::addMember(obj, "freeObjIndex", x.freeObjIndex_, allocator); - esp::io::addMember(obj, "startRotationIndex", x.startRotationIndex_, allocator); + esp::io::addMember(obj, "startRotationIndex", x.startRotationIndex_, + allocator); esp::io::addMember(obj, "startPos", x.startPos_, allocator); return obj; } -bool fromJsonValue(const esp::io::JsonGenericValue& obj, - Episode& val) { - esp::io::readMember(obj, "stageFixedObjIndex", val.stageFixedObjIndex); +bool fromJsonValue(const esp::io::JsonGenericValue& obj, Episode& val) { + esp::io::readMember(obj, "staticSceneIndex", val.staticSceneIndex_); esp::io::readMember(obj, "numFreeObjectSpawns", val.numFreeObjectSpawns_); esp::io::readMember(obj, "targetObjIndex", val.targetObjIndex_); - esp::io::readMember(obj, "firstFreeObjectSpawnIndex", val.firstFreeObjectSpawnIndex_); + esp::io::readMember(obj, "firstFreeObjectSpawnIndex", + val.firstFreeObjectSpawnIndex_); esp::io::readMember(obj, "agentStartPos", val.agentStartPos_); esp::io::readMember(obj, "agentStartYaw", val.agentStartYaw_); - esp::io::readMember(obj, "robotStartJointPositions", val.robotStartJointPositions_); + esp::io::readMember(obj, "robotStartJointPositions", + val.robotStartJointPositions_); esp::io::readMember(obj, "targetObjGoalPos", val.targetObjGoalPos_); esp::io::readMember(obj, "targetObjGoalRotation", val.targetObjGoalRotation_); return true; } esp::io::JsonGenericValue toJsonValue(const Episode& x, - esp::io::JsonAllocator& allocator) { + esp::io::JsonAllocator& allocator) { esp::io::JsonGenericValue obj(rapidjson::kObjectType); - esp::io::addMember(obj, "stageFixedObjIndex", x.stageFixedObjIndex, allocator); - esp::io::addMember(obj, "numFreeObjectSpawns", x.numFreeObjectSpawns_, allocator); + esp::io::addMember(obj, "staticSceneIndex", x.staticSceneIndex_, allocator); + esp::io::addMember(obj, "numFreeObjectSpawns", x.numFreeObjectSpawns_, + allocator); esp::io::addMember(obj, "targetObjIndex", x.targetObjIndex_, allocator); - esp::io::addMember(obj, "firstFreeObjectSpawnIndex", x.firstFreeObjectSpawnIndex_, allocator); + esp::io::addMember(obj, "firstFreeObjectSpawnIndex", + x.firstFreeObjectSpawnIndex_, allocator); esp::io::addMember(obj, "agentStartPos", x.agentStartPos_, allocator); esp::io::addMember(obj, "agentStartYaw", x.agentStartYaw_, allocator); - esp::io::addMember(obj, "robotStartJointPositions", x.robotStartJointPositions_, allocator); + esp::io::addMember(obj, "robotStartJointPositions", + x.robotStartJointPositions_, allocator); esp::io::addMember(obj, "targetObjGoalPos", x.targetObjGoalPos_, allocator); - esp::io::addMember(obj, "targetObjGoalRotation", x.targetObjGoalRotation_, allocator); + esp::io::addMember(obj, "targetObjGoalRotation", x.targetObjGoalRotation_, + allocator); return obj; } - -bool fromJsonValue(const esp::io::JsonGenericValue& obj, - EpisodeSet& val) { - esp::io::readMember(obj, "episodes", val.episodes_); - esp::io::readMember(obj, "fixedObjects", val.fixedObjects_); - esp::io::readMember(obj, "freeObjectSpawns", val.freeObjectSpawns_); +bool fromJsonValue(const esp::io::JsonGenericValue& obj, EpisodeSet& val) { + esp::io::readMember(obj, "renderAssets", val.renderAssets_); + esp::io::readMember(obj, "staticScenes", val.staticScenes_); esp::io::readMember(obj, "freeObjects", val.freeObjects_); + esp::io::readMember(obj, "freeObjectSpawns", val.freeObjectSpawns_); + esp::io::readMember(obj, "episodes", val.episodes_); // for maxFreeObjects_ and allEpisodesAABB_ val.needsPostLoadFixup_ = true; @@ -374,14 +497,14 @@ bool fromJsonValue(const esp::io::JsonGenericValue& obj, return true; } - esp::io::JsonGenericValue toJsonValue(const EpisodeSet& x, - esp::io::JsonAllocator& allocator) { + esp::io::JsonAllocator& allocator) { esp::io::JsonGenericValue obj(rapidjson::kObjectType); - esp::io::addMember(obj, "episodes", x.episodes_, allocator); - esp::io::addMember(obj, "fixedObjects", x.fixedObjects_, allocator); - esp::io::addMember(obj, "freeObjectSpawns", x.freeObjectSpawns_, allocator); + esp::io::addMember(obj, "renderAssets", x.renderAssets_, allocator); + esp::io::addMember(obj, "staticScenes", x.staticScenes_, allocator); esp::io::addMember(obj, "freeObjects", x.freeObjects_, allocator); + esp::io::addMember(obj, "freeObjectSpawns", x.freeObjectSpawns_, allocator); + esp::io::addMember(obj, "episodes", x.episodes_, allocator); // don't write maxFreeObjects_ return obj; @@ -389,25 +512,24 @@ esp::io::JsonGenericValue toJsonValue(const EpisodeSet& x, EpisodeSet EpisodeSet::loadFromFile(const std::string& filepath) { EpisodeSet episodeSet; - ESP_CHECK(Cr::Utility::Directory::exists(filepath), "couldn't find EpisodeSet file " << filepath); + ESP_CHECK(Cr::Utility::Directory::exists(filepath), + "couldn't find EpisodeSet file " << filepath); auto newDoc = esp::io::parseJsonFile(filepath); esp::io::readMember(newDoc, "episodeSet", episodeSet); return episodeSet; } - -void EpisodeSet::saveToFile(const std::string& filepath)const { - +void EpisodeSet::saveToFile(const std::string& filepath) const { rapidjson::Document document(rapidjson::kObjectType); rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); esp::io::addMember(document, "episodeSet", *this, allocator); - + // EpisodeSet use floats (not doubles) so this is plenty of precision const float maxDecimalPlaces = 7; constexpr bool usePrettyWriter = false; bool success = esp::io::writeJsonToFile(document, filepath, usePrettyWriter, - maxDecimalPlaces); + maxDecimalPlaces); ESP_CHECK(success, "failed to save EpisodeSet to " << filepath); } diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index 20688f6baf..c6d70f5d03 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -6,17 +6,18 @@ #define ESP_BATCHEDSIM_EPISODESET_H_ #include "esp/batched_sim/BatchedSimAssert.h" -#include "esp/batched_sim/ColumnGrid.h" -#include "esp/batched_sim/CollisionBroadphaseGrid.h" #include "esp/batched_sim/BpsSceneMapping.h" +#include "esp/batched_sim/CollisionBroadphaseGrid.h" +#include "esp/batched_sim/ColumnGrid.h" #include "esp/batched_sim/SerializeCollection.h" #include -#include #include +#include -#include +#include #include +#include namespace esp { namespace batched_sim { @@ -25,25 +26,47 @@ namespace batched_sim { struct CollisionSphere { Magnum::Vector3 origin; - int radiusIdx = -1; // see BatchedSimulator::getCollisionRadius() + int radiusIdx = -1; // see BatchedSimulator::getCollisionRadius() }; -class FreeObject { +class RenderAsset { public: std::string name_; BpsSceneMapping::InstanceBlueprint instanceBlueprint_; + bool needsPostLoadFixup_ = true; +}; + +class FreeObject { + public: + int renderAssetIndex_ = -1; Magnum::Range3D aabb_; std::vector startRotations_; - int heldRotationIndex_; // index into startRotations_ + int heldRotationIndex_; // index into startRotations_ std::vector collisionSpheres_; bool needsPostLoadFixup_ = true; }; -class FixedObject { +class Transform { + public: + Magnum::Vector3 origin_; + Magnum::Quaternion rotation_; + Magnum::Vector3 scale_; +}; + +class RenderAssetInstance { + public: + int renderAssetIndex_; + Transform transform_; // for serialization + glm::mat4x3 glMat_; // computed from transform_ +}; + +class StaticScene { public: std::string name_; - bool needsPostLoadFixup_ = true; - BpsSceneMapping::InstanceBlueprint instanceBlueprint_; + std::vector renderAssetInstances_; + std::string columnGridSetName_; + + bool needsPostLoadFixup_ = true; // todo: get rid of all these bools esp::batched_sim::ColumnGridSet columnGridSet_; }; @@ -51,31 +74,36 @@ class FreeObjectSpawn { public: int16_t freeObjIndex_; int16_t startRotationIndex_; - Magnum::Vector3 startPos_; // discretize? + Magnum::Vector3 startPos_; // discretize? }; class Episode { public: - int32_t stageFixedObjIndex = -1; // todo: array of fixed objects + int16_t staticSceneIndex_ = -1; int16_t numFreeObjectSpawns_ = 0; - int16_t targetObjIndex_ = -1; // 0..numFreeObjectSpawns - 1, see also freeObjectIndex - int32_t firstFreeObjectSpawnIndex_ = -1; // index into EpisodeSet::freeObjectSpawns_ + int16_t targetObjIndex_ = + -1; // 0..numFreeObjectSpawns - 1, see also freeObjectIndex + int32_t firstFreeObjectSpawnIndex_ = + -1; // index into EpisodeSet::freeObjectSpawns_ Magnum::Vector2 agentStartPos_ = Magnum::Vector2(Magnum::Math::ZeroInit); - float agentStartYaw_ = 0.f; // radians + float agentStartYaw_ = 0.f; // radians static constexpr int MAX_JOINT_POSITIONS = 7; std::array robotStartJointPositions_; // task-specific Magnum::Vector3 targetObjGoalPos_ = Magnum::Vector3(Magnum::Math::ZeroInit); - Magnum::Quaternion targetObjGoalRotation_ = Magnum::Quaternion(Magnum::Math::IdentityInit); + Magnum::Quaternion targetObjGoalRotation_ = + Magnum::Quaternion(Magnum::Math::IdentityInit); }; class EpisodeSet { public: - std::vector episodes_; // ~50,000 - std::vector fixedObjects_; // ~100, max 32K - std::vector freeObjectSpawns_; // num episodes * num-spawns-per-episode, ~5,000,000 - std::vector freeObjects_; // ~100, max 32K + std::vector episodes_; // ~50,000 + std::vector renderAssets_; // ~1000, no max + std::vector staticScenes_; // ~100, max 32K + std::vector + freeObjectSpawns_; // num episodes * num-spawns-per-episode, ~5,000,000 + std::vector freeObjects_; // ~100, max 32K int maxFreeObjects_ = -1; Magnum::Range3D allEpisodesAABB_; bool needsPostLoadFixup_ = true; @@ -87,7 +115,7 @@ class EpisodeSet { class EpisodeInstance { public: int32_t episodeIndex_ = -1; - int32_t stageFixedObjectInstanceId_ = -1; + std::vector staticSceneInstanceIds_; // free obj instance ids stored in freeObjectInstanceIds_ CollisionBroadphaseGrid colGrid_; // todo: more robust storage for moved free objects @@ -99,15 +127,19 @@ class EpisodeInstance { class EpisodeInstanceSet { public: - std::vector episodeInstanceByEnv_; // num envs, ~1,000 + std::vector episodeInstanceByEnv_; // num envs, ~1,000 }; -void updateFromSerializeCollection(EpisodeSet& set, const serialize::Collection& collection); +void updateFromSerializeCollection(EpisodeSet& set, + const serialize::Collection& collection); -void updateFromSerializeCollection(FreeObject& freeObject, const serialize::FreeObject& serFreeObject, const serialize::Collection& collection); +void updateFromSerializeCollection(FreeObject& freeObject, + const serialize::FreeObject& serFreeObject, + const serialize::Collection& collection); -void postLoadFixup(EpisodeSet& set, const BpsSceneMapping& sceneMapping, - const serialize::Collection& collection); +void postLoadFixup(EpisodeSet& set, + const BpsSceneMapping& sceneMapping, + const serialize::Collection& collection); } // namespace batched_sim } // namespace esp diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index b0d71e774e..b458074813 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -5,8 +5,8 @@ #include "esp/batched_sim/BatchedSimAssert.h" #include "esp/batched_sim/BatchedSimulator.h" #include "esp/batched_sim/BpsSceneMapping.h" -#include "esp/batched_sim/GlmUtils.h" #include "esp/batched_sim/ColumnGrid.h" +#include "esp/batched_sim/GlmUtils.h" #include "esp/core/logging.h" #include @@ -83,102 +83,157 @@ void saveFrame(const char* fname, // Hacky way to get keypresses at the terminal. Linux only. // https://stackoverflow.com/a/67038432 -int key_press() { // not working: ¹ (251), num lock (-144), caps lock (-20), windows key (-91), kontext menu key (-93) - struct termios term; - tcgetattr(0, &term); - while(true) { - term.c_lflag &= ~(ICANON|ECHO); // turn off line buffering and echoing - tcsetattr(0, TCSANOW, &term); - int nbbytes; - ioctl(0, FIONREAD, &nbbytes); // 0 is STDIN - while(!nbbytes) { - sleep(0.01); - fflush(stdout); - ioctl(0, FIONREAD, &nbbytes); // 0 is STDIN - } - int key = (int)getchar(); - if(key==27||key==194||key==195) { // escape, 194/195 is escape for °ß´äöüÄÖÜ - key = (int)getchar(); - if(key==91) { // [ following escape - key = (int)getchar(); // get code of next char after \e[ - if(key==49) { // F5-F8 - key = 62+(int)getchar(); // 53, 55-57 - if(key==115) key++; // F5 code is too low by 1 - getchar(); // take in following ~ (126), but discard code - } else if(key==50) { // insert or F9-F12 - key = (int)getchar(); - if(key==126) { // insert - key = 45; - } else { // F9-F12 - key += 71; // 48, 49, 51, 52 - if(key<121) key++; // F11 and F12 are too low by 1 - getchar(); // take in following ~ (126), but discard code - } - } else if(key==51||key==53||key==54) { // delete, page up/down - getchar(); // take in following ~ (126), but discard code - } - } else if(key==79) { // F1-F4 - key = 32+(int)getchar(); // 80-83 - } - key = -key; // use negative numbers for escaped keys - } - term.c_lflag |= (ICANON|ECHO); // turn on line buffering and echoing - tcsetattr(0, TCSANOW, &term); - switch(key) { - case 127: return 8; // backspace - case -27: return 27; // escape - case -51: return 127; // delete - case -164: return 132; // ä - case -182: return 148; // ö - case -188: return 129; // ü - case -132: return 142; // Ä - case -150: return 153; // Ö - case -156: return 154; // Ü - case -159: return 225; // ß - case -181: return 230; // µ - case -167: return 245; // § - case -176: return 248; // ° - case -178: return 253; // ² - case -179: return 252; // ³ - case -180: return 239; // ´ - case -65: return -38; // up arrow - case -66: return -40; // down arrow - case -68: return -37; // left arrow - case -67: return -39; // right arrow - case -53: return -33; // page up - case -54: return -34; // page down - case -72: return -36; // pos1 - case -70: return -35; // end - case 0: continue; - case 1: continue; // disable Ctrl + a - case 2: continue; // disable Ctrl + b - case 3: continue; // disable Ctrl + c (terminates program) - case 4: continue; // disable Ctrl + d - case 5: continue; // disable Ctrl + e - case 6: continue; // disable Ctrl + f - case 7: continue; // disable Ctrl + g - case 8: continue; // disable Ctrl + h - //case 9: continue; // disable Ctrl + i (ascii for tab) - //case 10: continue; // disable Ctrl + j (ascii for new line) - case 11: continue; // disable Ctrl + k - case 12: continue; // disable Ctrl + l - case 13: continue; // disable Ctrl + m - case 14: continue; // disable Ctrl + n - case 15: continue; // disable Ctrl + o - case 16: continue; // disable Ctrl + p - case 17: continue; // disable Ctrl + q - case 18: continue; // disable Ctrl + r - case 19: continue; // disable Ctrl + s - case 20: continue; // disable Ctrl + t - case 21: continue; // disable Ctrl + u - case 22: continue; // disable Ctrl + v - case 23: continue; // disable Ctrl + w - case 24: continue; // disable Ctrl + x - case 25: continue; // disable Ctrl + y - case 26: continue; // disable Ctrl + z (terminates program) - default: return key; // any other ASCII character +int key_press() { // not working: ¹ (251), num lock (-144), caps lock (-20), + // windows key (-91), kontext menu key (-93) + struct termios term; + tcgetattr(0, &term); + while (true) { + term.c_lflag &= ~(ICANON | ECHO); // turn off line buffering and echoing + tcsetattr(0, TCSANOW, &term); + int nbbytes; + ioctl(0, FIONREAD, &nbbytes); // 0 is STDIN + while (!nbbytes) { + sleep(0.01); + fflush(stdout); + ioctl(0, FIONREAD, &nbbytes); // 0 is STDIN + } + int key = (int)getchar(); + if (key == 27 || key == 194 || + key == 195) { // escape, 194/195 is escape for °ß´äöüÄÖÜ + key = (int)getchar(); + if (key == 91) { // [ following escape + key = (int)getchar(); // get code of next char after \e[ + if (key == 49) { // F5-F8 + key = 62 + (int)getchar(); // 53, 55-57 + if (key == 115) + key++; // F5 code is too low by 1 + getchar(); // take in following ~ (126), but discard code + } else if (key == 50) { // insert or F9-F12 + key = (int)getchar(); + if (key == 126) { // insert + key = 45; + } else { // F9-F12 + key += 71; // 48, 49, 51, 52 + if (key < 121) + key++; // F11 and F12 are too low by 1 + getchar(); // take in following ~ (126), but discard code + } + } else if (key == 51 || key == 53 || + key == 54) { // delete, page up/down + getchar(); // take in following ~ (126), but discard code } + } else if (key == 79) { // F1-F4 + key = 32 + (int)getchar(); // 80-83 + } + key = -key; // use negative numbers for escaped keys + } + term.c_lflag |= (ICANON | ECHO); // turn on line buffering and echoing + tcsetattr(0, TCSANOW, &term); + switch (key) { + case 127: + return 8; // backspace + case -27: + return 27; // escape + case -51: + return 127; // delete + case -164: + return 132; // ä + case -182: + return 148; // ö + case -188: + return 129; // ü + case -132: + return 142; // Ä + case -150: + return 153; // Ö + case -156: + return 154; // Ü + case -159: + return 225; // ß + case -181: + return 230; // µ + case -167: + return 245; // § + case -176: + return 248; // ° + case -178: + return 253; // ² + case -179: + return 252; // ³ + case -180: + return 239; // ´ + case -65: + return -38; // up arrow + case -66: + return -40; // down arrow + case -68: + return -37; // left arrow + case -67: + return -39; // right arrow + case -53: + return -33; // page up + case -54: + return -34; // page down + case -72: + return -36; // pos1 + case -70: + return -35; // end + case 0: + continue; + case 1: + continue; // disable Ctrl + a + case 2: + continue; // disable Ctrl + b + case 3: + continue; // disable Ctrl + c (terminates program) + case 4: + continue; // disable Ctrl + d + case 5: + continue; // disable Ctrl + e + case 6: + continue; // disable Ctrl + f + case 7: + continue; // disable Ctrl + g + case 8: + continue; // disable Ctrl + h + // case 9: continue; // disable Ctrl + i (ascii for tab) + // case 10: continue; // disable Ctrl + j (ascii for new line) + case 11: + continue; // disable Ctrl + k + case 12: + continue; // disable Ctrl + l + case 13: + continue; // disable Ctrl + m + case 14: + continue; // disable Ctrl + n + case 15: + continue; // disable Ctrl + o + case 16: + continue; // disable Ctrl + p + case 17: + continue; // disable Ctrl + q + case 18: + continue; // disable Ctrl + r + case 19: + continue; // disable Ctrl + s + case 20: + continue; // disable Ctrl + t + case 21: + continue; // disable Ctrl + u + case 22: + continue; // disable Ctrl + v + case 23: + continue; // disable Ctrl + w + case 24: + continue; // disable Ctrl + x + case 25: + continue; // disable Ctrl + y + case 26: + continue; // disable Ctrl + z (terminates program) + default: + return key; // any other ASCII character } + } } float calcLerpFraction(float x, float src0, float src1) { @@ -191,13 +246,13 @@ float calcLerpFraction(float x, float src0, float src1) { class BatchedSimulatorTest : public ::testing::Test {}; TEST_F(BatchedSimulatorTest, basic) { - esp::logging::LoggingContext loggingContext; constexpr bool doOverlapPhysics = false; constexpr bool doFreeCam = false; bool doTuneRobotCam = false; - constexpr bool doSaveAllFramesForVideo = false; // see make_video_from_image_files.py + constexpr bool doSaveAllFramesForVideo = + false; // see make_video_from_image_files.py constexpr bool includeDebugSensor = true; constexpr bool includeDepth = false; constexpr bool includeColor = true; @@ -206,35 +261,36 @@ TEST_F(BatchedSimulatorTest, basic) { const bool forceRandomActions = doFreeCam || doTuneRobotCam; EpisodeGeneratorConfig generatorConfig{ - .numEpisodes = 100, - .seed = 0, - .numStageVariations = 12, - .numObjectVariations = 6, - .minNontargetObjects = 27, - .maxNontargetObjects = 32, - .useFixedRobotStartPos = true, - .useFixedRobotStartYaw = true, - .useFixedRobotJointStartPositions = true - }; - + .numEpisodes = 84, + .seed = 0, + .numStageVariations = 84, + .numObjectVariations = 9, + .minNontargetObjects = 27, + .maxNontargetObjects = 32, + .useFixedRobotStartPos = true, + .useFixedRobotStartYaw = true, + .useFixedRobotJointStartPositions = true}; BatchedSimulatorConfig config{ - .numEnvs = 1, .gpuId = 0, + .numEnvs = 1, + .gpuId = 0, .includeDepth = includeDepth, .includeColor = includeColor, .sensor0 = {.width = 768, .height = 768}, .numDebugEnvs = 1, - .debugSensor {.width = 768, .height = 768}, + .debugSensor{.width = 768, .height = 768}, .forceRandomActions = forceRandomActions, .doAsyncPhysicsStep = doOverlapPhysics, .numSubsteps = 1, .enableRobotCollision = true, .enableHeldObjectCollision = true, .doProceduralEpisodeSet = true, - //.episodeSetFilepath = "../data/generated.episode_set.json", .episodeGeneratorConfig = generatorConfig, - .collectionFilepath = "../data/replicacad_composite.collection.json" - }; + //.episodeSetFilepath = "../data/generated.episode_set.json", + .collectionFilepath = "../data/replicacad_composite.collection.json", + .renderAssetCompositeFilepath = + "../data/bps_data/composite/composite.bps", + }; BatchedSimulator bsim(config); Mn::Vector3 camPos; @@ -272,20 +328,28 @@ TEST_F(BatchedSimulatorTest, basic) { float moveSpeed = 1.f; const int envIndex = 0; - //int sphereGreenInstance = bsim.addDebugInstance("sphere_green_wireframe", envIndex); - //int sphereOrangeInstance = bsim.addDebugInstance("sphere_orange_wireframe", envIndex); - // int testModelInstance = bsim.addDebugInstance("cube_gray_shaded", envIndex, - // Mn::Matrix4(Mn::Math::IdentityInit), /*persistent*/true); - // auto& bpsEnv = bsim.getBpsEnvironment(0); + // int sphereGreenInstance = bsim.addDebugInstance("sphere_green_wireframe", + // envIndex); int sphereOrangeInstance = + // bsim.addDebugInstance("sphere_orange_wireframe", envIndex); + // int testModelInstance = bsim.addDebugInstance("cube_gray_shaded", + // envIndex, + // Mn::Matrix4(Mn::Math::IdentityInit), /*persistent*/true); + // auto& bpsEnv = bsim.getBpsEnvironment(0); // esp::batched_sim::ColumnGridSource source; // source.load("../data/columngrids/Baked_sc0_staging_00_stage_only.columngrid"); - std::cout << "Open ./latest_env0.bmp in VS Code or another viewer that supports hot-reload." << std::endl; + std::cout << "Open ./latest_env0.bmp in VS Code or another viewer that " + "supports hot-reload." + << std::endl; if (doFreeCam || doTuneRobotCam) { - std::cout << "Camera controls: press WASDQE/arrow keys to move/look, +/- to adjust speed, or ESC to quit." << std::endl; + std::cout << "Camera controls: press WASDQE/arrow keys to move/look, +/- " + "to adjust speed, or ESC to quit." + << std::endl; } else { - std::cout << "Robot controls: press WASD/arrow keys to move, G to grab/drop, +/- to adjust speed, or ESC to quit." << std::endl; + std::cout << "Robot controls: press WASD/arrow keys to move, G to " + "grab/drop, +/- to adjust speed, or ESC to quit." + << std::endl; } const auto& actionMap = bsim.getSerializeCollection().robots[0].actionMap; @@ -318,19 +382,20 @@ TEST_F(BatchedSimulatorTest, basic) { int autoplayProgress = -1; while (true) { - if (doOverlapPhysics) { bsim.startRender(); if (doAdvanceSim) { - bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); + bsim.startStepPhysicsOrReset(std::vector(actions), + std::vector(resets)); doAdvanceSim = false; } bsim.waitRender(); bsim.waitStepPhysicsOrReset(); } else { if (doAdvanceSim) { - bsim.startStepPhysicsOrReset(std::vector(actions), std::vector(resets)); - bsim.waitStepPhysicsOrReset(); + bsim.startStepPhysicsOrReset(std::vector(actions), + std::vector(resets)); + bsim.waitStepPhysicsOrReset(); doAdvanceSim = false; } bsim.startRender(); @@ -341,37 +406,37 @@ TEST_F(BatchedSimulatorTest, basic) { envStates = bsim.getEnvironmentStates(); for (bool isDebug : {false, true}) { - if (isDebug && !includeDebugSensor) { continue; } - uint8_t* base_color_ptr = isDebug - ? bsim.getDebugBpsRenderer().getColorPointer() - : bsim.getBpsRenderer().getColorPointer(); + uint8_t* base_color_ptr = + isDebug ? bsim.getDebugBpsRenderer().getColorPointer() + : bsim.getBpsRenderer().getColorPointer(); // float* base_depth_ptr = isDebug // ? nullptr // : bsim.getBpsRenderer().getDepthPointer(); - glm::u32vec2 out_dim = isDebug - ? glm::u32vec2(config.debugSensor.width, config.debugSensor.height) - : glm::u32vec2(config.sensor0.width, config.sensor0.height); + glm::u32vec2 out_dim = + isDebug ? glm::u32vec2(config.debugSensor.width, + config.debugSensor.height) + : glm::u32vec2(config.sensor0.width, config.sensor0.height); const int numEnvs = isDebug ? config.numDebugEnvs : config.numEnvs; for (int b = 0; b < numEnvs; b++) { std::stringstream ss; - ss << (isDebug ? "./debugenv" : "./env") - << b << "_frame" - << std::setfill('0') << std::setw(4) << frameIdx << ".bmp"; + ss << (isDebug ? "./debugenv" : "./env") << b << "_frame" + << std::setfill('0') << std::setw(4) << frameIdx << ".bmp"; saveFrame(doSaveAllFramesForVideo ? ss.str().c_str() : nullptr, - ((isDebug ? "./latest_debugenv" : "./latest_env") - + std::to_string(b) + ".bmp").c_str(), + ((isDebug ? "./latest_debugenv" : "./latest_env") + + std::to_string(b) + ".bmp") + .c_str(), base_color_ptr + b * out_dim.x * out_dim.y * 4, out_dim.x, out_dim.y, 4); // saveFrame(("./out_depth_" + std::to_string(b) + ".bmp").c_str(), - // base_depth_ptr + b * out_dim.x * out_dim.y, out_dim.x, out_dim.y, - // 1); + // base_depth_ptr + b * out_dim.x * out_dim.y, out_dim.x, + // out_dim.y, 1); } } @@ -382,12 +447,11 @@ TEST_F(BatchedSimulatorTest, basic) { key = key_press(); } - if (key == 27) { // ESC + if (key == 27) { // ESC break; } - + if (!(doFreeCam || doTuneRobotCam)) { - bool doGrapRelease = false; float targetUpDown = 0.f; float targetLeftRight = 0.f; @@ -399,13 +463,13 @@ TEST_F(BatchedSimulatorTest, basic) { doAdvanceSim = true; - if (key == -38) { // up + if (key == -38) { // up targetUpDown += 1.f; - } else if (key == -40) { // down + } else if (key == -40) { // down targetUpDown -= 1.f; - } else if (key == -37) { // left + } else if (key == -37) { // left targetLeftRight += 1.f; - } else if (key == -39) { // right + } else if (key == -39) { // right targetLeftRight -= 1.f; } else if (key >= '1' && key <= '8') { int keyIdx = key - '1'; @@ -423,7 +487,7 @@ TEST_F(BatchedSimulatorTest, basic) { baseUpDown = 0.1f; } else if (key == 'q') { baseUpDown = -0.1f; - }else if (key == '=') { // + + } else if (key == '=') { // + moveSpeed = Mn::Math::min(moveSpeed * 2.f, 1.f); } else if (key == '-') { moveSpeed = moveSpeed * 0.5f; @@ -450,12 +514,13 @@ TEST_F(BatchedSimulatorTest, basic) { constexpr float eps = 0.01; if (doGrapRelease) { bool isHolding = (envState.held_obj_idx != -1); - actionsForEnv[actionMap.graspRelease.actionIdx] = !isHolding - ? actionMap.graspRelease.thresholds[1] + eps - : actionMap.graspRelease.thresholds[0] - eps; + actionsForEnv[actionMap.graspRelease.actionIdx] = + !isHolding ? actionMap.graspRelease.thresholds[1] + eps + : actionMap.graspRelease.thresholds[0] - eps; } else { // do nothing - actionsForEnv[actionMap.graspRelease.actionIdx] = actionMap.graspRelease.thresholds[0] + eps; + actionsForEnv[actionMap.graspRelease.actionIdx] = + actionMap.graspRelease.thresholds[0] + eps; } actionsForEnv[actionMap.baseRotate.actionIdx] = baseYaw * moveSpeed; actionsForEnv[actionMap.baseMove.actionIdx] = baseForward * moveSpeed; @@ -463,12 +528,13 @@ TEST_F(BatchedSimulatorTest, basic) { if (jointPosIdx != -1) { BATCHED_SIM_ASSERT(jointPosIdx < actionMap.joints.size()); const auto& jointActionSetup = actionMap.joints[jointPosIdx]; - // actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus > 0.f + // actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus > + // 0.f // ? jointPlusMinus * jointActionSetup.second.stepMax * moveSpeed // : -jointPlusMinus * jointActionSetup.second.stepMin * moveSpeed; - actionsForEnv[jointActionSetup.second.actionIdx] = jointPlusMinus * moveSpeed; + actionsForEnv[jointActionSetup.second.actionIdx] = + jointPlusMinus * moveSpeed; } - // 0-1 I don't know these // 2 is torso up/down @@ -481,7 +547,7 @@ TEST_F(BatchedSimulatorTest, basic) { // 12 wrist twist, + is right // 13 gripper finger 1 // 14 gripper finger 2 - //actionsForEnv[3 + 8] = -targetLeftRight * rotSpeed; + // actionsForEnv[3 + 8] = -targetLeftRight * rotSpeed; // actionsForEnv[3 + 2] = baseUpDown * moveSpeed; // actionsForEnv[3 + 11] = -targetUpDown * moveSpeed; // actionsForEnv[3 + 12] = -targetLeftRight * moveSpeed; @@ -494,26 +560,36 @@ TEST_F(BatchedSimulatorTest, basic) { const float camMoveSpeed = 0.2f * moveSpeed; const float camRotateSpeed = 20.f * moveSpeed; // free camera with spacebar to advance sim - if (key == -38) { // up - camRot = camRot * Mn::Quaternion::rotation(Mn::Deg(camRotateSpeed), Mn::Vector3(1.f, 0.f, 0.f)); - } else if (key == -40) { // down - camRot = camRot * Mn::Quaternion::rotation(Mn::Deg(-camRotateSpeed), Mn::Vector3(1.f, 0.f, 0.f)); - } else if (key == -37) { // left - camRot = Mn::Quaternion::rotation(Mn::Deg(camRotateSpeed), Mn::Vector3(0.f, 1.f, 0.f)) * camRot; - } else if (key == -39) { // right - camRot = Mn::Quaternion::rotation(Mn::Deg(-camRotateSpeed), Mn::Vector3(0.f, 1.f, 0.f)) * camRot; - } else if (key == '=') { // + + if (key == -38) { // up + camRot = camRot * Mn::Quaternion::rotation(Mn::Deg(camRotateSpeed), + Mn::Vector3(1.f, 0.f, 0.f)); + } else if (key == -40) { // down + camRot = camRot * Mn::Quaternion::rotation(Mn::Deg(-camRotateSpeed), + Mn::Vector3(1.f, 0.f, 0.f)); + } else if (key == -37) { // left + camRot = Mn::Quaternion::rotation(Mn::Deg(camRotateSpeed), + Mn::Vector3(0.f, 1.f, 0.f)) * + camRot; + } else if (key == -39) { // right + camRot = Mn::Quaternion::rotation(Mn::Deg(-camRotateSpeed), + Mn::Vector3(0.f, 1.f, 0.f)) * + camRot; + } else if (key == '=') { // + moveSpeed *= 1.5f; } else if (key == '-') { moveSpeed /= 1.5f; } else if (key == 'w') { - camPos += camRot.transformVector(Mn::Vector3(0.f, 0.f, -1.f)) * camMoveSpeed; + camPos += + camRot.transformVector(Mn::Vector3(0.f, 0.f, -1.f)) * camMoveSpeed; } else if (key == 's') { - camPos += camRot.transformVector(Mn::Vector3(0.f, 0.f, 1.f)) * camMoveSpeed; + camPos += + camRot.transformVector(Mn::Vector3(0.f, 0.f, 1.f)) * camMoveSpeed; } else if (key == 'a') { - camPos += camRot.transformVector(Mn::Vector3(-1.f, 0.f, 0.f)) * camMoveSpeed; + camPos += + camRot.transformVector(Mn::Vector3(-1.f, 0.f, 0.f)) * camMoveSpeed; } else if (key == 'd') { - camPos += camRot.transformVector(Mn::Vector3(1.f, 0.f, 0.f)) * camMoveSpeed; + camPos += + camRot.transformVector(Mn::Vector3(1.f, 0.f, 0.f)) * camMoveSpeed; } else if (key == 'e') { camPos.y() += camMoveSpeed; } else if (key == 'q') { @@ -534,8 +610,10 @@ TEST_F(BatchedSimulatorTest, basic) { } else { const auto cameraMat = Mn::Matrix4::from(camRot.toMatrix(), camPos); ESP_DEBUG() << "camPos: " << camPos << ", camRot: " << camRot; - bsim.setCamera("sensor0", camPos, camRot, cameraHfov, cameraAttachLinkName); - bsim.setCamera("debug", camPos, camRot, cameraHfov, cameraAttachLinkName); + bsim.setCamera("sensor0", camPos, camRot, cameraHfov, + cameraAttachLinkName); + bsim.setCamera("debug", camPos, camRot, cameraHfov, + cameraAttachLinkName); } } @@ -544,12 +622,12 @@ TEST_F(BatchedSimulatorTest, basic) { autoplayProgress = 0; } if (isAutoplay) { - constexpr int animDuration = 50; constexpr int renderInc = (250 + 450) / animDuration; int prevProgress = autoplayProgress; autoplayProgress++; - bsim.debugRenderColumnGrids(prevProgress * renderInc, autoplayProgress * renderInc); + bsim.debugRenderColumnGrids(prevProgress * renderInc, + autoplayProgress * renderInc); // move camera slightly camPos.y() -= 0.01f; @@ -561,19 +639,18 @@ TEST_F(BatchedSimulatorTest, basic) { } } - #if 1 +#if 1 for (int b = 0; b < config.numEnvs; b++) { // end episode on collision? - if (/*envStates[b].did_collide */ false - || doReloadAll) { + if (/*envStates[b].did_collide */ false || doReloadAll) { resets[b] = getNextEpisode(); } else { resets[b] = -1; } } - #endif +#endif - #if 0 // test columnGrid collision +#if 0 // test columnGrid collision { constexpr float sphereDist = 1.f; constexpr float sphereRadius = 0.1f; // todo: get from ColumnGrid @@ -584,26 +661,26 @@ TEST_F(BatchedSimulatorTest, basic) { Mn::Matrix4 mat = Mn::Matrix4::translation(spherePos) * Mn::Matrix4::scaling(Mn::Vector3(sphereRadius, sphereRadius, sphereRadius)); - + int instanceToShow = contact ? sphereOrangeInstance : sphereGreenInstance; int instanceToHide = contact ? sphereGreenInstance : sphereOrangeInstance; CORRADE_INTERNAL_ASSERT(instanceToShow != -1); bpsEnv.updateInstanceTransform(instanceToShow, toGlmMat4x3(mat)); - bpsEnv.updateInstanceTransform(instanceToHide, + bpsEnv.updateInstanceTransform(instanceToHide, toGlmMat4x3(Mn::Matrix4::translation({1000.f, 1000.f, 1000.f}))); } - #endif +#endif frameIdx++; if (frameIdx % 20 == 0) { - ESP_DEBUG() << "batched_sim stats: " << bsim.getRecentStatsAndReset(); + ESP_DEBUG() << "batched_sim stats: " << bsim.getRecentStatsAndReset(); } } bsim.close(); - bsim.close(); // try a redundant close + bsim.close(); // try a redundant close } -} -} \ No newline at end of file +} // namespace batched_sim +} // namespace esp diff --git a/src/utils/viewer/viewer.cpp b/src/utils/viewer/viewer.cpp index 777070c5a2..f6a1652d4b 100644 --- a/src/utils/viewer/viewer.cpp +++ b/src/utils/viewer/viewer.cpp @@ -24,6 +24,7 @@ #include #include #include +#include "esp/core/Check.h" #include "esp/core/configure.h" #include "esp/gfx/RenderCamera.h" #include "esp/gfx/Renderer.h" @@ -55,12 +56,12 @@ #include "esp/geo/VoxelUtils.h" #endif +#include "esp/batched_sim/ColumnGridBuilder.h" #include "esp/physics/configure.h" #include "esp/sensor/CameraSensor.h" #include "esp/sensor/EquirectangularSensor.h" #include "esp/sensor/FisheyeSensor.h" #include "esp/sim/Simulator.h" -#include "esp/batched_sim/ColumnGridBuilder.h" #include "ObjectPickingHelper.h" @@ -73,8 +74,8 @@ constexpr float agentActionsPerSecond = 60.0f; namespace Cr = Corrade; namespace Mn = Magnum; -using esp::batched_sim::ColumnGridSource; using esp::batched_sim::ColumnGridBuilder; +using esp::batched_sim::ColumnGridSource; namespace { @@ -82,8 +83,8 @@ esp::physics::ManagedArticulatedObject::ptr g_ao; ColumnGridSource columnGrid_; -void drawColumnGridSource(const ColumnGridSource& source, esp::gfx::DebugLineRender& lineRender) { - +void drawColumnGridSource(const ColumnGridSource& source, + esp::gfx::DebugLineRender& lineRender) { for (int layerIdx = 0; layerIdx < source.layers.size(); layerIdx++) { for (int cellZ = 0; cellZ < source.dimZ; cellZ++) { for (int cellX = 0; cellX < source.dimX; cellX++) { @@ -91,32 +92,28 @@ void drawColumnGridSource(const ColumnGridSource& source, esp::gfx::DebugLineRen if (col.freeMinY == source.INVALID_Y) { continue; } - Mn::Vector3 colBottom( - source.minX + cellX * source.gridSpacing, - col.freeMinY, - source.minZ + cellZ * source.gridSpacing); - Mn::Vector3 colTop( - colBottom.x(), - col.freeMaxY, - colBottom.z()); - - #if 0 + Mn::Vector3 colBottom(source.minX + cellX * source.gridSpacing, + col.freeMinY, + source.minZ + cellZ * source.gridSpacing); + Mn::Vector3 colTop(colBottom.x(), col.freeMaxY, colBottom.z()); + +#if 0 constexpr int numSegments = 16; lineRender.drawSphere(colBottom, source.sphereRadius, Magnum::Color4::red(), numSegments); lineRender.drawSphere(colTop, source.sphereRadius, Magnum::Color4::green(), numSegments); lineRender.drawLine(colBottom, colTop, Magnum::Color4::red(), Magnum::Color4::green()); - #else +#else constexpr int numSegments = 4; - lineRender.drawCircle(colBottom, source.gridSpacing / 2, Magnum::Color4::red(), numSegments); - lineRender.drawCircle(colTop, source.gridSpacing / 2, Magnum::Color4::green(), numSegments); - #endif + lineRender.drawCircle(colBottom, source.gridSpacing / 2, + Magnum::Color4::red(), numSegments); + lineRender.drawCircle(colTop, source.gridSpacing / 2, + Magnum::Color4::green(), numSegments); +#endif } } } }; - - //! return current time as string in format //! "year_month_day_hour-minutes-seconds" std::string getCurrentTimeString() { @@ -367,7 +364,9 @@ Key Commands: )"; //! Print viewer help text to terminal output. - void printHelpText() { ESP_DEBUG() << helpText; }; + void printHelpText() { + ESP_DEBUG() << helpText; + }; // single inline for logging agent state msgs, so can be easily modified inline void showAgentStateMsg(bool showPos, bool showOrient) { @@ -794,7 +793,8 @@ Viewer::Viewer(const Arguments& arguments) simConfig_.enablePhysics = args.isSet("enable-physics"); simConfig_.frustumCulling = true; simConfig_.requiresTextures = true; - simConfig_.enableGfxReplaySave = !gfxReplayRecordFilepath_.empty(); + simConfig_.enableGfxReplaySave = + true; // temp force reply record !gfxReplayRecordFilepath_.empty(); if (args.isSet("stage-requires-lighting")) { ESP_DEBUG() << "Stage using DEFAULT_LIGHTING_KEY"; simConfig_.sceneLightSetup = esp::DEFAULT_LIGHTING_KEY; @@ -881,8 +881,9 @@ Viewer::Viewer(const Arguments& arguments) // Per frame profiler will average measurements taken over previous 50 frames profiler_.setup(profilerValues, 50); - // load or build column grid on startup - { +// load or build column grid on startup +#if 0 + { const std::string baseName = Cr::Utility::Directory::splitExtension( Cr::Utility::Directory::splitExtension( @@ -913,6 +914,25 @@ Viewer::Viewer(const Arguments& arguments) } } } +#endif + + // write keyframe + { + const auto recorder = simulator_->getGfxReplayManager()->getRecorder(); + ESP_CHECK(recorder, "need replay recorder"); + + const std::string baseName = + Cr::Utility::Directory::splitExtension( + Cr::Utility::Directory::splitExtension( + Cr::Utility::Directory::filename(simConfig_.activeSceneName)) + .first) + .first; + + recorder->saveKeyframe(); + const std::string filepath = "../data/replays/" + baseName + ".replay.json"; + + recorder->writeSavedKeyframesToFile(filepath); + } printHelpText(); } // end Viewer::Viewer @@ -1336,7 +1356,8 @@ void Viewer::drawEvent() { { const auto extents = simulator_->getCollisionExtents(); - simulator_->getDebugLineRender()->drawBox(extents.min(), extents.max(), Mn::Color4::yellow()); + simulator_->getDebugLineRender()->drawBox(extents.min(), extents.max(), + Mn::Color4::yellow()); } drawColumnGridSource(columnGrid_, *simulator_->getDebugLineRender().get()); @@ -1672,17 +1693,18 @@ void Viewer::mousePressEvent(MouseEvent& event) { esp::physics::RaycastResults raycastResults = simulator_->castRay(ray); if (raycastResults.hasHits()) { - { const auto& gridCenter = raycastResults.hits[0].point; static const Mn::Vector3 gridHalfSize(1.0, 2.5f, 1.0); static float gridSpacing = 0.03f; static float sphereRadius = 0.03; - auto gridAabb = Mn::Range3D(gridCenter - gridHalfSize, gridCenter + gridHalfSize); + auto gridAabb = Mn::Range3D(gridCenter - gridHalfSize, + gridCenter + gridHalfSize); ColumnGridBuilder builder; - columnGrid_ = builder.build(*simulator_.get(), gridAabb, sphereRadius, gridSpacing); + columnGrid_ = builder.build(*simulator_.get(), gridAabb, + sphereRadius, gridSpacing); return; } From ce017743c5c9c033e33746ed5deb62902a36e443 Mon Sep 17 00:00:00 2001 From: Alexander Clegg Date: Tue, 3 May 2022 14:28:28 -0700 Subject: [PATCH 34/53] hack update to latest version of datasets_download.py in habitat-sim/main --- .pre-commit-config.yaml | 2 +- habitat_sim/utils/datasets_download.py | 142 ++++++++++++++++++++++--- 2 files changed, 127 insertions(+), 17 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ea7981045..015040bd46 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,7 @@ repos: exclude: docs/ - repo: https://github.com/ambv/black - rev: 21.5b2 + rev: 22.3.0 hooks: - id: black exclude: ^examples/tutorials/(nb_python|colabs) diff --git a/habitat_sim/utils/datasets_download.py b/habitat_sim/utils/datasets_download.py index 9f15e7e277..c1d04c8b31 100644 --- a/habitat_sim/utils/datasets_download.py +++ b/habitat_sim/utils/datasets_download.py @@ -35,6 +35,20 @@ def hm3d_train_configs_post(extract_dir: str) -> List[str]: return [link_name] +def hm3d_semantic_configs_post(extract_dir: str) -> List[str]: + all_scene_dataset_cfg = os.path.join( + extract_dir, "hm3d_annotated_basis.scene_dataset_config.json" + ) + assert os.path.exists(all_scene_dataset_cfg) + + dst_name = os.path.join( + extract_dir, "..", "hm3d_annotated_basis.scene_dataset_config.json" + ) + os.replace(all_scene_dataset_cfg, dst_name) + + return [dst_name] + + def initialize_test_data_sources(data_path): global data_sources global data_groups @@ -75,10 +89,10 @@ def initialize_test_data_sources(data_path): "version": "0.2", }, "mp3d_example_scene": { - "source": "http://dl.fbaipublicfiles.com/habitat/mp3d_example.zip", - "package_name": "mp3d_example.zip", + "source": "http://dl.fbaipublicfiles.com/habitat/mp3d/mp3d_example_v1.1.zip", + "package_name": "mp3d_example_v1.1.zip", "link": data_path + "scene_datasets/mp3d_example", - "version": "1.0", + "version": "1.1", }, "coda_scene": { "source": "https://dl.fbaipublicfiles.com/habitat/coda_v1.0.zip", @@ -95,22 +109,22 @@ def initialize_test_data_sources(data_path): "version": "1.0", }, "replica_cad_dataset": { - "source": "https://dl.fbaipublicfiles.com/habitat/ReplicaCAD/ReplicaCAD_dataset_v1.1.zip", - "package_name": "ReplicaCAD_dataset_v1.1.zip", + "source": "https://dl.fbaipublicfiles.com/habitat/ReplicaCAD/ReplicaCAD_dataset_v1.5.zip", + "package_name": "ReplicaCAD_dataset_v1.5.zip", "link": data_path + "replica_cad", - "version": "1.1", + "version": "1.5", }, "replica_cad_baked_lighting": { - "source": "https://dl.fbaipublicfiles.com/habitat/ReplicaCAD/ReplicaCAD_baked_lighting_v1.1.zip", - "package_name": "ReplicaCAD_baked_lighting_v1.1.zip", + "source": "https://dl.fbaipublicfiles.com/habitat/ReplicaCAD/ReplicaCAD_baked_lighting_v1.5.zip", + "package_name": "ReplicaCAD_baked_lighting_v1.5.zip", "link": data_path + "replica_cad_baked_lighting", - "version": "1.1", + "version": "1.5", }, "ycb": { - "source": "https://dl.fbaipublicfiles.com/habitat/ycb/hab_ycb_v1.0.zip", - "package_name": "hab_ycb_v1.0.zip", + "source": "https://dl.fbaipublicfiles.com/habitat/ycb/hab_ycb_v1.1.zip", + "package_name": "hab_ycb_v1.1.zip", "link": data_path + "objects/ycb", - "version": "1.0", + "version": "1.1", }, "hab_fetch": { "source": "http://dl.fbaipublicfiles.com/habitat/hab_fetch_v1.0.zip", @@ -124,6 +138,18 @@ def initialize_test_data_sources(data_path): "link": data_path + "datasets/rearrange_pick/replica_cad/v0", "version": "1.0", }, + "rearrange_dataset_v1": { + "source": "https://dl.fbaipublicfiles.com/habitat/data/datasets/replica_cad/v1.zip", + "package_name": "v1.zip", + "link": data_path + "datasets/replica_cad/rearrange", + "version": "1.0", + }, + "hab2_bench_assets": { + "source": "https://dl.fbaipublicfiles.com/habitat/ReplicaCAD/hab2_bench_assets.zip", + "package_name": "hab2_bench_assets.zip", + "link": data_path + "hab2_bench_assets", + "version": "1.0", + }, } data_sources.update( @@ -179,6 +205,59 @@ def initialize_test_data_sources(data_path): } ) + data_sources.update( + { + f"hm3d_{split}_semantic_{data_format}_v0.1": { + "source": "https://api.matterport.com/resources/habitat/hm3d-{split}-semantic-{data_format}-v0.1.tar{ext}".format( + ext=".gz" if data_format == "annots" else "", + split=split, + data_format=data_format, + ), + "download_pre_args": "--location", + "package_name": "hm3d-{split}-semantic-{data_format}-v0.1.tar{ext}".format( + ext=".gz" if data_format == "annots" else "", + split=split, + data_format=data_format, + ), + "link": data_path + "scene_datasets/hm3d", + "version": "1.0", + "version_dir": "hm3d-{version}/hm3d", + "extract_postfix": f"{split}", + "downloaded_file_list": f"hm3d-{{version}}/{split}-semantic-{data_format}-files.json.gz", + "requires_auth": True, + "use_curl": True, + "post_extract_fn": hm3d_semantic_configs_post + if data_format == "configs" + else None, + } + for split, data_format in itertools.product( + ["minival", "train", "val"], + ["annots", "configs"], + ) + } + ) + + data_sources.update( + { + f"hm3d_example_semantic_{data_format}_v0.1": { + "source": "https://github.com/matterport/habitat-matterport-3dresearch/raw/main/example/hm3d-example-semantic-{data_format}-v0.1.tar{ext}".format( + ext=".gz" if data_format == "annots" else "", + data_format=data_format, + ), + "package_name": "hm3d-example-semantic-{data_format}-v0.1.tar{ext}".format( + ext=".gz" if data_format == "annots" else "", + data_format=data_format, + ), + "link": data_path + "scene_datasets/hm3d", + "version": "1.0", + "version_dir": "hm3d-{version}/hm3d", + "extract_postfix": "example", + "downloaded_file_list": f"hm3d-{{version}}/example-semantic-{data_format}-files.json.gz", + } + for data_format in ["annots", "configs"] + } + ) + # data sources can be grouped for batch commands with a new uid data_groups = { "ci_test_assets": [ @@ -196,11 +275,42 @@ def initialize_test_data_sources(data_path): "hab_fetch", "ycb", "rearrange_pick_dataset_v0", + "rearrange_dataset_v1", + ], + "hm3d_example": [ + "hm3d_example_habitat", + "hm3d_example_configs", + "hm3d_example_semantic_annots_v0.1", + "hm3d_example_semantic_configs_v0.1", + ], + "hm3d_val": [ + "hm3d_val_habitat", + "hm3d_val_configs", + "hm3d_val_semantic_annots_v0.1", + "hm3d_val_semantic_configs_v0.1", + ], + "hm3d_train": [ + "hm3d_train_habitat", + "hm3d_train_configs", + "hm3d_train_semantic_annots_v0.1", + "hm3d_train_semantic_configs_v0.1", + ], + "hm3d_minival": [ + "hm3d_minival_habitat", + "hm3d_minival_configs", + "hm3d_minival_semantic_annots_v0.1", + "hm3d_minival_semantic_configs_v0.1", + ], + "hm3d_semantics": [ + "hm3d_example_semantic_annots_v0.1", + "hm3d_example_semantic_configs_v0.1", + "hm3d_val_semantic_annots_v0.1", + "hm3d_val_semantic_configs_v0.1", + "hm3d_train_semantic_annots_v0.1", + "hm3d_train_semantic_configs_v0.1", + "hm3d_minival_semantic_annots_v0.1", + "hm3d_minival_semantic_configs_v0.1", ], - "hm3d_example": ["hm3d_example_habitat", "hm3d_example_configs"], - "hm3d_val": ["hm3d_val_habitat", "hm3d_val_configs"], - "hm3d_train": ["hm3d_train_habitat", "hm3d_train_configs"], - "hm3d_minival": ["hm3d_minival_habitat", "hm3d_minival_configs"], "hm3d_full": list(filter(lambda k: k.startswith("hm3d_"), data_sources.keys())), } From 0f836de844caadfecc68137313e7ed10b3f6c789 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Fri, 13 May 2022 15:46:48 -0400 Subject: [PATCH 35/53] gfx-replay scaling fix (relevant for URDF scaling) --- src/esp/gfx/replay/Recorder.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/esp/gfx/replay/Recorder.cpp b/src/esp/gfx/replay/Recorder.cpp index 894986cf9c..4dec701ed7 100644 --- a/src/esp/gfx/replay/Recorder.cpp +++ b/src/esp/gfx/replay/Recorder.cpp @@ -54,7 +54,18 @@ void Recorder::onCreateRenderAssetInstance( RenderAssetInstanceKey instanceKey = getNewInstanceKey(); - getKeyframe().creations.emplace_back(std::make_pair(instanceKey, creation)); + auto adjustedCreation = creation; + + // bake node scale into creation + auto nodeScale = node->absoluteTransformation().scaling(); + if (nodeScale != Mn::Vector3(1.f, 1.f, 1.f)) { + adjustedCreation.scale = adjustedCreation.scale + ? *adjustedCreation.scale * nodeScale + : nodeScale; + } + + getKeyframe().creations.emplace_back( + std::make_pair(instanceKey, adjustedCreation)); // Constructing NodeDeletionHelper here is equivalent to calling // node->addFeature. We keep a pointer to deletionHelper so we can delete it From ad3ebfb493126ef4927e49e1a050dbc4acee1c66 Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Sun, 15 May 2022 09:56:27 -0400 Subject: [PATCH 36/53] add --do-gala-kinematic-preprocess to legacy viewer --- src/utils/viewer/viewer.cpp | 107 +++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 44 deletions(-) diff --git a/src/utils/viewer/viewer.cpp b/src/utils/viewer/viewer.cpp index f6a1652d4b..337a6c3b9a 100644 --- a/src/utils/viewer/viewer.cpp +++ b/src/utils/viewer/viewer.cpp @@ -711,6 +711,10 @@ Viewer::Viewer(const Arguments& arguments) .addOption("agent-transform-filepath") .setHelp("agent-transform-filepath", "Specify path to load camera transform from.") + .addBooleanOption("do-gala-kinematic-preprocess") + .setHelp("do-gala-kinematic-preprocess", + "Generate ../replays/myscene.replay.json and " + "../columngrids/myscene.k.columngrid files and then exit.") .parse(arguments.argc, arguments.argv); const auto viewportSize = Mn::GL::defaultFramebuffer.viewport().size(); @@ -881,60 +885,75 @@ Viewer::Viewer(const Arguments& arguments) // Per frame profiler will average measurements taken over previous 50 frames profiler_.setup(profilerValues, 50); -// load or build column grid on startup -#if 0 - { + if (args.isSet("do-gala-kinematic-preprocess")) { + ESP_CHECK(args.isSet("enable-physics"), + "For --do-gala-kinematic-preprocess, you should also set " + "--enable-physics"); - const std::string baseName = Cr::Utility::Directory::splitExtension( - Cr::Utility::Directory::splitExtension( - Cr::Utility::Directory::filename(simConfig_.activeSceneName)).first).first; + // write replay file + { + const auto recorder = simulator_->getGfxReplayManager()->getRecorder(); + ESP_CHECK(recorder, "need replay recorder"); - std::array sphereRadii = {0.015, 0.05, 0.12}; - for (int i = 0; i < sphereRadii.size(); i++) { + const std::string baseName = + Cr::Utility::Directory::splitExtension( + Cr::Utility::Directory::splitExtension( + Cr::Utility::Directory::filename(simConfig_.activeSceneName)) + .first) + .first; - const std::string filepath = "../data/columngrids/" + baseName + std::to_string(i) + ".columngrid"; + recorder->saveKeyframe(); + const std::string filepath = + "../data/replays/" + baseName + ".replay.json"; - if (Cr::Utility::Directory::exists(filepath)) { - ESP_DEBUG() << "loading ColumnGrid from " << filepath; - columnGrid_.load(filepath); - // temp - exit(0); - } else { + ESP_DEBUG() << "Writing " << filepath; + recorder->writeSavedKeyframesToFile(filepath); + } - ESP_DEBUG() << "building ColumnGrid..."; - const auto extents = simulator_->getCollisionExtents(); - const float sphereRadius = sphereRadii[i]; - constexpr float gridSpacing = 0.03; - ColumnGridBuilder builder; - columnGrid_ = builder.build(*simulator_.get(), extents, sphereRadius, gridSpacing); + // write columngrid files + { +#ifndef NDEBUG + ESP_WARNING() << "--do-gala-kinematic-preprocess is compute-heavy and " + "you are not running a release build."; +#endif - ESP_DEBUG() << "Done. Saving ColumnGrid to " << filepath; - columnGrid_.save(filepath); - exit(0); + const std::string baseName = + Cr::Utility::Directory::splitExtension( + Cr::Utility::Directory::splitExtension( + Cr::Utility::Directory::filename(simConfig_.activeSceneName)) + .first) + .first; + + std::array sphereRadii = {0.015, 0.05, 0.12}; + for (int i = 0; i < sphereRadii.size(); i++) { + const std::string filepath = "../data/columngrids/" + baseName + "." + + std::to_string(i) + ".columngrid"; + + if (Cr::Utility::Directory::exists(filepath)) { + ESP_WARNING() << "skipping " << filepath + << " because it already exists."; + // ESP_DEBUG() << "loading ColumnGrid from " << filepath; + // columnGrid_.load(filepath); + } else { + ESP_DEBUG() << "building ColumnGrid..."; + const auto extents = simulator_->getCollisionExtents(); + const float sphereRadius = sphereRadii[i]; + constexpr float gridSpacing = 0.03; + ColumnGridBuilder builder; + columnGrid_ = builder.build(*simulator_.get(), extents, sphereRadius, + gridSpacing); + + ESP_DEBUG() << "Done. Saving ColumnGrid to " << filepath; + columnGrid_.save(filepath); + } } } - } -#endif - - // write keyframe - { - const auto recorder = simulator_->getGfxReplayManager()->getRecorder(); - ESP_CHECK(recorder, "need replay recorder"); - const std::string baseName = - Cr::Utility::Directory::splitExtension( - Cr::Utility::Directory::splitExtension( - Cr::Utility::Directory::filename(simConfig_.activeSceneName)) - .first) - .first; - - recorder->saveKeyframe(); - const std::string filepath = "../data/replays/" + baseName + ".replay.json"; - - recorder->writeSavedKeyframesToFile(filepath); + // exit after preprocessing + exit(0); + } else { + printHelpText(); } - - printHelpText(); } // end Viewer::Viewer void Viewer::initSimPostReconfigure() { From d770933bab8c611805625051e4ed59f50e93629c Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Mon, 16 May 2022 20:35:34 -0400 Subject: [PATCH 37/53] add file-write error-checking to Recorder::writeSavedKeyframesToFile --- src/esp/gfx/replay/Recorder.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/esp/gfx/replay/Recorder.cpp b/src/esp/gfx/replay/Recorder.cpp index 4dec701ed7..0766e85ff4 100644 --- a/src/esp/gfx/replay/Recorder.cpp +++ b/src/esp/gfx/replay/Recorder.cpp @@ -5,6 +5,7 @@ #include "Recorder.h" #include "esp/assets/RenderAssetInstanceCreationInfo.h" +#include "esp/core/Check.h" #include "esp/io/JsonAllTypes.h" #include "esp/io/json.h" #include "esp/scene/SceneNode.h" @@ -186,8 +187,9 @@ void Recorder::writeSavedKeyframesToFile(const std::string& filepath, auto document = writeKeyframesToJsonDocument(); // replay::Keyframes use floats (not doubles) so this is plenty of precision const float maxDecimalPlaces = 7; - esp::io::writeJsonToFile(document, filepath, usePrettyWriter, - maxDecimalPlaces); + auto ok = esp::io::writeJsonToFile(document, filepath, usePrettyWriter, + maxDecimalPlaces); + ESP_CHECK(ok, "writeSavedKeyframesToFile: unable to write to " << filepath); consolidateSavedKeyframes(); } From ed3dfb87d05c7141e34ae1eee4b3648ae5df2b2f Mon Sep 17 00:00:00 2001 From: Eric Undersander Date: Tue, 17 May 2022 07:33:49 -0400 Subject: [PATCH 38/53] nicer file IO error for ColumnGrid --- src/esp/batched_sim/ColumnGrid.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/esp/batched_sim/ColumnGrid.cpp b/src/esp/batched_sim/ColumnGrid.cpp index 82b275443c..49d6744a6b 100644 --- a/src/esp/batched_sim/ColumnGrid.cpp +++ b/src/esp/batched_sim/ColumnGrid.cpp @@ -228,7 +228,9 @@ void ColumnGridSet::load(const std::string& filepathBase) { } else { if (columnGridFilepathNumber == 1) { ESP_CHECK(!columnGrids_.empty(), - "couldn't find " << columnGridFilepath); + "couldn't find " << columnGridFilepath + << " . Did you remember to unzip the latest " + "data/columngrids.zip?"); } break; } From 0f1026b50f6780c2ac0336e5cbdbe620e86e1843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Mon, 20 Dec 2021 20:04:25 +0100 Subject: [PATCH 39/53] Updated Magnum submodules for the batch renderer. All on master except for magnum which currently uses the scenedata-optimizations branch. --- src/deps/corrade | 2 +- src/deps/magnum | 2 +- src/deps/magnum-bindings | 2 +- src/deps/magnum-integration | 2 +- src/deps/magnum-plugins | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/deps/corrade b/src/deps/corrade index a8065db3c5..3acef468e5 160000 --- a/src/deps/corrade +++ b/src/deps/corrade @@ -1 +1 @@ -Subproject commit a8065db3c55aec214aa4e4887c4073289b2988e2 +Subproject commit 3acef468e5bf982f3fc506c0895995139bf91f88 diff --git a/src/deps/magnum b/src/deps/magnum index 360ca834e1..da27aaca3b 160000 --- a/src/deps/magnum +++ b/src/deps/magnum @@ -1 +1 @@ -Subproject commit 360ca834e17870e56f987bb6ecca308ab9889409 +Subproject commit da27aaca3ba30731cbf1ac43d0c60be7648570ac diff --git a/src/deps/magnum-bindings b/src/deps/magnum-bindings index 7ede64e7d4..5994150a68 160000 --- a/src/deps/magnum-bindings +++ b/src/deps/magnum-bindings @@ -1 +1 @@ -Subproject commit 7ede64e7d42f82b7a576f59f200dd8e3f2098223 +Subproject commit 5994150a68a216621582f76ecf394d1b42758abc diff --git a/src/deps/magnum-integration b/src/deps/magnum-integration index 789f813284..da3c1ab2fb 160000 --- a/src/deps/magnum-integration +++ b/src/deps/magnum-integration @@ -1 +1 @@ -Subproject commit 789f8132842536f077548bc4f2d2c3685e9f491e +Subproject commit da3c1ab2fb19e547d7da6dfcd942a52958b8bd5a diff --git a/src/deps/magnum-plugins b/src/deps/magnum-plugins index 39ab2a207a..9e30f511ee 160000 --- a/src/deps/magnum-plugins +++ b/src/deps/magnum-plugins @@ -1 +1 @@ -Subproject commit 39ab2a207af5fff3434f74f37bc4e6badd2ec569 +Subproject commit 9e30f511ee37d88c95367acda3ed72be707dd725 From 66403f4816eb7dc72e35e703997e1a8568ca02fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 19 May 2022 12:46:02 +0200 Subject: [PATCH 40/53] cmake: update Magnum Find modules. --- src/cmake/FindAssimp.cmake | 235 +++++++++++++++++++++----- src/cmake/FindBullet.cmake | 28 ++- src/cmake/FindCorrade.cmake | 157 +++++++++-------- src/cmake/FindEGL.cmake | 2 +- src/cmake/FindGLFW.cmake | 4 +- src/cmake/FindMagnum.cmake | 220 +++++++++++++++++------- src/cmake/FindMagnumBindings.cmake | 75 ++++++-- src/cmake/FindMagnumIntegration.cmake | 70 ++++++-- src/cmake/FindMagnumPlugins.cmake | 137 ++++++++++++--- 9 files changed, 700 insertions(+), 228 deletions(-) diff --git a/src/cmake/FindAssimp.cmake b/src/cmake/FindAssimp.cmake index 1158eba040..3d828bd8ec 100644 --- a/src/cmake/FindAssimp.cmake +++ b/src/cmake/FindAssimp.cmake @@ -9,15 +9,15 @@ # # Additionally these variables are defined for internal usage: # -# ASSIMP_LIBRARY - Assimp library -# ASSIMP_INCLUDE_DIR - Include dir +# Assimp_LIBRARY - Assimp library +# Assimp_INCLUDE_DIR - Include dir # # # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # Copyright © 2017 Jonathan Hale # # Permission is hereby granted, free of charge, to any person obtaining a @@ -39,7 +39,138 @@ # DEALINGS IN THE SOFTWARE. # -find_path(ASSIMP_INCLUDE_DIR NAMES assimp/anim.h HINTS include) +# For Vcpkg we *have to* use CMake config files as there it's a huge tree of +# static dependencies that would be impossible to figure out otherwise (see +# https://github.com/microsoft/vcpkg/pull/14554 for details), however vanilla +# Assimp config files are completely broken and thus we have to attempt to +# detect & ignore / patch them in most cases: +# +# 1. On macOS, assimp::assimp references `libassimpd.dylib.5` while it should +# be `libassimpd.5.dylib`. The assimpTargets.cmake then commits an +# irreversible suicide as the file does not exist. This got fixed in +# https://github.com/assimp/assimp/commit/ae3236f4819f8b41e197694fd5f7a6df0f63c323 +# and later replaced with https://github.com/assimp/assimp/pull/3455 but +# there's no version containing either of those yet. The only sane way to +# fix this is to not do find_package(assimp CONFIG) on macOS. We do that by +# setting CMAKE_DISABLE_FIND_PACKAGE_assimp so it's still possible to +# disable this behavior from the outside (for example when using vcpkg, or +# latest master) by explicitly setting that to OFF. (The automagic but +# insane way to fix this would be by overriding message() to ignore the +# FATAL_ERROR printed by assimpTargets.cmake, but overriden macros stay +# alive forever, even in outer scopes, and that's not something I want to +# live with. I had to do a similar hack for SPIRV-Tools already and I feel +# dirty.) +# 2. On anything except Windows, the files doen't set IMPORTED_CONFIGURATIONS, +# resulting in a warning about +# IMPORTED_LOCATION not set for imported target "assimp::assimp" +# and subsequently failing with +# ninja: error: 'assimp::assimp-NOTFOUND', needed by '', +# missing and no known rule to make it +# This got fixed with https://github.com/assimp/assimp/pull/3215 and again +# later replaced with https://github.com/assimp/assimp/pull/3455 but there's +# no version containing those yet either. This fortunately can be fixed "in +# post" so we just detect that case below and skip aliasing the target to +# assimp::assimp. +# 3. It doesn't end there though -- on static builds there's nothing that would +# describe the actual static dependencies, which is THE MAIN REASON I wanted +# to use the config file. So instead, when it looks like a static build and +# there's no information about INTERFACE_LINK_LIBRARIES, we attempt to do it +# ourselves instead. +# 4. Furthermore, the vanilla config file contains if(ON) and because its CMake +# requirement is set to an insanely low version 2.6, CMake complains about a +# policy change. To suppress it, neither cmake_policy(SET CMP0012 NEW) nor +# find_package(... NO_POLICY_SCOPE) does anything, fortunately there's this +# variable that's able to punch through the scope created by find_package(): +# https://cmake.org/cmake/help/latest/variable/CMAKE_POLICY_DEFAULT_CMPNNNN.html +# +# In conclusion, dynamic builds on MSVC are the only case where vanilla config +# files work, but we still need to go through this pain for vcpkg for the +# static dependencies. + +# Assimp installs a config file that can give us all its dependencies in case +# of a static build. In case the assimp target is already present, we have it +# as a CMake subproject, so don't attempt to look for it again. +if(NOT TARGET assimp) + # See Exhibit 1 above for details + if(APPLE AND NOT DEFINED CMAKE_DISABLE_FIND_PACKAGE_assimp) + set(CMAKE_DISABLE_FIND_PACKAGE_assimp ON) + endif() + # See Exhibit 4 above for details + set(CMAKE_POLICY_DEFAULT_CMP0012 NEW) + find_package(assimp CONFIG QUIET) + unset(CMAKE_POLICY_DEFAULT_CMP0012) + + # Old config files (Assimp 3.2, i.e.) don't define any target at all, in + # which case we don't even attempt to use anything from the config file + if(assimp_FOUND AND TARGET assimp::assimp) + # See Exhibit 2 above for details + get_target_property(_ASSIMP_IMPORTED_CONFIGURATIONS assimp::assimp IMPORTED_CONFIGURATIONS) + if(NOT _ASSIMP_IMPORTED_CONFIGURATIONS) + set(_ASSIMP_HAS_USELESS_CONFIG ON) + endif() + + # See Exhibit 3 above for details + get_target_property(_ASSIMP_INTERFACE_LINK_LIBRARIES assimp::assimp INTERFACE_LINK_LIBRARIES) + if(NOT ASSIMP_BUILD_SHARED_LIBS AND NOT _ASSIMP_INTERFACE_LINK_LIBRARIES) + set(_ASSIMP_HAS_USELESS_CONFIG ON) + endif() + endif() +endif() + +# In case we have Assimp as a CMake subproject or the config file was present, +# simply alias our target to that. The assimp config file is actually +# assimp::assimp and while the CMake subproject defines assimp::assimp as well, +# it's like that only since version 5: +# https://github.com/assimp/assimp/commit/b43cf9233703305cfd8dfe7844fce959879b4f0c +# https://github.com/assimp/assimp/commit/30d3c8c6a37a3b098702dfb714fe8e5e2abbfa5e +# The target aliasing is skipped in case the config files are crap, see above +if((TARGET assimp OR TARGET assimp::assimp) AND NOT _ASSIMP_HAS_USELESS_CONFIG) + if(TARGET assimp) + set(_ASSIMP_TARGET assimp) + else() + set(_ASSIMP_TARGET assimp::assimp) + endif() + + get_target_property(_ASSIMP_INTERFACE_INCLUDE_DIRECTORIES ${_ASSIMP_TARGET} INTERFACE_INCLUDE_DIRECTORIES) + # In case of a CMake subproject (which always has the assimp target, not + # assimp::assimp, so we don't need to use ${_ASSIMP_TARGET}), the target + # doesn't define any usable INTERFACE_INCLUDE_DIRECTORIES for some reason + # (the $ in there doesn't get expanded), so let's extract + # that from the SOURCE_DIR property instead. + if(_ASSIMP_INTERFACE_INCLUDE_DIRECTORIES MATCHES ">:${ASSIMP_IRRXML_LIBRARY_RELEASE}>) - endif() - if(ASSIMP_IRRXML_LIBRARY_DEBUG) - set_property(TARGET Assimp::Assimp APPEND PROPERTY - INTERFACE_LINK_LIBRARIES $<$:${ASSIMP_IRRXML_LIBRARY_DEBUG}>) + if(Assimp_LIBRARY_RELEASE) + set_property(TARGET Assimp::Assimp APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(Assimp::Assimp PROPERTIES IMPORTED_LOCATION_RELEASE ${Assimp_LIBRARY_RELEASE}) endif() + # Link to IrrXML / zlib as well, if found. See the comment above for + # details. Allow mixing up debug and release libraries because that's what + # the IMPORTED_LOCATION does as well -- if building as Debug and a Debug + # library is not available, it picks the Release one. + foreach(_extra IRRXML ZLIB) + if(Assimp_${_extra}_LIBRARY_RELEASE AND Assimp_${_extra}_LIBRARY_DEBUG) + set_property(TARGET Assimp::Assimp APPEND PROPERTY + INTERFACE_LINK_LIBRARIES $<$>:${Assimp_${_extra}_LIBRARY_RELEASE}>) + set_property(TARGET Assimp::Assimp APPEND PROPERTY + INTERFACE_LINK_LIBRARIES $<$:${Assimp_${_extra}_LIBRARY_DEBUG}>) + elseif(Assimp_${_extra}_LIBRARY_DEBUG) + set_property(TARGET Assimp::Assimp APPEND PROPERTY + INTERFACE_LINK_LIBRARIES ${Assimp_${_extra}_LIBRARY_DEBUG}) + elseif(Assimp_${_extra}_LIBRARY_RELEASE) + set_property(TARGET Assimp::Assimp APPEND PROPERTY + INTERFACE_LINK_LIBRARIES ${Assimp_${_extra}_LIBRARY_RELEASE}) + endif() + endforeach() + set_target_properties(Assimp::Assimp PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES ${ASSIMP_INCLUDE_DIR}) + INTERFACE_INCLUDE_DIRECTORIES ${Assimp_INCLUDE_DIR}) endif() diff --git a/src/cmake/FindBullet.cmake b/src/cmake/FindBullet.cmake index c26499d08b..7bc20569e0 100644 --- a/src/cmake/FindBullet.cmake +++ b/src/cmake/FindBullet.cmake @@ -27,7 +27,7 @@ # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -125,13 +125,25 @@ if(TARGET BulletCollision) get_filename_component(_BULLET_INTERFACE_INCLUDE_DIRECTORIES ${_BULLET_INTERFACE_INCLUDE_DIRECTORIES} DIRECTORY) endif() - # Why, Bullet, why? + # Compile definitions, which is basically just USE_DOUBLE_PRECISION. If + # Bullet was found externally, this is contained in the BULLET_DEFINITIONS + # variable (which might be empty). If the variable isn't defined, it means + # we have a Bullet subproject. OF COURSE this isn't propagated in + # INTERFACE_COMPILE_DEFINITIONS, we can't expect any modicum of usability + # there, so we have to fetch that from the CMake option instead. + if(NOT DEFINED BULLET_DEFINITIONS AND USE_DOUBLE_PRECISION) + set(BULLET_DEFINITIONS "-DBT_USE_DOUBLE_PRECISION") + endif() + + # Why, Bullet, why this library has to have such a different name? if(NOT TARGET Bullet::LinearMath) # Aliases of (global) targets [..] CMake 3.11 [...], as above add_library(Bullet::LinearMath INTERFACE IMPORTED) set_target_properties(Bullet::LinearMath PROPERTIES INTERFACE_LINK_LIBRARIES LinearMath - INTERFACE_INCLUDE_DIRECTORIES ${_BULLET_INTERFACE_INCLUDE_DIRECTORIES}) + INTERFACE_INCLUDE_DIRECTORIES ${_BULLET_INTERFACE_INCLUDE_DIRECTORIES} + # This might define BT_USE_DOUBLE_PRECISION, or not + INTERFACE_COMPILE_OPTIONS "${BULLET_DEFINITIONS}") endif() # Just to make FPHSA print some meaningful location, nothing else. Luckily @@ -211,10 +223,18 @@ foreach(_library ${_BULLET_LIBRARIES}) IMPORTED_LOCATION_DEBUG ${Bullet_${_library}_LIBRARY_DEBUG}) endif() - # Everything depends on LinearMath, so put the include dir there + # Everything depends on LinearMath, so put the include dir as well as + # compile definitions (such as BT_USE_DOUBLE_PRECISION) there if(_library STREQUAL LinearMath) set_property(TARGET Bullet::${_library} APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${Bullet_INCLUDE_DIR}) + # BULLET_DEFINITIONS gets defined by BulletConfig from the + # find_package() we did at first. That's also the only useful + # thing from it, the rest we can find by hand as well. + if(BULLET_DEFINITIONS) + set_property(TARGET Bullet::${_library} APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS "${BULLET_DEFINITIONS}") + endif() # Collision depends on LinearMath elseif(_library STREQUAL Collision) diff --git a/src/cmake/FindCorrade.cmake b/src/cmake/FindCorrade.cmake index 8a5fd6f1b5..780932b466 100644 --- a/src/cmake/FindCorrade.cmake +++ b/src/cmake/FindCorrade.cmake @@ -11,9 +11,6 @@ # # Corrade_FOUND - Whether the base library was found # CORRADE_LIB_SUFFIX_MODULE - Path to CorradeLibSuffix.cmake module -# CORRADE_INCLUDE_INSTALL_PREFIX - Prefix where to put platform-independent -# include and other files, defaults to ``.``. If a relative path is used, -# it's relative to :variable:`CMAKE_INSTALL_PREFIX`. # # This command will try to find only the base library, not the optional # components, which are: @@ -65,8 +62,8 @@ # # Features of found Corrade library are exposed in these variables: # -# CORRADE_MSVC2019_COMPATIBILITY - Defined if compiled with compatibility -# mode for MSVC 2019 +# CORRADE_MSVC_COMPATIBILITY - Defined if compiled with compatibility +# mode for MSVC 2019+ without the /permissive- flag set # CORRADE_MSVC2017_COMPATIBILITY - Defined if compiled with compatibility # mode for MSVC 2017 # CORRADE_MSVC2015_COMPATIBILITY - Defined if compiled with compatibility @@ -79,7 +76,7 @@ # globals unique even across different shared libraries. Enabled by default # for static builds. # CORRADE_BUILD_MULTITHREADED - Defined if compiled in a way that makes it -# possible to safely use certain Corrade features simultaenously in multiple +# possible to safely use certain Corrade features simultaneously in multiple # threads # CORRADE_TARGET_UNIX - Defined if compiled for some Unix flavor # (Linux, BSD, macOS) @@ -103,7 +100,7 @@ # CORRADE_TARGET_MINGW - Defined if compiling under MinGW # CORRADE_PLUGINMANAGER_NO_DYNAMIC_PLUGIN_SUPPORT - Defined if PluginManager # doesn't support dynamic plugin loading due to platform limitations -# CORRADE_TESTSUITE_TARGET_XCTEST - Defined if TestSuite is targetting Xcode +# CORRADE_TESTSUITE_TARGET_XCTEST - Defined if TestSuite is targeting Xcode # XCTest # CORRADE_UTILITY_USE_ANSI_COLORS - Defined if ANSI escape sequences are used # for colored output with Utility::Debug on Windows @@ -123,15 +120,10 @@ # CORRADE_PEDANTIC_COMPILER_DEFINITIONS - List of pedantic compiler # definitions used for targets with :prop_tgt:`CORRADE_USE_PEDANTIC_FLAGS` # enabled -# -# Workflows without :prop_tgt:`IMPORTED` targets are deprecated and the -# following variables are included just for backwards compatibility and only if -# :variable:`CORRADE_BUILD_DEPRECATED` is enabled: -# -# CORRADE_CXX_FLAGS - Pedantic compile flags. Use -# :prop_tgt:`CORRADE_USE_PEDANTIC_FLAGS` property or -# :variable:`CORRADE_PEDANTIC_COMPILER_DEFINITIONS` / -# :variable:`CORRADE_PEDANTIC_COMPILER_OPTIONS` list variables instead. +# CORRADE_CXX{11,14,17,20}_STANDARD_FLAG - Compiler flag to use for targeting +# C++11, 14, 17 or 20 in cases where it's not possible to use +# :prop_tgt:`CORRADE_CXX_STANDARD`. Not defined if a standard switch is +# already present in :variable:`CMAKE_CXX_FLAGS`. # # Corrade provides these macros and functions: # @@ -272,7 +264,8 @@ # This file is part of Corrade. # # Copyright © 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, -# 2017, 2018, 2019, 2020 Vladimír VondruÅ¡ +# 2017, 2018, 2019, 2020, 2021, 2022 +# Vladimír VondruÅ¡ # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -319,7 +312,7 @@ string(REGEX REPLACE "\n" ";" _corradeConfigure "${_corradeConfigure}") set(_corradeFlags MSVC2015_COMPATIBILITY MSVC2017_COMPATIBILITY - MSVC2019_COMPATIBILITY + MSVC_COMPATIBILITY BUILD_DEPRECATED BUILD_STATIC BUILD_STATIC_UNIQUE_GLOBALS @@ -355,40 +348,47 @@ mark_as_advanced(_CORRADE_MODULE_DIR) set(CORRADE_USE_MODULE ${_CORRADE_MODULE_DIR}/UseCorrade.cmake) set(CORRADE_LIB_SUFFIX_MODULE ${_CORRADE_MODULE_DIR}/CorradeLibSuffix.cmake) +# Component distinction (listing them explicitly to avoid mistakes with finding +# unknown components) +set(_CORRADE_LIBRARY_COMPONENTS + Containers Interconnect Main PluginManager TestSuite Utility) +set(_CORRADE_HEADER_ONLY_COMPONENTS Containers) +if(NOT CORRADE_TARGET_WINDOWS) + # CorradeMain is a real library only on windows, a dummy target elsewhere + list(APPEND _CORRADE_HEADER_ONLY_COMPONENTS Main) +endif() +set(_CORRADE_EXECUTABLE_COMPONENTS rc) +# Currently everything is enabled implicitly. Keep in sync with Corrade's root +# CMakeLists.txt. +set(_CORRADE_IMPLICITLY_ENABLED_COMPONENTS + Containers Interconnect Main PluginManager TestSuite Utility rc) + +# Inter-component dependencies +set(_CORRADE_Containers_DEPENDENCIES Utility) +set(_CORRADE_Interconnect_DEPENDENCIES Containers Utility) +set(_CORRADE_PluginManager_DEPENDENCIES Containers Utility rc) +set(_CORRADE_TestSuite_DEPENDENCIES Containers Utility Main) # see below +set(_CORRADE_Utility_DEPENDENCIES Containers rc) + # Ensure that all inter-component dependencies are specified as well foreach(_component ${Corrade_FIND_COMPONENTS}) - string(TOUPPER ${_component} _COMPONENT) - - if(_component STREQUAL Containers) - set(_CORRADE_${_COMPONENT}_DEPENDENCIES Utility) - elseif(_component STREQUAL Interconnect) - set(_CORRADE_${_COMPONENT}_DEPENDENCIES Utility) - elseif(_component STREQUAL PluginManager) - set(_CORRADE_${_COMPONENT}_DEPENDENCIES Containers Utility rc) - elseif(_component STREQUAL TestSuite) - set(_CORRADE_${_COMPONENT}_DEPENDENCIES Utility Main) # see below - elseif(_component STREQUAL Utility) - set(_CORRADE_${_COMPONENT}_DEPENDENCIES Containers rc) - endif() - # Mark the dependencies as required if the component is also required if(Corrade_FIND_REQUIRED_${_component}) - foreach(_dependency ${_CORRADE_${_COMPONENT}_DEPENDENCIES}) + foreach(_dependency ${_CORRADE_${_component}_DEPENDENCIES}) set(Corrade_FIND_REQUIRED_${_dependency} TRUE) endforeach() endif() - list(APPEND _CORRADE_ADDITIONAL_COMPONENTS ${_CORRADE_${_COMPONENT}_DEPENDENCIES}) - - # Main is linked only in corrade_add_test(), not to everything that depends - # on TestSuite, so remove it from the list again once we filled the above - # variables - if(_component STREQUAL TestSuite) - set(_CORRADE_${_COMPONENT}_DEPENDENCIES Utility) - endif() + list(APPEND _CORRADE_ADDITIONAL_COMPONENTS ${_CORRADE_${_component}_DEPENDENCIES}) endforeach() +# Main is linked only in corrade_add_test(), not to everything that depends on +# TestSuite, so remove it from the list again once we filled the above +# variables +set(_CORRADE_TestSuite_DEPENDENCIES Containers Utility) + # Join the lists, remove duplicate components +set(_CORRADE_ORIGINAL_FIND_COMPONENTS ${Corrade_FIND_COMPONENTS}) if(_CORRADE_ADDITIONAL_COMPONENTS) list(INSERT Corrade_FIND_COMPONENTS 0 ${_CORRADE_ADDITIONAL_COMPONENTS}) endif() @@ -396,16 +396,6 @@ if(Corrade_FIND_COMPONENTS) list(REMOVE_DUPLICATES Corrade_FIND_COMPONENTS) endif() -# Component distinction -set(_CORRADE_LIBRARY_COMPONENTS "^(Containers|Interconnect|Main|PluginManager|TestSuite|Utility)$") -if(CORRADE_TARGET_WINDOWS) - # CorradeMain is a real library only on windows, a dummy target elsewhere - set(_CORRADE_HEADER_ONLY_COMPONENTS "^(Containers)$") -else() - set(_CORRADE_HEADER_ONLY_COMPONENTS "^(Containers|Main)$") -endif() -set(_CORRADE_EXECUTABLE_COMPONENTS "^(rc)$") - # Find all components foreach(_component ${Corrade_FIND_COMPONENTS}) string(TOUPPER ${_component} _COMPONENT) @@ -417,7 +407,7 @@ foreach(_component ${Corrade_FIND_COMPONENTS}) set(Corrade_${_component}_FOUND TRUE) else() # Library (and not header-only) components - if(_component MATCHES ${_CORRADE_LIBRARY_COMPONENTS} AND NOT _component MATCHES ${_CORRADE_HEADER_ONLY_COMPONENTS}) + if(_component IN_LIST _CORRADE_LIBRARY_COMPONENTS AND NOT _component IN_LIST _CORRADE_HEADER_ONLY_COMPONENTS) add_library(Corrade::${_component} UNKNOWN IMPORTED) # Try to find both debug and release version @@ -442,19 +432,19 @@ foreach(_component ${Corrade_FIND_COMPONENTS}) endif() # Header-only library components - if(_component MATCHES ${_CORRADE_HEADER_ONLY_COMPONENTS}) + if(_component IN_LIST _CORRADE_HEADER_ONLY_COMPONENTS) add_library(Corrade::${_component} INTERFACE IMPORTED) endif() # Default include path names to look for for library / header-only # components - if(_component MATCHES ${_CORRADE_LIBRARY_COMPONENTS}) + if(_component IN_LIST _CORRADE_LIBRARY_COMPONENTS) set(_CORRADE_${_COMPONENT}_INCLUDE_PATH_SUFFIX Corrade/${_component}) set(_CORRADE_${_COMPONENT}_INCLUDE_PATH_NAMES ${_component}.h) endif() # Executable components - if(_component MATCHES ${_CORRADE_EXECUTABLE_COMPONENTS}) + if(_component IN_LIST _CORRADE_EXECUTABLE_COMPONENTS) add_executable(Corrade::${_component} IMPORTED) find_program(CORRADE_${_COMPONENT}_EXECUTABLE corrade-${_component}) @@ -539,7 +529,7 @@ foreach(_component ${Corrade_FIND_COMPONENTS}) set_property(TARGET Corrade::${_component} APPEND PROPERTY COMPATIBLE_INTERFACE_NUMBER_MAX CORRADE_CXX_STANDARD) - # Directory::libraryLocation() needs this + # Path::libraryLocation() needs this if(CORRADE_TARGET_UNIX) set_property(TARGET Corrade::${_component} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS}) @@ -552,7 +542,7 @@ foreach(_component ${Corrade_FIND_COMPONENTS}) endif() # Find library includes - if(_component MATCHES ${_CORRADE_LIBRARY_COMPONENTS}) + if(_component IN_LIST _CORRADE_LIBRARY_COMPONENTS) find_path(_CORRADE_${_COMPONENT}_INCLUDE_DIR NAMES ${_CORRADE_${_COMPONENT}_INCLUDE_PATH_NAMES} HINTS ${CORRADE_INCLUDE_DIR}/${_CORRADE_${_COMPONENT}_INCLUDE_PATH_SUFFIX}) @@ -560,9 +550,9 @@ foreach(_component ${Corrade_FIND_COMPONENTS}) endif() # Add inter-library dependencies - if(_component MATCHES ${_CORRADE_LIBRARY_COMPONENTS} OR _component MATCHES ${_CORRADE_HEADER_ONLY_COMPONENTS}) - foreach(_dependency ${_CORRADE_${_COMPONENT}_DEPENDENCIES}) - if(_dependency MATCHES ${_CORRADE_LIBRARY_COMPONENTS} OR _dependency MATCHES ${_CORRADE_HEADER_ONLY_COMPONENTS}) + if(_component IN_LIST _CORRADE_LIBRARY_COMPONENTS OR _component IN_LIST _CORRADE_HEADER_ONLY_COMPONENTS) + foreach(_dependency ${_CORRADE_${_component}_DEPENDENCIES}) + if(_dependency IN_LIST _CORRADE_LIBRARY_COMPONENTS OR _dependency IN_LIST _CORRADE_HEADER_ONLY_COMPONENTS) set_property(TARGET Corrade::${_component} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Corrade::${_dependency}) endif() @@ -570,7 +560,7 @@ foreach(_component ${Corrade_FIND_COMPONENTS}) endif() # Decide if the component was found - if((_component MATCHES ${_CORRADE_LIBRARY_COMPONENTS} AND _CORRADE_${_COMPONENT}_INCLUDE_DIR AND (_component MATCHES ${_CORRADE_HEADER_ONLY_COMPONENTS} OR CORRADE_${_COMPONENT}_LIBRARY_RELEASE OR CORRADE_${_COMPONENT}_LIBRARY_DEBUG)) OR (_component MATCHES ${_CORRADE_EXECUTABLE_COMPONENTS} AND CORRADE_${_COMPONENT}_EXECUTABLE)) + if((_component IN_LIST _CORRADE_LIBRARY_COMPONENTS AND _CORRADE_${_COMPONENT}_INCLUDE_DIR AND (_component IN_LIST _CORRADE_HEADER_ONLY_COMPONENTS OR CORRADE_${_COMPONENT}_LIBRARY_RELEASE OR CORRADE_${_COMPONENT}_LIBRARY_DEBUG)) OR (_component IN_LIST _CORRADE_EXECUTABLE_COMPONENTS AND CORRADE_${_COMPONENT}_EXECUTABLE)) set(Corrade_${_component}_FOUND TRUE) else() set(Corrade_${_component}_FOUND FALSE) @@ -578,6 +568,39 @@ foreach(_component ${Corrade_FIND_COMPONENTS}) endif() endforeach() +# For CMake 3.16+ with REASON_FAILURE_MESSAGE, provide additional potentially +# useful info about the failed components. +if(NOT CMAKE_VERSION VERSION_LESS 3.16) + set(_CORRADE_REASON_FAILURE_MESSAGE ) + # Go only through the originally specified find_package() components, not + # the dependencies added by us afterwards + foreach(_component ${_CORRADE_ORIGINAL_FIND_COMPONENTS}) + if(Corrade_${_component}_FOUND) + continue() + endif() + + # If it's not known at all, tell the user -- it might be a new library + # and an old Find module, or something platform-specific. + if(NOT _component IN_LIST _CORRADE_LIBRARY_COMPONENTS AND NOT _component IN_LIST _CORRADE_EXECUTABLE_COMPONENTS) + list(APPEND _CORRADE_REASON_FAILURE_MESSAGE "${_component} is not a known component on this platform.") + # Otherwise, if it's not among implicitly built components, hint that + # the user may need to enable it. + # TODO: currently, the _FOUND variable doesn't reflect if dependencies + # were found. When it will, this needs to be updated to avoid + # misleading messages. + elseif(NOT _component IN_LIST _CORRADE_IMPLICITLY_ENABLED_COMPONENTS) + string(TOUPPER ${_component} _COMPONENT) + list(APPEND _CORRADE_REASON_FAILURE_MESSAGE "${_component} is not built by default. Make sure you enabled WITH_${_COMPONENT} when building Corrade.") + # Otherwise we have no idea. Better be silent than to print something + # misleading. + else() + endif() + endforeach() + + string(REPLACE ";" " " _CORRADE_REASON_FAILURE_MESSAGE "${_CORRADE_REASON_FAILURE_MESSAGE}") + set(_CORRADE_REASON_FAILURE_MESSAGE REASON_FAILURE_MESSAGE "${_CORRADE_REASON_FAILURE_MESSAGE}") +endif() + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Corrade REQUIRED_VARS CORRADE_INCLUDE_DIR @@ -586,13 +609,15 @@ find_package_handle_standard_args(Corrade REQUIRED_VARS ${CORRADE_TESTSUITE_XCTEST_RUNNER_NEEDED} ${CORRADE_TESTSUITE_ADB_RUNNER_NEEDED} ${CORRADE_TESTSUITE_EMSCRIPTEN_RUNNER_NEEDED} - HANDLE_COMPONENTS) + HANDLE_COMPONENTS + ${_CORRADE_REASON_FAILURE_MESSAGE}) # Finalize the finding process include(${CORRADE_USE_MODULE}) -# Installation dirs -set(CORRADE_INCLUDE_INSTALL_PREFIX "." - CACHE STRING "Prefix where to put platform-independent include and other files") +set(CORRADE_INCLUDE_INSTALL_DIR include/Corrade) -set(CORRADE_INCLUDE_INSTALL_DIR ${CORRADE_INCLUDE_INSTALL_PREFIX}/include/Corrade) +if(CORRADE_BUILD_DEPRECATED AND CORRADE_INCLUDE_INSTALL_PREFIX AND NOT CORRADE_INCLUDE_INSTALL_PREFIX STREQUAL ".") + message(DEPRECATION "CORRADE_INCLUDE_INSTALL_PREFIX is obsolete as its primary use was for old Android NDK versions. Please switch to the NDK r19+ layout instead of using this variable and recreate your build directory to get rid of this warning.") + set(CORRADE_INCLUDE_INSTALL_DIR ${CORRADE_INCLUDE_INSTALL_PREFIX}/${CORRADE_INCLUDE_INSTALL_DIR}) +endif() diff --git a/src/cmake/FindEGL.cmake b/src/cmake/FindEGL.cmake index cc4210ef36..024dde36d4 100644 --- a/src/cmake/FindEGL.cmake +++ b/src/cmake/FindEGL.cmake @@ -17,7 +17,7 @@ # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), diff --git a/src/cmake/FindGLFW.cmake b/src/cmake/FindGLFW.cmake index 3d7d99b6d7..5e0463ff91 100644 --- a/src/cmake/FindGLFW.cmake +++ b/src/cmake/FindGLFW.cmake @@ -21,7 +21,7 @@ # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # Copyright © 2016 Jonathan Hale # # Permission is hereby granted, free of charge, to any person obtaining a @@ -87,6 +87,8 @@ if(CORRADE_TARGET_WINDOWS) set(_GLFW_LIBRARY_PATH_SUFFIX lib-vc2017) elseif(MSVC_VERSION VERSION_LESS 1930) set(_GLFW_LIBRARY_PATH_SUFFIX lib-vc2019) + elseif(MSVC_VERSION VERSION_LESS 1940) + set(_GLFW_LIBRARY_PATH_SUFFIX lib-vc2022) else() message(FATAL_ERROR "Unsupported MSVC version") endif() diff --git a/src/cmake/FindMagnum.cmake b/src/cmake/FindMagnum.cmake index 156c4be58b..b65de5a790 100644 --- a/src/cmake/FindMagnum.cmake +++ b/src/cmake/FindMagnum.cmake @@ -13,9 +13,6 @@ # MAGNUM_DEPLOY_PREFIX - Prefix where to put final application # executables, defaults to ``.``. If a relative path is used, it's relative # to :variable:`CMAKE_INSTALL_PREFIX`. -# MAGNUM_INCLUDE_INSTALL_PREFIX - Prefix where to put platform-independent -# include and other files, defaults to ``.``. If a relative path is used, -# it's relative to :variable:`CMAKE_INSTALL_PREFIX`. # MAGNUM_PLUGINS_DEBUG_DIR - Base directory with dynamic plugins for # debug builds, defaults to magnum-d/ subdirectory of dir where Magnum # library was found @@ -63,7 +60,9 @@ # MeshTools - MeshTools library # Primitives - Primitives library # SceneGraph - SceneGraph library +# SceneTools - SceneTools library # Shaders - Shaders library +# ShaderTools - ShaderTools library # Text - Text library # TextureTools - TextureTools library # Trade - Trade library @@ -85,6 +84,7 @@ # GlxContext - GLX context # WglContext - WGL context # OpenGLTester - OpenGLTester class +# VulkanTester - VulkanTester class # MagnumFont - Magnum bitmap font plugin # MagnumFontConverter - Magnum bitmap font converter plugin # ObjImporter - OBJ importer plugin @@ -95,7 +95,9 @@ # fontconverter - magnum-fontconverter executable # imageconverter - magnum-imageconverter executable # sceneconverterter - magnum-sceneconverter executable +# shaderconverterter - magnum-shaderconverter executable # gl-info - magnum-gl-info executable +# vk-info - magnum-vk-info executable # al-info - magnum-al-info executable # # Example usage with specifying additional components is:: @@ -164,6 +166,10 @@ # installation directory # MAGNUM_PLUGINS_[DEBUG|RELEASE]_LIBRARY_INSTALL_DIR - Plugin library # installation directory +# MAGNUM_PLUGINS_SHADERCONVERTER_[DEBUG|RELEASE]_BINARY_INSTALL_DIR - Shader +# converter plugin binary installation directory +# MAGNUM_PLUGINS_SHADERCONVERTER_[DEBUG|RELEASE]_LIBRARY_INSTALL_DIR - Shader +# converter plugin library installation directory # MAGNUM_PLUGINS_FONT_[DEBUG|RELEASE]_BINARY_INSTALL_DIR - Font plugin binary # installation directory # MAGNUM_PLUGINS_FONT_[DEBUG|RELEASE]_LIBRARY_INSTALL_DIR - Font plugin @@ -196,7 +202,7 @@ # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -224,7 +230,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) # Unrolling the transitive dependencies here so this doesn't need to be # after resolving inter-component dependencies. Listing also all plugins. - if(_component MATCHES "^(Audio|DebugTools|MeshTools|Primitives|Text|TextureTools|Trade|.+Importer|.+ImageConverter|.+Font)$") + if(_component MATCHES "^(Audio|DebugTools|MeshTools|Primitives|SceneTools|ShaderTools|Text|TextureTools|Trade|.+Importer|.+ImageConverter|.+Font|.+ShaderConverter)$") set(_MAGNUM_${_COMPONENT}_CORRADE_DEPENDENCIES PluginManager) endif() @@ -338,12 +344,6 @@ if(NOT TARGET Magnum::Magnum) # Include directories set_property(TARGET Magnum::Magnum APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${MAGNUM_INCLUDE_DIR}) - # Some deprecated APIs use headers (but not externally defined symbols) - # from the GL library, link those includes as well - if(MAGNUM_BUILD_DEPRECATED AND MAGNUM_TARGET_GL) - set_property(TARGET Magnum::Magnum APPEND PROPERTY - INTERFACE_INCLUDE_DIRECTORIES ${MAGNUM_INCLUDE_DIR}/MagnumExternal/OpenGL) - endif() # Dependent libraries set_property(TARGET Magnum::Magnum APPEND PROPERTY INTERFACE_LINK_LIBRARIES @@ -354,22 +354,52 @@ endif() # Component distinction (listing them explicitly to avoid mistakes with finding # components from other repositories) -set(_MAGNUM_LIBRARY_COMPONENT_LIST - Audio DebugTools GL MeshTools Primitives SceneGraph Shaders Text - TextureTools Trade Vk - AndroidApplication EmscriptenApplication GlfwApplication GlxApplication - Sdl2Application XEglApplication WindowlessCglApplication - WindowlessEglApplication WindowlessGlxApplication WindowlessIosApplication - WindowlessWglApplication WindowlessWindowsEglApplication - CglContext EglContext GlxContext WglContext - OpenGLTester) -set(_MAGNUM_PLUGIN_COMPONENT_LIST +set(_MAGNUM_LIBRARY_COMPONENTS + Audio DebugTools GL MeshTools Primitives SceneGraph SceneTools Shaders + ShaderTools Text TextureTools Trade + WindowlessEglApplication EglContext OpenGLTester) +set(_MAGNUM_PLUGIN_COMPONENTS AnyAudioImporter AnyImageConverter AnyImageImporter AnySceneConverter AnySceneImporter MagnumFont MagnumFontConverter ObjImporter TgaImageConverter TgaImporter WavAudioImporter) -set(_MAGNUM_EXECUTABLE_COMPONENT_LIST - distancefieldconverter fontconverter imageconverter sceneconverter gl-info - al-info) +set(_MAGNUM_EXECUTABLE_COMPONENTS + imageconverter sceneconverter shaderconverter gl-info al-info) +# Audio and Vk libs aren't enabled by default, and none of the Context, +# Application, Tester libs nor plugins are. Keep in sync with Magnum's root +# CMakeLists.txt. +set(_MAGNUM_IMPLICITLY_ENABLED_COMPONENTS + DebugTools MeshTools SceneGraph Shaders ShaderTools Text TextureTools Trade + GL Primitives) +if(NOT CORRADE_TARGET_EMSCRIPTEN) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS Vk VulkanTester) + list(APPEND _MAGNUM_EXECUTABLE_COMPONENTS vk-info) +endif() +if(NOT CORRADE_TARGET_ANDROID) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS Sdl2Application) +endif() +if(NOT CORRADE_TARGET_ANDROID AND NOT CORRADE_TARGET_IOS AND NOT CORRADE_TARGET_EMSCRIPTEN) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS GlfwApplication) +endif() +if(CORRADE_TARGET_ANDROID) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS AndroidApplication) +endif() +if(CORRADE_TARGET_EMSCRIPTEN) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS EmscriptenApplication) +endif() +if(CORRADE_TARGET_IOS) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS WindowlessIosApplication) +elseif(CORRADE_TARGET_APPLE AND NOT MAGNUM_TARGET_GLES) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS WindowlessCglApplication CglContext) +endif() +if(CORRADE_TARGET_UNIX AND NOT CORRADE_TARGET_APPLE) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS GlxApplication XEglApplication WindowlessGlxApplication GlxContext) +endif() +if(CORRADE_TARGET_WINDOWS) + list(APPEND _MAGNUM_LIBRARY_COMPONENTS WindowlessWglApplication WglContext WindowlessWindowsEglApplication) +endif() +if(CORRADE_TARGET_UNIX OR CORRADE_TARGET_WINDOWS) + list(APPEND _MAGNUM_EXECUTABLE_COMPONENTS fontconverter distancefieldconverter) +endif() # Inter-component dependencies set(_MAGNUM_Audio_DEPENDENCIES ) @@ -400,7 +430,7 @@ if(MAGNUM_TARGET_HEADLESS OR CORRADE_TARGET_EMSCRIPTEN OR CORRADE_TARGET_ANDROID list(APPEND _MAGNUM_OpenGLTester_DEPENDENCIES WindowlessEglApplication) elseif(CORRADE_TARGET_IOS) list(APPEND _MAGNUM_OpenGLTester_DEPENDENCIES WindowlessIosApplication) -elseif(CORRADE_TARGET_APPLE) +elseif(CORRADE_TARGET_APPLE AND NOT MAGNUM_TARGET_GLES) list(APPEND _MAGNUM_OpenGLTester_DEPENDENCIES WindowlessCglApplication) elseif(CORRADE_TARGET_UNIX) if(MAGNUM_TARGET_GLES AND NOT MAGNUM_TARGET_DESKTOP_GLES) @@ -421,8 +451,11 @@ if(MAGNUM_TARGET_GL) # GL not required by Primitives themselves, but transitively by MeshTools list(APPEND _MAGNUM_Primitives_DEPENDENCIES GL) endif() + set(_MAGNUM_SceneGraph_DEPENDENCIES ) +set(_MAGNUM_SceneTools_DEPENDENCIES Trade) set(_MAGNUM_Shaders_DEPENDENCIES GL) + set(_MAGNUM_Text_DEPENDENCIES TextureTools) if(MAGNUM_TARGET_GL) list(APPEND _MAGNUM_Text_DEPENDENCIES GL) @@ -434,7 +467,9 @@ if(MAGNUM_TARGET_GL) endif() set(_MAGNUM_Trade_DEPENDENCIES ) +set(_MAGNUM_VulkanTester_DEPENDENCIES Vk) set(_MAGNUM_AndroidApplication_DEPENDENCIES GL) + set(_MAGNUM_EmscriptenApplication_DEPENDENCIES) if(MAGNUM_TARGET_GL) list(APPEND _MAGNUM_EmscriptenApplication_DEPENDENCIES GL) @@ -467,9 +502,11 @@ set(_MAGNUM_WglContext_DEPENDENCIES GL) set(_MAGNUM_MagnumFont_DEPENDENCIES Trade TgaImporter GL) # and below set(_MAGNUM_MagnumFontConverter_DEPENDENCIES Trade TgaImageConverter) # and below set(_MAGNUM_ObjImporter_DEPENDENCIES MeshTools) # and below -foreach(_component ${_MAGNUM_PLUGIN_COMPONENT_LIST}) +foreach(_component ${_MAGNUM_PLUGIN_COMPONENTS}) if(_component MATCHES ".+AudioImporter") list(APPEND _MAGNUM_${_component}_DEPENDENCIES Audio) + elseif(_component MATCHES ".+ShaderConverter") + list(APPEND _MAGNUM_${_component}_DEPENDENCIES ShaderTools) elseif(_component MATCHES ".+(Importer|ImageConverter|SceneConverter)") list(APPEND _MAGNUM_${_component}_DEPENDENCIES Trade) elseif(_component MATCHES ".+(Font|FontConverter)") @@ -495,6 +532,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) endforeach() # Join the lists, remove duplicate components +set(_MAGNUM_ORIGINAL_FIND_COMPONENTS ${Magnum_FIND_COMPONENTS}) if(_MAGNUM_ADDITIONAL_COMPONENTS) list(INSERT Magnum_FIND_COMPONENTS 0 ${_MAGNUM_ADDITIONAL_COMPONENTS}) endif() @@ -502,13 +540,6 @@ if(Magnum_FIND_COMPONENTS) list(REMOVE_DUPLICATES Magnum_FIND_COMPONENTS) endif() -# Convert components lists to regular expressions so I can use if(MATCHES). -# TODO: Drop this once CMake 3.3 and if(IN_LIST) can be used -foreach(_WHAT LIBRARY PLUGIN EXECUTABLE) - string(REPLACE ";" "|" _MAGNUM_${_WHAT}_COMPONENTS "${_MAGNUM_${_WHAT}_COMPONENT_LIST}") - set(_MAGNUM_${_WHAT}_COMPONENTS "^(${_MAGNUM_${_WHAT}_COMPONENTS})$") -endforeach() - # Find all components. Maintain a list of components that'll need to have # their optional dependencies checked. set(_MAGNUM_OPTIONAL_DEPENDENCIES_TO_ADD ) @@ -522,7 +553,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) set(Magnum_${_component}_FOUND TRUE) else() # Library components - if(_component MATCHES ${_MAGNUM_LIBRARY_COMPONENTS}) + if(_component IN_LIST _MAGNUM_LIBRARY_COMPONENTS) add_library(Magnum::${_component} UNKNOWN IMPORTED) # Set library defaults, find the library @@ -534,10 +565,9 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) find_library(MAGNUM_${_COMPONENT}_LIBRARY_RELEASE Magnum${_component}) mark_as_advanced(MAGNUM_${_COMPONENT}_LIBRARY_DEBUG MAGNUM_${_COMPONENT}_LIBRARY_RELEASE) - endif() # Plugin components - if(_component MATCHES ${_MAGNUM_PLUGIN_COMPONENTS}) + elseif(_component IN_LIST _MAGNUM_PLUGIN_COMPONENTS) add_library(Magnum::${_component} UNKNOWN IMPORTED) # AudioImporter plugin specific name suffixes @@ -549,6 +579,10 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) string(REPLACE "AudioImporter" "Importer" _MAGNUM_${_COMPONENT}_HEADER_NAME "${_component}") set(_MAGNUM_${_COMPONENT}_INCLUDE_PATH_NAMES ${_MAGNUM_${_COMPONENT}_HEADER_NAME}.h) + # ShaderConverter plugin specific name suffixes + elseif(_component MATCHES ".+ShaderConverter$") + set(_MAGNUM_${_COMPONENT}_PATH_SUFFIX shaderconverters) + # Importer plugin specific name suffixes elseif(_component MATCHES ".+Importer$") set(_MAGNUM_${_COMPONENT}_PATH_SUFFIX importers) @@ -578,7 +612,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) # Dynamic plugins don't have any prefix (e.g. `lib` on Linux), # search with empty prefix and then reset that back so we don't - # accidentaly break something else + # accidentally break something else set(_tmp_prefixes "${CMAKE_FIND_LIBRARY_PREFIXES}") set(CMAKE_FIND_LIBRARY_PREFIXES "${CMAKE_FIND_LIBRARY_PREFIXES};") @@ -601,10 +635,26 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) # Reset back set(CMAKE_FIND_LIBRARY_PREFIXES "${_tmp_prefixes}") + + # Executables + elseif(_component IN_LIST _MAGNUM_EXECUTABLE_COMPONENTS) + add_executable(Magnum::${_component} IMPORTED) + + find_program(MAGNUM_${_COMPONENT}_EXECUTABLE magnum-${_component}) + mark_as_advanced(MAGNUM_${_COMPONENT}_EXECUTABLE) + + if(MAGNUM_${_COMPONENT}_EXECUTABLE) + set_property(TARGET Magnum::${_component} PROPERTY + IMPORTED_LOCATION ${MAGNUM_${_COMPONENT}_EXECUTABLE}) + endif() + + # Something unknown, skip. FPHSA will take care of handling this below. + else() + continue() endif() # Library location for libraries/plugins - if(_component MATCHES ${_MAGNUM_LIBRARY_COMPONENTS} OR _component MATCHES ${_MAGNUM_PLUGIN_COMPONENTS}) + if(_component IN_LIST _MAGNUM_LIBRARY_COMPONENTS OR _component IN_LIST _MAGNUM_PLUGIN_COMPONENTS) if(MAGNUM_${_COMPONENT}_LIBRARY_RELEASE) set_property(TARGET Magnum::${_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) @@ -620,19 +670,6 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) endif() endif() - # Executables - if(_component MATCHES ${_MAGNUM_EXECUTABLE_COMPONENTS}) - add_executable(Magnum::${_component} IMPORTED) - - find_program(MAGNUM_${_COMPONENT}_EXECUTABLE magnum-${_component}) - mark_as_advanced(MAGNUM_${_COMPONENT}_EXECUTABLE) - - if(MAGNUM_${_COMPONENT}_EXECUTABLE) - set_property(TARGET Magnum::${_component} PROPERTY - IMPORTED_LOCATION ${MAGNUM_${_COMPONENT}_EXECUTABLE}) - endif() - endif() - # Applications if(_component MATCHES ".+Application") set(_MAGNUM_${_COMPONENT}_INCLUDE_PATH_SUFFIX Magnum/Platform) @@ -857,11 +894,21 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) elseif(_component STREQUAL OpenGLTester) set(_MAGNUM_${_COMPONENT}_INCLUDE_PATH_SUFFIX Magnum/GL) + # VulkanTester library + elseif(_component STREQUAL VulkanTester) + set(_MAGNUM_${_COMPONENT}_INCLUDE_PATH_SUFFIX Magnum/Vk) + # Primitives library elseif(_component STREQUAL Primitives) set(_MAGNUM_${_COMPONENT}_INCLUDE_PATH_NAMES Cube.h) # No special setup for SceneGraph library + + # ShaderTools library + elseif(_component STREQUAL ShaderTools) + set_property(TARGET Magnum::${_component} APPEND PROPERTY + INTERFACE_LINK_LIBRARIES Corrade::PluginManager) + # No special setup for Shaders library # Text library @@ -897,7 +944,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) # No special setup for WavAudioImporter plugin # Find library/plugin includes - if(_component MATCHES ${_MAGNUM_LIBRARY_COMPONENTS} OR _component MATCHES ${_MAGNUM_PLUGIN_COMPONENTS}) + if(_component IN_LIST _MAGNUM_LIBRARY_COMPONENTS OR _component IN_LIST _MAGNUM_PLUGIN_COMPONENTS) find_path(_MAGNUM_${_COMPONENT}_INCLUDE_DIR NAMES ${_MAGNUM_${_COMPONENT}_INCLUDE_PATH_NAMES} HINTS ${MAGNUM_INCLUDE_DIR}/${_MAGNUM_${_COMPONENT}_INCLUDE_PATH_SUFFIX}) @@ -906,7 +953,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) # Automatic import of static plugins. Skip in case the include dir was # not found -- that'll fail later with a proper message. - if(_component MATCHES ${_MAGNUM_PLUGIN_COMPONENTS} AND _MAGNUM_${_COMPONENT}_INCLUDE_DIR) + if(_component IN_LIST _MAGNUM_PLUGIN_COMPONENTS AND _MAGNUM_${_COMPONENT}_INCLUDE_DIR) # Automatic import of static plugins file(READ ${_MAGNUM_${_COMPONENT}_INCLUDE_DIR}/configure.h _magnum${_component}Configure) string(FIND "${_magnum${_component}Configure}" "#define MAGNUM_${_COMPONENT}_BUILD_STATIC" _magnum${_component}_BUILD_STATIC) @@ -919,7 +966,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) # Link to core Magnum library, add inter-library dependencies. If there # are optional dependencies, defer adding them to later once we know if # they were found or not. - if(_component MATCHES ${_MAGNUM_LIBRARY_COMPONENTS} OR _component MATCHES ${_MAGNUM_PLUGIN_COMPONENTS}) + if(_component IN_LIST _MAGNUM_LIBRARY_COMPONENTS OR _component IN_LIST _MAGNUM_PLUGIN_COMPONENTS) set_property(TARGET Magnum::${_component} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Magnum::Magnum) set(_MAGNUM_${component}_OPTIONAL_DEPENDENCIES_TO_ADD ) @@ -938,7 +985,7 @@ foreach(_component ${Magnum_FIND_COMPONENTS}) endif() # Decide if the library was found - if(((_component MATCHES ${_MAGNUM_LIBRARY_COMPONENTS} OR _component MATCHES ${_MAGNUM_PLUGIN_COMPONENTS}) AND _MAGNUM_${_COMPONENT}_INCLUDE_DIR AND (MAGNUM_${_COMPONENT}_LIBRARY_DEBUG OR MAGNUM_${_COMPONENT}_LIBRARY_RELEASE)) OR (_component MATCHES ${_MAGNUM_EXECUTABLE_COMPONENTS} AND MAGNUM_${_COMPONENT}_EXECUTABLE)) + if(((_component IN_LIST _MAGNUM_LIBRARY_COMPONENTS OR _component IN_LIST _MAGNUM_PLUGIN_COMPONENTS) AND _MAGNUM_${_COMPONENT}_INCLUDE_DIR AND (MAGNUM_${_COMPONENT}_LIBRARY_DEBUG OR MAGNUM_${_COMPONENT}_LIBRARY_RELEASE)) OR (_component IN_LIST _MAGNUM_EXECUTABLE_COMPONENTS AND MAGNUM_${_COMPONENT}_EXECUTABLE)) set(Magnum_${_component}_FOUND TRUE) else() set(Magnum_${_component}_FOUND FALSE) @@ -995,16 +1042,54 @@ if(CORRADE_TARGET_EMSCRIPTEN) # IN_LIST as an operator since 3.1 (Emscripten needs at least 3.7), but # it's behind a policy, so enable that one as well. cmake_policy(SET CMP0057 NEW) + # TODO since 1.39.19 it's possible to use `-sUSE_WEBGL2=1`, which can be + # then passed via target_link_libraries() etc. without requiring CMake + # 3.13: https://github.com/emscripten-core/emscripten/blob/main/ChangeLog.md#13919-07072020 + # -- change to that once we drop support for older Emscripten versions if(CMAKE_VERSION VERSION_LESS 3.13 AND GL IN_LIST Magnum_FIND_COMPONENTS AND NOT MAGNUM_TARGET_GLES2 AND NOT CMAKE_EXE_LINKER_FLAGS MATCHES "-s USE_WEBGL2=1") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s USE_WEBGL2=1") endif() endif() +# For CMake 3.16+ with REASON_FAILURE_MESSAGE, provide additional potentially +# useful info about the failed components. +if(NOT CMAKE_VERSION VERSION_LESS 3.16) + set(_MAGNUM_REASON_FAILURE_MESSAGE ) + # Go only through the originally specified find_package() components, not + # the dependencies added by us afterwards + foreach(_component ${_MAGNUM_ORIGINAL_FIND_COMPONENTS}) + if(Magnum_${_component}_FOUND) + continue() + endif() + + # If it's not known at all, tell the user -- it might be a new library + # and an old Find module, or something platform-specific. + if(NOT _component IN_LIST _MAGNUM_LIBRARY_COMPONENTS AND NOT _component IN_LIST _MAGNUM_PLUGIN_COMPONENTS AND NOT _component IN_LIST _MAGNUM_EXECUTABLE_COMPONENTS) + list(APPEND _MAGNUM_REASON_FAILURE_MESSAGE "${_component} is not a known component on this platform.") + # Otherwise, if it's not among implicitly built components, hint that + # the user may need to enable it + # TODO: currently, the _FOUND variable doesn't reflect if dependencies + # were found. When it will, this needs to be updated to avoid + # misleading messages. + elseif(NOT _component IN_LIST _MAGNUM_IMPLICITLY_ENABLED_COMPONENTS) + string(TOUPPER ${_component} _COMPONENT) + list(APPEND _MAGNUM_REASON_FAILURE_MESSAGE "${_component} is not built by default. Make sure you enabled WITH_${_COMPONENT} when building Magnum.") + # Otherwise we have no idea. Better be silent than to print something + # misleading. + else() + endif() + endforeach() + + string(REPLACE ";" " " _MAGNUM_REASON_FAILURE_MESSAGE "${_MAGNUM_REASON_FAILURE_MESSAGE}") + set(_MAGNUM_REASON_FAILURE_MESSAGE REASON_FAILURE_MESSAGE "${_MAGNUM_REASON_FAILURE_MESSAGE}") +endif() + # Complete the check with also all components include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Magnum REQUIRED_VARS MAGNUM_INCLUDE_DIR MAGNUM_LIBRARY ${MAGNUM_EXTRAS_NEEDED} - HANDLE_COMPONENTS) + HANDLE_COMPONENTS + ${_MAGNUM_REASON_FAILURE_MESSAGE}) # Components with optional dependencies -- add them once we know if they were # found or not. @@ -1105,17 +1190,31 @@ endif() # Installation and deploy dirs set(MAGNUM_DEPLOY_PREFIX "." CACHE STRING "Prefix where to put final application executables") -set(MAGNUM_INCLUDE_INSTALL_PREFIX "." - CACHE STRING "Prefix where to put platform-independent include and other files") include(${CORRADE_LIB_SUFFIX_MODULE}) set(MAGNUM_BINARY_INSTALL_DIR bin) set(MAGNUM_LIBRARY_INSTALL_DIR lib${LIB_SUFFIX}) -set(MAGNUM_DATA_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/share/magnum) +set(MAGNUM_DATA_INSTALL_DIR share/magnum) +set(MAGNUM_INCLUDE_INSTALL_DIR include/Magnum) +set(MAGNUM_EXTERNAL_INCLUDE_INSTALL_DIR include/MagnumExternal) +set(MAGNUM_PLUGINS_INCLUDE_INSTALL_DIR include/MagnumPlugins) +if(MAGNUM_BUILD_DEPRECATED AND MAGNUM_INCLUDE_INSTALL_PREFIX AND NOT MAGNUM_INCLUDE_INSTALL_PREFIX STREQUAL ".") + message(DEPRECATION "MAGNUM_INCLUDE_INSTALL_PREFIX is obsolete as its primary use was for old Android NDK versions. Please switch to the NDK r19+ layout instead of using this variable and recreate your build directory to get rid of this warning.") + set(MAGNUM_DATA_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/${MAGNUM_DATA_INSTALL_DIR}) + set(MAGNUM_INCLUDE_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/${MAGNUM_INCLUDE_INSTALL_DIR}) + set(MAGNUM_EXTERNAL_INCLUDE_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/${MAGNUM_EXTERNAL_INCLUDE_INSTALL_DIR}) + set(MAGNUM_PLUGINS_INCLUDE_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/${MAGNUM_PLUGINS_INCLUDE_INSTALL_DIR}) +endif() + set(MAGNUM_PLUGINS_DEBUG_BINARY_INSTALL_DIR ${MAGNUM_BINARY_INSTALL_DIR}/magnum-d) set(MAGNUM_PLUGINS_DEBUG_LIBRARY_INSTALL_DIR ${MAGNUM_LIBRARY_INSTALL_DIR}/magnum-d) set(MAGNUM_PLUGINS_RELEASE_BINARY_INSTALL_DIR ${MAGNUM_BINARY_INSTALL_DIR}/magnum) set(MAGNUM_PLUGINS_RELEASE_LIBRARY_INSTALL_DIR ${MAGNUM_LIBRARY_INSTALL_DIR}/magnum) + +set(MAGNUM_PLUGINS_SHADERCONVERTER_DEBUG_BINARY_INSTALL_DIR ${MAGNUM_PLUGINS_DEBUG_BINARY_INSTALL_DIR}/shaderconverters) +set(MAGNUM_PLUGINS_SHADERCONVERTER_DEBUG_LIBRARY_INSTALL_DIR ${MAGNUM_PLUGINS_DEBUG_LIBRARY_INSTALL_DIR}/shaderconverters) +set(MAGNUM_PLUGINS_SHADERCONVERTER_RELEASE_LIBRARY_INSTALL_DIR ${MAGNUM_PLUGINS_RELEASE_LIBRARY_INSTALL_DIR}/shaderconverters) +set(MAGNUM_PLUGINS_SHADERCONVERTER_RELEASE_BINARY_INSTALL_DIR ${MAGNUM_PLUGINS_RELEASE_BINARY_INSTALL_DIR}/shaderconverters) set(MAGNUM_PLUGINS_FONT_DEBUG_BINARY_INSTALL_DIR ${MAGNUM_PLUGINS_DEBUG_BINARY_INSTALL_DIR}/fonts) set(MAGNUM_PLUGINS_FONT_DEBUG_LIBRARY_INSTALL_DIR ${MAGNUM_PLUGINS_DEBUG_LIBRARY_INSTALL_DIR}/fonts) set(MAGNUM_PLUGINS_FONT_RELEASE_BINARY_INSTALL_DIR ${MAGNUM_PLUGINS_RELEASE_BINARY_INSTALL_DIR}/fonts) @@ -1138,9 +1237,6 @@ set(MAGNUM_PLUGINS_AUDIOIMPORTER_DEBUG_BINARY_INSTALL_DIR ${MAGNUM_PLUGINS_DEBUG set(MAGNUM_PLUGINS_AUDIOIMPORTER_DEBUG_LIBRARY_INSTALL_DIR ${MAGNUM_PLUGINS_DEBUG_LIBRARY_INSTALL_DIR}/audioimporters) set(MAGNUM_PLUGINS_AUDIOIMPORTER_RELEASE_BINARY_INSTALL_DIR ${MAGNUM_PLUGINS_RELEASE_BINARY_INSTALL_DIR}/audioimporters) set(MAGNUM_PLUGINS_AUDIOIMPORTER_RELEASE_LIBRARY_INSTALL_DIR ${MAGNUM_PLUGINS_RELEASE_LIBRARY_INSTALL_DIR}/audioimporters) -set(MAGNUM_INCLUDE_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/include/Magnum) -set(MAGNUM_EXTERNAL_INCLUDE_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/include/MagnumExternal) -set(MAGNUM_PLUGINS_INCLUDE_INSTALL_DIR ${MAGNUM_INCLUDE_INSTALL_PREFIX}/include/MagnumPlugins) # Get base plugin directory from main library location. This is *not* PATH, # because CMake always converts the path to an absolute location internally, diff --git a/src/cmake/FindMagnumBindings.cmake b/src/cmake/FindMagnumBindings.cmake index 5fad00d92b..0bcbb1a8ba 100644 --- a/src/cmake/FindMagnumBindings.cmake +++ b/src/cmake/FindMagnumBindings.cmake @@ -35,7 +35,7 @@ # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -65,15 +65,34 @@ mark_as_advanced(MAGNUMBINDINGS_INCLUDE_DIR) # Component distinction (listing them explicitly to avoid mistakes with finding # components from other repositories) -set(_MAGNUMBINDINGS_HEADER_ONLY_COMPONENT_LIST Python) +set(_MAGNUMBINDINGS_HEADER_ONLY_COMPONENTS Python) +# Nothing is enabled by default right now +set(_MAGNUMBINDINGS_IMPLICITLY_ENABLED_COMPONENTS ) -# Convert components lists to regular expressions so I can use if(MATCHES). -# TODO: Drop this once CMake 3.3 and if(IN_LIST) can be used -foreach(_WHAT HEADER_ONLY) - string(REPLACE ";" "|" _MAGNUMBINDINGS_${_WHAT}_COMPONENTS "${_MAGNUMBINDINGS_${_WHAT}_COMPONENT_LIST}") - set(_MAGNUMBINDINGS_${_WHAT}_COMPONENTS "^(${_MAGNUMBINDINGS_${_WHAT}_COMPONENTS})$") +# No inter-component dependencies right now + +# Ensure that all inter-component dependencies are specified as well +set(_MAGNUMBINDINGS_ADDITIONAL_COMPONENTS ) +foreach(_component ${MagnumBindings_FIND_COMPONENTS}) + # Mark the dependencies as required if the component is also required + if(MagnumBindings_FIND_REQUIRED_${_component}) + foreach(_dependency ${_MAGNUMBINDINGS_${_component}_DEPENDENCIES}) + set(MagnumBindings_FIND_REQUIRED_${_dependency} TRUE) + endforeach() + endif() + + list(APPEND _MAGNUMBINDINGS_ADDITIONAL_COMPONENTS ${_MAGNUMBINDINGS_${_component}_DEPENDENCIES}) endforeach() +# Join the lists, remove duplicate components +set(_MAGNUMBINDINGS_ORIGINAL_FIND_COMPONENTS ${MagnumBindings_FIND_COMPONENTS}) +if(_MAGNUMBINDINGS_ADDITIONAL_COMPONENTS) + list(INSERT MagnumBindings_FIND_COMPONENTS 0 ${_MAGNUMBINDINGS_ADDITIONAL_COMPONENTS}) +endif() +if(MagnumBindings_FIND_COMPONENTS) + list(REMOVE_DUPLICATES MagnumBindings_FIND_COMPONENTS) +endif() + # Find all components foreach(_component ${MagnumBindings_FIND_COMPONENTS}) string(TOUPPER ${_component} _COMPONENT) @@ -85,7 +104,7 @@ foreach(_component ${MagnumBindings_FIND_COMPONENTS}) set(MagnumBindings_${_component}_FOUND TRUE) else() # Header-only components - if(_component MATCHES ${_MAGNUMBINDINGS_HEADER_ONLY_COMPONENTS}) + if(_component IN_LIST _MAGNUMBINDINGS_HEADER_ONLY_COMPONENTS) add_library(MagnumBindings::${_component} INTERFACE IMPORTED) endif() @@ -94,7 +113,7 @@ foreach(_component ${MagnumBindings_FIND_COMPONENTS}) set(_MAGNUMBINDINGS_${_COMPONENT}_INCLUDE_PATH_NAMES Magnum/SceneGraph/PythonBindings.h) endif() - if(_component MATCHES ${_MAGNUMBINDINGS_HEADER_ONLY_COMPONENTS}) + if(_component IN_LIST _MAGNUMBINDINGS_HEADER_ONLY_COMPONENTS) # Find includes find_path(_MAGNUMBINDINGS_${_COMPONENT}_INCLUDE_DIR NAMES ${_MAGNUMBINDINGS_${_COMPONENT}_INCLUDE_PATH_NAMES} @@ -111,7 +130,7 @@ foreach(_component ${MagnumBindings_FIND_COMPONENTS}) endif() # Decide if the component was found - if(_component MATCHES ${_MAGNUMBINDINGS_HEADER_ONLY_COMPONENTS} AND _MAGNUMBINDINGS_${_COMPONENT}_INCLUDE_DIR) + if(_component IN_LIST _MAGNUMBINDINGS_HEADER_ONLY_COMPONENTS AND _MAGNUMBINDINGS_${_COMPONENT}_INCLUDE_DIR) set(MagnumBindings_${_component}_FOUND TRUE) else() set(MagnumBindings_${_component}_FOUND FALSE) @@ -119,7 +138,41 @@ foreach(_component ${MagnumBindings_FIND_COMPONENTS}) endif() endforeach() +# For CMake 3.16+ with REASON_FAILURE_MESSAGE, provide additional potentially +# useful info about the failed components. +if(NOT CMAKE_VERSION VERSION_LESS 3.16) + set(_MAGNUMBINDINGS_REASON_FAILURE_MESSAGE) + # Go only through the originally specified find_package() components, not + # the dependencies added by us afterwards + foreach(_component ${_MAGNUMBINDINGS_ORIGINAL_FIND_COMPONENTS}) + if(MagnumBindings_${_component}_FOUND) + continue() + endif() + + # If it's not known at all, tell the user -- it might be a new library + # and an old Find module, or something platform-specific. + if(NOT _component IN_LIST _MAGNUMBINDINGS_LIBRARY_COMPONENTS AND NOT _component IN_LIST _MAGNUMBINDINGS_PLUGIN_COMPONENTS) + list(APPEND _MAGNUMBINDINGS_REASON_FAILURE_MESSAGE "${_component} is not a known component on this platform.") + # Otherwise, if it's not among implicitly built components, hint that + # the user may need to enable it + # TODO: currently, the _FOUND variable doesn't reflect if dependencies + # were found. When it will, this needs to be updated to avoid + # misleading messages. + elseif(NOT _component IN_LIST _MAGNUMBINDINGS_IMPLICITLY_ENABLED_COMPONENTS) + string(TOUPPER ${_component} _COMPONENT) + list(APPEND _MAGNUMBINDINGS_REASON_FAILURE_MESSAGE "${_component} is not built by default. Make sure you enabled WITH_${_COMPONENT} when building Magnum Bindings") + # Otherwise we have no idea. Better be silent than to print something + # misleading. + else() + endif() + endforeach() + + string(REPLACE ";" " " _MAGNUMBINDINGS_REASON_FAILURE_MESSAGE "${_MAGNUMBINDINGS_REASON_FAILURE_MESSAGE}") + set(_MAGNUMBINDINGS_REASON_FAILURE_MESSAGE REASON_FAILURE_MESSAGE "${_MAGNUMBINDINGS_REASON_FAILURE_MESSAGE}") +endif() + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(MagnumBindings REQUIRED_VARS MAGNUMBINDINGS_INCLUDE_DIR - HANDLE_COMPONENTS) + HANDLE_COMPONENTS + ${_MAGNUMBINDINGS_REASON_FAILURE_MESSAGE}) diff --git a/src/cmake/FindMagnumIntegration.cmake b/src/cmake/FindMagnumIntegration.cmake index b2b81c7bf8..f0d535198e 100644 --- a/src/cmake/FindMagnumIntegration.cmake +++ b/src/cmake/FindMagnumIntegration.cmake @@ -48,7 +48,7 @@ # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # Copyright © 2018 Konstantinos Chatzilygeroudis # # Permission is hereby granted, free of charge, to any person obtaining a @@ -96,8 +96,13 @@ mark_as_advanced(MAGNUMINTEGRATION_INCLUDE_DIR) # Component distinction (listing them explicitly to avoid mistakes with finding # components from other repositories) -set(_MAGNUMINTEGRATION_LIBRARY_COMPONENT_LIST Bullet Dart Eigen ImGui Glm Ovr) -set(_MAGNUMINTEGRATION_HEADER_ONLY_COMPONENT_LIST Eigen) +set(_MAGNUMINTEGRATION_LIBRARY_COMPONENTS Bullet Dart Eigen ImGui Glm) +if(CORRADE_TARGET_WINDOWS) + list(APPEND _MAGNUMINTEGRATION_LIBRARY_COMPONENTS Ovr) +endif() +set(_MAGNUMINTEGRATION_HEADER_ONLY_COMPONENTS Eigen) +# Nothing is enabled by default right now +set(_MAGNUMINTEGRATION_IMPLICITLY_ENABLED_COMPONENTS ) # Inter-component dependencies (none yet) # set(_MAGNUMINTEGRATION_Component_DEPENDENCIES Dependency) @@ -116,6 +121,7 @@ foreach(_component ${MagnumIntegration_FIND_COMPONENTS}) endforeach() # Join the lists, remove duplicate components +set(_MAGNUMINTEGRATION_ORIGINAL_FIND_COMPONENTS ${MagnumIntegration_FIND_COMPONENTS}) if(_MAGNUMINTEGRATION_ADDITIONAL_COMPONENTS) list(INSERT MagnumIntegration_FIND_COMPONENTS 0 ${_MAGNUMINTEGRATION_ADDITIONAL_COMPONENTS}) endif() @@ -123,13 +129,6 @@ if(MagnumIntegration_FIND_COMPONENTS) list(REMOVE_DUPLICATES MagnumIntegration_FIND_COMPONENTS) endif() -# Convert components lists to regular expressions so I can use if(MATCHES). -# TODO: Drop this once CMake 3.3 and if(IN_LIST) can be used -foreach(_WHAT LIBRARY HEADER_ONLY) - string(REPLACE ";" "|" _MAGNUMINTEGRATION_${_WHAT}_COMPONENTS "${_MAGNUMINTEGRATION_${_WHAT}_COMPONENT_LIST}") - set(_MAGNUMINTEGRATION_${_WHAT}_COMPONENTS "^(${_MAGNUMINTEGRATION_${_WHAT}_COMPONENTS})$") -endforeach() - # Find all components foreach(_component ${MagnumIntegration_FIND_COMPONENTS}) string(TOUPPER ${_component} _COMPONENT) @@ -141,7 +140,7 @@ foreach(_component ${MagnumIntegration_FIND_COMPONENTS}) set(MagnumIntegration_${_component}_FOUND TRUE) else() # Library components - if(_component MATCHES ${_MAGNUMINTEGRATION_LIBRARY_COMPONENTS} AND NOT _component MATCHES ${_MAGNUMINTEGRATION_HEADER_ONLY_COMPONENTS}) + if(_component IN_LIST _MAGNUMINTEGRATION_LIBRARY_COMPONENTS AND NOT _component IN_LIST _MAGNUMINTEGRATION_HEADER_ONLY_COMPONENTS) add_library(MagnumIntegration::${_component} UNKNOWN IMPORTED) # Try to find both debug and release version @@ -163,11 +162,14 @@ foreach(_component ${MagnumIntegration_FIND_COMPONENTS}) set_property(TARGET MagnumIntegration::${_component} PROPERTY IMPORTED_LOCATION_DEBUG ${MAGNUMINTEGRATION_${_COMPONENT}_LIBRARY_DEBUG}) endif() - endif() # Header-only library components - if(_component MATCHES ${_MAGNUMINTEGRATION_HEADER_ONLY_COMPONENTS}) + elseif(_component IN_LIST _MAGNUMINTEGRATION_HEADER_ONLY_COMPONENTS) add_library(MagnumIntegration::${_component} INTERFACE IMPORTED) + + # Something unknown, skip. FPHSA will take care of handling this below. + else() + continue() endif() # Bullet integration library @@ -251,14 +253,14 @@ foreach(_component ${MagnumIntegration_FIND_COMPONENTS}) endif() # Find library includes - if(_component MATCHES ${_MAGNUMINTEGRATION_LIBRARY_COMPONENTS}) + if(_component IN_LIST _MAGNUMINTEGRATION_LIBRARY_COMPONENTS) find_path(_MAGNUMINTEGRATION_${_COMPONENT}_INCLUDE_DIR NAMES ${_MAGNUMINTEGRATION_${_COMPONENT}_INCLUDE_PATH_NAMES} HINTS ${MAGNUMINTEGRATION_INCLUDE_DIR}/Magnum/${_component}Integration) mark_as_advanced(_MAGNUMINTEGRATION_${_COMPONENT}_INCLUDE_DIR) endif() - if(_component MATCHES ${_MAGNUMINTEGRATION_LIBRARY_COMPONENTS}) + if(_component IN_LIST _MAGNUMINTEGRATION_LIBRARY_COMPONENTS) # Link to core Magnum library, add other Magnum required and # optional dependencies set_property(TARGET MagnumIntegration::${_component} APPEND PROPERTY @@ -282,7 +284,7 @@ foreach(_component ${MagnumIntegration_FIND_COMPONENTS}) endif() # Decide if the library was found - if(_component MATCHES ${_MAGNUMINTEGRATION_LIBRARY_COMPONENTS} AND _MAGNUMINTEGRATION_${_COMPONENT}_INCLUDE_DIR AND (_component MATCHES ${_MAGNUMINTEGRATION_HEADER_ONLY_COMPONENTS} OR MAGNUMINTEGRATION_${_COMPONENT}_LIBRARY_DEBUG OR MAGNUMINTEGRATION_${_COMPONENT}_LIBRARY_RELEASE)) + if(_component IN_LIST _MAGNUMINTEGRATION_LIBRARY_COMPONENTS AND _MAGNUMINTEGRATION_${_COMPONENT}_INCLUDE_DIR AND (_component IN_LIST _MAGNUMINTEGRATION_HEADER_ONLY_COMPONENTS OR MAGNUMINTEGRATION_${_COMPONENT}_LIBRARY_DEBUG OR MAGNUMINTEGRATION_${_COMPONENT}_LIBRARY_RELEASE)) set(MagnumIntegration_${_component}_FOUND TRUE) else() set(MagnumIntegration_${_component}_FOUND FALSE) @@ -290,7 +292,41 @@ foreach(_component ${MagnumIntegration_FIND_COMPONENTS}) endif() endforeach() +# For CMake 3.16+ with REASON_FAILURE_MESSAGE, provide additional potentially +# useful info about the failed components. +if(NOT CMAKE_VERSION VERSION_LESS 3.16) + set(_MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE ) + # Go only through the originally specified find_package() components, not + # the dependencies added by us afterwards + foreach(_component ${_MAGNUMINTEGRATION_ORIGINAL_FIND_COMPONENTS}) + if(MagnumIntegration_${_component}_FOUND) + continue() + endif() + + # If it's not known at all, tell the user -- it might be a new library + # and an old Find module, or something platform-specific. + if(NOT _component IN_LIST _MAGNUMINTEGRATION_LIBRARY_COMPONENTS) + list(APPEND _MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE "${_component} is not a known component on this platform.") + # Otherwise, if it's not among implicitly built components, hint that + # the user may need to enable it + # TODO: currently, the _FOUND variable doesn't reflect if dependencies + # were found. When it will, this needs to be updated to avoid + # misleading messages. + elseif(NOT _component IN_LIST _MAGNUMINTEGRATION_IMPLICITLY_ENABLED_COMPONENTS) + string(TOUPPER ${_component} _COMPONENT) + list(APPEND _MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE "${_component} is not built by default. Make sure you enabled WITH_${_COMPONENT} when building Magnum Integration.") + # Otherwise we have no idea. Better be silent than to print something + # misleading. + else() + endif() + endforeach() + + string(REPLACE ";" " " _MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE "${_MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE}") + set(_MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE REASON_FAILURE_MESSAGE "${_MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE}") +endif() + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(MagnumIntegration REQUIRED_VARS MAGNUMINTEGRATION_INCLUDE_DIR - HANDLE_COMPONENTS) + HANDLE_COMPONENTS + ${_MAGNUMINTEGRATION_REASON_FAILURE_MESSAGE}) diff --git a/src/cmake/FindMagnumPlugins.cmake b/src/cmake/FindMagnumPlugins.cmake index 3255f795fd..98ca188332 100644 --- a/src/cmake/FindMagnumPlugins.cmake +++ b/src/cmake/FindMagnumPlugins.cmake @@ -22,23 +22,35 @@ # DrWavAudioImporter - WAV audio importer using dr_wav # Faad2AudioImporter - AAC audio importer using FAAD2 # FreeTypeFont - FreeType font +# GlslangShaderConverter - Glslang shader converter +# GltfImporter - glTF importer +# GltfSceneConverter - glTF converter # HarfBuzzFont - HarfBuzz font # IcoImporter - ICO importer # JpegImageConverter - JPEG image converter # JpegImporter - JPEG importer +# KtxImageConverter - KTX image converter +# KtxImporter - KTX importer # MeshOptimizerSceneConverter - MeshOptimizer scene converter # MiniExrImageConverter - OpenEXR image converter using miniexr # OpenGexImporter - OpenGEX importer # PngImageConverter - PNG image converter # PngImporter - PNG importer # PrimitiveImporter - Primitive importer +# SpirvToolsShaderConverter - SPIR-V Tools shader converter # StanfordImporter - Stanford PLY importer # StanfordSceneConverter - Stanford PLY converter +# StbDxtImageConverter - BC1/BC3 image compressor using stb_dxt # StbImageConverter - Image converter using stb_image_write # StbImageImporter - Image importer using stb_image # StbTrueTypeFont - TrueType font using stb_truetype # StbVorbisAudioImporter - OGG audio importer using stb_vorbis # StlImporter - STL importer +# +# If Magnum is built with MAGNUM_BUILD_DEPRECATED enabled, these additional +# plugins are available for backwards compatibility purposes: +# +# CgltfImporter - glTF importer using cgltf # TinyGltfImporter - GLTF importer using tiny_gltf # # Some plugins expose their internal state through separate libraries. The @@ -75,7 +87,7 @@ # This file is part of Magnum. # # Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, -# 2020 Vladimír VondruÅ¡ +# 2020, 2021, 2022 Vladimír VondruÅ¡ # Copyright © 2019 Jonathan Hale # # Permission is hereby granted, free of charge, to any person obtaining a @@ -110,6 +122,11 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) if(_component STREQUAL AssimpImporter) list(APPEND _MAGNUMPLUGINS_${_component}_MAGNUM_DEPENDENCIES AnyImageImporter) + elseif(_component STREQUAL CgltfImporter) + # TODO remove when the deprecated plugin is gone + list(APPEND _MAGNUMPLUGINS_${_component}_MAGNUM_DEPENDENCIES AnyImageImporter) + elseif(_component STREQUAL GltfImporter) + list(APPEND _MAGNUMPLUGINS_${_component}_MAGNUM_DEPENDENCIES AnyImageImporter) elseif(_component STREQUAL MeshOptimizerSceneConverter) list(APPEND _MAGNUMPLUGINS_${_component}_MAGNUM_DEPENDENCIES MeshTools) elseif(_component STREQUAL OpenGexImporter) @@ -121,6 +138,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) elseif(_component STREQUAL StanfordSceneConverter) list(APPEND _MAGNUMPLUGINS_${_component}_MAGNUM_DEPENDENCIES MeshTools) elseif(_component STREQUAL TinyGltfImporter) + # TODO remove when the deprecated plugin is gone list(APPEND _MAGNUMPLUGINS_${_component}_MAGNUM_DEPENDENCIES AnyImageImporter) endif() @@ -135,21 +153,31 @@ mark_as_advanced(MAGNUMPLUGINS_INCLUDE_DIR) # Component distinction (listing them explicitly to avoid mistakes with finding # components from other repositories) -set(_MAGNUMPLUGINS_LIBRARY_COMPONENT_LIST OpenDdl) -set(_MAGNUMPLUGINS_PLUGIN_COMPONENT_LIST +set(_MAGNUMPLUGINS_LIBRARY_COMPONENTS OpenDdl) +set(_MAGNUMPLUGINS_PLUGIN_COMPONENTS AssimpImporter BasisImageConverter BasisImporter DdsImporter DevIlImageImporter DrFlacAudioImporter DrMp3AudioImporter - DrWavAudioImporter Faad2AudioImporter FreeTypeFont HarfBuzzFont IcoImporter - JpegImageConverter JpegImporter MeshOptimizerSceneConverter - MiniExrImageConverter OpenGexImporter PngImageConverter PngImporter - PrimitiveImporter StanfordImporter StanfordSceneConverter StbImageConverter - StbImageImporter StbTrueTypeFont StbVorbisAudioImporter StlImporter - TinyGltfImporter) + DrWavAudioImporter Faad2AudioImporter FreeTypeFont GlslangShaderConverter + GltfImporter GltfSceneConverter HarfBuzzFont IcoImporter JpegImageConverter + JpegImporter KtxImageConverter KtxImporter MeshOptimizerSceneConverter + MiniExrImageConverter OpenExrImageConverter OpenExrImporter + OpenGexImporter PngImageConverter PngImporter PrimitiveImporter + SpirvToolsShaderConverter StanfordImporter StanfordSceneConverter + StbDxtImageConverter StbImageConverter StbImageImporter + StbTrueTypeFont StbVorbisAudioImporter StlImporter) +# Nothing is enabled by default right now +set(_MAGNUMPLUGINS_IMPLICITLY_ENABLED_COMPONENTS ) # Inter-component dependencies set(_MAGNUMPLUGINS_HarfBuzzFont_DEPENDENCIES FreeTypeFont) set(_MAGNUMPLUGINS_OpenGexImporter_DEPENDENCIES OpenDdl) +# CgltfImporter and TinyGltfImporter, available only on a deprecated build +if(MAGNUM_BUILD_DEPRECATED) + list(APPEND _MAGNUMPLUGINS_PLUGIN_COMPONENTS CgltfImporter TinyGltfImporter) + set(_MAGNUMPLUGINS_CgltfImporter_DEPENDENCIES GltfImporter) +endif() + # Ensure that all inter-component dependencies are specified as well set(_MAGNUMPLUGINS_ADDITIONAL_COMPONENTS ) foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) @@ -164,6 +192,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endforeach() # Join the lists, remove duplicate components +set(_MAGNUMPLUGINS_ORIGINAL_FIND_COMPONENTS ${MagnumPlugins_FIND_COMPONENTS}) if(_MAGNUMPLUGINS_ADDITIONAL_COMPONENTS) list(INSERT MagnumPlugins_FIND_COMPONENTS 0 ${_MAGNUMPLUGINS_ADDITIONAL_COMPONENTS}) endif() @@ -171,13 +200,6 @@ if(MagnumPlugins_FIND_COMPONENTS) list(REMOVE_DUPLICATES MagnumPlugins_FIND_COMPONENTS) endif() -# Convert components lists to regular expressions so I can use if(MATCHES). -# TODO: Drop this once CMake 3.3 and if(IN_LIST) can be used -foreach(_WHAT LIBRARY PLUGIN) - string(REPLACE ";" "|" _MAGNUMPLUGINS_${_WHAT}_COMPONENTS "${_MAGNUMPLUGINS_${_WHAT}_COMPONENT_LIST}") - set(_MAGNUMPLUGINS_${_WHAT}_COMPONENTS "^(${_MAGNUMPLUGINS_${_WHAT}_COMPONENTS})$") -endforeach() - # Find all components foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) string(TOUPPER ${_component} _COMPONENT) @@ -189,7 +211,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) set(MagnumPlugins_${_component}_FOUND TRUE) else() # Library components - if(_component MATCHES ${_MAGNUMPLUGINS_LIBRARY_COMPONENTS}) + if(_component IN_LIST _MAGNUMPLUGINS_LIBRARY_COMPONENTS) add_library(MagnumPlugins::${_component} UNKNOWN IMPORTED) # Set library defaults, find the library @@ -204,7 +226,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endif() # Plugin components - if(_component MATCHES ${_MAGNUMPLUGINS_PLUGIN_COMPONENTS}) + if(_component IN_LIST _MAGNUMPLUGINS_PLUGIN_COMPONENTS) add_library(MagnumPlugins::${_component} UNKNOWN IMPORTED) # AudioImporter plugin specific name suffixes @@ -271,7 +293,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endif() # Library location for plugins/libraries - if(_component MATCHES ${_MAGNUMPLUGINS_PLUGIN_COMPONENTS} OR _component MATCHES ${_MAGNUMPLUGINS_LIBRARY_COMPONENTS}) + if(_component IN_LIST _MAGNUMPLUGINS_PLUGIN_COMPONENTS OR _component IN_LIST _MAGNUMPLUGINS_LIBRARY_COMPONENTS) if(MAGNUMPLUGINS_${_COMPONENT}_LIBRARY_RELEASE) set_property(TARGET MagnumPlugins::${_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) @@ -312,6 +334,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) INTERFACE_LINK_LIBRARIES basisu_transcoder) endif() + # CgltfImporter has no dependencies # DdsImporter has no dependencies # DevIlImageImporter plugin dependencies @@ -344,6 +367,15 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) INTERFACE_LINK_LIBRARIES ${FREETYPE_LIBRARIES}) endif() + # GlslangShaderConverter plugin dependencies + elseif(_component STREQUAL GlslangShaderConverter) + find_package(Glslang REQUIRED) + set_property(TARGET MagnumPlugins::${_component} APPEND PROPERTY + INTERFACE_LINK_LIBRARIES Glslang::Glslang) + + # GltfImporter has no dependencies + # GltfSceneConverter has no dependencies + # HarfBuzzFont plugin dependencies elseif(_component STREQUAL HarfBuzzFont) find_package(Freetype) @@ -377,6 +409,9 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) INTERFACE_LINK_LIBRARIES ${JPEG_LIBRARIES}) endif() + # KtxImageConverter has no dependencies + # KtxImporter has no dependencies + # MeshOptimizerSceneConverter plugin dependencies elseif(_component STREQUAL MeshOptimizerSceneConverter) if(NOT TARGET meshoptimizer) @@ -389,6 +424,15 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endif() # MiniExrImageConverter has no dependencies + + # OpenExrImporter / OpenExrImageConverter plugin dependencies + elseif(_component STREQUAL OpenExrImporter OR _component STREQUAL OpenExrImageConverter) + # Force our own FindOpenEXR module, which then delegates to the + # config if appropriate + find_package(OpenEXR REQUIRED MODULE) + set_property(TARGET MagnumPlugins::${_component} APPEND PROPERTY + INTERFACE_LINK_LIBRARIES OpenEXR::IlmImf) + # No special setup for the OpenDdl library # OpenGexImporter has no dependencies @@ -409,11 +453,18 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) set_property(TARGET MagnumPlugins::${_component} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${PNG_LIBRARIES}) endif() - endif() # PrimitiveImporter has no dependencies + + # SpirvToolsShaderConverter plugin dependencies + elseif(_component STREQUAL SpirvToolsShaderConverter) + find_package(SpirvTools REQUIRED) + set_property(TARGET MagnumPlugins::${_component} APPEND PROPERTY + INTERFACE_LINK_LIBRARIES SpirvTools::SpirvTools SpirvTools::Opt) + # StanfordImporter has no dependencies # StanfordSceneConverter has no dependencies + # StbDxtImageConverter has no dependencies # StbImageConverter has no dependencies # StbImageImporter has no dependencies # StbTrueTypeFont has no dependencies @@ -421,8 +472,10 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) # StlImporter has no dependencies # TinyGltfImporter has no dependencies + endif() + # Find plugin/library includes - if(_component MATCHES ${_MAGNUMPLUGINS_PLUGIN_COMPONENTS} OR _component MATCHES ${_MAGNUMPLUGINS_LIBRARY_COMPONENTS}) + if(_component IN_LIST _MAGNUMPLUGINS_PLUGIN_COMPONENTS OR _component IN_LIST _MAGNUMPLUGINS_LIBRARY_COMPONENTS) find_path(_MAGNUMPLUGINS_${_COMPONENT}_INCLUDE_DIR NAMES ${_MAGNUMPLUGINS_${_COMPONENT}_INCLUDE_PATH_NAMES} HINTS ${MAGNUMPLUGINS_INCLUDE_DIR}/${_MAGNUMPLUGINS_${_COMPONENT}_INCLUDE_PATH_SUFFIX}) @@ -430,7 +483,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endif() # Automatic import of static plugins - if(_component MATCHES ${_MAGNUMPLUGINS_PLUGIN_COMPONENTS}) + if(_component IN_LIST _MAGNUMPLUGINS_PLUGIN_COMPONENTS AND _MAGNUMPLUGINS_${_COMPONENT}_INCLUDE_DIR) file(READ ${_MAGNUMPLUGINS_${_COMPONENT}_INCLUDE_DIR}/configure.h _magnumPlugins${_component}Configure) string(FIND "${_magnumPlugins${_component}Configure}" "#define MAGNUM_${_COMPONENT}_BUILD_STATIC" _magnumPlugins${_component}_BUILD_STATIC) if(NOT _magnumPlugins${_component}_BUILD_STATIC EQUAL -1) @@ -439,7 +492,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endif() endif() - if(_component MATCHES ${_MAGNUMPLUGINS_PLUGIN_COMPONENTS} OR _component MATCHES ${_MAGNUMPLUGINS_LIBRARY_COMPONENTS}) + if(_component IN_LIST _MAGNUMPLUGINS_PLUGIN_COMPONENTS OR _component IN_LIST _MAGNUMPLUGINS_LIBRARY_COMPONENTS) # Link to core Magnum library, add other Magnum dependencies set_property(TARGET MagnumPlugins::${_component} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Magnum::Magnum) @@ -456,7 +509,7 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endif() # Decide if the plugin/library was found - if((_component MATCHES ${_MAGNUMPLUGINS_PLUGIN_COMPONENTS} OR _component MATCHES ${_MAGNUMPLUGINS_LIBRARY_COMPONENTS}) AND _MAGNUMPLUGINS_${_COMPONENT}_INCLUDE_DIR AND (MAGNUMPLUGINS_${_COMPONENT}_LIBRARY_DEBUG OR MAGNUMPLUGINS_${_COMPONENT}_LIBRARY_RELEASE)) + if((_component IN_LIST _MAGNUMPLUGINS_PLUGIN_COMPONENTS OR _component IN_LIST _MAGNUMPLUGINS_LIBRARY_COMPONENTS) AND _MAGNUMPLUGINS_${_COMPONENT}_INCLUDE_DIR AND (MAGNUMPLUGINS_${_COMPONENT}_LIBRARY_DEBUG OR MAGNUMPLUGINS_${_COMPONENT}_LIBRARY_RELEASE)) set(MagnumPlugins_${_component}_FOUND TRUE) else() set(MagnumPlugins_${_component}_FOUND FALSE) @@ -464,7 +517,41 @@ foreach(_component ${MagnumPlugins_FIND_COMPONENTS}) endif() endforeach() +# For CMake 3.16+ with REASON_FAILURE_MESSAGE, provide additional potentially +# useful info about the failed components. +if(NOT CMAKE_VERSION VERSION_LESS 3.16) + set(_MAGNUMPLUGINS_REASON_FAILURE_MESSAGE) + # Go only through the originally specified find_package() components, not + # the dependencies added by us afterwards + foreach(_component ${_MAGNUMPLUGINS_ORIGINAL_FIND_COMPONENTS}) + if(MagnumPlugins_${_component}_FOUND) + continue() + endif() + + # If it's not known at all, tell the user -- it might be a new library + # and an old Find module, or something platform-specific. + if(NOT _component IN_LIST _MAGNUMPLUGINS_LIBRARY_COMPONENTS AND NOT _component IN_LIST _MAGNUMPLUGINS_PLUGIN_COMPONENTS) + list(APPEND _MAGNUMPLUGINS_REASON_FAILURE_MESSAGE "${_component} is not a known component on this platform.") + # Otherwise, if it's not among implicitly built components, hint that + # the user may need to enable it + # TODO: currently, the _FOUND variable doesn't reflect if dependencies + # were found. When it will, this needs to be updated to avoid + # misleading messages. + elseif(NOT _component IN_LIST _MAGNUMPLUGINS_IMPLICITLY_ENABLED_COMPONENTS) + string(TOUPPER ${_component} _COMPONENT) + list(APPEND _MAGNUMPLUGINS_REASON_FAILURE_MESSAGE "${_component} is not built by default. Make sure you enabled WITH_${_COMPONENT} when building Magnum Plugins.") + # Otherwise we have no idea. Better be silent than to print something + # misleading. + else() + endif() + endforeach() + + string(REPLACE ";" " " _MAGNUMPLUGINS_REASON_FAILURE_MESSAGE "${_MAGNUMPLUGINS_REASON_FAILURE_MESSAGE}") + set(_MAGNUMPLUGINS_REASON_FAILURE_MESSAGE REASON_FAILURE_MESSAGE "${_MAGNUMPLUGINS_REASON_FAILURE_MESSAGE}") +endif() + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(MagnumPlugins REQUIRED_VARS MAGNUMPLUGINS_INCLUDE_DIR - HANDLE_COMPONENTS) + HANDLE_COMPONENTS + ${_MAGNUMPLUGINS_REASON_FAILURE_MESSAGE}) From dc084205a91e85bd2cbf3b8171bc6d5e4869141f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Mon, 20 Dec 2021 21:31:45 +0100 Subject: [PATCH 41/53] Bundle new Basis Universal 1_15_update2, including Zstd. This is a difference to Habitat master, which omits the Zstd files. But otherwise the KTX files won't load. --- src/cmake/dependencies.cmake | 1 + src/deps/basis-universal/basisu_comp.cpp | 1206 - src/deps/basis-universal/basisu_etc.cpp | 1074 - src/deps/basis-universal/basisu_pvrtc1_4.cpp | 269 - src/deps/basis-universal/encoder/apg_bmp.c | 541 + src/deps/basis-universal/encoder/apg_bmp.h | 123 + .../{ => encoder}/basisu_astc_decomp.cpp | 15 +- .../{ => encoder}/basisu_astc_decomp.h | 2 +- .../{ => encoder}/basisu_backend.cpp | 305 +- .../{ => encoder}/basisu_backend.h | 35 +- .../{ => encoder}/basisu_basis_file.cpp | 105 +- .../{ => encoder}/basisu_basis_file.h | 4 +- .../basis-universal/encoder/basisu_bc7enc.cpp | 1983 + .../basis-universal/encoder/basisu_bc7enc.h | 132 + .../basis-universal/encoder/basisu_comp.cpp | 2119 + .../{ => encoder}/basisu_comp.h | 259 +- .../{ => encoder}/basisu_enc.cpp | 837 +- .../{ => encoder}/basisu_enc.h | 410 +- .../basis-universal/encoder/basisu_etc.cpp | 1593 + .../{ => encoder}/basisu_etc.h | 158 +- .../{ => encoder}/basisu_frontend.cpp | 897 +- .../{ => encoder}/basisu_frontend.h | 46 +- ...basisu_global_selector_palette_helpers.cpp | 0 .../basisu_global_selector_palette_helpers.h | 6 +- .../{ => encoder}/basisu_gpu_texture.cpp | 625 +- .../{ => encoder}/basisu_gpu_texture.h | 24 +- .../encoder/basisu_kernels_sse.cpp | 161 + .../basis-universal/encoder/basisu_miniz.h | 2514 + .../encoder/basisu_pvrtc1_4.cpp | 564 + .../{ => encoder}/basisu_pvrtc1_4.h | 100 +- .../{ => encoder}/basisu_resample_filters.cpp | 24 +- .../{ => encoder}/basisu_resampler.cpp | 8 - .../{ => encoder}/basisu_resampler.h | 2 +- .../{ => encoder}/basisu_resampler_filters.h | 2 +- .../{ => encoder}/basisu_ssim.cpp | 0 .../{ => encoder}/basisu_ssim.h | 0 .../encoder/basisu_uastc_enc.cpp | 4189 ++ .../encoder/basisu_uastc_enc.h | 140 + src/deps/basis-universal/encoder/jpgd.cpp | 3241 ++ src/deps/basis-universal/encoder/jpgd.h | 347 + .../basis-universal/{ => encoder}/lodepng.cpp | 2 + .../basis-universal/{ => encoder}/lodepng.h | 0 src/deps/basis-universal/transcoder/basisu.h | 152 +- .../transcoder/basisu_containers.h | 1908 + .../transcoder/basisu_containers_impl.h | 311 + .../transcoder/basisu_file_headers.h | 45 +- .../transcoder/basisu_global_selector_cb.h | 2 +- .../basisu_global_selector_palette.h | 8 +- .../transcoder/basisu_transcoder.cpp | 10942 ++++- .../transcoder/basisu_transcoder.h | 758 +- .../transcoder/basisu_transcoder_internal.h | 84 +- .../basisu_transcoder_tables_astc.inc | 2 +- .../basisu_transcoder_tables_astc_0_255.inc | 2 +- .../basisu_transcoder_tables_atc_55.inc | 481 + .../basisu_transcoder_tables_atc_56.inc | 481 + .../basisu_transcoder_tables_bc7_m5_alpha.inc | 2 +- .../basisu_transcoder_tables_bc7_m5_color.inc | 2 +- .../basisu_transcoder_tables_dxt1_5.inc | 2 +- .../basisu_transcoder_tables_dxt1_6.inc | 2 +- .../basisu_transcoder_tables_pvrtc2_45.inc | 481 + ...sisu_transcoder_tables_pvrtc2_alpha_33.inc | 481 + .../transcoder/basisu_transcoder_uastc.h | 297 + src/deps/basis-universal/zstd/zstd.c | 38717 ++++++++++++++++ src/deps/basis-universal/zstd/zstd.h | 2450 + 64 files changed, 76257 insertions(+), 5416 deletions(-) delete mode 100644 src/deps/basis-universal/basisu_comp.cpp delete mode 100644 src/deps/basis-universal/basisu_etc.cpp delete mode 100644 src/deps/basis-universal/basisu_pvrtc1_4.cpp create mode 100644 src/deps/basis-universal/encoder/apg_bmp.c create mode 100644 src/deps/basis-universal/encoder/apg_bmp.h rename src/deps/basis-universal/{ => encoder}/basisu_astc_decomp.cpp (99%) rename src/deps/basis-universal/{ => encoder}/basisu_astc_decomp.h (94%) rename src/deps/basis-universal/{ => encoder}/basisu_backend.cpp (84%) rename src/deps/basis-universal/{ => encoder}/basisu_backend.h (87%) rename src/deps/basis-universal/{ => encoder}/basisu_basis_file.cpp (67%) rename src/deps/basis-universal/{ => encoder}/basisu_basis_file.h (95%) create mode 100644 src/deps/basis-universal/encoder/basisu_bc7enc.cpp create mode 100644 src/deps/basis-universal/encoder/basisu_bc7enc.h create mode 100644 src/deps/basis-universal/encoder/basisu_comp.cpp rename src/deps/basis-universal/{ => encoder}/basisu_comp.h (56%) rename src/deps/basis-universal/{ => encoder}/basisu_enc.cpp (58%) rename src/deps/basis-universal/{ => encoder}/basisu_enc.h (87%) create mode 100644 src/deps/basis-universal/encoder/basisu_etc.cpp rename src/deps/basis-universal/{ => encoder}/basisu_etc.h (86%) rename src/deps/basis-universal/{ => encoder}/basisu_frontend.cpp (75%) rename src/deps/basis-universal/{ => encoder}/basisu_frontend.h (89%) rename src/deps/basis-universal/{ => encoder}/basisu_global_selector_palette_helpers.cpp (100%) rename src/deps/basis-universal/{ => encoder}/basisu_global_selector_palette_helpers.h (92%) rename src/deps/basis-universal/{ => encoder}/basisu_gpu_texture.cpp (71%) rename src/deps/basis-universal/{ => encoder}/basisu_gpu_texture.h (87%) create mode 100644 src/deps/basis-universal/encoder/basisu_kernels_sse.cpp create mode 100644 src/deps/basis-universal/encoder/basisu_miniz.h create mode 100644 src/deps/basis-universal/encoder/basisu_pvrtc1_4.cpp rename src/deps/basis-universal/{ => encoder}/basisu_pvrtc1_4.h (77%) rename src/deps/basis-universal/{ => encoder}/basisu_resample_filters.cpp (87%) rename src/deps/basis-universal/{ => encoder}/basisu_resampler.cpp (99%) rename src/deps/basis-universal/{ => encoder}/basisu_resampler.h (99%) rename src/deps/basis-universal/{ => encoder}/basisu_resampler_filters.h (96%) rename src/deps/basis-universal/{ => encoder}/basisu_ssim.cpp (100%) rename src/deps/basis-universal/{ => encoder}/basisu_ssim.h (100%) create mode 100644 src/deps/basis-universal/encoder/basisu_uastc_enc.cpp create mode 100644 src/deps/basis-universal/encoder/basisu_uastc_enc.h create mode 100644 src/deps/basis-universal/encoder/jpgd.cpp create mode 100644 src/deps/basis-universal/encoder/jpgd.h rename src/deps/basis-universal/{ => encoder}/lodepng.cpp (99%) rename src/deps/basis-universal/{ => encoder}/lodepng.h (100%) create mode 100644 src/deps/basis-universal/transcoder/basisu_containers.h create mode 100644 src/deps/basis-universal/transcoder/basisu_containers_impl.h create mode 100644 src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_55.inc create mode 100644 src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_56.inc create mode 100644 src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc create mode 100644 src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc create mode 100644 src/deps/basis-universal/transcoder/basisu_transcoder_uastc.h create mode 100644 src/deps/basis-universal/zstd/zstd.c create mode 100644 src/deps/basis-universal/zstd/zstd.h diff --git a/src/cmake/dependencies.cmake b/src/cmake/dependencies.cmake index f8cf14379e..59ed1ad41e 100644 --- a/src/cmake/dependencies.cmake +++ b/src/cmake/dependencies.cmake @@ -211,6 +211,7 @@ if(NOT USE_SYSTEM_MAGNUM) # submodule we bundle just the transcoder files, and only a subset of the # formats (BC7 mode 6 has > 1 MB tables, ATC/FXT1/PVRTC2 are quite rare and # not supported by Magnum). + set(CMAKE_DISABLE_FIND_PACKAGE_Zstd ON) set(BASIS_UNIVERSAL_DIR "${DEPS_DIR}/basis-universal") set( CMAKE_CXX_FLAGS diff --git a/src/deps/basis-universal/basisu_comp.cpp b/src/deps/basis-universal/basisu_comp.cpp deleted file mode 100644 index 1e4679311c..0000000000 --- a/src/deps/basis-universal/basisu_comp.cpp +++ /dev/null @@ -1,1206 +0,0 @@ -// basisu_comp.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "basisu_comp.h" -#include "basisu_enc.h" -#include - -#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 0 -#define DEBUG_CROP_TEXTURE_TO_64x64 (0) -#define DEBUG_RESIZE_TEXTURE (0) -#define DEBUG_EXTRACT_SINGLE_BLOCK (0) - -namespace basisu -{ - basis_compressor::basis_compressor() : - m_total_blocks(0), - m_auto_global_sel_pal(false), - m_basis_file_size(0), - m_basis_bits_per_texel(0), - m_any_source_image_has_alpha(false) - { - debug_printf("basis_compressor::basis_compressor\n"); - } - - bool basis_compressor::init(const basis_compressor_params ¶ms) - { - debug_printf("basis_compressor::init\n"); - - m_params = params; - - if (m_params.m_debug) - { - debug_printf("basis_compressor::init:\n"); - -#define PRINT_BOOL_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); -#define PRINT_INT_VALUE(v) debug_printf("%s: %i %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); -#define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); -#define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); - - debug_printf("Has global selector codebook: %i\n", m_params.m_pSel_codebook != nullptr); - - debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i\n", - (uint32_t)m_params.m_source_images.size(), (uint32_t)m_params.m_source_filenames.size(), (uint32_t)m_params.m_source_alpha_filenames.size()); - - PRINT_BOOL_VALUE(m_y_flip); - PRINT_BOOL_VALUE(m_debug); - PRINT_BOOL_VALUE(m_debug_images); - PRINT_BOOL_VALUE(m_global_sel_pal); - PRINT_BOOL_VALUE(m_auto_global_sel_pal); - PRINT_BOOL_VALUE(m_compression_level); - PRINT_BOOL_VALUE(m_no_hybrid_sel_cb); - PRINT_BOOL_VALUE(m_perceptual); - PRINT_BOOL_VALUE(m_no_endpoint_rdo); - PRINT_BOOL_VALUE(m_no_selector_rdo); - PRINT_BOOL_VALUE(m_read_source_images); - PRINT_BOOL_VALUE(m_write_output_basis_files); - PRINT_BOOL_VALUE(m_compute_stats); - PRINT_BOOL_VALUE(m_check_for_alpha) - PRINT_BOOL_VALUE(m_force_alpha) - PRINT_BOOL_VALUE(m_seperate_rg_to_color_alpha); - PRINT_BOOL_VALUE(m_multithreading); - PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); - - PRINT_FLOAT_VALUE(m_hybrid_sel_cb_quality_thresh); - - PRINT_INT_VALUE(m_global_pal_bits); - PRINT_INT_VALUE(m_global_mod_bits); - - PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); - PRINT_FLOAT_VALUE(m_selector_rdo_thresh); - - PRINT_BOOL_VALUE(m_mip_gen); - PRINT_BOOL_VALUE(m_mip_renormalize); - PRINT_BOOL_VALUE(m_mip_wrapping); - PRINT_BOOL_VALUE(m_mip_srgb); - PRINT_FLOAT_VALUE(m_mip_premultiplied); - PRINT_FLOAT_VALUE(m_mip_scale); - PRINT_INT_VALUE(m_mip_smallest_dimension); - debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str()); - - debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters); - debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters); - debug_printf("m_quality_level: %i\n", m_params.m_quality_level); - - debug_printf("m_tex_type: %u\n", m_params.m_tex_type); - debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); - debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); - -#undef PRINT_BOOL_VALUE -#undef PRINT_INT_VALUE -#undef PRINT_UINT_VALUE -#undef PRINT_FLOAT_VALUE - } - - if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) - { - assert(0); - return false; - } - - return true; - } - - basis_compressor::error_code basis_compressor::process() - { - debug_printf("basis_compressor::process\n"); - - if (!read_source_images()) - return cECFailedReadingSourceImages; - - if (!validate_texture_type_constraints()) - return cECFailedValidating; - - if (!process_frontend()) - return cECFailedFrontEnd; - - if (!extract_frontend_texture_data()) - return cECFailedFontendExtract; - - if (!process_backend()) - return cECFailedBackend; - - if (!create_basis_file_and_transcode()) - return cECFailedCreateBasisFile; - - if (!write_output_files_and_compute_stats()) - return cECFailedWritingOutput; - - return cECSuccess; - } - - bool basis_compressor::generate_mipmaps(const image &img, std::vector &mips, bool has_alpha) - { - debug_printf("basis_compressor::generate_mipmaps\n"); - - uint32_t total_levels = 1; - uint32_t w = img.get_width(), h = img.get_height(); - while (maximum(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) - { - w = maximum(w >> 1U, 1U); - h = maximum(h >> 1U, 1U); - total_levels++; - } - -#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN - // Requires stb_image_resize - stbir_filter filter = STBIR_FILTER_DEFAULT; - if (m_params.m_mip_filter == "box") - filter = STBIR_FILTER_BOX; - else if (m_params.m_mip_filter == "triangle") - filter = STBIR_FILTER_TRIANGLE; - else if (m_params.m_mip_filter == "cubic") - filter = STBIR_FILTER_CUBICBSPLINE; - else if (m_params.m_mip_filter == "catmull") - filter = STBIR_FILTER_CATMULLROM; - else if (m_params.m_mip_filter == "mitchell") - filter = STBIR_FILTER_MITCHELL; - - for (uint32_t level = 1; level < total_levels; level++) - { - const uint32_t level_width = maximum(1, img.get_width() >> level); - const uint32_t level_height = maximum(1, img.get_height() >> level); - - image &level_img = *enlarge_vector(mips, 1); - level_img.resize(level_width, level_height); - - int result = stbir_resize_uint8_generic( - (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), - (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), - has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, - m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, - nullptr); - - if (result == 0) - { - error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); - return false; - } - - if (m_params.m_mip_renormalize) - level_img.renormalize_normal_map(); - } -#else - for (uint32_t level = 1; level < total_levels; level++) - { - const uint32_t level_width = maximum(1, img.get_width() >> level); - const uint32_t level_height = maximum(1, img.get_height() >> level); - - image &level_img = *enlarge_vector(mips, 1); - level_img.resize(level_width, level_height); - - bool status = image_resample(img, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); - if (!status) - { - error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); - return false; - } - - if (m_params.m_mip_renormalize) - level_img.renormalize_normal_map(); - } -#endif - - return true; - } - - bool basis_compressor::read_source_images() - { - debug_printf("basis_compressor::read_source_images\n"); - - const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size(); - if (!total_source_files) - return false; - - m_stats.resize(0); - m_slice_descs.resize(0); - m_slice_images.resize(0); - - m_total_blocks = 0; - uint32_t total_macroblocks = 0; - - m_any_source_image_has_alpha = false; - - std::vector source_images; - std::vector source_filenames; - - // First load all source images, and determine if any have an alpha channel. - for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) - { - const char *pSource_filename = ""; - - image file_image; - - if (m_params.m_read_source_images) - { - pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); - - // Load the source image - if (!load_png(pSource_filename, file_image)) - { - error_printf("Failed reading source image: %s\n", pSource_filename); - return false; - } - - printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); - - // Optionally load another image and put a grayscale version of it into the alpha channel. - if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) - { - const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); - - image alpha_data; - - if (!load_png(pSource_alpha_image, alpha_data)) - { - error_printf("Failed reading source image: %s\n", pSource_alpha_image); - return false; - } - - printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); - - alpha_data.crop(file_image.get_width(), file_image.get_height()); - - for (uint32_t y = 0; y < file_image.get_height(); y++) - for (uint32_t x = 0; x < file_image.get_width(); x++) - file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); - } - } - else - { - file_image = m_params.m_source_images[source_file_index]; - } - - if (m_params.m_seperate_rg_to_color_alpha) - { - // Used for XY normal maps in RG - puts X in color, Y in alpha - for (uint32_t y = 0; y < file_image.get_height(); y++) - for (uint32_t x = 0; x < file_image.get_width(); x++) - { - const color_rgba &c = file_image(x, y); - file_image(x, y).set_noclamp_rgba(c.r, c.r, c.r, c.g); - } - } - - bool has_alpha = false; - if ((m_params.m_force_alpha) || (m_params.m_seperate_rg_to_color_alpha)) - has_alpha = true; - else if (!m_params.m_check_for_alpha) - file_image.set_alpha(255); - else if (file_image.has_alpha()) - has_alpha = true; - - if (has_alpha) - m_any_source_image_has_alpha = true; - - debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); - - if (m_params.m_y_flip) - file_image.flip_y(); - -#if DEBUG_EXTRACT_SINGLE_BLOCK - image block_image(4, 4); - const uint32_t block_x = 0; - const uint32_t block_y = 0; - block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0); - file_image = block_image; -#endif - -#if DEBUG_CROP_TEXTURE_TO_64x64 - file_image.resize(64, 64); -#endif -#if DEBUG_RESIZE_TEXTURE - image temp_img((file_image.get_width() + 1) / 2, (file_image.get_height() + 1) / 2); - image_resample(file_image, temp_img, m_params.m_perceptual, "kaiser"); - temp_img.swap(file_image); -#endif - - if ((!file_image.get_width()) || (!file_image.get_height())) - { - error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n"); - return false; - } - - if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) - { - error_printf("basis_compressor::read_source_images: Source image is too large!\n"); - return false; - } - - source_images.push_back(file_image); - source_filenames.push_back(pSource_filename); - } - - debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha); - - for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) - { - image &file_image = source_images[source_file_index]; - const std::string &source_filename = source_filenames[source_file_index]; - - std::vector slices; - - slices.reserve(32); - slices.push_back(file_image); - - if (m_params.m_mip_gen) - { - if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha)) - return false; - } - - uint_vec mip_indices(slices.size()); - for (uint32_t i = 0; i < slices.size(); i++) - mip_indices[i] = i; - - if (m_any_source_image_has_alpha) - { - // If source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. - std::vector alpha_slices; - uint_vec new_mip_indices; - - alpha_slices.reserve(slices.size() * 2); - - for (uint32_t i = 0; i < slices.size(); i++) - { - image lvl_rgb(slices[i]); - image lvl_a(lvl_rgb); - - for (uint32_t y = 0; y < lvl_a.get_height(); y++) - { - for (uint32_t x = 0; x < lvl_a.get_width(); x++) - { - uint8_t a = lvl_a(x, y).a; - lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); - } - } - - lvl_rgb.set_alpha(255); - - alpha_slices.push_back(lvl_rgb); - new_mip_indices.push_back(i); - - alpha_slices.push_back(lvl_a); - new_mip_indices.push_back(i); - } - - slices.swap(alpha_slices); - mip_indices.swap(new_mip_indices); - } - - assert(slices.size() == mip_indices.size()); - - for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) - { - const bool is_alpha_slice = m_any_source_image_has_alpha && ((slice_index & 1) != 0); - - image &slice_image = slices[slice_index]; - const uint32_t orig_width = slice_image.get_width(); - const uint32_t orig_height = slice_image.get_height(); - - // Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks. - slice_image.crop_dup_borders(slice_image.get_block_width(4) * 4, slice_image.get_block_height(4) * 4); - - if (m_params.m_debug_images) - { - save_png(string_format("basis_debug_source_image_%u_%u.png", source_file_index, slice_index).c_str(), slice_image); - } - - enlarge_vector(m_stats, 1); - enlarge_vector(m_slice_images, 1); - enlarge_vector(m_slice_descs, 1); - - const uint32_t dest_image_index = (uint32_t)m_stats.size() - 1; - - m_stats[dest_image_index].m_filename = source_filename.c_str(); - m_stats[dest_image_index].m_width = orig_width; - m_stats[dest_image_index].m_height = orig_height; - - m_slice_images[dest_image_index] = slice_image; - - debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, slice_image.get_width(), slice_image.get_height()); - - basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index]; - - slice_desc.m_first_block_index = m_total_blocks; - - slice_desc.m_orig_width = orig_width; - slice_desc.m_orig_height = orig_height; - - slice_desc.m_width = slice_image.get_width(); - slice_desc.m_height = slice_image.get_height(); - - slice_desc.m_num_blocks_x = slice_image.get_block_width(4); - slice_desc.m_num_blocks_y = slice_image.get_block_height(4); - - slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1; - slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; - - slice_desc.m_source_file_index = source_file_index; - - slice_desc.m_mip_index = mip_indices[slice_index]; - - slice_desc.m_alpha = is_alpha_slice; - slice_desc.m_iframe = false; - if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) - { - slice_desc.m_iframe = (source_file_index == 0); - } - - m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y; - - } // slice_index - - } // source_file_index - - debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks); - - // Make sure we don't have too many slices - if (m_slice_descs.size() > BASISU_MAX_SLICES) - { - error_printf("Too many slices!\n"); - return false; - } - - // Basic sanity check on the slices - for (uint32_t i = 1; i < m_slice_descs.size(); i++) - { - const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; - const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; - - // Make sure images are in order - int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; - if (image_delta > 1) - return false; - - // Make sure mipmap levels are in order - if (!image_delta) - { - int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index; - if (level_delta > 1) - return false; - } - } - - printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size()); - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; - - printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n", - i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); - - if (m_any_source_image_has_alpha) - { - // Alpha slices must be at odd slice indices - if (slice_desc.m_alpha) - { - if ((i & 1) == 0) - return false; - - const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; - - // Make sure previous slice has this image's color data - if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index) - return false; - if (prev_slice_desc.m_alpha) - return false; - if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index) - return false; - if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x) - return false; - if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y) - return false; - } - else if (i & 1) - return false; - } - else if (slice_desc.m_alpha) - { - return false; - } - - if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height)) - return false; - if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) - { - if (!slice_desc.m_iframe) - return false; - } - } - - return true; - } - - // Do some basic validation for 2D arrays, cubemaps, video, and volumes. - bool basis_compressor::validate_texture_type_constraints() - { - debug_printf("basis_compressor::validate_texture_type_constraints\n"); - - // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). - if (m_params.m_tex_type == basist::cBASISTexType2D) - return true; - - uint32_t total_basis_images = 0; - - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - total_basis_images = maximum(total_basis_images, slice_desc.m_source_file_index + 1); - } - - if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) - { - // For cubemaps, validate that the total # of Basis images is a multiple of 6. - if ((total_basis_images % 6) != 0) - { - error_printf("basis_compressor::validate_texture_type_constraints: For cubemaps the total number of input images is not a multiple of 6!\n"); - return false; - } - } - - // Now validate that all the mip0's have the same dimensions, and that each image has the same # of mipmap levels. - uint_vec image_mipmap_levels(total_basis_images); - - int width = -1, height = -1; - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); - - if (slice_desc.m_mip_index != 0) - continue; - - if (width < 0) - { - width = slice_desc.m_orig_width; - height = slice_desc.m_orig_height; - } - else if ((width != (int)slice_desc.m_orig_width) || (height != (int)slice_desc.m_orig_height)) - { - error_printf("basis_compressor::validate_texture_type_constraints: The source image resolutions are not all equal!\n"); - return false; - } - } - - for (size_t i = 1; i < image_mipmap_levels.size(); i++) - { - if (image_mipmap_levels[0] != image_mipmap_levels[i]) - { - error_printf("basis_compressor::validate_texture_type_constraints: Each image must have the same number of mipmap levels!\n"); - return false; - } - } - - return true; - } - - bool basis_compressor::process_frontend() - { - debug_printf("basis_compressor::process_frontend\n"); - - m_source_blocks.resize(m_total_blocks); - - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; - const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; - - const image &source_image = m_slice_images[slice_index]; - - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); - } - -#if 0 - // TODO - basis_etc1_pack_params pack_params; - pack_params.m_quality = cETCQualityMedium; - pack_params.m_perceptual = m_params.m_perceptual; - pack_params.m_use_color4 = false; - - pack_etc1_block_context pack_context; - - std::unordered_set endpoint_hash; - std::unordered_set selector_hash; - - for (uint32_t i = 0; i < m_source_blocks.size(); i++) - { - etc_block blk; - pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context); - - const color_rgba c0(blk.get_block_color(0, false)); - endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16)); - - const color_rgba c1(blk.get_block_color(1, false)); - endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16)); - - selector_hash.insert(blk.get_raw_selector_bits()); - } - - const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size(); - const uint32_t total_unique_selectors = (uint32_t)selector_hash.size(); - - if (m_params.m_debug) - { - debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors); - } -#endif - - const double total_texels = m_total_blocks * 16.0f; - - int endpoint_clusters = m_params.m_max_endpoint_clusters; - int selector_clusters = m_params.m_max_selector_clusters; - - if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters) - { - error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters); - return false; - } - if (selector_clusters > basisu_frontend::cMaxSelectorClusters) - { - error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); - return false; - } - - if (m_params.m_quality_level != -1) - { - const float quality = saturate(m_params.m_quality_level / 255.0f); - - const float bits_per_endpoint_cluster = 14.0f; - const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f - int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); - - const float mid = 128.0f / 255.0f; - - float color_endpoint_quality = quality; - - const float endpoint_split_point = 0.5f; - if (color_endpoint_quality <= mid) - { - color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f)); - - max_endpoints = clamp(max_endpoints, 256, 3072); - max_endpoints = minimum(max_endpoints, m_total_blocks); - - if (max_endpoints < 64) - max_endpoints = 64; - endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); - } - else - { - color_endpoint_quality = powf((color_endpoint_quality - mid) / (1.0f - mid), 1.6f); - - max_endpoints = clamp(max_endpoints, 256, 8192); - max_endpoints = minimum(max_endpoints, m_total_blocks); - - if (max_endpoints < 3072) - max_endpoints = 3072; - endpoint_clusters = clamp((uint32_t)(.5f + lerp(3072, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); - } - - float bits_per_selector_cluster = m_params.m_global_sel_pal ? 21.0f : 14.0f; - - const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f - int max_selectors = static_cast((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster); - max_selectors = clamp(max_selectors, 256, basisu_frontend::cMaxSelectorClusters); - max_selectors = minimum(max_selectors, m_total_blocks); - - float color_selector_quality = quality; - //color_selector_quality = powf(color_selector_quality, 1.65f); - color_selector_quality = powf(color_selector_quality, 2.62f); - - if (max_selectors < 96) - max_selectors = 96; - selector_clusters = clamp((uint32_t)(.5f + lerp(96, static_cast(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters); - - debug_printf("Max endpoints: %u, max selectors: %u\n", endpoint_clusters, selector_clusters); - - if (m_params.m_quality_level >= 223) - { - if (!m_params.m_selector_rdo_thresh.was_changed()) - { - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= .25f; - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= .25f; - } - } - else if (m_params.m_quality_level >= 192) - { - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= .5f; - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= .5f; - } - else if (m_params.m_quality_level >= 160) - { - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= .75f; - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= .75f; - } - else if (m_params.m_quality_level >= 129) - { - float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f); - - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= lerp(1.0f, .75f, l); - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l); - } - } - - m_auto_global_sel_pal = false; - if (!m_params.m_global_sel_pal && m_params.m_auto_global_sel_pal) - { - const float bits_per_selector_cluster = 31.0f; - double selector_codebook_bpp_est = (bits_per_selector_cluster * selector_clusters) / total_texels; - debug_printf("selector_codebook_bpp_est: %f\n", selector_codebook_bpp_est); - const float force_global_sel_pal_bpp_threshold = .15f; - if ((total_texels <= 128.0f*128.0f) && (selector_codebook_bpp_est > force_global_sel_pal_bpp_threshold)) - { - m_auto_global_sel_pal = true; - debug_printf("Auto global selector palette enabled\n"); - } - } - - basisu_frontend::params p; - p.m_num_source_blocks = m_total_blocks; - p.m_pSource_blocks = &m_source_blocks[0]; - p.m_max_endpoint_clusters = endpoint_clusters; - p.m_max_selector_clusters = selector_clusters; - p.m_perceptual = m_params.m_perceptual; - p.m_debug_stats = m_params.m_debug; - p.m_debug_images = m_params.m_debug_images; - p.m_compression_level = m_params.m_compression_level; - p.m_tex_type = m_params.m_tex_type; - p.m_multithreaded = m_params.m_multithreading; - p.m_disable_hierarchical_endpoint_codebooks = m_params.m_disable_hierarchical_endpoint_codebooks; - p.m_pJob_pool = m_params.m_pJob_pool; - - if ((m_params.m_global_sel_pal) || (m_auto_global_sel_pal)) - { - p.m_pGlobal_sel_codebook = m_params.m_pSel_codebook; - p.m_num_global_sel_codebook_pal_bits = m_params.m_global_pal_bits; - p.m_num_global_sel_codebook_mod_bits = m_params.m_global_mod_bits; - p.m_use_hybrid_selector_codebooks = !m_params.m_no_hybrid_sel_cb; - p.m_hybrid_codebook_quality_thresh = m_params.m_hybrid_sel_cb_quality_thresh; - } - - if (!m_frontend.init(p)) - { - error_printf("basisu_frontend::init() failed!\n"); - return false; - } - - m_frontend.compress(); - - if (m_params.m_debug_images) - { - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - char filename[1024]; -#ifdef _WIN32 - sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); -#else - snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); -#endif - m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); - -#ifdef _WIN32 - sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); -#else - snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); -#endif - m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); - } - } - - return true; - } - - bool basis_compressor::extract_frontend_texture_data() - { - debug_printf("basis_compressor::extract_frontend_texture_data\n"); - - m_frontend_output_textures.resize(m_slice_descs.size()); - m_best_etc1s_images.resize(m_slice_descs.size()); - m_best_etc1s_images_unpacked.resize(m_slice_descs.size()); - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; - - const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; - const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; - - const uint32_t width = num_blocks_x * 4; - const uint32_t height = num_blocks_y * 4; - - m_frontend_output_textures[i].init(texture_format::cETC1, width, height); - - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); - -#if 0 - if (m_params.m_debug_images) - { - char filename[1024]; - sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i); - write_etc1_vis_images(m_frontend_output_textures[i], filename); - } -#endif - - m_best_etc1s_images[i].init(texture_format::cETC1, width, height); - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); - - m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]); - } - - return true; - } - - bool basis_compressor::process_backend() - { - debug_printf("basis_compressor::process_backend\n"); - - basisu_backend_params backend_params; - backend_params.m_debug = m_params.m_debug; - backend_params.m_debug_images = m_params.m_debug_images; - backend_params.m_etc1s = true; - backend_params.m_compression_level = m_params.m_compression_level; - - if (!m_params.m_no_endpoint_rdo) - backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; - - if (!m_params.m_no_selector_rdo) - backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; - - backend_params.m_use_global_sel_codebook = (m_frontend.get_params().m_pGlobal_sel_codebook != NULL); - backend_params.m_global_sel_codebook_pal_bits = m_frontend.get_params().m_num_global_sel_codebook_pal_bits; - backend_params.m_global_sel_codebook_mod_bits = m_frontend.get_params().m_num_global_sel_codebook_mod_bits; - backend_params.m_use_hybrid_sel_codebooks = m_frontend.get_params().m_use_hybrid_selector_codebooks; - - m_backend.init(&m_frontend, backend_params, m_slice_descs, m_params.m_pSel_codebook); - uint32_t total_packed_bytes = m_backend.encode(); - - if (!total_packed_bytes) - { - error_printf("basis_compressor::encode() failed!\n"); - return false; - } - - debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes); - - return true; - } - - bool basis_compressor::create_basis_file_and_transcode() - { - debug_printf("basis_compressor::create_basis_file_and_transcode\n"); - - const basisu_backend_output &encoded_output = m_backend.get_output(); - - if (!m_basis_file.init(encoded_output, m_params.m_tex_type, m_params.m_userdata0, m_params.m_userdata1, m_params.m_y_flip, m_params.m_us_per_frame)) - { - error_printf("basis_compressor::write_output_files_and_compute_stats: basisu_backend:init() failed!\n"); - return false; - } - - const uint8_vec &comp_data = m_basis_file.get_compressed_data(); - - m_output_basis_file = comp_data; - - // Verify the compressed data by transcoding it to ETC1/BC1 and validating the CRC's. - basist::basisu_transcoder decoder(m_params.m_pSel_codebook); - if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true)) - { - error_printf("decoder.validate_file_checksums() failed!\n"); - return false; - } - - m_decoded_output_textures.resize(m_slice_descs.size()); - m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); - - m_decoded_output_textures_bc1.resize(m_slice_descs.size()); - m_decoded_output_textures_unpacked_bc1.resize(m_slice_descs.size()); - - interval_timer tm; - tm.start(); - - if (!decoder.start_transcoding(&comp_data[0], (uint32_t)comp_data.size())) - { - error_printf("decoder.start_transcoding() failed!\n"); - return false; - } - - debug_printf("basisu_comppressor::start_transcoding() took %3.3fms\n", tm.get_elapsed_ms()); - - uint32_t total_orig_pixels = 0; - uint32_t total_texels = 0; - - double total_time_etc1 = 0; - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - gpu_image decoded_texture; - decoded_texture.init(texture_format::cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); - - tm.start(); - - if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cETC1, 8)) - { - error_printf("Transcoding failed to ETC1 on slice %u!\n", i); - return false; - } - - total_time_etc1 += tm.get_elapsed_secs(); - - uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0); - if (image_crc16 != m_backend.get_output().m_slice_image_crcs[i]) - { - error_printf("Decoded image data CRC check failed on slice %u!\n", i); - return false; - } - debug_printf("Decoded image data CRC check succeeded on slice %i\n", i); - - m_decoded_output_textures[i] = decoded_texture; - - total_orig_pixels += m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height; - total_texels += m_slice_descs[i].m_width * m_slice_descs[i].m_height; - } - - tm.start(); - - basist::basisu_transcoder_init(); - - debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms()); - - double total_time_bc1 = 0; - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - gpu_image decoded_texture; - decoded_texture.init(texture_format::cBC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); - - tm.start(); - - if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC1, 8)) - { - error_printf("Transcoding failed to BC1 on slice %u!\n", i); - return false; - } - - total_time_bc1 += tm.get_elapsed_secs(); - - m_decoded_output_textures_bc1[i] = decoded_texture; - } - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); - m_decoded_output_textures_bc1[i].unpack(m_decoded_output_textures_unpacked_bc1[i]); - } - - debug_printf("Transcoded to ETC1 in %3.3fms, %f texels/sec\n", total_time_etc1 * 1000.0f, total_orig_pixels / total_time_etc1); - - debug_printf("Transcoded to BC1 in %3.3fms, %f texels/sec\n", total_time_bc1 * 1000.0f, total_orig_pixels / total_time_bc1); - - debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); - - m_output_blocks.resize(0); - - uint32_t total_orig_texels = 0; - for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; - - const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - - assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); - - memcpy(enlarge_vector(m_output_blocks, total_blocks), m_decoded_output_textures[slice_index].get_ptr(), sizeof(etc_block) * total_blocks); - } - - m_basis_file_size = (uint32_t)comp_data.size(); - m_basis_bits_per_texel = (comp_data.size() * 8.0f) / total_orig_texels; - - return true; - } - - bool basis_compressor::write_output_files_and_compute_stats() - { - debug_printf("basis_compressor::write_output_files_and_compute_stats\n"); - - if (m_params.m_write_output_basis_files) - { - const uint8_vec &comp_data = m_basis_file.get_compressed_data(); - - const std::string& basis_filename = m_params.m_out_filename; - - if (!write_vec_to_file(basis_filename.c_str(), comp_data)) - { - error_printf("Failed writing output data to file \"%s\"\n", basis_filename.c_str()); - return false; - } - - printf("Wrote output .basis file \"%s\"\n", basis_filename.c_str()); - } - - m_stats.resize(m_slice_descs.size()); - - uint32_t total_orig_texels = 0; - - for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; - - if (m_params.m_compute_stats) - { - printf("Slice: %u\n", slice_index); - - image_stats &s = m_stats[slice_index]; - - // TODO: We used to output SSIM (during heavy encoder development), but this slowed down compression too much. We'll be adding it back. - - image_metrics em; - - // ---- .basis ETC1S stats - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); - em.print(".basis ETC1S 709 Luma: "); - - s.m_basis_etc1s_luma_709_psnr = static_cast(em.m_psnr); - s.m_basis_etc1s_luma_709_ssim = static_cast(em.m_ssim); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); - em.print(".basis ETC1S 601 Luma: "); - - s.m_basis_etc1s_luma_601_psnr = static_cast(em.m_psnr); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); - em.print(".basis ETC1S RGB Avg: "); - - s.m_basis_etc1s_rgb_avg_psnr = em.m_psnr; - - if (m_slice_descs.size() == 1) - { - debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_etc1s_luma_709_psnr / ((m_backend.get_output().get_output_size_estimate() * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); - } - - // ---- .basis BC1 stats - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 0); - em.print(".basis BC1 709 Luma: "); - - s.m_basis_bc1_luma_709_psnr = static_cast(em.m_psnr); - s.m_basis_bc1_luma_709_ssim = static_cast(em.m_ssim); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 0, true, true); - em.print(".basis BC1 601 Luma: "); - - s.m_basis_bc1_luma_601_psnr = static_cast(em.m_psnr); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 3); - em.print(".basis BC1 RGB Avg: "); - - s.m_basis_bc1_rgb_avg_psnr = static_cast(em.m_psnr); - - // ---- Nearly best possible ETC1S stats - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); - em.print("Unquantized ETC1S 709 Luma: "); - - s.m_best_luma_709_psnr = static_cast(em.m_psnr); - s.m_best_luma_709_ssim = static_cast(em.m_ssim); - - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true); - em.print("Unquantized ETC1S 601 Luma: "); - - s.m_best_luma_601_psnr = static_cast(em.m_psnr); - - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); - em.print("Unquantized ETC1S RGB Avg: "); - - s.m_best_rgb_avg_psnr = static_cast(em.m_psnr); - } - - if (m_frontend.get_params().m_debug_images) - { - std::string out_basename; - if (m_params.m_out_filename.size()) - string_get_filename(m_params.m_out_filename.c_str(), out_basename); - else if (m_params.m_source_filenames.size()) - string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); - - string_remove_extension(out_basename); - out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); - - // Write "best" ETC1S debug images - { - gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); - best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image); - - image best_etc1s_unpacked; - best_etc1s_gpu_image.unpack(best_etc1s_unpacked); - save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); - } - - // Write decoded ETC1S debug images - { - gpu_image decoded_etc1s(m_decoded_output_textures[slice_index]); - decoded_etc1s.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_decoded_etc1s.ktx").c_str(), decoded_etc1s); - - image temp(m_decoded_output_textures_unpacked[slice_index]); - temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); - save_png(out_basename + "_decoded_etc1s.png", temp); - } - - // Write decoded BC1 debug images - { - gpu_image decoded_bc1(m_decoded_output_textures_bc1[slice_index]); - decoded_bc1.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_decoded_bc1.ktx").c_str(), decoded_bc1); - - image temp(m_decoded_output_textures_unpacked_bc1[slice_index]); - temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); - save_png(out_basename + "_decoded_bc1.png", temp); - } - } - } - - return true; - } - -} // namespace basisu diff --git a/src/deps/basis-universal/basisu_etc.cpp b/src/deps/basis-universal/basisu_etc.cpp deleted file mode 100644 index 244f1d2e6b..0000000000 --- a/src/deps/basis-universal/basisu_etc.cpp +++ /dev/null @@ -1,1074 +0,0 @@ -// basis_etc.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "basisu_etc.h" - -#define BASISU_DEBUG_ETC_ENCODER 0 -#define BASISU_DEBUG_ETC_ENCODER_DEEPER 0 - -namespace basisu -{ - const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; - - static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = - { - { { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } }, - { { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } }, - { { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } }, - { { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } }, - { { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } }, - { { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } }, - { { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } }, - { { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } }, - { { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } }, - { { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } }, - { { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } }, - { { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } }, - { { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } }, - { { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } }, - { { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } }, - { { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } }, - { { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } }, - { { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } }, - { { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } }, - { { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } }, - { { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } }, - { { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } }, - { { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } }, - { { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } }, - { { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } }, - { { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } }, - { { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } }, - { { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } }, - { { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } }, - { { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } }, - { { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } }, - { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, - { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } - }; - - const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = - { - { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, - { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } - }; - - const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; - const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - - // [flip][subblock][pixel_index] - const etc_coord2 g_etc1_pixel_coords[2][2][8] = - { - { - { - { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, - { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } - }, - { - { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, - { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } - } - }, - { - { - { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, - { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } - }, - { - { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, - { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } - }, - } - }; - - // [flip][subblock][pixel_index] - const uint32_t g_etc1_pixel_indices[2][2][8] = - { - { - { - 0 + 4 * 0, 0 + 4 * 1, 0 + 4 * 2, 0 + 4 * 3, - 1 + 4 * 0, 1 + 4 * 1, 1 + 4 * 2, 1 + 4 * 3 - }, - { - 2 + 4 * 0, 2 + 4 * 1, 2 + 4 * 2, 2 + 4 * 3, - 3 + 4 * 0, 3 + 4 * 1, 3 + 4 * 2, 3 + 4 * 3 - } - }, - { - { - 0 + 4 * 0, 1 + 4 * 0, 2 + 4 * 0, 3 + 4 * 0, - 0 + 4 * 1, 1 + 4 * 1, 2 + 4 * 1, 3 + 4 * 1 - }, - { - 0 + 4 * 2, 1 + 4 * 2, 2 + 4 * 2, 3 + 4 * 2, - 0 + 4 * 3, 1 + 4 * 3, 2 + 4 * 3, 3 + 4 * 3 - }, - } - }; - - uint16_t etc_block::pack_color5(const color_rgba& color, bool scaled, uint32_t bias) - { - return pack_color5(color.r, color.g, color.b, scaled, bias); - } - - uint16_t etc_block::pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) - { - if (scaled) - { - r = (r * 31U + bias) / 255U; - g = (g * 31U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } - - r = minimum(r, 31U); - g = minimum(g, 31U); - b = minimum(b, 31U); - - return static_cast(b | (g << 5U) | (r << 10U)); - } - - color_rgba etc_block::unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) - { - uint32_t b = packed_color5 & 31U; - uint32_t g = (packed_color5 >> 5U) & 31U; - uint32_t r = (packed_color5 >> 10U) & 31U; - - if (scaled) - { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); - } - - void etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled) - { - result = unpack_color5(packed_color5, scaled, 255); - } - - void etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) - { - color_rgba c(unpack_color5(packed_color5, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; - } - - bool etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) - { - color_rgba_i16 dc(unpack_delta3(packed_delta3)); - - int b = (packed_color5 & 31U) + dc.b; - int g = ((packed_color5 >> 5U) & 31U) + dc.g; - int r = ((packed_color5 >> 10U) & 31U) + dc.r; - - bool success = true; - if (static_cast(r | g | b) > 31U) - { - success = false; - r = clamp(r, 0, 31); - g = clamp(g, 0, 31); - b = clamp(b, 0, 31); - } - - if (scaled) - { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - result.set_noclamp_rgba(r, g, b, minimum(alpha, 255U)); - return success; - } - - bool etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) - { - color_rgba result; - const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); - r = result.r; - g = result.g; - b = result.b; - return success; - } - - uint16_t etc_block::pack_delta3(const color_rgba_i16& color) - { - return pack_delta3(color.r, color.g, color.b); - } - - uint16_t etc_block::pack_delta3(int r, int g, int b) - { - assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); - assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); - assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); - if (r < 0) r += 8; - if (g < 0) g += 8; - if (b < 0) b += 8; - return static_cast(b | (g << 3) | (r << 6)); - } - - color_rgba_i16 etc_block::unpack_delta3(uint16_t packed_delta3) - { - int r = (packed_delta3 >> 6) & 7; - int g = (packed_delta3 >> 3) & 7; - int b = packed_delta3 & 7; - if (r >= 4) r -= 8; - if (g >= 4) g -= 8; - if (b >= 4) b -= 8; - return color_rgba_i16(r, g, b, 255); - } - - void etc_block::unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) - { - r = (packed_delta3 >> 6) & 7; - g = (packed_delta3 >> 3) & 7; - b = packed_delta3 & 7; - if (r >= 4) r -= 8; - if (g >= 4) g -= 8; - if (b >= 4) b -= 8; - } - - uint16_t etc_block::pack_color4(const color_rgba& color, bool scaled, uint32_t bias) - { - return pack_color4(color.r, color.g, color.b, scaled, bias); - } - - uint16_t etc_block::pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) - { - if (scaled) - { - r = (r * 15U + bias) / 255U; - g = (g * 15U + bias) / 255U; - b = (b * 15U + bias) / 255U; - } - - r = minimum(r, 15U); - g = minimum(g, 15U); - b = minimum(b, 15U); - - return static_cast(b | (g << 4U) | (r << 8U)); - } - - color_rgba etc_block::unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) - { - uint32_t b = packed_color4 & 15U; - uint32_t g = (packed_color4 >> 4U) & 15U; - uint32_t r = (packed_color4 >> 8U) & 15U; - - if (scaled) - { - b = (b << 4U) | b; - g = (g << 4U) | g; - r = (r << 4U) | r; - } - - return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); - } - - void etc_block::unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) - { - color_rgba c(unpack_color4(packed_color4, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; - } - - void etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx) - { - assert(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint32_t r, g, b; - unpack_color5(r, g, b, packed_color5, true); - - const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0, 255); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1, 255); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2, 255); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3, 255); - } - - bool etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx) - { - assert(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint32_t r, g, b; - bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); - - const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0, 255); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1, 255); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2, 255); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3, 255); - - return success; - } - - void etc_block::get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx) - { - assert(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint32_t r, g, b; - unpack_color4(r, g, b, packed_color4, true); - - const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0, 255); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1, 255); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2, 255); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3, 255); - } - - bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) - { - const bool diff_flag = block.get_diff_bit(); - const bool flip_flag = block.get_flip_bit(); - const uint32_t table_index0 = block.get_inten_table(0); - const uint32_t table_index1 = block.get_inten_table(1); - - color_rgba subblock_colors0[4]; - color_rgba subblock_colors1[4]; - - if (diff_flag) - { - const uint16_t base_color5 = block.get_base5_color(); - const uint16_t delta_color3 = block.get_delta3_color(); - etc_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); - - if (!etc_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) - return false; - } - else - { - const uint16_t base_color4_0 = block.get_base4_color(0); - etc_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); - - const uint16_t base_color4_1 = block.get_base4_color(1); - etc_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); - } - - if (preserve_alpha) - { - if (flip_flag) - { - for (uint32_t y = 0; y < 2; y++) - { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); - pDst += 4; - } - - for (uint32_t y = 2; y < 4; y++) - { - pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } - } - else - { - for (uint32_t y = 0; y < 4; y++) - { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } - } - } - else - { - if (flip_flag) - { - // 0000 - // 0000 - // 1111 - // 1111 - for (uint32_t y = 0; y < 2; y++) - { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors0[block.get_selector(2, y)]; - pDst[3] = subblock_colors0[block.get_selector(3, y)]; - pDst += 4; - } - - for (uint32_t y = 2; y < 4; y++) - { - pDst[0] = subblock_colors1[block.get_selector(0, y)]; - pDst[1] = subblock_colors1[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } - } - else - { - // 0011 - // 0011 - // 0011 - // 0011 - for (uint32_t y = 0; y < 4; y++) - { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } - } - } - - return true; - } - - inline int extend_6_to_8(uint32_t n) - { - return (n << 2) | (n >> 4); - } - - inline int extend_7_to_8(uint32_t n) - { - return (n << 1) | (n >> 6); - } - - inline int extend_4_to_8(uint32_t n) - { - return (n << 4) | n; - } - - uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const - { - color_rgba unpacked_block[16]; - - unpack_etc1(*this, unpacked_block); - - uint64_t total_error = 0; - - if (subblock_index < 0) - { - for (uint32_t i = 0; i < 16; i++) - total_error += color_distance(perceptual, pBlock_pixels[i], unpacked_block[i], false); - } - else - { - const bool flip_bit = get_flip_bit(); - - for (uint32_t i = 0; i < 8; i++) - { - const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; - - total_error += color_distance(perceptual, pBlock_pixels[idx], unpacked_block[idx], false); - } - } - - return total_error; - } - - void etc_block::get_subblock_pixels(color_rgba* pPixels, int subblock_index) const - { - if (subblock_index < 0) - unpack_etc1(*this, pPixels); - else - { - color_rgba unpacked_block[16]; - - unpack_etc1(*this, unpacked_block); - - const bool flip_bit = get_flip_bit(); - - for (uint32_t i = 0; i < 8; i++) - { - const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; - - pPixels[i] = unpacked_block[idx]; - } - } - } - - bool etc1_optimizer::compute() - { - assert(m_pResult->m_pSelectors); - - if ((m_pParams->m_pForce_selectors) || (m_pParams->m_pEval_solution_override)) - { - assert(m_pParams->m_quality >= cETCQualitySlow); - } - - const uint32_t n = m_pParams->m_num_src_pixels; - - if (m_pParams->m_cluster_fit) - { - if (m_pParams->m_quality == cETCQualityFast) - compute_internal_cluster_fit(4); - else if (m_pParams->m_quality == cETCQualityMedium) - compute_internal_cluster_fit(32); - else if (m_pParams->m_quality == cETCQualitySlow) - compute_internal_cluster_fit(64); - else - compute_internal_cluster_fit(BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE); - } - else - compute_internal_neighborhood(m_br, m_bg, m_bb); - - if (!m_best_solution.m_valid) - { - m_pResult->m_error = UINT32_MAX; - return false; - } - - const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; - -#ifdef BASISU_BUILD_DEBUG - if (m_pParams->m_pEval_solution_override == nullptr) - { - color_rgba block_colors[4]; - m_best_solution.m_coords.get_block_colors(block_colors); - - const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - uint64_t actual_error = 0; - for (uint32_t i = 0; i < n; i++) - { - if ((m_pParams->m_perceptual) && (m_pParams->m_quality >= cETCQualitySlow)) - actual_error += color_distance(true, pSrc_pixels[i], block_colors[pSelectors[i]], false); - else - actual_error += color_distance(pSrc_pixels[i], block_colors[pSelectors[i]], false); - } - assert(actual_error == m_best_solution.m_error); - } -#endif - - m_pResult->m_error = m_best_solution.m_error; - - m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; - m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; - - m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; - memcpy(m_pResult->m_pSelectors, pSelectors, n); - m_pResult->m_n = n; - - return true; - } - - void etc1_optimizer::refine_solution(uint32_t max_refinement_trials) - { - // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. - // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: - // The goal is: - // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 - // Rearranging this: - // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 - // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 - // So what this means: - // optimal_block_color = avg_input - avg_inten_delta - // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. - // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. - // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. - - const uint32_t n = m_pParams->m_num_src_pixels; - - for (uint32_t refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) - { - const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; - const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; - - int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; - const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); - for (uint32_t r = 0; r < n; r++) - { - const uint32_t s = *pSelectors++; - const int yd_temp = pInten_table[s]; - // Compute actual delta being applied to each pixel, taking into account clamping. - delta_sum_r += clamp(base_color.r + yd_temp, 0, 255) - base_color.r; - delta_sum_g += clamp(base_color.g + yd_temp, 0, 255) - base_color.g; - delta_sum_b += clamp(base_color.b + yd_temp, 0, 255) - base_color.b; - } - - if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) - break; - - const float avg_delta_r_f = static_cast(delta_sum_r) / n; - const float avg_delta_g_f = static_cast(delta_sum_g) / n; - const float avg_delta_b_f = static_cast(delta_sum_b) / n; - const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Refinement trial %u, avg_delta %f %f %f\n", refinement_trial, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); -#endif - - if (!evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution)) - break; - - } // refinement_trial - } - - void etc1_optimizer::compute_internal_neighborhood(int scan_r, int scan_g, int scan_b) - { - if (m_best_solution.m_error == 0) - return; - - const uint32_t n = m_pParams->m_num_src_pixels; - const int scan_delta_size = m_pParams->m_scan_delta_size; - - // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. - // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. - for (int zdi = 0; zdi < scan_delta_size; zdi++) - { - const int zd = m_pParams->m_pScan_deltas[zdi]; - const int mbb = scan_b + zd; - if (mbb < 0) continue; else if (mbb > m_limit) break; - - for (int ydi = 0; ydi < scan_delta_size; ydi++) - { - const int yd = m_pParams->m_pScan_deltas[ydi]; - const int mbg = scan_g + yd; - if (mbg < 0) continue; else if (mbg > m_limit) break; - - for (int xdi = 0; xdi < scan_delta_size; xdi++) - { - const int xd = m_pParams->m_pScan_deltas[xdi]; - const int mbr = scan_r + xd; - if (mbr < 0) continue; else if (mbr > m_limit) break; - - etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); - - if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) - continue; - - if (m_pParams->m_refinement) - { - refine_solution((m_pParams->m_quality == cETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2)); - } - - } // xdi - } // ydi - } // zdi - } - - void etc1_optimizer::compute_internal_cluster_fit(uint32_t total_perms_to_try) - { - if ((!m_best_solution.m_valid) || ((m_br != m_best_solution.m_coords.m_unscaled_color.r) || (m_bg != m_best_solution.m_coords.m_unscaled_color.g) || (m_bb != m_best_solution.m_coords.m_unscaled_color.b))) - { - evaluate_solution(etc1_solution_coordinates(m_br, m_bg, m_bb, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); - } - - if ((m_best_solution.m_error == 0) || (!m_best_solution.m_valid)) - return; - - for (uint32_t i = 0; i < total_perms_to_try; i++) - { - int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; - - const int *pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; - const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); - - const uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v; - - for (uint32_t q = 0; q < 4; q++) - { - const int yd_temp = pInten_table[q]; - - delta_sum_r += pNum_selectors[q] * (clamp(base_color.r + yd_temp, 0, 255) - base_color.r); - delta_sum_g += pNum_selectors[q] * (clamp(base_color.g + yd_temp, 0, 255) - base_color.g); - delta_sum_b += pNum_selectors[q] * (clamp(base_color.b + yd_temp, 0, 255) - base_color.b); - } - - if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) - continue; - - const float avg_delta_r_f = static_cast(delta_sum_r) / 8; - const float avg_delta_g_f = static_cast(delta_sum_g) / 8; - const float avg_delta_b_f = static_cast(delta_sum_b) / 8; - - const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Second refinement trial %u, avg_delta %f %f %f\n", i, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); -#endif - - evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); - - if (m_best_solution.m_error == 0) - break; - } - } - - void etc1_optimizer::init(const params& params, results& result) - { - m_pParams = ¶ms; - m_pResult = &result; - - const uint32_t n = m_pParams->m_num_src_pixels; - - m_selectors.resize(n); - m_best_selectors.resize(n); - m_temp_selectors.resize(n); - m_trial_solution.m_selectors.resize(n); - m_best_solution.m_selectors.resize(n); - - m_limit = m_pParams->m_use_color4 ? 15 : 31; - - vec3F avg_color(0.0f); - - m_luma.resize(n); - m_sorted_luma_indices.resize(n); - m_sorted_luma.resize(n); - - for (uint32_t i = 0; i < n; i++) - { - const color_rgba& c = m_pParams->m_pSrc_pixels[i]; - const vec3F fc(c.r, c.g, c.b); - - avg_color += fc; - - m_luma[i] = static_cast(c.r + c.g + c.b); - m_sorted_luma_indices[i] = i; - } - avg_color /= static_cast(n); - m_avg_color = avg_color; - - m_br = clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); - m_bg = clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); - m_bb = clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Avg block color: %u %u %u\n", m_br, m_bg, m_bb); -#endif - - if (m_pParams->m_quality <= cETCQualityMedium) - { - indirect_sort(n, &m_sorted_luma_indices[0], &m_luma[0]); - - m_pSorted_luma = &m_sorted_luma[0]; - m_pSorted_luma_indices = &m_sorted_luma_indices[0]; - - for (uint32_t i = 0; i < n; i++) - m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; - } - - m_best_solution.m_coords.clear(); - m_best_solution.m_valid = false; - m_best_solution.m_error = UINT64_MAX; - - m_solutions_tried.clear(); - } - - bool etc1_optimizer::evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) - { - uint32_t k = coords.m_unscaled_color.r | (coords.m_unscaled_color.g << 8) | (coords.m_unscaled_color.b << 16); - if (!m_solutions_tried.insert(k).second) - return false; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval solution: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); -#endif - - trial_solution.m_valid = false; - - if (m_pParams->m_constrain_against_base_color5) - { - const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; - const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; - const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; - - if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) - { -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); -#endif - return false; - } - } - - const color_rgba base_color(coords.get_scaled_color()); - - const uint32_t n = m_pParams->m_num_src_pixels; - assert(trial_solution.m_selectors.size() == n); - - trial_solution.m_error = UINT64_MAX; - - const uint8_t *pSelectors_to_use = m_pParams->m_pForce_selectors; - - for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) - { - const int* pInten_table = g_etc1_inten_tables[inten_table]; - - color_rgba block_colors[4]; - for (uint32_t s = 0; s < 4; s++) - { - const int yd = pInten_table[s]; - block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); - } - - uint64_t total_error = 0; - - const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - for (uint32_t c = 0; c < n; c++) - { - const color_rgba& src_pixel = *pSrc_pixels++; - - uint32_t best_selector_index = 0; - uint32_t best_error = 0; - - if (pSelectors_to_use) - { - best_selector_index = pSelectors_to_use[c]; - best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[best_selector_index], false); - } - else - { - best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[0], false); - - uint32_t trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[1], false); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 1; - } - - trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[2], false); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 2; - } - - trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[3], false); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 3; - } - } - - m_temp_selectors[c] = static_cast(best_selector_index); - - total_error += best_error; - if ((m_pParams->m_pEval_solution_override == nullptr) && (total_error >= trial_solution.m_error)) - break; - } - - if (m_pParams->m_pEval_solution_override) - { - if (!(*m_pParams->m_pEval_solution_override)(total_error, *m_pParams, block_colors, &m_temp_selectors[0], coords)) - return false; - } - - if (total_error < trial_solution.m_error) - { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - trial_solution.m_selectors.swap(m_temp_selectors); - trial_solution.m_valid = true; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); -#endif - - bool success = false; - if (pBest_solution) - { - if (trial_solution.m_error < pBest_solution->m_error) - { - *pBest_solution = trial_solution; - success = true; - } - } - - return success; - } - - bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) - { - uint32_t k = coords.m_unscaled_color.r | (coords.m_unscaled_color.g << 8) | (coords.m_unscaled_color.b << 16); - if (!m_solutions_tried.insert(k).second) - return false; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval solution fast: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); -#endif - - if (m_pParams->m_constrain_against_base_color5) - { - const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; - const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; - const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; - - if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) - { - trial_solution.m_valid = false; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); -#endif - return false; - } - } - - const color_rgba base_color(coords.get_scaled_color()); - - const uint32_t n = m_pParams->m_num_src_pixels; - assert(trial_solution.m_selectors.size() == n); - - trial_solution.m_error = UINT64_MAX; - - for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) - { - const int* pInten_table = g_etc1_inten_tables[inten_table]; - - uint32_t block_inten[4]; - color_rgba block_colors[4]; - for (uint32_t s = 0; s < 4; s++) - { - const int yd = pInten_table[s]; - color_rgba block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); - block_colors[s] = block_color; - block_inten[s] = block_color.r + block_color.g + block_color.b; - } - - // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. - // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. - // 0 1 2 3 - // 01 12 23 - const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; - - uint64_t total_error = 0; - const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) - { - if (block_inten[0] > m_pSorted_luma[n - 1]) - { - const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 0, n); - - for (uint32_t c = 0; c < n; c++) - total_error += color_distance(block_colors[0], pSrc_pixels[c], false); - } - else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) - { - if (m_pSorted_luma[0] > block_inten[3]) - { - const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 3, n); - - for (uint32_t c = 0; c < n; c++) - total_error += color_distance(block_colors[3], pSrc_pixels[c], false); - } - else - { - uint32_t cur_selector = 0, c; - for (c = 0; c < n; c++) - { - const uint32_t y = m_pSorted_luma[c]; - while ((y * 2) >= block_inten_midpoints[cur_selector]) - if (++cur_selector > 2) - goto done; - const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); - total_error += color_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); - } - done: - while (c < n) - { - const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = 3; - total_error += color_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); - ++c; - } - } - - if (total_error < trial_solution.m_error) - { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - trial_solution.m_selectors.swap(m_temp_selectors); - trial_solution.m_valid = true; - if (!total_error) - break; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); -#endif - - bool success = false; - if (pBest_solution) - { - if (trial_solution.m_error < pBest_solution->m_error) - { - *pBest_solution = trial_solution; - success = true; - } - } - - return success; - } - -} // namespace basisu diff --git a/src/deps/basis-universal/basisu_pvrtc1_4.cpp b/src/deps/basis-universal/basisu_pvrtc1_4.cpp deleted file mode 100644 index f0122fcb6c..0000000000 --- a/src/deps/basis-universal/basisu_pvrtc1_4.cpp +++ /dev/null @@ -1,269 +0,0 @@ -// basisu_pvrtc1_4.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "basisu_pvrtc1_4.h" - -namespace basisu -{ - uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) - { - assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); - - uint32_t min_d = width, max_v = y; - if (height < width) - { - min_d = height; - max_v = x; - } - - // Interleave the XY LSB's - uint32_t shift_ofs = 0, swizzled = 0; - for (uint32_t s_bit = 1, d_bit = 1; s_bit < min_d; s_bit <<= 1, d_bit <<= 2, ++shift_ofs) - { - if (y & s_bit) swizzled |= d_bit; - if (x & s_bit) swizzled |= (2 * d_bit); - } - - max_v >>= shift_ofs; - - // OR in the rest of the bits from the largest dimension - swizzled |= (max_v << (2 * shift_ofs)); - - return swizzled; - } - - color_rgba pvrtc4_block::get_endpoint(uint32_t endpoint_index, bool unpack) const - { - assert(endpoint_index < 2); - const uint32_t packed = m_endpoints >> (endpoint_index * 16); - - uint32_t r, g, b, a; - if (packed & 0x8000) - { - // opaque 554 or 555 - if (!endpoint_index) - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = (packed >> 1) & 15; - - if (unpack) - { - b = (b << 1) | (b >> 3); - } - } - else - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = packed & 31; - } - - a = unpack ? 255 : 7; - } - else - { - // translucent 4433 or 4443 - if (!endpoint_index) - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = (packed >> 1) & 7; - - if (unpack) - { - a = (a << 1); - a = (a << 4) | a; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 2) | (b >> 1); - } - } - else - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = packed & 15; - - if (unpack) - { - a = (a << 1); - a = (a << 4) | a; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 1) | (b >> 3); - } - } - } - - if (unpack) - { - r = (r << 3) | (r >> 2); - g = (g << 3) | (g >> 2); - b = (b << 3) | (b >> 2); - } - - assert((r < 256) && (g < 256) && (b < 256) && (a < 256)); - - return color_rgba(r, g, b, a); - } - - color_rgba pvrtc4_block::get_endpoint_5554(uint32_t endpoint_index) const - { - assert(endpoint_index < 2); - const uint32_t packed = m_endpoints >> (endpoint_index * 16); - - uint32_t r, g, b, a; - if (packed & 0x8000) - { - // opaque 554 or 555 - if (!endpoint_index) - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = (packed >> 1) & 15; - - b = (b << 1) | (b >> 3); - } - else - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = packed & 31; - } - - a = 15; - } - else - { - // translucent 4433 or 4443 - if (!endpoint_index) - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = (packed >> 1) & 7; - - a = a << 1; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 2) | (b >> 1); - } - else - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = packed & 15; - - a = a << 1; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 1) | (b >> 3); - } - } - - assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); - - return color_rgba(r, g, b, a); - } - - bool pvrtc4_image::get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const - { - assert((x < m_width) && (y < m_height)); - - int block_x0 = (static_cast(x) - 2) >> 2; - int block_x1 = block_x0 + 1; - int block_y0 = (static_cast(y) - 2) >> 2; - int block_y1 = block_y0 + 1; - - block_x0 = posmod(block_x0, m_block_width); - block_x1 = posmod(block_x1, m_block_width); - block_y0 = posmod(block_y0, m_block_height); - block_y1 = posmod(block_y1, m_block_height); - - pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); - pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); - - if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) - { - for (uint32_t c = 0; c < 4; c++) - { - uint32_t m = (pColors[0][c] + pColors[3][c]) / 2; - pColors[1][c] = static_cast(m); - pColors[2][c] = static_cast(m); - } - pColors[2][3] = 0; - return true; - } - - for (uint32_t c = 0; c < 4; c++) - { - pColors[1][c] = static_cast((pColors[0][c] * 5 + pColors[3][c] * 3) / 8); - pColors[2][c] = static_cast((pColors[0][c] * 3 + pColors[3][c] * 5) / 8); - } - - return false; - } - - color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const - { - assert((x < m_width) && (y < m_height)); - - int block_x0 = (static_cast(x) - 2) >> 2; - int block_x1 = block_x0 + 1; - int block_y0 = (static_cast(y) - 2) >> 2; - int block_y1 = block_y0 + 1; - - block_x0 = posmod(block_x0, m_block_width); - block_x1 = posmod(block_x1, m_block_width); - block_y0 = posmod(block_y0, m_block_height); - block_y1 = posmod(block_y1, m_block_height); - - if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) - { - if (m == 0) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); - else if (m == 3) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); - - color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); - color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); - - return color_rgba((l[0] + h[0]) / 2, (l[1] + h[1]) / 2, (l[2] + h[2]) / 2, (m == 2) ? 0 : (l[3] + h[3]) / 2); - } - else - { - if (m == 0) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); - else if (m == 3) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); - - color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); - color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); - - if (m == 2) - return color_rgba((l[0] * 3 + h[0] * 5) / 8, (l[1] * 3 + h[1] * 5) / 8, (l[2] * 3 + h[2] * 5) / 8, (l[3] * 3 + h[3] * 5) / 8); - else - return color_rgba((l[0] * 5 + h[0] * 3) / 8, (l[1] * 5 + h[1] * 3) / 8, (l[2] * 5 + h[2] * 3) / 8, (l[3] * 5 + h[3] * 3) / 8); - } - } - -} // basisu diff --git a/src/deps/basis-universal/encoder/apg_bmp.c b/src/deps/basis-universal/encoder/apg_bmp.c new file mode 100644 index 0000000000..d342b20fc8 --- /dev/null +++ b/src/deps/basis-universal/encoder/apg_bmp.c @@ -0,0 +1,541 @@ +/* +BMP File Reader/Writer Implementation +Anton Gerdelan +Version: 3 +Licence: see apg_bmp.h +C99 +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS 1 +#endif + +#include "apg_bmp.h" +#include +#include +#include +#include +#include +#include + +/* Maximum pixel dimensions of width or height of an image. Should accommodate max used in graphics APIs. + NOTE: 65536*65536 is the biggest number storable in 32 bits. + This needs to be multiplied by n_channels so actual memory indices are not uint32 but size_t to avoid overflow. + Note this will crash stb_image_write et al at maximum size which use 32bits, so reduce max size to accom. */ +#define _BMP_MAX_DIMS 65536 +#define _BMP_FILE_HDR_SZ 14 +#define _BMP_MIN_DIB_HDR_SZ 40 +#define _BMP_MIN_HDR_SZ ( _BMP_FILE_HDR_SZ + _BMP_MIN_DIB_HDR_SZ ) +#define _BMP_MAX_IMAGE_FILE_SIZE (1024ULL*1024ULL*1024ULL) + +#pragma pack( push, 1 ) // supported on GCC in addition to individual packing attribs +/* All BMP files, regardless of type, start with this file header */ +typedef struct _bmp_file_header_t { + char file_type[2]; + uint32_t file_sz; + uint16_t reserved1; + uint16_t reserved2; + uint32_t image_data_offset; +} _bmp_file_header_t; + +/* Following the file header is the BMP type header. this is the most commonly used format */ +typedef struct _bmp_dib_BITMAPINFOHEADER_t { + uint32_t this_header_sz; + int32_t w; // in older headers w & h these are shorts and may be unsigned + int32_t h; // + uint16_t n_planes; // must be 1 + uint16_t bpp; // bits per pixel. 1,4,8,16,24,32. + uint32_t compression_method; // 16 and 32-bit images must have a value of 3 here + uint32_t image_uncompressed_sz; // not consistently used in the wild, so ignored here. + int32_t horiz_pixels_per_meter; // not used. + int32_t vert_pixels_per_meter; // not used. + uint32_t n_colours_in_palette; // + uint32_t n_important_colours; // not used. + /* NOTE(Anton) a DIB header may end here at 40-bytes. be careful using sizeof() */ + /* if 'compression' value, above, is set to 3 ie the image is 16 or 32-bit, then these colour channel masks follow the headers. + these are big-endian order bit masks to assign bits of each pixel to different colours. bits used must be contiguous and not overlap. */ + uint32_t bitmask_r; + uint32_t bitmask_g; + uint32_t bitmask_b; +} _bmp_dib_BITMAPINFOHEADER_t; +#pragma pack( pop ) + +typedef enum _bmp_compression_t { + BI_RGB = 0, + BI_RLE8 = 1, + BI_RLE4 = 2, + BI_BITFIELDS = 3, + BI_JPEG = 4, + BI_PNG = 5, + BI_ALPHABITFIELDS = 6, + BI_CMYK = 11, + BI_CMYKRLE8 = 12, + BI_CMYRLE4 = 13 +} _bmp_compression_t; + +/* convenience struct and file->memory function */ +typedef struct _entire_file_t { + void* data; + size_t sz; +} _entire_file_t; + +/* +RETURNS +- true on success. record->data is allocated memory and must be freed by the caller. +- false on any error. Any allocated memory is freed if false is returned */ +static bool _read_entire_file( const char* filename, _entire_file_t* record ) { + FILE* fp = fopen( filename, "rb" ); + if ( !fp ) { return false; } + fseek( fp, 0L, SEEK_END ); + record->sz = (size_t)ftell( fp ); + + // Immediately bail on anything larger than _BMP_MAX_IMAGE_FILE_SIZE. + if (record->sz > _BMP_MAX_IMAGE_FILE_SIZE) { + fclose( fp ); + return false; + } + + record->data = malloc( record->sz ); + if ( !record->data ) { + fclose( fp ); + return false; + } + rewind( fp ); + size_t nr = fread( record->data, record->sz, 1, fp ); + fclose( fp ); + if ( 1 != nr ) { return false; } + return true; +} + +static bool _validate_file_hdr( _bmp_file_header_t* file_hdr_ptr, size_t file_sz ) { + if ( !file_hdr_ptr ) { return false; } + if ( file_hdr_ptr->file_type[0] != 'B' || file_hdr_ptr->file_type[1] != 'M' ) { return false; } + if ( file_hdr_ptr->image_data_offset > file_sz ) { return false; } + return true; +} + +static bool _validate_dib_hdr( _bmp_dib_BITMAPINFOHEADER_t* dib_hdr_ptr, size_t file_sz ) { + if ( !dib_hdr_ptr ) { return false; } + if ( _BMP_FILE_HDR_SZ + dib_hdr_ptr->this_header_sz > file_sz ) { return false; } + if ( ( 32 == dib_hdr_ptr->bpp || 16 == dib_hdr_ptr->bpp ) && ( BI_BITFIELDS != dib_hdr_ptr->compression_method && BI_ALPHABITFIELDS != dib_hdr_ptr->compression_method ) ) { + return false; + } + if ( BI_RGB != dib_hdr_ptr->compression_method && BI_BITFIELDS != dib_hdr_ptr->compression_method && BI_ALPHABITFIELDS != dib_hdr_ptr->compression_method ) { + return false; + } + // NOTE(Anton) using abs() in the if-statement was blowing up on large negative numbers. switched to labs() + if ( 0 == dib_hdr_ptr->w || 0 == dib_hdr_ptr->h || labs( dib_hdr_ptr->w ) > _BMP_MAX_DIMS || labs( dib_hdr_ptr->h ) > _BMP_MAX_DIMS ) { return false; } + + /* NOTE(Anton) if images reliably used n_colours_in_palette we could have done a palette/file size integrity check here. + because some always set 0 then we have to check every palette indexing as we read them */ + return true; +} + +/* NOTE(Anton) this could have ifdef branches on different compilers for the intrinsics versions for perf */ +static uint32_t _bitscan( uint32_t dword ) { + for ( uint32_t i = 0; i < 32; i++ ) { + if ( 1 & dword ) { return i; } + dword = dword >> 1; + } + return (uint32_t)-1; +} + +unsigned char* apg_bmp_read( const char* filename, int* w, int* h, unsigned int* n_chans ) { + if ( !filename || !w || !h || !n_chans ) { return NULL; } + + // read in the whole file into memory first - much faster than parsing on-the-fly + _entire_file_t record; + if ( !_read_entire_file( filename, &record ) ) { return NULL; } + if ( record.sz < _BMP_MIN_HDR_SZ ) { + free( record.data ); + return NULL; + } + + // grab and validate the first, file, header + _bmp_file_header_t* file_hdr_ptr = (_bmp_file_header_t*)record.data; + if ( !_validate_file_hdr( file_hdr_ptr, record.sz ) ) { + free( record.data ); + return NULL; + } + + // grad and validate the second, DIB, header + _bmp_dib_BITMAPINFOHEADER_t* dib_hdr_ptr = (_bmp_dib_BITMAPINFOHEADER_t*)( (uint8_t*)record.data + _BMP_FILE_HDR_SZ ); + if ( !_validate_dib_hdr( dib_hdr_ptr, record.sz ) ) { + free( record.data ); + return NULL; + } + + // bitmaps can have negative dims to indicate the image should be flipped + uint32_t width = *w = abs( dib_hdr_ptr->w ); + uint32_t height = *h = abs( dib_hdr_ptr->h ); + + // TODO(Anton) flip image memory at the end if this is true. because doing it per row was making me write bugs. + // bool vertically_flip = dib_hdr_ptr->h > 0 ? false : true; + + // channel count and palette are not well defined in the header so we make a good guess here + uint32_t n_dst_chans = 3, n_src_chans = 3; + bool has_palette = false; + switch ( dib_hdr_ptr->bpp ) { + case 32: n_dst_chans = n_src_chans = 4; break; // technically can be RGB but not supported + case 24: n_dst_chans = n_src_chans = 3; break; // technically can be RGBA but not supported + case 8: // seems to always use a BGR0 palette, even for greyscale + n_dst_chans = 3; + has_palette = true; + n_src_chans = 1; + break; + case 4: // always has a palette - needed for a MS-saved BMP + n_dst_chans = 3; + has_palette = true; + n_src_chans = 1; + break; + case 1: // 1-bpp means the palette has 3 colour channels with 2 colours i.e. monochrome but not always black & white + n_dst_chans = 3; + has_palette = true; + n_src_chans = 1; + break; + default: // this includes 2bpp and 16bpp + free( record.data ); + return NULL; + } // endswitch + *n_chans = n_dst_chans; + // NOTE(Anton) some image formats are not allowed a palette - could check for a bad header spec here also + if ( dib_hdr_ptr->n_colours_in_palette > 0 ) { has_palette = true; } + +#ifdef APG_BMP_DEBUG_OUTPUT + printf( "apg_bmp_debug: reading image\n|-filename `%s`\n|-dims %ux%u pixels\n|-bpp %u\n|-n_src_chans %u\n|-n_dst_chans %u\n", filename, *w, *h, + dib_hdr_ptr->bpp, n_src_chans, n_dst_chans ); +#endif + + uint32_t palette_offset = _BMP_FILE_HDR_SZ + dib_hdr_ptr->this_header_sz; + bool has_bitmasks = false; + if ( BI_BITFIELDS == dib_hdr_ptr->compression_method || BI_ALPHABITFIELDS == dib_hdr_ptr->compression_method ) { + has_bitmasks = true; + palette_offset += 12; + } + if ( palette_offset > record.sz ) { + free( record.data ); + return NULL; + } + + // work out if any padding how much to skip at end of each row + uint32_t unpadded_row_sz = width * n_src_chans; + // bit-encoded palette indices have different padding properties + if ( 4 == dib_hdr_ptr->bpp ) { + unpadded_row_sz = width % 2 > 0 ? width / 2 + 1 : width / 2; // find how many whole bytes required for this bit width + } + if ( 1 == dib_hdr_ptr->bpp ) { + unpadded_row_sz = width % 8 > 0 ? width / 8 + 1 : width / 8; // find how many whole bytes required for this bit width + } + uint32_t row_padding_sz = 0 == unpadded_row_sz % 4 ? 0 : 4 - ( unpadded_row_sz % 4 ); // NOTE(Anton) didn't expect operator precedence of - over % + + // another file size integrity check: partially validate source image data size + // 'image_data_offset' is by row padded to 4 bytes and is either colour data or palette indices. + if ( file_hdr_ptr->image_data_offset + ( unpadded_row_sz + row_padding_sz ) * height > record.sz ) { + free( record.data ); + return NULL; + } + + // find which bit number each colour channel starts at, so we can separate colours out + uint32_t bitshift_rgba[4] = {0, 0, 0, 0}; // NOTE(Anton) noticed this was int and not uint32_t so changed it. 17 Mar 2020 + uint32_t bitmask_a = 0; + if ( has_bitmasks ) { + bitmask_a = ~( dib_hdr_ptr->bitmask_r | dib_hdr_ptr->bitmask_g | dib_hdr_ptr->bitmask_b ); + bitshift_rgba[0] = _bitscan( dib_hdr_ptr->bitmask_r ); + bitshift_rgba[1] = _bitscan( dib_hdr_ptr->bitmask_g ); + bitshift_rgba[2] = _bitscan( dib_hdr_ptr->bitmask_b ); + bitshift_rgba[3] = _bitscan( bitmask_a ); + } + + // allocate memory for the output pixels block. cast to size_t in case width and height are both the max of 65536 and n_dst_chans > 1 + unsigned char* dst_img_ptr = (unsigned char*)malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans ); + if ( !dst_img_ptr ) { + free( record.data ); + return NULL; + } + + uint8_t* palette_data_ptr = (uint8_t*)record.data + palette_offset; + uint8_t* src_img_ptr = (uint8_t*)record.data + file_hdr_ptr->image_data_offset; + size_t dst_stride_sz = width * n_dst_chans; + + // == 32-bpp -> 32-bit RGBA. == 32-bit and 16-bit require bitmasks + if ( 32 == dib_hdr_ptr->bpp ) { + // check source image has enough data in it to read from + if ( (size_t)file_hdr_ptr->image_data_offset + (size_t)height * (size_t)width * (size_t)n_src_chans > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = r * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + uint32_t pixel; + memcpy( &pixel, &src_img_ptr[src_byte_idx], 4 ); + // NOTE(Anton) the below assumes 32-bits is always RGBA 1 byte per channel. 10,10,10 RGB exists though and isn't handled. + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & dib_hdr_ptr->bitmask_r ) >> bitshift_rgba[0] ); + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & dib_hdr_ptr->bitmask_g ) >> bitshift_rgba[1] ); + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & dib_hdr_ptr->bitmask_b ) >> bitshift_rgba[2] ); + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & bitmask_a ) >> bitshift_rgba[3] ); + src_byte_idx += 4; + } + src_byte_idx += row_padding_sz; + } + + // == 8-bpp -> 24-bit RGB == + } else if ( 8 == dib_hdr_ptr->bpp && has_palette ) { + // validate indices (body of image data) fits in file + if ( file_hdr_ptr->image_data_offset + height * width > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + // "most palettes are 4 bytes in RGB0 order but 3 for..." - it was actually BRG0 in old images -- Anton + uint8_t index = src_img_ptr[src_byte_idx]; // 8-bit index value per pixel + + if ( palette_offset + index * 4 + 2 >= record.sz ) { + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[index * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[index * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[index * 4 + 0]; + src_byte_idx++; + } + src_byte_idx += row_padding_sz; + } + + // == 4-bpp (16-colour) -> 24-bit RGB == + } else if ( 4 == dib_hdr_ptr->bpp && has_palette ) { + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + if ( file_hdr_ptr->image_data_offset + src_byte_idx > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + // handle 2 pixels at a time + uint8_t pixel_duo = src_img_ptr[src_byte_idx]; + uint8_t a_index = ( 0xFF & pixel_duo ) >> 4; + uint8_t b_index = 0xF & pixel_duo; + + if ( palette_offset + a_index * 4 + 2 >= record.sz ) { // invalid src image + free( record.data ); + return dst_img_ptr; + } + if ( dst_pixels_idx + 3 > width * height * n_dst_chans ) { // done + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[a_index * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[a_index * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[a_index * 4 + 0]; + if ( ++c >= width ) { // advance a column + c = 0; + r++; + if ( r >= height ) { // done. no need to get second pixel. eg a 1x1 pixel image. + free( record.data ); + return dst_img_ptr; + } + dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + } + + if ( palette_offset + b_index * 4 + 2 >= record.sz ) { // invalid src image + free( record.data ); + return dst_img_ptr; + } + if ( dst_pixels_idx + 3 > width * height * n_dst_chans ) { // done. probably redundant check since checking r >= height. + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[b_index * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[b_index * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[b_index * 4 + 0]; + src_byte_idx++; + } + src_byte_idx += row_padding_sz; + } + + // == 1-bpp -> 24-bit RGB == + } else if ( 1 == dib_hdr_ptr->bpp && has_palette ) { + /* encoding method for monochrome is not well documented. + a 2x2 pixel image is stored as 4 1-bit palette indexes + the palette is stored as any 2 RGB0 colours (not necessarily B&W) + so for an image with indexes like so: + 1 1 + 0 1 + it is bit-encoded as follows, starting at MSB: + 01000000 00000000 00000000 00000000 (first byte val 64) + 11000000 00000000 00000000 00000000 (first byte val 192) + data is still split by row and each row padded to 4 byte multiples + */ + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + uint8_t bit_idx = 0; // used in monochrome + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + if ( 8 == bit_idx ) { // start reading from the next byte + src_byte_idx++; + bit_idx = 0; + } + if ( file_hdr_ptr->image_data_offset + src_byte_idx > record.sz ) { + free( record.data ); + return dst_img_ptr; + } + uint8_t pixel_oct = src_img_ptr[src_byte_idx]; + uint8_t bit = 128 >> bit_idx; + uint8_t masked = pixel_oct & bit; + uint8_t palette_idx = masked > 0 ? 1 : 0; + + if ( palette_offset + palette_idx * 4 + 2 >= record.sz ) { + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[palette_idx * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[palette_idx * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[palette_idx * 4 + 0]; + bit_idx++; + } + src_byte_idx += ( row_padding_sz + 1 ); // 1bpp is special here + } + + // == 24-bpp -> 24-bit RGB == (but also should handle some other n_chans cases) + } else { + // NOTE(Anton) this only supports 1 byte per channel + if ( file_hdr_ptr->image_data_offset + height * width * n_dst_chans > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + // re-orders from BGR to RGB + if ( n_dst_chans > 3 ) { dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx + 3]; } + if ( n_dst_chans > 2 ) { dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx + 2]; } + if ( n_dst_chans > 1 ) { dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx + 1]; } + dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx]; + src_byte_idx += n_src_chans; + } + src_byte_idx += row_padding_sz; + } + } // endif bpp + + free( record.data ); + return dst_img_ptr; +} + +void apg_bmp_free( unsigned char* pixels_ptr ) { + if ( !pixels_ptr ) { return; } + free( pixels_ptr ); +} + +unsigned int apg_bmp_write( const char* filename, unsigned char* pixels_ptr, int w, int h, unsigned int n_chans ) { + if ( !filename || !pixels_ptr ) { return 0; } + if ( 0 == w || 0 == h ) { return 0; } + if ( labs( w ) > _BMP_MAX_DIMS || labs( h ) > _BMP_MAX_DIMS ) { return 0; } + if ( n_chans != 3 && n_chans != 4 ) { return 0; } + + uint32_t height = (uint32_t)labs( h ); + uint32_t width = (uint32_t)labs( w ); + // work out if any padding how much to skip at end of each row + const size_t unpadded_row_sz = width * n_chans; + const size_t row_padding_sz = 0 == unpadded_row_sz % 4 ? 0 : 4 - unpadded_row_sz % 4; + const size_t row_sz = unpadded_row_sz + row_padding_sz; + const size_t dst_pixels_padded_sz = row_sz * height; + + const size_t dib_hdr_sz = sizeof( _bmp_dib_BITMAPINFOHEADER_t ); + _bmp_file_header_t file_hdr; + { + file_hdr.file_type[0] = 'B'; + file_hdr.file_type[1] = 'M'; + file_hdr.file_sz = _BMP_FILE_HDR_SZ + (uint32_t)dib_hdr_sz + (uint32_t)dst_pixels_padded_sz; + file_hdr.reserved1 = 0; + file_hdr.reserved2 = 0; + file_hdr.image_data_offset = _BMP_FILE_HDR_SZ + (uint32_t)dib_hdr_sz; + } + _bmp_dib_BITMAPINFOHEADER_t dib_hdr; + { + dib_hdr.this_header_sz = _BMP_MIN_DIB_HDR_SZ; // NOTE: must be 40 and not include the bitmask memory in size here + dib_hdr.w = w; + dib_hdr.h = h; + dib_hdr.n_planes = 1; + dib_hdr.bpp = 3 == n_chans ? 24 : 32; + dib_hdr.compression_method = 3 == n_chans ? BI_RGB : BI_BITFIELDS; + dib_hdr.image_uncompressed_sz = 0; + dib_hdr.horiz_pixels_per_meter = 0; + dib_hdr.vert_pixels_per_meter = 0; + dib_hdr.n_colours_in_palette = 0; + dib_hdr.n_important_colours = 0; + // big-endian masks. only used in BI_BITFIELDS and BI_ALPHABITFIELDS ( 16 and 32-bit images ) + // important note: GIMP stores BMP data in this array order for 32-bit: [A][B][G][R] + dib_hdr.bitmask_r = 0xFF000000; + dib_hdr.bitmask_g = 0x00FF0000; + dib_hdr.bitmask_b = 0x0000FF00; + } + + uint8_t* dst_pixels_ptr = (uint8_t*)malloc( dst_pixels_padded_sz ); + if ( !dst_pixels_ptr ) { return 0; } + { + size_t dst_byte_idx = 0; + uint8_t padding[4] = {0, 0, 0, 0}; + uint8_t rgba[4] = {0, 0, 0, 0}; + uint8_t bgra[4] = {0, 0, 0, 0}; + + for ( uint32_t row = 0; row < height; row++ ) { + size_t src_byte_idx = ( height - 1 - row ) * n_chans * width; + for ( uint32_t col = 0; col < width; col++ ) { + for ( uint32_t chan = 0; chan < n_chans; chan++ ) { rgba[chan] = pixels_ptr[src_byte_idx++]; } + if ( 3 == n_chans ) { + bgra[0] = rgba[2]; + bgra[1] = rgba[1]; + bgra[2] = rgba[0]; + } else { + /* NOTE(Anton) RGBA with alpha channel would be better supported with an extended DIB header */ + bgra[0] = rgba[3]; + bgra[1] = rgba[2]; + bgra[2] = rgba[1]; + bgra[3] = rgba[0]; // alpha + } + memcpy( &dst_pixels_ptr[dst_byte_idx], bgra, n_chans ); + dst_byte_idx += (size_t)n_chans; + } // endfor col + if ( row_padding_sz > 0 ) { + memcpy( &dst_pixels_ptr[dst_byte_idx], padding, row_padding_sz ); + dst_byte_idx += row_padding_sz; + } + } // endfor row + } + { + FILE* fp = fopen( filename, "wb" ); + if ( !fp ) { + free( dst_pixels_ptr ); + return 0; + } + if ( 1 != fwrite( &file_hdr, _BMP_FILE_HDR_SZ, 1, fp ) ) { + free( dst_pixels_ptr ); + fclose( fp ); + return 0; + } + if ( 1 != fwrite( &dib_hdr, dib_hdr_sz, 1, fp ) ) { + free( dst_pixels_ptr ); + fclose( fp ); + return 0; + } + if ( 1 != fwrite( dst_pixels_ptr, dst_pixels_padded_sz, 1, fp ) ) { + free( dst_pixels_ptr ); + fclose( fp ); + return 0; + } + fclose( fp ); + } + free( dst_pixels_ptr ); + + return 1; +} diff --git a/src/deps/basis-universal/encoder/apg_bmp.h b/src/deps/basis-universal/encoder/apg_bmp.h new file mode 100644 index 0000000000..8cd73b62e0 --- /dev/null +++ b/src/deps/basis-universal/encoder/apg_bmp.h @@ -0,0 +1,123 @@ +/* +BMP File Reader/Writer Implementation +Anton Gerdelan +Version: 3.1 18 March 2020. +Licence: see bottom of file. +C89 ( Implementation is C99 ) + +Contributors: +- Anton Gerdelan - Initial code. +- Saija Sorsa - Fuzz testing. + +Instructions: +- Just drop this header, and the matching .c file into your project. +- To get debug printouts during parsing define APG_BMP_DEBUG_OUTPUT. + +Advantages: +- The implementation is fast, simple, and supports more formats than most BMP reader libraries. +- The reader function is fuzzed with AFL https://lcamtuf.coredump.cx/afl/. +- The reader is robust to large files and malformed files, and will return any valid partial data in an image. +- Reader supports 32bpp (with alpha channel), 24bpp, 8bpp, 4bpp, and 1bpp monochrome BMP images. +- Reader handles indexed BMP images using a colour palette. +- Writer supports 32bpp RGBA and 24bpp uncompressed RGB images. + +Current Limitations: +- 16-bit images not supported (don't have any samples to test on). +- No support for interleaved channel bit layouts eg RGB101010 RGB555 RGB565. +- No support for compressed BMP images, although in practice these are not used. +- Output images with alpha channel are written in BITMAPINFOHEADER format. + For better alpha support in other apps the 124-bit v5 header could be used instead, + at the cost of some backward compatibility and bloat. + +To Do: +- FUZZING + - create a unique fuzz test set for (8,4,1 BPP). +- (maybe) FEATURE Flipping the image based on negative width and height in header, and/or function arguments. +- (maybe) PERF ifdef intrinsics/asm for bitscan. Platform-specific code so won't include unless necessary. +- (maybe) FEATURE Add parameter for padding output memory to eg 4-byte alignment or n channels. +- (maybe) FEATURE Improved apps support in alpha channel writing (using v5 header). +*/ + +#ifndef APG_BMP_H_ +#define APG_BMP_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* CPP */ + +/* Reads a bitmap from a file, allocates memory for the raw image data, and returns it. +PARAMS + * w,h, - Retrieves the width and height of the BMP in pixels. + * n_chans - Retrieves the number of channels in the BMP. +RETURNS + * Tightly-packed pixel memory in RGBA order. The caller must call free() on the memory. + * NULL on any error. Any allocated memory is freed before returning NULL. */ +unsigned char* apg_bmp_read( const char* filename, int* w, int* h, unsigned int* n_chans ); + +/* Calls free() on memory created by apg_bmp_read */ +void apg_bmp_free( unsigned char* pixels_ptr ); + +/* Writes a bitmap to a file. +PARAMS + * filename - e.g."my_bitmap.bmp". Must not be NULL. + * pixels_ptr - Pointer to tightly-packed pixel memory in RGBA order. Must not be NULL. There must be abs(w)*abs(h)*n_chans bytes in the memory pointed to. + * w,h, - Width and height of the image in pixels. + * n_chans - The number of channels in the BMP. 3 or 4 supported for writing, which means RGB or RGBA memory, respectively. +RETURNS + * Zero on any error, non zero on success. */ +unsigned int apg_bmp_write( const char* filename, unsigned char* pixels_ptr, int w, int h, unsigned int n_chans ); + +#ifdef __cplusplus +} +#endif /* CPP */ + +#endif /*_APG_BMP_H_ */ + +/* +------------------------------------------------------------------------------------- +This software is available under two licences - you may use it under either licence. +------------------------------------------------------------------------------------- +FIRST LICENCE OPTION + +> Apache License +> Version 2.0, January 2004 +> http://www.apache.org/licenses/ +> Copyright 2019 Anton Gerdelan. +> Licensed under the Apache License, Version 2.0 (the "License"); +> you may not use this file except in compliance with the License. +> You may obtain a copy of the License at +> http://www.apache.org/licenses/LICENSE-2.0 +> Unless required by applicable law or agreed to in writing, software +> distributed under the License is distributed on an "AS IS" BASIS, +> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +> See the License for the specific language governing permissions and +> limitations under the License. +------------------------------------------------------------------------------------- +SECOND LICENCE OPTION + +> This is free and unencumbered software released into the public domain. +> +> Anyone is free to copy, modify, publish, use, compile, sell, or +> distribute this software, either in source code form or as a compiled +> binary, for any purpose, commercial or non-commercial, and by any +> means. +> +> In jurisdictions that recognize copyright laws, the author or authors +> of this software dedicate any and all copyright interest in the +> software to the public domain. We make this dedication for the benefit +> of the public at large and to the detriment of our heirs and +> successors. We intend this dedication to be an overt act of +> relinquishment in perpetuity of all present and future rights to this +> software under copyright law. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +> IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +> OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +> ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +> OTHER DEALINGS IN THE SOFTWARE. +> +> For more information, please refer to +------------------------------------------------------------------------------------- +*/ diff --git a/src/deps/basis-universal/basisu_astc_decomp.cpp b/src/deps/basis-universal/encoder/basisu_astc_decomp.cpp similarity index 99% rename from src/deps/basis-universal/basisu_astc_decomp.cpp rename to src/deps/basis-universal/encoder/basisu_astc_decomp.cpp index cc0a6ced7a..53bccfc515 100644 --- a/src/deps/basis-universal/basisu_astc_decomp.cpp +++ b/src/deps/basis-universal/encoder/basisu_astc_decomp.cpp @@ -50,6 +50,13 @@ typedef uint64_t deUint64; #define DE_ASSERT assert +#ifdef _MSC_VER +#pragma warning (disable:4505) // unreferenced local function has been removed +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + namespace basisu_astc { static bool inBounds(int v, int l, int h) @@ -150,7 +157,7 @@ namespace basisu_astc UVec4 asUint() const { - return UVec4(std::max(0, m_c[0]), std::max(0, m_c[1]), std::max(0, m_c[2]), std::max(0, m_c[3])); + return UVec4(basisu::maximum(0, m_c[0]), basisu::maximum(0, m_c[1]), basisu::maximum(0, m_c[2]), basisu::maximum(0, m_c[3])); } int32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; } @@ -1256,7 +1263,7 @@ void interpolateWeights (TexelWeightPair* dst, const deUint32 (&unquantizedWeigh const int numWeightsPerTexel = blockMode.isDualPlane ? 2 : 1; const deUint32 scaleX = (1024 + blockWidth/2) / (blockWidth-1); const deUint32 scaleY = (1024 + blockHeight/2) / (blockHeight-1); - DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= DE_LENGTH_OF_ARRAY(unquantizedWeights)); + DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= (int)DE_LENGTH_OF_ARRAY(unquantizedWeights)); for (int texelY = 0; texelY < blockHeight; texelY++) { for (int texelX = 0; texelX < blockWidth; texelX++) @@ -1548,3 +1555,7 @@ bool decompress(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth } // astc } // basisu_astc + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif diff --git a/src/deps/basis-universal/basisu_astc_decomp.h b/src/deps/basis-universal/encoder/basisu_astc_decomp.h similarity index 94% rename from src/deps/basis-universal/basisu_astc_decomp.h rename to src/deps/basis-universal/encoder/basisu_astc_decomp.h index 6cd053b7b6..9ec2e46076 100644 --- a/src/deps/basis-universal/basisu_astc_decomp.h +++ b/src/deps/basis-universal/encoder/basisu_astc_decomp.h @@ -23,7 +23,7 @@ * \brief ASTC Utilities. *//*--------------------------------------------------------------------*/ -#include "transcoder/basisu.h" // to pick up the iterator debug level madness +#include "../transcoder/basisu.h" // to pick up the iterator debug level madness #include #include diff --git a/src/deps/basis-universal/basisu_backend.cpp b/src/deps/basis-universal/encoder/basisu_backend.cpp similarity index 84% rename from src/deps/basis-universal/basisu_backend.cpp rename to src/deps/basis-universal/encoder/basisu_backend.cpp index 3a689e58d7..19911fcbb4 100644 --- a/src/deps/basis-universal/basisu_backend.cpp +++ b/src/deps/basis-universal/encoder/basisu_backend.cpp @@ -1,5 +1,5 @@ // basisu_backend.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,11 @@ // #include "basisu_backend.h" +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + #define BASISU_FASTER_SELECTOR_REORDERING 0 #define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__); @@ -176,64 +181,117 @@ namespace basisu void basisu_backend::reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec& all_endpoint_indices) { basisu_frontend& r = *m_pFront_end; - const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + //const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; - if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0)) + if (m_params.m_used_global_codebooks) { - // We're changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed) - uint_vec new_block_endpoints(get_total_blocks()); - - for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + m_endpoint_remap_table_old_to_new.clear(); + m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters()); + for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++) + m_endpoint_remap_table_old_to_new[i] = i; + } + else + { + //if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0)) + if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 1)) { - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; - const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + // We've changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed) + uint_vec new_block_endpoints(get_total_blocks()); - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index; - } + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; - int_vec old_to_new_endpoint_indices; - r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true); + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index; + } - create_endpoint_palette(); + int_vec old_to_new_endpoint_indices; + r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true); - for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) - { - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + create_endpoint_palette(); - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; - const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; - const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; - - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { - const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; - encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); - m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index]; - } // block_x - } // block_y - } // slice_index + m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index]; + } // block_x + } // block_y + } // slice_index + + for (uint32_t i = 0; i < all_endpoint_indices.size(); i++) + all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]]; + + } //if (total_block_endpoints_remapped) + + // Sort endpoint codebook + palette_index_reorderer reorderer; + reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0); + m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); + } - for (uint32_t i = 0; i < all_endpoint_indices.size(); i++) - all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]]; + // For endpoints, old_to_new[] may not be bijective! + // Some "old" entries may be unused and don't get remapped into the "new" array. - } //if (total_block_endpoints_remapped) + m_old_endpoint_was_used.clear(); + m_old_endpoint_was_used.resize(r.get_total_endpoint_clusters()); + uint32_t first_old_entry_index = UINT32_MAX; - // Sort endpoint codebook - palette_index_reorderer reorderer; - reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0); - m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y; + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + const uint32_t old_endpoint_index = m.m_endpoint_index; + m_old_endpoint_was_used[old_endpoint_index] = true; + first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index); + } // block_x + } // block_y + } // slice_index + + debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index); + + m_new_endpoint_was_used.clear(); + m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters()); + + m_endpoint_remap_table_new_to_old.clear(); m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters()); - for (uint32_t i = 0; i < m_endpoint_remap_table_old_to_new.size(); i++) - m_endpoint_remap_table_new_to_old[m_endpoint_remap_table_old_to_new[i]] = i; + + // Set unused entries in the new array to point to the first used entry in the old array. + m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index); + + for (uint32_t old_index = 0; old_index < m_endpoint_remap_table_old_to_new.size(); old_index++) + { + if (m_old_endpoint_was_used[old_index]) + { + const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index]; + + m_new_endpoint_was_used[new_index] = true; + + m_endpoint_remap_table_new_to_old[new_index] = old_index; + } + } } void basisu_backend::sort_selector_codebook() @@ -242,7 +300,7 @@ namespace basisu m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters()); - if (m_params.m_compression_level == 0) + if ((m_params.m_compression_level == 0) || (m_params.m_used_global_codebooks)) { for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) m_selector_remap_table_new_to_old[i] = i; @@ -336,10 +394,10 @@ namespace basisu for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { const bool is_iframe = m_slices[slice_index].m_iframe; - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; const int prev_frame_slice_index = find_video_frame(slice_index, -1); @@ -393,6 +451,7 @@ namespace basisu BASISU_BACKEND_VERIFY(total_invalid_crs == 0); } + void basisu_backend::create_encoder_blocks() { basisu_frontend& r = *m_pFront_end; @@ -411,8 +470,8 @@ namespace basisu const bool is_iframe = m_slices[slice_index].m_iframe; const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; @@ -590,7 +649,7 @@ namespace basisu { for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; const uint32_t width = m_slices[slice_index].m_width; const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; @@ -603,7 +662,7 @@ namespace basisu { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) { - const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); @@ -662,7 +721,7 @@ namespace basisu histogram selector_histogram(r.get_total_selector_clusters() + basist::MAX_SELECTOR_HISTORY_BUF_SIZE + 1); histogram selector_history_buf_rle_histogram(1 << basist::SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - std::vector selector_syms(m_slices.size()); + basisu::vector selector_syms(m_slices.size()); const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters(); const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + basist::MAX_SELECTOR_HISTORY_BUF_SIZE; @@ -672,7 +731,7 @@ namespace basisu histogram delta_endpoint_histogram(r.get_total_endpoint_clusters()); histogram endpoint_pred_histogram(basist::ENDPOINT_PRED_TOTAL_SYMBOLS); - std::vector endpoint_pred_syms(m_slices.size()); + basisu::vector endpoint_pred_syms(m_slices.size()); uint32_t total_endpoint_indices_remapped = 0; @@ -680,11 +739,11 @@ namespace basisu for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; - const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1; + //const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; + //const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1; const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; @@ -702,7 +761,7 @@ namespace basisu { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) { - const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); @@ -723,6 +782,7 @@ namespace basisu } // block_x } // block_y + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) @@ -821,6 +881,10 @@ namespace basisu if (trial_idx == new_endpoint_index) continue; + // Skip it if this new endpoint palette entry is actually never used. + if (!m_new_endpoint_was_used[trial_idx]) + continue; + const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; trial_etc_blk.set_block_color5_etc1s(p.m_color5); trial_etc_blk.set_inten_tables_etc1s(p.m_inten5); @@ -884,23 +948,32 @@ namespace basisu { const pixel_block& src_pixels = r.get_source_pixel_block(block_index); - etc_block etc_blk(r.get_output_block(block_index)); + const etc_block& etc_blk = r.get_output_block(block_index); color_rgba etc_blk_unpacked[16]; unpack_etc1(etc_blk, etc_blk_unpacked); uint64_t cur_err = 0; - for (uint32_t p = 0; p < 16; p++) - cur_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); - + if (r.get_params().m_perceptual) + { + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(true, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + } + else + { + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(false, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + } + uint64_t best_trial_err = UINT64_MAX; int best_trial_idx = 0; uint32_t best_trial_history_buf_idx = 0; - const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f; const bool use_strict_search = (m_params.m_compression_level == 0) && (selector_remap_thresh == 1.0f); + const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh); + for (uint32_t j = 0; j < selector_history_buf.size(); j++) { const int trial_idx = selector_history_buf[j]; @@ -917,31 +990,43 @@ namespace basisu } else { - for (uint32_t sy = 0; sy < 4; sy++) - for (uint32_t sx = 0; sx < 4; sx++) - etc_blk.set_selector(sx, sy, m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](sx, sy)); + uint64_t trial_err = 0; + const uint64_t thresh_err = minimum(limit_err, best_trial_err); - // TODO: Optimize this - unpack_etc1(etc_blk, etc_blk_unpacked); + color_rgba block_colors[4]; + etc_blk.get_block_colors(block_colors, 0); - uint64_t trial_err = 0; - const uint64_t thresh_err = minimum((uint64_t)ceilf(cur_err * selector_remap_thresh), best_trial_err); - for (uint32_t p = 0; p < 16; p++) + const uint8_t* pSelectors = &m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](0, 0); + + if (r.get_params().m_perceptual) { - trial_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); - if (trial_err > thresh_err) - break; + for (uint32_t p = 0; p < 16; p++) + { + uint32_t sel = pSelectors[p]; + trial_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[sel], false); + if (trial_err > thresh_err) + break; + } } - - if (trial_err <= cur_err * selector_remap_thresh) + else { - if (trial_err < best_trial_err) + for (uint32_t p = 0; p < 16; p++) { - best_trial_err = trial_err; - best_trial_idx = trial_idx; - best_trial_history_buf_idx = j; + uint32_t sel = pSelectors[p]; + trial_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[sel], false); + if (trial_err > thresh_err) + break; } } + + if ((trial_err < best_trial_err) && (trial_err <= thresh_err)) + { + assert(trial_err <= limit_err); + + best_trial_err = trial_err; + best_trial_idx = trial_idx; + best_trial_history_buf_idx = j; + } } } @@ -1086,7 +1171,8 @@ namespace basisu total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / get_total_blocks(), total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / get_total_blocks()); - if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0)) + //if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0)) + if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 1) && (!m_params.m_used_global_codebooks)) { int_vec unused; r.reoptimize_remapped_endpoints(block_endpoint_indices, unused, false, &block_selector_indices); @@ -1168,8 +1254,8 @@ namespace basisu for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; @@ -1296,10 +1382,53 @@ namespace basisu { const basisu_frontend& r = *m_pFront_end; + // The endpoint indices may have been changed by the backend's RDO step, so go and figure out which ones are actually used again. + bool_vec old_endpoint_was_used(r.get_total_endpoint_clusters()); + uint32_t first_old_entry_index = UINT32_MAX; + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y; + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + const uint32_t old_endpoint_index = m.m_endpoint_index; + + old_endpoint_was_used[old_endpoint_index] = true; + first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index); + } // block_x + } // block_y + } // slice_index + + debug_printf("basisu_backend::encode_endpoint_palette: first_old_entry_index: %u\n", first_old_entry_index); + // Maps NEW to OLD endpoints - uint_vec endpoint_remap_table_inv(r.get_total_endpoint_clusters()); + uint_vec endpoint_remap_table_new_to_old(r.get_total_endpoint_clusters()); + endpoint_remap_table_new_to_old.set_all(first_old_entry_index); + + bool_vec new_endpoint_was_used(r.get_total_endpoint_clusters()); + for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++) - endpoint_remap_table_inv[m_endpoint_remap_table_old_to_new[old_endpoint_index]] = old_endpoint_index; + { + if (old_endpoint_was_used[old_endpoint_index]) + { + const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index]; + + new_endpoint_was_used[new_endpoint_index] = true; + + endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index; + } + } + + // TODO: Some new endpoint palette entries may actually be unused and aren't worth coding. Fix that. + + uint32_t total_unused_new_entries = 0; + for (uint32_t i = 0; i < new_endpoint_was_used.size(); i++) + if (!new_endpoint_was_used[i]) + total_unused_new_entries++; + debug_printf("basisu_backend::encode_endpoint_palette: total_unused_new_entries: %u out of %u\n", total_unused_new_entries, new_endpoint_was_used.size()); bool is_grayscale = true; for (uint32_t old_endpoint_index = 0; old_endpoint_index < (uint32_t)m_endpoint_palette.size(); old_endpoint_index++) @@ -1324,7 +1453,7 @@ namespace basisu for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++) { - const uint32_t old_endpoint_index = endpoint_remap_table_inv[new_endpoint_index]; + const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index]; int delta_inten = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten; inten_delta_hist.inc(delta_inten & 7); @@ -1390,7 +1519,7 @@ namespace basisu for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++) { - const uint32_t old_endpoint_index = endpoint_remap_table_inv[new_endpoint_index]; + const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index]; int delta_inten = (m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten) & 7; coder.put_code(delta_inten, inten_delta_model); @@ -1644,9 +1773,11 @@ namespace basisu uint32_t basisu_backend::encode() { - const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + //const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; m_output.m_slice_desc = m_slices; m_output.m_etc1s = m_params.m_etc1s; + m_output.m_uses_global_codebooks = m_params.m_used_global_codebooks; + m_output.m_srgb = m_pFront_end->get_params().m_perceptual; create_endpoint_palette(); create_selector_palette(); diff --git a/src/deps/basis-universal/basisu_backend.h b/src/deps/basis-universal/encoder/basisu_backend.h similarity index 87% rename from src/deps/basis-universal/basisu_backend.h rename to src/deps/basis-universal/encoder/basisu_backend.h index 1c72fa8cc8..393dccd22f 100644 --- a/src/deps/basis-universal/basisu_backend.h +++ b/src/deps/basis-universal/encoder/basisu_backend.h @@ -1,5 +1,5 @@ // basisu_backend.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,10 +14,10 @@ // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_enc.h" -#include "transcoder/basisu_transcoder_internal.h" -#include "transcoder/basisu_global_selector_palette.h" +#include "../transcoder/basisu_transcoder_internal.h" +#include "../transcoder/basisu_global_selector_palette.h" #include "basisu_frontend.h" namespace basisu @@ -49,7 +49,7 @@ namespace basisu } }; - typedef std::vector encoder_block_vec; + typedef basisu::vector encoder_block_vec; typedef vector2D encoder_block_vec2D; struct etc1_endpoint_palette_entry @@ -69,7 +69,7 @@ namespace basisu } }; - typedef std::vector etc1_endpoint_palette_entry_vec; + typedef basisu::vector etc1_endpoint_palette_entry_vec; struct basisu_backend_params { @@ -84,6 +84,8 @@ namespace basisu uint32_t m_global_sel_codebook_mod_bits; bool m_use_hybrid_sel_codebooks; + bool m_used_global_codebooks; + basisu_backend_params() { clear(); @@ -102,6 +104,7 @@ namespace basisu m_global_sel_codebook_pal_bits = ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS; m_global_sel_codebook_mod_bits = basist::etc1_global_palette_entry_modifier::cTotalBits; m_use_hybrid_sel_codebooks = false; + m_used_global_codebooks = false; } }; @@ -111,10 +114,12 @@ namespace basisu { clear(); } + void clear() { clear_obj(*this); } + uint32_t m_first_block_index; uint32_t m_orig_width; @@ -135,11 +140,15 @@ namespace basisu bool m_iframe; }; - typedef std::vector basisu_backend_slice_desc_vec; + typedef basisu::vector basisu_backend_slice_desc_vec; struct basisu_backend_output { + basist::basis_tex_format m_tex_format; + bool m_etc1s; + bool m_uses_global_codebooks; + bool m_srgb; uint32_t m_num_endpoints; uint32_t m_num_selectors; @@ -150,7 +159,7 @@ namespace basisu basisu_backend_slice_desc_vec m_slice_desc; uint8_vec m_slice_image_tables; - std::vector m_slice_image_data; + basisu::vector m_slice_image_data; uint16_vec m_slice_image_crcs; basisu_backend_output() @@ -160,7 +169,10 @@ namespace basisu void clear() { + m_tex_format = basist::basis_tex_format::cETC1S; m_etc1s = false; + m_uses_global_codebooks = false; + m_srgb = true; m_num_endpoints = 0; m_num_selectors = 0; @@ -198,6 +210,7 @@ namespace basisu uint32_t encode(); const basisu_backend_output &get_output() const { return m_output; } + const basisu_backend_params& get_params() const { return m_params; } private: basisu_frontend *m_pFront_end; @@ -216,15 +229,17 @@ namespace basisu bool m_was_used; }; - typedef std::vector etc1_global_selector_cb_entry_desc_vec; + typedef basisu::vector etc1_global_selector_cb_entry_desc_vec; etc1_global_selector_cb_entry_desc_vec m_global_selector_palette_desc; - std::vector m_slice_encoder_blocks; + basisu::vector m_slice_encoder_blocks; // Maps OLD to NEW endpoint/selector indices uint_vec m_endpoint_remap_table_old_to_new; uint_vec m_endpoint_remap_table_new_to_old; + bool_vec m_old_endpoint_was_used; + bool_vec m_new_endpoint_was_used; uint_vec m_selector_remap_table_old_to_new; diff --git a/src/deps/basis-universal/basisu_basis_file.cpp b/src/deps/basis-universal/encoder/basisu_basis_file.cpp similarity index 67% rename from src/deps/basis-universal/basisu_basis_file.cpp rename to src/deps/basis-universal/encoder/basisu_basis_file.cpp index 3e6b1906b9..f4c77bef23 100644 --- a/src/deps/basis-universal/basisu_basis_file.cpp +++ b/src/deps/basis-universal/encoder/basisu_basis_file.cpp @@ -1,5 +1,5 @@ // basisu_basis_file.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "basisu_basis_file.h" -#include "transcoder/basisu_transcoder.h" +#include "../transcoder/basisu_transcoder.h" // The output file version. Keep in sync with BASISD_SUPPORTED_BASIS_VERSION. #define BASIS_FILE_VERSION (0x13) @@ -31,15 +31,26 @@ namespace basisu m_header.m_total_images = 0; for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) m_header.m_total_images = maximum(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1); - - m_header.m_format = 0;// basist::block_format::cETC1; + + m_header.m_tex_format = (int)encoder_output.m_tex_format; m_header.m_flags = 0; if (encoder_output.m_etc1s) + { + assert(encoder_output.m_tex_format == basist::basis_tex_format::cETC1S); m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagETC1S; + } + else + { + assert(encoder_output.m_tex_format != basist::basis_tex_format::cETC1S); + } if (y_flipped) m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagYFlipped; + if (encoder_output.m_uses_global_codebooks) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagUsesGlobalCodebook; + if (encoder_output.m_srgb) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagSRGB; for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) { @@ -57,12 +68,26 @@ namespace basisu m_header.m_userdata1 = userdata1; m_header.m_total_endpoints = encoder_output.m_num_endpoints; - m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs; - m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size(); + if (!encoder_output.m_uses_global_codebooks) + { + m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs; + m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size(); + } + else + { + assert(!m_endpoint_cb_file_ofs); + } m_header.m_total_selectors = encoder_output.m_num_selectors; - m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs; - m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size(); + if (!encoder_output.m_uses_global_codebooks) + { + m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs; + m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size(); + } + else + { + assert(!m_selector_cb_file_ofs); + } m_header.m_tables_file_ofs = m_tables_file_ofs; m_header.m_tables_file_size = (uint32_t)encoder_output.m_slice_image_tables.size(); @@ -85,7 +110,7 @@ namespace basisu m_images_descs[i].m_level_index = slice_descs[i].m_mip_index; if (slice_descs[i].m_alpha) - m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsIsAlphaData; + m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsHasAlpha; if (slice_descs[i].m_iframe) m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsFrameIsIFrame; @@ -127,14 +152,26 @@ namespace basisu assert(m_comp_data.size() == m_slice_descs_file_ofs); append_vector(m_comp_data, reinterpret_cast(&m_images_descs[0]), m_images_descs.size() * sizeof(m_images_descs[0])); - assert(m_comp_data.size() == m_endpoint_cb_file_ofs); - append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size()); + if (!encoder_output.m_uses_global_codebooks) + { + if (encoder_output.m_endpoint_palette.size()) + { + assert(m_comp_data.size() == m_endpoint_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size()); + } - assert(m_comp_data.size() == m_selector_cb_file_ofs); - append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size()); + if (encoder_output.m_selector_palette.size()) + { + assert(m_comp_data.size() == m_selector_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size()); + } + } - assert(m_comp_data.size() == m_tables_file_ofs); - append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size()); + if (encoder_output.m_slice_image_tables.size()) + { + assert(m_comp_data.size() == m_tables_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size()); + } assert(m_comp_data.size() == m_first_image_file_ofs); for (uint32_t i = 0; i < slice_descs.size(); i++) @@ -163,8 +200,17 @@ namespace basisu const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; // The Basis file uses 32-bit fields for lots of stuff, so make sure it's not too large. - uint64_t check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + + uint64_t check_size = 0; + if (!encoder_output.m_uses_global_codebooks) + { + check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + (uint64_t)encoder_output.m_endpoint_palette.size() + (uint64_t)encoder_output.m_selector_palette.size() + (uint64_t)encoder_output.m_slice_image_tables.size(); + } + else + { + check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + + (uint64_t)encoder_output.m_slice_image_tables.size(); + } if (check_size >= 0xFFFF0000ULL) { error_printf("basisu_file::init: File is too large!\n"); @@ -173,10 +219,29 @@ namespace basisu m_header_file_ofs = 0; m_slice_descs_file_ofs = sizeof(basist::basis_file_header); - m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); - m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size(); - m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size(); - m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size(); + if (encoder_output.m_tex_format == basist::basis_tex_format::cETC1S) + { + if (encoder_output.m_uses_global_codebooks) + { + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + } + else + { + m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size(); + m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size(); + } + m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size(); + } + else + { + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = 0; + m_first_image_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + } uint64_t total_file_size = m_first_image_file_ofs; for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++) diff --git a/src/deps/basis-universal/basisu_basis_file.h b/src/deps/basis-universal/encoder/basisu_basis_file.h similarity index 95% rename from src/deps/basis-universal/basisu_basis_file.h rename to src/deps/basis-universal/encoder/basisu_basis_file.h index df3abbdcfd..98498a0121 100644 --- a/src/deps/basis-universal/basisu_basis_file.h +++ b/src/deps/basis-universal/encoder/basisu_basis_file.h @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_file_headers.h" #include "basisu_backend.h" namespace basisu @@ -49,7 +49,7 @@ namespace basisu private: basist::basis_file_header m_header; - std::vector m_images_descs; + basisu::vector m_images_descs; uint8_vec m_comp_data; diff --git a/src/deps/basis-universal/encoder/basisu_bc7enc.cpp b/src/deps/basis-universal/encoder/basisu_bc7enc.cpp new file mode 100644 index 0000000000..22fdfa603f --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_bc7enc.cpp @@ -0,0 +1,1983 @@ +// File: basisu_bc7enc.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_bc7enc.h" + +#ifdef _DEBUG +#define BC7ENC_CHECK_OVERALL_ERROR 1 +#else +#define BC7ENC_CHECK_OVERALL_ERROR 0 +#endif + +using namespace basist; + +namespace basisu +{ + +// Helpers +static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->m_c[0] = (uint8_t)clampi(r, 0, 255); pRes->m_c[1] = (uint8_t)clampi(g, 0, 255); pRes->m_c[2] = (uint8_t)clampi(b, 0, 255); pRes->m_c[3] = (uint8_t)clampi(a, 0, 255); return pRes; } +static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->m_c[0] = (uint8_t)r; pRes->m_c[1] = (uint8_t)g; pRes->m_c[2] = (uint8_t)b; pRes->m_c[3] = (uint8_t)a; return pRes; } +static inline bc7enc_bool color_quad_u8_notequals(const color_quad_u8 *pLHS, const color_quad_u8 *pRHS) { return (pLHS->m_c[0] != pRHS->m_c[0]) || (pLHS->m_c[1] != pRHS->m_c[1]) || (pLHS->m_c[2] != pRHS->m_c[2]) || (pLHS->m_c[3] != pRHS->m_c[3]); } +static inline bc7enc_vec4F*vec4F_set_scalar(bc7enc_vec4F*pV, float x) { pV->m_c[0] = x; pV->m_c[1] = x; pV->m_c[2] = x; pV->m_c[3] = x; return pV; } +static inline bc7enc_vec4F*vec4F_set(bc7enc_vec4F*pV, float x, float y, float z, float w) { pV->m_c[0] = x; pV->m_c[1] = y; pV->m_c[2] = z; pV->m_c[3] = w; return pV; } +static inline bc7enc_vec4F*vec4F_saturate_in_place(bc7enc_vec4F*pV) { pV->m_c[0] = saturate(pV->m_c[0]); pV->m_c[1] = saturate(pV->m_c[1]); pV->m_c[2] = saturate(pV->m_c[2]); pV->m_c[3] = saturate(pV->m_c[3]); return pV; } +static inline bc7enc_vec4F vec4F_saturate(const bc7enc_vec4F*pV) { bc7enc_vec4F res; res.m_c[0] = saturate(pV->m_c[0]); res.m_c[1] = saturate(pV->m_c[1]); res.m_c[2] = saturate(pV->m_c[2]); res.m_c[3] = saturate(pV->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_from_color(const color_quad_u8 *pC) { bc7enc_vec4F res; vec4F_set(&res, pC->m_c[0], pC->m_c[1], pC->m_c[2], pC->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_add(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] + pRHS->m_c[0], pLHS->m_c[1] + pRHS->m_c[1], pLHS->m_c[2] + pRHS->m_c[2], pLHS->m_c[3] + pRHS->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_sub(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] - pRHS->m_c[0], pLHS->m_c[1] - pRHS->m_c[1], pLHS->m_c[2] - pRHS->m_c[2], pLHS->m_c[3] - pRHS->m_c[3]); return res; } +static inline float vec4F_dot(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { return pLHS->m_c[0] * pRHS->m_c[0] + pLHS->m_c[1] * pRHS->m_c[1] + pLHS->m_c[2] * pRHS->m_c[2] + pLHS->m_c[3] * pRHS->m_c[3]; } +static inline bc7enc_vec4F vec4F_mul(const bc7enc_vec4F*pLHS, float s) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] * s, pLHS->m_c[1] * s, pLHS->m_c[2] * s, pLHS->m_c[3] * s); return res; } +static inline bc7enc_vec4F* vec4F_normalize_in_place(bc7enc_vec4F*pV) { float s = pV->m_c[0] * pV->m_c[0] + pV->m_c[1] * pV->m_c[1] + pV->m_c[2] * pV->m_c[2] + pV->m_c[3] * pV->m_c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->m_c[0] *= s; pV->m_c[1] *= s; pV->m_c[2] *= s; pV->m_c[3] *= s; } return pV; } + +// Precomputed weight constants used during least fit determination. For each entry in g_bc7_weights[]: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w +const float g_bc7_weights1x[2 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights2x[4 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.107666f, 0.220459f, 0.451416f, 0.328125f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights3x[8 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.019775f, 0.120850f, 0.738525f, 0.140625f, 0.079102f, 0.202148f, 0.516602f, 0.281250f, 0.177979f, 0.243896f, 0.334229f, 0.421875f, 0.334229f, 0.243896f, 0.177979f, 0.578125f, 0.516602f, 0.202148f, + 0.079102f, 0.718750f, 0.738525f, 0.120850f, 0.019775f, 0.859375f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights4x[16 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.019775f, 0.120850f, 0.738525f, 0.140625f, 0.041260f, 0.161865f, 0.635010f, 0.203125f, 0.070557f, 0.195068f, 0.539307f, 0.265625f, 0.107666f, 0.220459f, + 0.451416f, 0.328125f, 0.165039f, 0.241211f, 0.352539f, 0.406250f, 0.219727f, 0.249023f, 0.282227f, 0.468750f, 0.282227f, 0.249023f, 0.219727f, 0.531250f, 0.352539f, 0.241211f, 0.165039f, 0.593750f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 0.539307f, 0.195068f, 0.070557f, 0.734375f, + 0.635010f, 0.161865f, 0.041260f, 0.796875f, 0.738525f, 0.120850f, 0.019775f, 0.859375f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights4x[16 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.015625f, 0.109375f, 0.765625f, 0.125000f, 0.035156f, 0.152344f, 0.660156f, 0.187500f, 0.070557f, 0.195068f, 0.539307f, 0.265625f, 0.107666f, 0.220459f, + 0.451416f, 0.328125f, 0.152588f, 0.238037f, 0.371338f, 0.390625f, 0.205322f, 0.247803f, 0.299072f, 0.453125f, 0.299072f, 0.247803f, 0.205322f, 0.546875f, 0.371338f, 0.238037f, 0.152588f, 0.609375f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 0.539307f, 0.195068f, 0.070557f, 0.734375f, + 0.660156f, 0.152344f, 0.035156f, 0.812500f, 0.765625f, 0.109375f, 0.015625f, 0.875000f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights5x[32 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000977f, 0.030273f, 0.938477f, 0.031250f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.008789f, 0.084961f, 0.821289f, + 0.093750f, 0.015625f, 0.109375f, 0.765625f, 0.125000f, 0.024414f, 0.131836f, 0.711914f, 0.156250f, 0.035156f, 0.152344f, 0.660156f, 0.187500f, 0.047852f, 0.170898f, 0.610352f, 0.218750f, 0.062500f, 0.187500f, + 0.562500f, 0.250000f, 0.079102f, 0.202148f, 0.516602f, 0.281250f, 0.097656f, 0.214844f, 0.472656f, 0.312500f, 0.118164f, 0.225586f, 0.430664f, 0.343750f, 0.140625f, 0.234375f, 0.390625f, 0.375000f, 0.165039f, + 0.241211f, 0.352539f, 0.406250f, 0.191406f, 0.246094f, 0.316406f, 0.437500f, 0.219727f, 0.249023f, 0.282227f, 0.468750f, 0.282227f, 0.249023f, 0.219727f, 0.531250f, 0.316406f, 0.246094f, 0.191406f, 0.562500f, + 0.352539f, 0.241211f, 0.165039f, 0.593750f, 0.390625f, 0.234375f, 0.140625f, 0.625000f, 0.430664f, 0.225586f, 0.118164f, 0.656250f, 0.472656f, 0.214844f, 0.097656f, 0.687500f, 0.516602f, 0.202148f, 0.079102f, + 0.718750f, 0.562500f, 0.187500f, 0.062500f, 0.750000f, 0.610352f, 0.170898f, 0.047852f, 0.781250f, 0.660156f, 0.152344f, 0.035156f, 0.812500f, 0.711914f, 0.131836f, 0.024414f, 0.843750f, 0.765625f, 0.109375f, + 0.015625f, 0.875000f, 0.821289f, 0.084961f, 0.008789f, 0.906250f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 0.938477f, 0.030273f, 0.000977f, 0.968750f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights_3levelsx[3 * 4] = { + 0.000000f, 0.000000f, 1.000000f, 0.000000f, + .5f * .5f, (1.0f - .5f) * .5f, (1.0f - .5f) * (1.0f - .5f), .5f, + 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +static endpoint_err g_bc7_mode_1_optimal_endpoints[256][2]; // [c][pbit] +static const uint32_t BC7ENC_MODE_1_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_4bit_3bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_4bit_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range7_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range13_4bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_range13_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range11_5bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX = 13; // not 1, which is optimal, because 26 losslessly maps to BC7 4-bit weights + +astc_quant_bin g_astc_sorted_order_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] + +static uint8_t g_astc_nearest_sorted_index[BC7ENC_TOTAL_ASTC_RANGES][256]; + +static void astc_init() +{ + for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++) + { + if (!astc_is_valid_endpoint_range(range)) + continue; + + const uint32_t levels = astc_get_levels(range); + + uint32_t vals[256]; + // TODO + for (uint32_t i = 0; i < levels; i++) + vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; + + std::sort(vals, vals + levels); + + for (uint32_t i = 0; i < levels; i++) + { + uint32_t order = vals[i] & 0xFF; + uint32_t unq = vals[i] >> 8; + + g_astc_sorted_order_unquant[range][i].m_unquant = (uint8_t)unq; + g_astc_sorted_order_unquant[range][i].m_index = (uint8_t)order; + + } // i + +#if 0 + if (g_astc_bise_range_table[range][1] || g_astc_bise_range_table[range][2]) + { + printf("// Range: %u, Levels: %u, Bits: %u, Trits: %u, Quints: %u\n", range, levels, g_astc_bise_range_table[range][0], g_astc_bise_range_table[range][1], g_astc_bise_range_table[range][2]); + + printf("{"); + for (uint32_t i = 0; i < levels; i++) + { + printf("{%u,%u}", g_astc_sorted_order_unquant[range][i].m_index, g_astc_sorted_order_unquant[range][i].m_unquant); + if (i != (levels - 1)) + printf(","); + } + printf("}\n"); + } +#endif + +#if 0 + if (g_astc_bise_range_table[range][1] || g_astc_bise_range_table[range][2]) + { + printf("// Range: %u, Levels: %u, Bits: %u, Trits: %u, Quints: %u\n", range, levels, g_astc_bise_range_table[range][0], g_astc_bise_range_table[range][1], g_astc_bise_range_table[range][2]); + + printf("{"); + for (uint32_t i = 0; i < levels; i++) + { + printf("{%u,%u}", g_astc_unquant[range][i].m_index, g_astc_unquant[range][i].m_unquant); + if (i != (levels - 1)) + printf(","); + } + printf("}\n"); + } +#endif + + for (uint32_t i = 0; i < 256; i++) + { + uint32_t best_index = 0; + int best_err = INT32_MAX; + + for (uint32_t j = 0; j < levels; j++) + { + int err = g_astc_sorted_order_unquant[range][j].m_unquant - i; + if (err < 0) + err = -err; + if (err < best_err) + { + best_err = err; + best_index = j; + } + } + + g_astc_nearest_sorted_index[range][i] = (uint8_t)best_index; + } // i + } // range +} + +static inline uint32_t astc_interpolate_linear(uint32_t l, uint32_t h, uint32_t w) +{ + l = (l << 8) | l; + h = (h << 8) | h; + uint32_t k = (l * (64 - w) + h * w + 32) >> 6; + return k >> 8; +} + +// Initialize the lookup table used for optimal single color compression in mode 1. Must be called before encoding. +void bc7enc_compress_block_init() +{ + astc_init(); + + // BC7 666.1 + for (int c = 0; c < 256; c++) + { + for (uint32_t lp = 0; lp < 2; lp++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 64; l++) + { + uint32_t low = ((l << 1) | lp) << 1; + low |= (low >> 7); + for (uint32_t h = 0; h < 64; h++) + { + uint32_t high = ((h << 1) | lp) << 1; + high |= (high >> 7); + const int k = (low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6; + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + g_bc7_mode_1_optimal_endpoints[c][lp] = best; + } // lp + } // c + + // ASTC [0,15] 3-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 16; l++) + { + uint32_t low = (l << 4) | l; + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t high = (h << 4) | h; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_4bit_3bit_optimal_endpoints[c] = best; + + } // c + + // ASTC [0,15] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 16; l++) + { + uint32_t low = (l << 4) | l; + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t high = (h << 4) | h; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_4bit_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 7 [0,11] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 12; l++) + { + uint32_t low = g_astc_sorted_order_unquant[7][l].m_unquant; + + for (uint32_t h = 0; h < 12; h++) + { + uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range7_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 13 [0,47] 4-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 48; l++) + { + uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; + + for (uint32_t h = 0; h < 48; h++) + { + uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range13_4bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 13 [0,47] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 48; l++) + { + uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; + + for (uint32_t h = 0; h < 48; h++) + { + uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range13_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 11 [0,31] 5-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 32; l++) + { + uint32_t low = g_astc_sorted_order_unquant[11][l].m_unquant; + + for (uint32_t h = 0; h < 32; h++) + { + uint32_t high = g_astc_sorted_order_unquant[11][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range11_5bit_optimal_endpoints[c] = best; + + } // c +} + +static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + double q00_a = 0.0f, q10_a = 0.0f, t_a = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + q10_a = t_a - q00_a; + + z01 = z10; + + double det = z00 * z11 - z01 * z10; + if (det != 0.0f) + det = 1.0f / det; + + double iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = (float)(iz00 * q00_a + iz01 * q10_a); pXh->m_c[3] = (float)(iz10 * q00_a + iz11 * q10_a); + + for (uint32_t c = 0; c < 4; c++) + { + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < N; i++) + { + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); + } + + if (lo_v == hi_v) + { + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; + } + } + } +} + +static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F*pSelector_weights, bc7enc_vec4F*pXl, bc7enc_vec4F*pXh, const color_quad_u8 *pColors) +{ + double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + double det = z00 * z11 - z01 * z10; + if (det != 0.0f) + det = 1.0f / det; + + double iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = 255.0f; pXh->m_c[3] = 255.0f; + + for (uint32_t c = 0; c < 3; c++) + { + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < N; i++) + { + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); + } + + if (lo_v == hi_v) + { + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; + } + } + } +} + +static inline color_quad_u8 scale_color(const color_quad_u8* pC, const color_cell_compressor_params* pParams) +{ + color_quad_u8 results; + + if (pParams->m_astc_endpoint_range) + { + for (uint32_t i = 0; i < 4; i++) + { + results.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pC->m_c[i]].m_unquant; + } + } + else + { + const uint32_t n = pParams->m_comp_bits + (pParams->m_has_pbits ? 1 : 0); + assert((n >= 4) && (n <= 8)); + + for (uint32_t i = 0; i < 4; i++) + { + uint32_t v = pC->m_c[i] << (8 - n); + v |= (v >> n); + assert(v <= 255); + results.m_c[i] = (uint8_t)(v); + } + } + + return results; +} + +static inline uint64_t compute_color_distance_rgb(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +{ + int dr, dg, db; + + if (perceptual) + { + const int l1 = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + const int cr1 = ((int)pE1->m_c[0] << 9) - l1; + const int cb1 = ((int)pE1->m_c[2] << 9) - l1; + const int l2 = pE2->m_c[0] * 109 + pE2->m_c[1] * 366 + pE2->m_c[2] * 37; + const int cr2 = ((int)pE2->m_c[0] << 9) - l2; + const int cb2 = ((int)pE2->m_c[2] << 9) - l2; + dr = (l1 - l2) >> 8; + dg = (cr1 - cr2) >> 8; + db = (cb1 - cb2) >> 8; + } + else + { + dr = (int)pE1->m_c[0] - (int)pE2->m_c[0]; + dg = (int)pE1->m_c[1] - (int)pE2->m_c[1]; + db = (int)pE1->m_c[2] - (int)pE2->m_c[2]; + } + + return weights[0] * (uint32_t)(dr * dr) + weights[1] * (uint32_t)(dg * dg) + weights[2] * (uint32_t)(db * db); +} + +static inline uint64_t compute_color_distance_rgba(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +{ + int da = (int)pE1->m_c[3] - (int)pE2->m_c[3]; + return compute_color_distance_rgb(pE1, pE2, perceptual, weights) + (weights[3] * (uint32_t)(da * da)); +} + +static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + uint32_t best_err = UINT_MAX; + uint32_t best_p = 0; + + for (uint32_t p = 0; p < 2; p++) + { + uint32_t err = g_bc7_mode_1_optimal_endpoints[r][p].m_error + g_bc7_mode_1_optimal_endpoints[g][p].m_error + g_bc7_mode_1_optimal_endpoints[b][p].m_error; + if (err < best_err) + { + best_err = err; + best_p = p; + } + } + + const endpoint_err *pEr = &g_bc7_mode_1_optimal_endpoints[r][best_p]; + const endpoint_err *pEg = &g_bc7_mode_1_optimal_endpoints[g][best_p]; + const endpoint_err *pEb = &g_bc7_mode_1_optimal_endpoints[b][best_p]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = best_p; + pResults->m_pbits[1] = 0; + + memset(pSelectors, BC7ENC_MODE_1_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = ((pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; + low |= (low >> 7); + + uint32_t high = ((pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; + high |= (high >> 7); + + p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_4bit_3bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + const endpoint_err *pEr = &g_astc_4bit_3bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_4bit_3bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_4bit_3bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_4bit_2bit_to_one_color_rgba(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint32_t a, uint8_t *pSelectors) +{ + const endpoint_err *pEr = &g_astc_4bit_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_4bit_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_4bit_2bit_optimal_endpoints[b]; + const endpoint_err *pEa = &g_astc_4bit_2bit_optimal_endpoints[a]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, pEa->m_lo); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, pEa->m_hi); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgba(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range7_2bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 7 && pParams->m_num_selector_weights == 4); + + const endpoint_err *pEr = &g_astc_range7_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_range7_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_range7_2bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 13 && pParams->m_num_selector_weights == 4 && !pParams->m_has_alpha); + + const endpoint_err *pEr = &g_astc_range13_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_range13_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_range13_2bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 47); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 47); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range11_5bit_to_one_color(const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t* pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 11 && pParams->m_num_selector_weights == 32 && !pParams->m_has_alpha); + + const endpoint_err* pEr = &g_astc_range11_5bit_optimal_endpoints[r]; + const endpoint_err* pEg = &g_astc_range11_5bit_optimal_endpoints[g]; + const endpoint_err* pEb = &g_astc_range11_5bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 31); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 31); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = g_astc_sorted_order_unquant[11][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[11][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + color_quad_u8 quantMinColor = *pLow; + color_quad_u8 quantMaxColor = *pHigh; + + if (pParams->m_has_pbits) + { + uint32_t minPBit, maxPBit; + + if (pParams->m_endpoints_share_pbit) + maxPBit = minPBit = pbits[0]; + else + { + minPBit = pbits[0]; + maxPBit = pbits[1]; + } + + quantMinColor.m_c[0] = (uint8_t)((pLow->m_c[0] << 1) | minPBit); + quantMinColor.m_c[1] = (uint8_t)((pLow->m_c[1] << 1) | minPBit); + quantMinColor.m_c[2] = (uint8_t)((pLow->m_c[2] << 1) | minPBit); + quantMinColor.m_c[3] = (uint8_t)((pLow->m_c[3] << 1) | minPBit); + + quantMaxColor.m_c[0] = (uint8_t)((pHigh->m_c[0] << 1) | maxPBit); + quantMaxColor.m_c[1] = (uint8_t)((pHigh->m_c[1] << 1) | maxPBit); + quantMaxColor.m_c[2] = (uint8_t)((pHigh->m_c[2] << 1) | maxPBit); + quantMaxColor.m_c[3] = (uint8_t)((pHigh->m_c[3] << 1) | maxPBit); + } + + color_quad_u8 actualMinColor = scale_color(&quantMinColor, pParams); + color_quad_u8 actualMaxColor = scale_color(&quantMaxColor, pParams); + + const uint32_t N = pParams->m_num_selector_weights; + assert(N >= 1 && N <= 32); + + color_quad_u8 weightedColors[32]; + weightedColors[0] = actualMinColor; + weightedColors[N - 1] = actualMaxColor; + + const uint32_t nc = pParams->m_has_alpha ? 4 : 3; + if (pParams->m_astc_endpoint_range) + { + for (uint32_t i = 1; i < (N - 1); i++) + { + for (uint32_t j = 0; j < nc; j++) + weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate_linear(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i])); + } + } + else + { + for (uint32_t i = 1; i < (N - 1); i++) + for (uint32_t j = 0; j < nc; j++) + weightedColors[i].m_c[j] = (uint8_t)((actualMinColor.m_c[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor.m_c[j] * pParams->m_pSelector_weights[i] + 32) >> 6); + } + + const int lr = actualMinColor.m_c[0]; + const int lg = actualMinColor.m_c[1]; + const int lb = actualMinColor.m_c[2]; + const int dr = actualMaxColor.m_c[0] - lr; + const int dg = actualMaxColor.m_c[1] - lg; + const int db = actualMaxColor.m_c[2] - lb; + + uint64_t total_err = 0; + + if (pParams->m_pForce_selectors) + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8* pC = &pParams->m_pPixels[i]; + + const uint8_t sel = pParams->m_pForce_selectors[i]; + assert(sel < N); + + total_err += (pParams->m_has_alpha ? compute_color_distance_rgba : compute_color_distance_rgb)(&weightedColors[sel], pC, pParams->m_perceptual, pParams->m_weights); + + pResults->m_pSelectors_temp[i] = sel; + } + } + else if (!pParams->m_perceptual) + { + if (pParams->m_has_alpha) + { + const int la = actualMinColor.m_c[3]; + const int da = actualMaxColor.m_c[3] - la; + + const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + squarei(da) + .00000125f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + int a = pC->m_c[3]; + + int best_sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db + (a - la) * da) * f + .5f); + best_sel = clampi(best_sel, 1, N - 1); + + uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, BC7ENC_FALSE, pParams->m_weights); + + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err1 > err0) + { + err1 = err0; + --best_sel; + } + total_err += err1; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + else + { + const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + + int sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db) * f + .5f); + sel = clampi(sel, 1, N - 1); + + uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, BC7ENC_FALSE, pParams->m_weights); + + int best_sel = sel; + uint64_t best_err = err1; + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err0 < best_err) + { + best_err = err0; + best_sel = sel - 1; + } + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + } + else + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_sel = 0; + + if (pParams->m_has_alpha) + { + for (uint32_t j = 0; j < N; j++) + { + uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + if (err < best_err) + { + best_err = err; + best_sel = j; + } + // Prefer non-interpolation + else if ((err == best_err) && (j == (N - 1))) + best_sel = j; + } + } + else + { + for (uint32_t j = 0; j < N; j++) + { + uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + if (err < best_err) + { + best_err = err; + best_sel = j; + } + // Prefer non-interpolation + else if ((err == best_err) && (j == (N - 1))) + best_sel = j; + } + } + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + + if (total_err < pResults->m_best_overall_err) + { + pResults->m_best_overall_err = total_err; + + pResults->m_low_endpoint = *pLow; + pResults->m_high_endpoint = *pHigh; + + pResults->m_pbits[0] = pbits[0]; + pResults->m_pbits[1] = pbits[1]; + + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + } + + return total_err; +} + +static bool areDegenerateEndpoints(color_quad_u8* pTrialMinColor, color_quad_u8* pTrialMaxColor, const bc7enc_vec4F* pXl, const bc7enc_vec4F* pXh) +{ + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.0f) + return true; + } + } + + return false; +} + +static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, color_quad_u8 *pTrialMaxColor, const bc7enc_vec4F*pXl, const bc7enc_vec4F*pXh, uint32_t iscale, int flags) +{ + if (mode == 255) + { + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.000125f) + { + if (flags & 1) + { + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + } + if (flags & 2) + { + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + } + } + } + } + } + else if (mode == 1) + { + // fix degenerate case where the input collapses to a single colorspace voxel, and we loose all freedom (test with grayscale ramps) + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.000125f) + { + if (pTrialMinColor->m_c[i] > (iscale >> 1)) + { + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + else + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + } + else + { + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + else if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + } + } + } + } + } +} + +static uint64_t find_optimal_solution(uint32_t mode, bc7enc_vec4F xl, bc7enc_vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + vec4F_saturate_in_place(&xl); vec4F_saturate_in_place(&xh); + + if (pParams->m_astc_endpoint_range) + { + const uint32_t levels = astc_get_levels(pParams->m_astc_endpoint_range); + + const float scale = 255.0f; + + color_quad_u8 trialMinColor8Bit, trialMaxColor8Bit; + color_quad_u8_set_clamped(&trialMinColor8Bit, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor8Bit, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + + color_quad_u8 trialMinColor, trialMaxColor; + for (uint32_t i = 0; i < 4; i++) + { + trialMinColor.m_c[i] = g_astc_nearest_sorted_index[pParams->m_astc_endpoint_range][trialMinColor8Bit.m_c[i]]; + trialMaxColor.m_c[i] = g_astc_nearest_sorted_index[pParams->m_astc_endpoint_range][trialMaxColor8Bit.m_c[i]]; + } + + if (areDegenerateEndpoints(&trialMinColor, &trialMaxColor, &xl, &xh)) + { + color_quad_u8 trialMinColorOrig(trialMinColor), trialMaxColorOrig(trialMaxColor); + + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 1); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 0); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 2); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 3); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + else + { + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + { + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + } + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + } + else if (pParams->m_has_pbits) + { + const int iscalep = (1 << (pParams->m_comp_bits + 1)) - 1; + const float scalep = (float)iscalep; + + const int32_t totalComps = pParams->m_has_alpha ? 4 : 3; + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + + if (!pParams->m_endpoints_share_pbit) + { + float best_err0 = 1e+9; + float best_err1 = 1e+9; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + + // Notes: The pbit controls which quantization intervals are selected. + // total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc. + // pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value + // rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5) + // rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5) + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); + color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + + float err0 = 0, err1 = 0; + for (int i = 0; i < totalComps; i++) + { + err0 += squaref(scaledLow.m_c[i] - xl.m_c[i] * 255.0f); + err1 += squaref(scaledHigh.m_c[i] - xh.m_c[i] * 255.0f); + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; + + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; + + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } + } + } + else + { + // Endpoints share pbits + float best_err = 1e+9; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); + color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + + float err = 0; + for (int i = 0; i < totalComps; i++) + err += squaref((scaledLow.m_c[i] / 255.0f) - xl.m_c[i]) + squaref((scaledHigh.m_c[i] / 255.0f) - xh.m_c[i]); + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + } + } + + fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, 0); + + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1])) + evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults); + } + else + { + const int iscale = (1 << pParams->m_comp_bits) - 1; + const float scale = (float)iscale; + + color_quad_u8 trialMinColor, trialMaxColor; + color_quad_u8_set_clamped(&trialMinColor, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale, 0); + + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + + return pResults->m_best_overall_err; +} + +void check_best_overall_error(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + const uint32_t n = pParams->m_num_selector_weights; + + assert(n <= 32); + + color_quad_u8 colors[32]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_low_endpoint.m_c[c]].m_unquant; + assert(colors[0].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[c]].m_unquant); + + colors[n-1].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_high_endpoint.m_c[c]].m_unquant; + assert(colors[n-1].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[c]].m_unquant); + } + + for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); + + uint64_t total_err = 0; + for (uint32_t p = 0; p < pParams->m_num_pixels; p++) + { + const color_quad_u8 &orig = pParams->m_pPixels[p]; + const color_quad_u8 &packed = colors[pResults->m_pSelectors[p]]; + + if (pParams->m_has_alpha) + total_err += compute_color_distance_rgba(&orig, &packed, pParams->m_perceptual, pParams->m_weights); + else + total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights); + } + assert(total_err == pResults->m_best_overall_err); + + // HACK HACK + //if (total_err != pResults->m_best_overall_err) + // printf("X"); +} + +static bool is_solid_rgb(const color_cell_compressor_params *pParams, uint32_t &r, uint32_t &g, uint32_t &b) +{ + r = pParams->m_pPixels[0].m_c[0]; + g = pParams->m_pPixels[0].m_c[1]; + b = pParams->m_pPixels[0].m_c[2]; + + bool allSame = true; + for (uint32_t i = 1; i < pParams->m_num_pixels; i++) + { + if ((r != pParams->m_pPixels[i].m_c[0]) || (g != pParams->m_pPixels[i].m_c[1]) || (b != pParams->m_pPixels[i].m_c[2])) + { + allSame = false; + break; + } + } + + return allSame; +} + +static bool is_solid_rgba(const color_cell_compressor_params *pParams, uint32_t &r, uint32_t &g, uint32_t &b, uint32_t &a) +{ + r = pParams->m_pPixels[0].m_c[0]; + g = pParams->m_pPixels[0].m_c[1]; + b = pParams->m_pPixels[0].m_c[2]; + a = pParams->m_pPixels[0].m_c[3]; + + bool allSame = true; + for (uint32_t i = 1; i < pParams->m_num_pixels; i++) + { + if ((r != pParams->m_pPixels[i].m_c[0]) || (g != pParams->m_pPixels[i].m_c[1]) || (b != pParams->m_pPixels[i].m_c[2]) || (a != pParams->m_pPixels[i].m_c[3])) + { + allSame = false; + break; + } + } + + return allSame; +} + +uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, const bc7enc_compress_block_params *pComp_params) +{ + if (!pParams->m_astc_endpoint_range) + { + assert((mode == 6) || (!pParams->m_has_alpha)); + } + assert(pParams->m_num_selector_weights >= 1 && pParams->m_num_selector_weights <= 32); + assert(pParams->m_pSelector_weights[0] == 0); + assert(pParams->m_pSelector_weights[pParams->m_num_selector_weights - 1] == 64); + + pResults->m_best_overall_err = UINT64_MAX; + + uint32_t cr, cg, cb, ca; + + // If the partition's colors are all the same, then just pack them as a single color. + if (!pParams->m_pForce_selectors) + { + if (mode == 1) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_mode1_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 8) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_4bit_3bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 7) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range7_2bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 4) && (pParams->m_has_alpha)) + { + if (is_solid_rgba(pParams, cr, cg, cb, ca)) + return pack_astc_4bit_2bit_to_one_color_rgba(pParams, pResults, cr, cg, cb, ca, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 13) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range13_2bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 11) && (pParams->m_num_selector_weights == 32) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range11_5bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + } + + // Compute partition's mean color and principle axis. + bc7enc_vec4F meanColor, axis; + vec4F_set_scalar(&meanColor, 0.0f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + meanColor = vec4F_add(&meanColor, &color); + } + + bc7enc_vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels)); + + meanColor = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels * 255.0f)); + vec4F_saturate_in_place(&meanColor); + + if (pParams->m_has_alpha) + { + // Use incremental PCA for RGBA PCA, because it's simple. + vec4F_set_scalar(&axis, 0.0f); + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + color = vec4F_sub(&color, &meanColorScaled); + bc7enc_vec4F a = vec4F_mul(&color, color.m_c[0]); + bc7enc_vec4F b = vec4F_mul(&color, color.m_c[1]); + bc7enc_vec4F c = vec4F_mul(&color, color.m_c[2]); + bc7enc_vec4F d = vec4F_mul(&color, color.m_c[3]); + bc7enc_vec4F n = i ? axis : color; + vec4F_normalize_in_place(&n); + axis.m_c[0] += vec4F_dot(&a, &n); + axis.m_c[1] += vec4F_dot(&b, &n); + axis.m_c[2] += vec4F_dot(&c, &n); + axis.m_c[3] += vec4F_dot(&d, &n); + } + vec4F_normalize_in_place(&axis); + } + else + { + // Use covar technique for RGB PCA, because it doesn't require per-pixel normalization. + float cov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pV = &pParams->m_pPixels[i]; + float r = pV->m_c[0] - meanColorScaled.m_c[0]; + float g = pV->m_c[1] - meanColorScaled.m_c[1]; + float b = pV->m_c[2] - meanColorScaled.m_c[2]; + cov[0] += r*r; cov[1] += r*g; cov[2] += r*b; cov[3] += g*g; cov[4] += g*b; cov[5] += b*b; + } + + float xr = .9f, xg = 1.0f, xb = .7f; + for (uint32_t iter = 0; iter < 3; iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + + float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b)); + if (m > 1e-10f) + { + m = 1.0f / m; + r *= m; g *= m; b *= m; + } + + xr = r; xg = g; xb = b; + } + + float len = xr * xr + xg * xg + xb * xb; + if (len < 1e-10f) + vec4F_set_scalar(&axis, 0.0f); + else + { + len = 1.0f / sqrtf(len); + xr *= len; xg *= len; xb *= len; + vec4F_set(&axis, xr, xg, xb, 0); + } + } + + if (vec4F_dot(&axis, &axis) < .5f) + { + if (pParams->m_perceptual) + vec4F_set(&axis, .213f, .715f, .072f, pParams->m_has_alpha ? .715f : 0); + else + vec4F_set(&axis, 1.0f, 1.0f, 1.0f, pParams->m_has_alpha ? 1.0f : 0); + vec4F_normalize_in_place(&axis); + } + + bc7enc_vec4F minColor, maxColor; + + float l = 1e+9f, h = -1e+9f; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + + bc7enc_vec4F q = vec4F_sub(&color, &meanColorScaled); + float d = vec4F_dot(&q, &axis); + + l = minimumf(l, d); + h = maximumf(h, d); + } + + l *= (1.0f / 255.0f); + h *= (1.0f / 255.0f); + + bc7enc_vec4F b0 = vec4F_mul(&axis, l); + bc7enc_vec4F b1 = vec4F_mul(&axis, h); + bc7enc_vec4F c0 = vec4F_add(&meanColor, &b0); + bc7enc_vec4F c1 = vec4F_add(&meanColor, &b1); + minColor = vec4F_saturate(&c0); + maxColor = vec4F_saturate(&c1); + + bc7enc_vec4F whiteVec; + vec4F_set_scalar(&whiteVec, 1.0f); + if (vec4F_dot(&minColor, &whiteVec) > vec4F_dot(&maxColor, &whiteVec)) + { +#if 1 + std::swap(minColor.m_c[0], maxColor.m_c[0]); + std::swap(minColor.m_c[1], maxColor.m_c[1]); + std::swap(minColor.m_c[2], maxColor.m_c[2]); + std::swap(minColor.m_c[3], maxColor.m_c[3]); +#elif 0 + // Fails to compile correctly with MSVC 2019 (code generation bug) + std::swap(minColor, maxColor); +#else + // Fails with MSVC 2019 + bc7enc_vec4F temp = minColor; + minColor = maxColor; + maxColor = temp; +#endif + } + + // First find a solution using the block's PCA. + if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pComp_params->m_least_squares_passes; i++) + { + // Now try to refine the solution using least squares by computing the optimal endpoints from the current selectors. + bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, pResults->m_pSelectors, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, pResults->m_pSelectors, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + } + + if ((!pParams->m_pForce_selectors) && (pComp_params->m_uber_level > 0)) + { + // In uber level 1, try varying the selectors a little, somewhat like cluster fit would. First try incrementing the minimum selectors, + // then try decrementing the selectrors, then try both. + uint8_t selectors_temp[16], selectors_temp1[16]; + memcpy(selectors_temp, pResults->m_pSelectors, pParams->m_num_pixels); + + const int max_selector = pParams->m_num_selector_weights - 1; + + uint32_t min_sel = 256; + uint32_t max_sel = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + min_sel = minimumu(min_sel, sel); + max_sel = maximumu(max_sel, sel); + } + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == min_sel) && (sel < (pParams->m_num_selector_weights - 1))) + sel++; + selectors_temp1[i] = (uint8_t)sel; + } + + bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == max_sel) && (sel > 0)) + sel--; + selectors_temp1[i] = (uint8_t)sel; + } + + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == min_sel) && (sel < (pParams->m_num_selector_weights - 1))) + sel++; + else if ((sel == max_sel) && (sel > 0)) + sel--; + selectors_temp1[i] = (uint8_t)sel; + } + + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + // In uber levels 2+, try taking more advantage of endpoint extrapolation by scaling the selectors in one direction or another. + const uint32_t uber_err_thresh = (pParams->m_num_pixels * 56) >> 4; + if ((pComp_params->m_uber_level >= 2) && (pResults->m_best_overall_err > uber_err_thresh)) + { + const int Q = (pComp_params->m_uber_level >= 4) ? (pComp_params->m_uber_level - 2) : 1; + for (int ly = -Q; ly <= 1; ly++) + { + for (int hy = max_selector - 1; hy <= (max_selector + Q); hy++) + { + if ((ly == 0) && (hy == max_selector)) + continue; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + selectors_temp1[i] = (uint8_t)clampf(floorf((float)max_selector * ((float)selectors_temp[i] - (float)ly) / ((float)hy - (float)ly) + .5f), 0, (float)max_selector); + + //bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + } + } + } + } + + if (!pParams->m_pForce_selectors) + { + // Try encoding the partition as a single color by using the optimal single colors tables to encode the block to its mean. + if (mode == 1) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_mode1_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 8) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_4bit_3bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 7) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range7_2bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 4) && (pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f), a = (int)(.5f + meanColor.m_c[3] * 255.0f); + uint64_t avg_err = pack_astc_4bit_2bit_to_one_color_rgba(pParams, &avg_results, r, g, b, a, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 13) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range13_2bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 11) && (pParams->m_num_selector_weights == 32) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range11_5bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + } + +#if BC7ENC_CHECK_OVERALL_ERROR + check_best_overall_error(pParams, pResults); +#endif + + return pResults->m_best_overall_err; +} + +uint64_t color_cell_compression_est_astc( + uint32_t num_weights, uint32_t num_comps, const uint32_t *pWeight_table, + uint32_t num_pixels, const color_quad_u8* pPixels, + uint64_t best_err_so_far, const uint32_t weights[4]) +{ + assert(num_comps == 3 || num_comps == 4); + assert(num_weights >= 1 && num_weights <= 32); + assert(pWeight_table[0] == 0 && pWeight_table[num_weights - 1] == 64); + + // Find RGB bounds as an approximation of the block's principle axis + uint32_t lr = 255, lg = 255, lb = 255, la = 255; + uint32_t hr = 0, hg = 0, hb = 0, ha = 0; + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + if (pC->m_c[3] < la) la = pC->m_c[3]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + if (pC->m_c[3] > ha) ha = pC->m_c[3]; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + } + la = 255; + ha = 255; + } + + color_quad_u8 lowColor, highColor; + color_quad_u8_set(&lowColor, lr, lg, lb, la); + color_quad_u8_set(&highColor, hr, hg, hb, ha); + + // Place endpoints at bbox diagonals and compute interpolated colors + color_quad_u8 weightedColors[32]; + + weightedColors[0] = lowColor; + weightedColors[num_weights - 1] = highColor; + for (uint32_t i = 1; i < (num_weights - 1); i++) + { + weightedColors[i].m_c[0] = (uint8_t)astc_interpolate_linear(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]); + weightedColors[i].m_c[1] = (uint8_t)astc_interpolate_linear(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]); + weightedColors[i].m_c[2] = (uint8_t)astc_interpolate_linear(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]); + weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate_linear(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255; + } + + // Compute dots and thresholds + const int ar = highColor.m_c[0] - lowColor.m_c[0]; + const int ag = highColor.m_c[1] - lowColor.m_c[1]; + const int ab = highColor.m_c[2] - lowColor.m_c[2]; + const int aa = highColor.m_c[3] - lowColor.m_c[3]; + + int dots[32]; + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_weights; i++) + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab + weightedColors[i].m_c[3] * aa; + } + else + { + assert(aa == 0); + for (uint32_t i = 0; i < num_weights; i++) + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab; + } + + int thresh[32 - 1]; + for (uint32_t i = 0; i < (num_weights - 1); i++) + thresh[i] = (dots[i] + dots[i + 1] + 1) >> 1; + + uint64_t total_err = 0; + if ((weights[0] | weights[1] | weights[2] | weights[3]) == 1) + { + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; + + total_err += (dr * dr) + (dg * dg) + (db * db) + (da * da); + if (total_err > best_err_so_far) + break; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + + total_err += (dr * dr) + (dg * dg) + (db * db); + if (total_err > best_err_so_far) + break; + } + } + } + else + { + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; + + total_err += weights[0] * (dr * dr) + weights[1] * (dg * dg) + weights[2] * (db * db) + weights[3] * (da * da); + if (total_err > best_err_so_far) + break; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + + total_err += weights[0] * (dr * dr) + weights[1] * (dg * dg) + weights[2] * (db * db); + if (total_err > best_err_so_far) + break; + } + } + } + + return total_err; +} + +} // namespace basisu diff --git a/src/deps/basis-universal/encoder/basisu_bc7enc.h b/src/deps/basis-universal/encoder/basisu_bc7enc.h new file mode 100644 index 0000000000..8d8b7888ca --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_bc7enc.h @@ -0,0 +1,132 @@ +// File: basisu_bc7enc.h +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + +#define BC7ENC_MAX_PARTITIONS1 (64) +#define BC7ENC_MAX_UBER_LEVEL (4) + + typedef uint8_t bc7enc_bool; + +#define BC7ENC_TRUE (1) +#define BC7ENC_FALSE (0) + + typedef struct { float m_c[4]; } bc7enc_vec4F; + + extern const float g_bc7_weights1x[2 * 4]; + extern const float g_bc7_weights2x[4 * 4]; + extern const float g_bc7_weights3x[8 * 4]; + extern const float g_bc7_weights4x[16 * 4]; + extern const float g_astc_weights4x[16 * 4]; + extern const float g_astc_weights5x[32 * 4]; + extern const float g_astc_weights_3levelsx[3 * 4]; + + extern basist::astc_quant_bin g_astc_sorted_order_unquant[basist::BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] + + struct color_cell_compressor_params + { + uint32_t m_num_pixels; + const basist::color_quad_u8* m_pPixels; + + uint32_t m_num_selector_weights; + const uint32_t* m_pSelector_weights; + + const bc7enc_vec4F* m_pSelector_weightsx; + uint32_t m_comp_bits; + + const uint8_t *m_pForce_selectors; + + // Non-zero m_astc_endpoint_range enables ASTC mode. m_comp_bits and m_has_pbits are always false. We only support 2, 3, or 4 bit weight encodings. + uint32_t m_astc_endpoint_range; + + uint32_t m_weights[4]; + bc7enc_bool m_has_alpha; + bc7enc_bool m_has_pbits; + bc7enc_bool m_endpoints_share_pbit; + bc7enc_bool m_perceptual; + }; + + struct color_cell_compressor_results + { + uint64_t m_best_overall_err; + basist::color_quad_u8 m_low_endpoint; + basist::color_quad_u8 m_high_endpoint; + uint32_t m_pbits[2]; + uint8_t* m_pSelectors; + uint8_t* m_pSelectors_temp; + + // Encoded ASTC indices, if ASTC mode is enabled + basist::color_quad_u8 m_astc_low_endpoint; + basist::color_quad_u8 m_astc_high_endpoint; + }; + + struct bc7enc_compress_block_params + { + // m_max_partitions_mode1 may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_max_partitions_mode1; + + // Relative RGBA or YCbCrA weights. + uint32_t m_weights[4]; + + // m_uber_level may range from 0 to BC7ENC_MAX_UBER_LEVEL. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_uber_level; + + // If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB. + bc7enc_bool m_perceptual; + + uint32_t m_least_squares_passes; + }; + + uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, const bc7enc_compress_block_params* pComp_params); + + uint64_t color_cell_compression_est_astc( + uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeight_table, + uint32_t num_pixels, const basist::color_quad_u8* pPixels, + uint64_t best_err_so_far, const uint32_t weights[4]); + + inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_FALSE; + p->m_weights[0] = 1; + p->m_weights[1] = 1; + p->m_weights[2] = 1; + p->m_weights[3] = 1; + } + + inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_TRUE; + p->m_weights[0] = 128; + p->m_weights[1] = 64; + p->m_weights[2] = 16; + p->m_weights[3] = 32; + } + + inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params* p) + { + p->m_max_partitions_mode1 = BC7ENC_MAX_PARTITIONS1; + p->m_least_squares_passes = 1; + p->m_uber_level = 0; + bc7enc_compress_block_params_init_perceptual_weights(p); + } + + // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). + void bc7enc_compress_block_init(); + +} // namespace basisu diff --git a/src/deps/basis-universal/encoder/basisu_comp.cpp b/src/deps/basis-universal/encoder/basisu_comp.cpp new file mode 100644 index 0000000000..10f96cec4a --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_comp.cpp @@ -0,0 +1,2119 @@ +// basisu_comp.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_comp.h" +#include "basisu_enc.h" +#include +#include + +// basisu_transcoder.cpp is where basisu_miniz lives now, we just need the declarations here. +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#include "basisu_miniz.h" + +#if !BASISD_SUPPORT_KTX2 +#error BASISD_SUPPORT_KTX2 must be enabled (set to 1). +#endif + +#if BASISD_SUPPORT_KTX2_ZSTD +#include "../zstd/zstd.h" +#endif + +// Set to 1 to disable the mipPadding alignment workaround (which only seems to be needed when no key-values are written at all) +#define BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND (0) + +// Set to 1 to disable writing all KTX2 key values, triggering the validator bug. +#define BASISU_DISABLE_KTX2_KEY_VALUES (0) + +using namespace buminiz; + +#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 0 +#define DEBUG_CROP_TEXTURE_TO_64x64 (0) +#define DEBUG_RESIZE_TEXTURE (0) +#define DEBUG_EXTRACT_SINGLE_BLOCK (0) + +namespace basisu +{ + basis_compressor::basis_compressor() : + m_basis_file_size(0), + m_basis_bits_per_texel(0.0f), + m_total_blocks(0), + m_auto_global_sel_pal(false), + m_any_source_image_has_alpha(false) + { + debug_printf("basis_compressor::basis_compressor\n"); + } + + bool basis_compressor::init(const basis_compressor_params ¶ms) + { + debug_printf("basis_compressor::init\n"); + + m_params = params; + + if (m_params.m_debug) + { + debug_printf("basis_compressor::init:\n"); + +#define PRINT_BOOL_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_INT_VALUE(v) debug_printf("%s: %i %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); + + debug_printf("Has global selector codebook: %i\n", m_params.m_pSel_codebook != nullptr); + + debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i, Source mipmap images: %u\n", + m_params.m_source_images.size(), m_params.m_source_filenames.size(), m_params.m_source_alpha_filenames.size(), m_params.m_source_mipmap_images.size()); + + if (m_params.m_source_mipmap_images.size()) + { + debug_printf("m_source_mipmap_images array sizes:\n"); + for (uint32_t i = 0; i < m_params.m_source_mipmap_images.size(); i++) + debug_printf("%u ", m_params.m_source_mipmap_images[i].size()); + debug_printf("\n"); + } + + PRINT_BOOL_VALUE(m_uastc); + PRINT_BOOL_VALUE(m_y_flip); + PRINT_BOOL_VALUE(m_debug); + PRINT_BOOL_VALUE(m_validate); + PRINT_BOOL_VALUE(m_debug_images); + PRINT_BOOL_VALUE(m_global_sel_pal); + PRINT_BOOL_VALUE(m_auto_global_sel_pal); + PRINT_INT_VALUE(m_compression_level); + PRINT_BOOL_VALUE(m_no_hybrid_sel_cb); + PRINT_BOOL_VALUE(m_perceptual); + PRINT_BOOL_VALUE(m_no_endpoint_rdo); + PRINT_BOOL_VALUE(m_no_selector_rdo); + PRINT_BOOL_VALUE(m_read_source_images); + PRINT_BOOL_VALUE(m_write_output_basis_files); + PRINT_BOOL_VALUE(m_compute_stats); + PRINT_BOOL_VALUE(m_check_for_alpha); + PRINT_BOOL_VALUE(m_force_alpha); + debug_printf("swizzle: %d,%d,%d,%d\n", + m_params.m_swizzle[0], + m_params.m_swizzle[1], + m_params.m_swizzle[2], + m_params.m_swizzle[3]); + PRINT_BOOL_VALUE(m_renormalize); + PRINT_BOOL_VALUE(m_multithreading); + PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); + + PRINT_FLOAT_VALUE(m_hybrid_sel_cb_quality_thresh); + + PRINT_INT_VALUE(m_global_pal_bits); + PRINT_INT_VALUE(m_global_mod_bits); + + PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); + PRINT_FLOAT_VALUE(m_selector_rdo_thresh); + + PRINT_BOOL_VALUE(m_mip_gen); + PRINT_BOOL_VALUE(m_mip_renormalize); + PRINT_BOOL_VALUE(m_mip_wrapping); + PRINT_BOOL_VALUE(m_mip_fast); + PRINT_BOOL_VALUE(m_mip_srgb); + PRINT_FLOAT_VALUE(m_mip_premultiplied); + PRINT_FLOAT_VALUE(m_mip_scale); + PRINT_INT_VALUE(m_mip_smallest_dimension); + debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str()); + + debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters); + debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters); + debug_printf("m_quality_level: %i\n", m_params.m_quality_level); + + debug_printf("m_tex_type: %u\n", m_params.m_tex_type); + debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); + debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); + debug_printf("m_pack_uastc_flags: 0x%X\n", m_params.m_pack_uastc_flags); + + PRINT_BOOL_VALUE(m_rdo_uastc); + PRINT_FLOAT_VALUE(m_rdo_uastc_quality_scalar); + PRINT_INT_VALUE(m_rdo_uastc_dict_size); + PRINT_FLOAT_VALUE(m_rdo_uastc_max_allowed_rms_increase_ratio); + PRINT_FLOAT_VALUE(m_rdo_uastc_skip_block_rms_thresh); + PRINT_FLOAT_VALUE(m_rdo_uastc_max_smooth_block_error_scale); + PRINT_FLOAT_VALUE(m_rdo_uastc_smooth_block_max_std_dev); + PRINT_BOOL_VALUE(m_rdo_uastc_favor_simpler_modes_in_rdo_mode) + PRINT_BOOL_VALUE(m_rdo_uastc_multithreading); + + PRINT_INT_VALUE(m_resample_width); + PRINT_INT_VALUE(m_resample_height); + PRINT_FLOAT_VALUE(m_resample_factor); + debug_printf("Has global codebooks: %u\n", m_params.m_pGlobal_codebooks ? 1 : 0); + if (m_params.m_pGlobal_codebooks) + { + debug_printf("Global codebook endpoints: %u selectors: %u\n", m_params.m_pGlobal_codebooks->get_endpoints().size(), m_params.m_pGlobal_codebooks->get_selectors().size()); + } + + PRINT_BOOL_VALUE(m_create_ktx2_file); + + debug_printf("KTX2 UASTC supercompression: %u\n", m_params.m_ktx2_uastc_supercompression); + debug_printf("KTX2 Zstd supercompression level: %i\n", (int)m_params.m_ktx2_zstd_supercompression_level); + debug_printf("KTX2 sRGB transfer func: %u\n", (int)m_params.m_ktx2_srgb_transfer_func); + debug_printf("Total KTX2 key values: %u\n", m_params.m_ktx2_key_values.size()); + for (uint32_t i = 0; i < m_params.m_ktx2_key_values.size(); i++) + { + debug_printf("Key: \"%s\"\n", m_params.m_ktx2_key_values[i].m_key.data()); + debug_printf("Value size: %u\n", m_params.m_ktx2_key_values[i].m_value.size()); + } + +#undef PRINT_BOOL_VALUE +#undef PRINT_INT_VALUE +#undef PRINT_UINT_VALUE +#undef PRINT_FLOAT_VALUE + } + + if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) + { + assert(0); + return false; + } + + return true; + } + + basis_compressor::error_code basis_compressor::process() + { + debug_printf("basis_compressor::process\n"); + + if (!read_source_images()) + return cECFailedReadingSourceImages; + + if (!validate_texture_type_constraints()) + return cECFailedValidating; + + if (m_params.m_create_ktx2_file) + { + if (!validate_ktx2_constraints()) + return cECFailedValidating; + } + + if (!extract_source_blocks()) + return cECFailedFrontEnd; + + if (m_params.m_uastc) + { + error_code ec = encode_slices_to_uastc(); + if (ec != cECSuccess) + return ec; + } + else + { + if (!process_frontend()) + return cECFailedFrontEnd; + + if (!extract_frontend_texture_data()) + return cECFailedFontendExtract; + + if (!process_backend()) + return cECFailedBackend; + } + + if (!create_basis_file_and_transcode()) + return cECFailedCreateBasisFile; + + if (m_params.m_create_ktx2_file) + { + if (!create_ktx2_file()) + return cECFailedCreateKTX2File; + } + + if (!write_output_files_and_compute_stats()) + return cECFailedWritingOutput; + + return cECSuccess; + } + + basis_compressor::error_code basis_compressor::encode_slices_to_uastc() + { + debug_printf("basis_compressor::encode_slices_to_uastc\n"); + + m_uastc_slice_textures.resize(m_slice_descs.size()); + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + m_uastc_slice_textures[slice_index].init(texture_format::cUASTC4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); + + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC4x4; + m_uastc_backend_output.m_etc1s = false; + m_uastc_backend_output.m_slice_desc = m_slice_descs; + m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); + m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + gpu_image& tex = m_uastc_slice_textures[slice_index]; + basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + (void)slice_desc; + + const uint32_t num_blocks_x = tex.get_blocks_x(); + const uint32_t num_blocks_y = tex.get_blocks_y(); + const uint32_t total_blocks = tex.get_total_blocks(); + const image& source_image = m_slice_images[slice_index]; + + std::atomic total_blocks_processed; + total_blocks_processed = 0; + + const uint32_t N = 256; + for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(total_blocks, block_index_iter + N); + + // FIXME: This sucks, but we're having a stack size related problem with std::function with emscripten. +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, &tex, &total_blocks_processed] + { +#endif + BASISU_NOTE_UNUSED(num_blocks_y); + + uint32_t uastc_flags = m_params.m_pack_uastc_flags; + if ((m_params.m_rdo_uastc) && (m_params.m_rdo_uastc_favor_simpler_modes_in_rdo_mode)) + uastc_flags |= cPackUASTCFavorSimplerModes; + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_x = block_index % num_blocks_x; + const uint32_t block_y = block_index / num_blocks_x; + + color_rgba block_pixels[4][4]; + + source_image.extract_block_clamped((color_rgba*)block_pixels, block_x * 4, block_y * 4, 4, 4); + + basist::uastc_block& dest_block = *(basist::uastc_block*)tex.get_block_ptr(block_x, block_y); + + encode_uastc(&block_pixels[0][0].r, dest_block, uastc_flags); + + total_blocks_processed++; + + uint32_t val = total_blocks_processed; + if ((val & 16383) == 16383) + { + debug_printf("basis_compressor::encode_slices_to_uastc: %3.1f%% done\n", static_cast(val) * 100.0f / total_blocks); + } + + } + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } // block_index_iter + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + if (m_params.m_rdo_uastc) + { + uastc_rdo_params rdo_params; + rdo_params.m_lambda = m_params.m_rdo_uastc_quality_scalar; + rdo_params.m_max_allowed_rms_increase_ratio = m_params.m_rdo_uastc_max_allowed_rms_increase_ratio; + rdo_params.m_skip_block_rms_thresh = m_params.m_rdo_uastc_skip_block_rms_thresh; + rdo_params.m_lz_dict_size = m_params.m_rdo_uastc_dict_size; + rdo_params.m_smooth_block_max_error_scale = m_params.m_rdo_uastc_max_smooth_block_error_scale; + rdo_params.m_max_smooth_block_std_dev = m_params.m_rdo_uastc_smooth_block_max_std_dev; + + bool status = uastc_rdo(tex.get_total_blocks(), (basist::uastc_block*)tex.get_ptr(), + (const color_rgba *)m_source_blocks[slice_desc.m_first_block_index].m_pixels, rdo_params, m_params.m_pack_uastc_flags, m_params.m_rdo_uastc_multithreading ? m_params.m_pJob_pool : nullptr, + (m_params.m_rdo_uastc_multithreading && m_params.m_pJob_pool) ? basisu::minimum(4, (uint32_t)m_params.m_pJob_pool->get_total_threads()) : 0); + if (!status) + { + return cECFailedUASTCRDOPostProcess; + } + } + + m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); + memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); + + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); + + } // slice_index + + return cECSuccess; + } + + bool basis_compressor::generate_mipmaps(const image &img, basisu::vector &mips, bool has_alpha) + { + debug_printf("basis_compressor::generate_mipmaps\n"); + + interval_timer tm; + tm.start(); + + uint32_t total_levels = 1; + uint32_t w = img.get_width(), h = img.get_height(); + while (maximum(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) + { + w = maximum(w >> 1U, 1U); + h = maximum(h >> 1U, 1U); + total_levels++; + } + +#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN + // Requires stb_image_resize + stbir_filter filter = STBIR_FILTER_DEFAULT; + if (m_params.m_mip_filter == "box") + filter = STBIR_FILTER_BOX; + else if (m_params.m_mip_filter == "triangle") + filter = STBIR_FILTER_TRIANGLE; + else if (m_params.m_mip_filter == "cubic") + filter = STBIR_FILTER_CUBICBSPLINE; + else if (m_params.m_mip_filter == "catmull") + filter = STBIR_FILTER_CATMULLROM; + else if (m_params.m_mip_filter == "mitchell") + filter = STBIR_FILTER_MITCHELL; + + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum(1, img.get_width() >> level); + const uint32_t level_height = maximum(1, img.get_height() >> level); + + image &level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + int result = stbir_resize_uint8_generic( + (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), + (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), + has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, + m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, + nullptr); + + if (result == 0) + { + error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#else + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum(1, img.get_width() >> level); + const uint32_t level_height = maximum(1, img.get_height() >> level); + + image& level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + const image* pSource_image = &img; + + if (m_params.m_mip_fast) + { + if (level > 1) + pSource_image = &mips[level - 1]; + } + + bool status = image_resample(*pSource_image, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); + if (!status) + { + error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#endif + + if (m_params.m_debug) + debug_printf("Total mipmap generation time: %f secs\n", tm.get_elapsed_secs()); + + return true; + } + + bool basis_compressor::read_source_images() + { + debug_printf("basis_compressor::read_source_images\n"); + + const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size(); + if (!total_source_files) + return false; + + m_stats.resize(0); + m_slice_descs.resize(0); + m_slice_images.resize(0); + + m_total_blocks = 0; + uint32_t total_macroblocks = 0; + + m_any_source_image_has_alpha = false; + + basisu::vector source_images; + basisu::vector source_filenames; + + // First load all source images, and determine if any have an alpha channel. + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + const char *pSource_filename = ""; + + image file_image; + + if (m_params.m_read_source_images) + { + pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); + + // Load the source image + if (!load_image(pSource_filename, file_image)) + { + error_printf("Failed reading source image: %s\n", pSource_filename); + return false; + } + + if (m_params.m_status_output) + { + printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); + } + + // Optionally load another image and put a grayscale version of it into the alpha channel. + if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) + { + const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); + + image alpha_data; + + if (!load_image(pSource_alpha_image, alpha_data)) + { + error_printf("Failed reading source image: %s\n", pSource_alpha_image); + return false; + } + + printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); + + alpha_data.crop(file_image.get_width(), file_image.get_height()); + + for (uint32_t y = 0; y < file_image.get_height(); y++) + for (uint32_t x = 0; x < file_image.get_width(); x++) + file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); + } + } + else + { + file_image = m_params.m_source_images[source_file_index]; + } + + if (m_params.m_renormalize) + file_image.renormalize_normal_map(); + + bool alpha_swizzled = false; + if (m_params.m_swizzle[0] != 0 || + m_params.m_swizzle[1] != 1 || + m_params.m_swizzle[2] != 2 || + m_params.m_swizzle[3] != 3) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < file_image.get_height(); y++) + for (uint32_t x = 0; x < file_image.get_width(); x++) + { + const color_rgba &c = file_image(x, y); + file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + } + alpha_swizzled = m_params.m_swizzle[3] != 3; + } + + bool has_alpha = false; + if (m_params.m_force_alpha || alpha_swizzled) + has_alpha = true; + else if (!m_params.m_check_for_alpha) + file_image.set_alpha(255); + else if (file_image.has_alpha()) + has_alpha = true; + + if (has_alpha) + m_any_source_image_has_alpha = true; + + debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); + + if (m_params.m_y_flip) + file_image.flip_y(); + +#if DEBUG_EXTRACT_SINGLE_BLOCK + image block_image(4, 4); + const uint32_t block_x = 0; + const uint32_t block_y = 0; + block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0); + file_image = block_image; +#endif + +#if DEBUG_CROP_TEXTURE_TO_64x64 + file_image.resize(64, 64); +#endif + + if (m_params.m_resample_width > 0 && m_params.m_resample_height > 0) + { + int new_width = basisu::minimum(m_params.m_resample_width, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum(m_params.m_resample_height, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } + else if (m_params.m_resample_factor > 0.0f) + { + int new_width = basisu::minimum(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } + + if ((!file_image.get_width()) || (!file_image.get_height())) + { + error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n"); + return false; + } + + if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) + { + error_printf("basis_compressor::read_source_images: Source image is too large!\n"); + return false; + } + + source_images.push_back(file_image); + source_filenames.push_back(pSource_filename); + } + + // Check if the caller has generated their own mipmaps. + if (m_params.m_source_mipmap_images.size()) + { + // Make sure they've passed us enough mipmap chains. + if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size())) + { + error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n"); + return false; + } + + // Check if any of the user-supplied mipmap levels has alpha. + // We're assuming the user has already preswizzled their mipmap source images. + if (!m_any_source_image_has_alpha) + { + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) + { + const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + + if (mip_img.has_alpha()) + { + m_any_source_image_has_alpha = true; + break; + } + } + + if (m_any_source_image_has_alpha) + break; + } + } + } + + debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha); + + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + image &file_image = source_images[source_file_index]; + const std::string &source_filename = source_filenames[source_file_index]; + + // Now, for each source image, create the slices corresponding to that image. + basisu::vector slices; + + slices.reserve(32); + + // The first (largest) mipmap level. + slices.push_back(file_image); + + if (m_params.m_source_mipmap_images.size()) + { + // User-provided mipmaps for each layer or image in the texture array. + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) + { + image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + + if (m_params.m_swizzle[0] != 0 || + m_params.m_swizzle[1] != 1 || + m_params.m_swizzle[2] != 2 || + m_params.m_swizzle[3] != 3) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < mip_img.get_height(); y++) + for (uint32_t x = 0; x < mip_img.get_width(); x++) + { + const color_rgba &c = mip_img(x, y); + mip_img(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + } + } + + slices.push_back(mip_img); + } + } + else if (m_params.m_mip_gen) + { + // Automatically generate mipmaps. + if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha)) + return false; + } + + uint_vec mip_indices(slices.size()); + for (uint32_t i = 0; i < slices.size(); i++) + mip_indices[i] = i; + + if ((m_any_source_image_has_alpha) && (!m_params.m_uastc)) + { + // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. + basisu::vector alpha_slices; + uint_vec new_mip_indices; + + alpha_slices.reserve(slices.size() * 2); + + for (uint32_t i = 0; i < slices.size(); i++) + { + image lvl_rgb(slices[i]); + image lvl_a(lvl_rgb); + + for (uint32_t y = 0; y < lvl_a.get_height(); y++) + { + for (uint32_t x = 0; x < lvl_a.get_width(); x++) + { + uint8_t a = lvl_a(x, y).a; + lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); + } + } + + lvl_rgb.set_alpha(255); + + alpha_slices.push_back(lvl_rgb); + new_mip_indices.push_back(i); + + alpha_slices.push_back(lvl_a); + new_mip_indices.push_back(i); + } + + slices.swap(alpha_slices); + mip_indices.swap(new_mip_indices); + } + + assert(slices.size() == mip_indices.size()); + + for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) + { + image& slice_image = slices[slice_index]; + const uint32_t orig_width = slice_image.get_width(); + const uint32_t orig_height = slice_image.get_height(); + + bool is_alpha_slice = false; + if (m_any_source_image_has_alpha) + { + if (m_params.m_uastc) + { + is_alpha_slice = slice_image.has_alpha(); + } + else + { + is_alpha_slice = (slice_index & 1) != 0; + } + } + + // Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks. + slice_image.crop_dup_borders(slice_image.get_block_width(4) * 4, slice_image.get_block_height(4) * 4); + + if (m_params.m_debug_images) + { + save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), slice_image); + } + + enlarge_vector(m_stats, 1); + enlarge_vector(m_slice_images, 1); + enlarge_vector(m_slice_descs, 1); + + const uint32_t dest_image_index = (uint32_t)m_stats.size() - 1; + + m_stats[dest_image_index].m_filename = source_filename.c_str(); + m_stats[dest_image_index].m_width = orig_width; + m_stats[dest_image_index].m_height = orig_height; + + m_slice_images[dest_image_index] = slice_image; + + debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, slice_image.get_width(), slice_image.get_height()); + + basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index]; + + slice_desc.m_first_block_index = m_total_blocks; + + slice_desc.m_orig_width = orig_width; + slice_desc.m_orig_height = orig_height; + + slice_desc.m_width = slice_image.get_width(); + slice_desc.m_height = slice_image.get_height(); + + slice_desc.m_num_blocks_x = slice_image.get_block_width(4); + slice_desc.m_num_blocks_y = slice_image.get_block_height(4); + + slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1; + slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; + + slice_desc.m_source_file_index = source_file_index; + + slice_desc.m_mip_index = mip_indices[slice_index]; + + slice_desc.m_alpha = is_alpha_slice; + slice_desc.m_iframe = false; + if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) + { + slice_desc.m_iframe = (source_file_index == 0); + } + + m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y; + + } // slice_index + + } // source_file_index + + debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks); + + // Make sure we don't have too many slices + if (m_slice_descs.size() > BASISU_MAX_SLICES) + { + error_printf("Too many slices!\n"); + return false; + } + + // Basic sanity check on the slices + for (uint32_t i = 1; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + // Make sure images are in order + int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; + if (image_delta > 1) + return false; + + // Make sure mipmap levels are in order + if (!image_delta) + { + int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index; + if (level_delta > 1) + return false; + } + } + + if (m_params.m_status_output) + { + printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size()); + } + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + if (m_params.m_status_output) + { + printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n", + i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); + } + + if (m_any_source_image_has_alpha) + { + if (!m_params.m_uastc) + { + // For ETC1S, alpha slices must be at odd slice indices. + if (slice_desc.m_alpha) + { + if ((i & 1) == 0) + return false; + + const basisu_backend_slice_desc& prev_slice_desc = m_slice_descs[i - 1]; + + // Make sure previous slice has this image's color data + if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index) + return false; + if (prev_slice_desc.m_alpha) + return false; + if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index) + return false; + if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x) + return false; + if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y) + return false; + } + else if (i & 1) + return false; + } + } + else if (slice_desc.m_alpha) + { + return false; + } + + if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height)) + return false; + if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) + { + if (!slice_desc.m_iframe) + return false; + } + } + + return true; + } + + // Do some basic validation for 2D arrays, cubemaps, video, and volumes. + bool basis_compressor::validate_texture_type_constraints() + { + debug_printf("basis_compressor::validate_texture_type_constraints\n"); + + // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). + if (m_params.m_tex_type == basist::cBASISTexType2D) + return true; + + uint32_t total_basis_images = 0; + + for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_basis_images = maximum(total_basis_images, slice_desc.m_source_file_index + 1); + } + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + // For cubemaps, validate that the total # of Basis images is a multiple of 6. + if ((total_basis_images % 6) != 0) + { + error_printf("basis_compressor::validate_texture_type_constraints: For cubemaps the total number of input images is not a multiple of 6!\n"); + return false; + } + } + + // Now validate that all the mip0's have the same dimensions, and that each image has the same # of mipmap levels. + uint_vec image_mipmap_levels(total_basis_images); + + int width = -1, height = -1; + for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); + + if (slice_desc.m_mip_index != 0) + continue; + + if (width < 0) + { + width = slice_desc.m_orig_width; + height = slice_desc.m_orig_height; + } + else if ((width != (int)slice_desc.m_orig_width) || (height != (int)slice_desc.m_orig_height)) + { + error_printf("basis_compressor::validate_texture_type_constraints: The source image resolutions are not all equal!\n"); + return false; + } + } + + for (size_t i = 1; i < image_mipmap_levels.size(); i++) + { + if (image_mipmap_levels[0] != image_mipmap_levels[i]) + { + error_printf("basis_compressor::validate_texture_type_constraints: Each image must have the same number of mipmap levels!\n"); + return false; + } + } + + return true; + } + + bool basis_compressor::extract_source_blocks() + { + debug_printf("basis_compressor::extract_source_blocks\n"); + + m_source_blocks.resize(m_total_blocks); + + for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const image& source_image = m_slice_images[slice_index]; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); + } + + return true; + } + + bool basis_compressor::process_frontend() + { + debug_printf("basis_compressor::process_frontend\n"); + +#if 0 + // TODO + basis_etc1_pack_params pack_params; + pack_params.m_quality = cETCQualityMedium; + pack_params.m_perceptual = m_params.m_perceptual; + pack_params.m_use_color4 = false; + + pack_etc1_block_context pack_context; + + std::unordered_set endpoint_hash; + std::unordered_set selector_hash; + + for (uint32_t i = 0; i < m_source_blocks.size(); i++) + { + etc_block blk; + pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context); + + const color_rgba c0(blk.get_block_color(0, false)); + endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16)); + + const color_rgba c1(blk.get_block_color(1, false)); + endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16)); + + selector_hash.insert(blk.get_raw_selector_bits()); + } + + const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size(); + const uint32_t total_unique_selectors = (uint32_t)selector_hash.size(); + + if (m_params.m_debug) + { + debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors); + } +#endif + + const double total_texels = m_total_blocks * 16.0f; + + int endpoint_clusters = m_params.m_max_endpoint_clusters; + int selector_clusters = m_params.m_max_selector_clusters; + + if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters) + { + error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters); + return false; + } + if (selector_clusters > basisu_frontend::cMaxSelectorClusters) + { + error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); + return false; + } + + if (m_params.m_quality_level != -1) + { + const float quality = saturate(m_params.m_quality_level / 255.0f); + + const float bits_per_endpoint_cluster = 14.0f; + const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f + int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); + + const float mid = 128.0f / 255.0f; + + float color_endpoint_quality = quality; + + const float endpoint_split_point = 0.5f; + + // In v1.2 and in previous versions, the endpoint codebook size at quality 128 was 3072. This wasn't quite large enough. + const int ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE = 4800; + const int MAX_ENDPOINT_CODEBOOK_SIZE = 8192; + + if (color_endpoint_quality <= mid) + { + color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f)); + + max_endpoints = clamp(max_endpoints, 256, ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE); + max_endpoints = minimum(max_endpoints, m_total_blocks); + + if (max_endpoints < 64) + max_endpoints = 64; + endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); + } + else + { + color_endpoint_quality = powf((color_endpoint_quality - mid) / (1.0f - mid), 1.6f); + + max_endpoints = clamp(max_endpoints, 256, MAX_ENDPOINT_CODEBOOK_SIZE); + max_endpoints = minimum(max_endpoints, m_total_blocks); + + if (max_endpoints < ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE) + max_endpoints = ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE; + endpoint_clusters = clamp((uint32_t)(.5f + lerp(ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); + } + + float bits_per_selector_cluster = m_params.m_global_sel_pal ? 21.0f : 14.0f; + + const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f + int max_selectors = static_cast((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster); + max_selectors = clamp(max_selectors, 256, basisu_frontend::cMaxSelectorClusters); + max_selectors = minimum(max_selectors, m_total_blocks); + + float color_selector_quality = quality; + //color_selector_quality = powf(color_selector_quality, 1.65f); + color_selector_quality = powf(color_selector_quality, 2.62f); + + if (max_selectors < 96) + max_selectors = 96; + selector_clusters = clamp((uint32_t)(.5f + lerp(96, static_cast(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters); + + debug_printf("Max endpoints: %u, max selectors: %u\n", endpoint_clusters, selector_clusters); + + if (m_params.m_quality_level >= 223) + { + if (!m_params.m_selector_rdo_thresh.was_changed()) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .25f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .25f; + } + } + else if (m_params.m_quality_level >= 192) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .5f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .5f; + } + else if (m_params.m_quality_level >= 160) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .75f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .75f; + } + else if (m_params.m_quality_level >= 129) + { + float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f); + + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= lerp(1.0f, .75f, l); + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l); + } + } + + m_auto_global_sel_pal = false; + if (!m_params.m_global_sel_pal && m_params.m_auto_global_sel_pal) + { + const float bits_per_selector_cluster = 31.0f; + double selector_codebook_bpp_est = (bits_per_selector_cluster * selector_clusters) / total_texels; + debug_printf("selector_codebook_bpp_est: %f\n", selector_codebook_bpp_est); + const float force_global_sel_pal_bpp_threshold = .15f; + if ((total_texels <= 128.0f*128.0f) && (selector_codebook_bpp_est > force_global_sel_pal_bpp_threshold)) + { + m_auto_global_sel_pal = true; + debug_printf("Auto global selector palette enabled\n"); + } + } + + basisu_frontend::params p; + p.m_num_source_blocks = m_total_blocks; + p.m_pSource_blocks = &m_source_blocks[0]; + p.m_max_endpoint_clusters = endpoint_clusters; + p.m_max_selector_clusters = selector_clusters; + p.m_perceptual = m_params.m_perceptual; + p.m_debug_stats = m_params.m_debug; + p.m_debug_images = m_params.m_debug_images; + p.m_compression_level = m_params.m_compression_level; + p.m_tex_type = m_params.m_tex_type; + p.m_multithreaded = m_params.m_multithreading; + p.m_disable_hierarchical_endpoint_codebooks = m_params.m_disable_hierarchical_endpoint_codebooks; + p.m_validate = m_params.m_validate; + p.m_pJob_pool = m_params.m_pJob_pool; + p.m_pGlobal_codebooks = m_params.m_pGlobal_codebooks; + + if ((m_params.m_global_sel_pal) || (m_auto_global_sel_pal)) + { + p.m_pGlobal_sel_codebook = m_params.m_pSel_codebook; + p.m_num_global_sel_codebook_pal_bits = m_params.m_global_pal_bits; + p.m_num_global_sel_codebook_mod_bits = m_params.m_global_mod_bits; + p.m_use_hybrid_selector_codebooks = !m_params.m_no_hybrid_sel_cb; + p.m_hybrid_codebook_quality_thresh = m_params.m_hybrid_sel_cb_quality_thresh; + } + + if (!m_frontend.init(p)) + { + error_printf("basisu_frontend::init() failed!\n"); + return false; + } + + m_frontend.compress(); + + if (m_params.m_debug_images) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + char filename[1024]; +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); + +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); + } + } + + return true; + } + + bool basis_compressor::extract_frontend_texture_data() + { + debug_printf("basis_compressor::extract_frontend_texture_data\n"); + + m_frontend_output_textures.resize(m_slice_descs.size()); + m_best_etc1s_images.resize(m_slice_descs.size()); + m_best_etc1s_images_unpacked.resize(m_slice_descs.size()); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + + m_frontend_output_textures[i].init(texture_format::cETC1, width, height); + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + +#if 0 + if (m_params.m_debug_images) + { + char filename[1024]; + sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i); + write_etc1_vis_images(m_frontend_output_textures[i], filename); + } +#endif + + m_best_etc1s_images[i].init(texture_format::cETC1, width, height); + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + + m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]); + } + + return true; + } + + bool basis_compressor::process_backend() + { + debug_printf("basis_compressor::process_backend\n"); + + basisu_backend_params backend_params; + backend_params.m_debug = m_params.m_debug; + backend_params.m_debug_images = m_params.m_debug_images; + backend_params.m_etc1s = true; + backend_params.m_compression_level = m_params.m_compression_level; + + if (!m_params.m_no_endpoint_rdo) + backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; + + if (!m_params.m_no_selector_rdo) + backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; + + backend_params.m_use_global_sel_codebook = (m_frontend.get_params().m_pGlobal_sel_codebook != NULL); + backend_params.m_global_sel_codebook_pal_bits = m_frontend.get_params().m_num_global_sel_codebook_pal_bits; + backend_params.m_global_sel_codebook_mod_bits = m_frontend.get_params().m_num_global_sel_codebook_mod_bits; + backend_params.m_use_hybrid_sel_codebooks = m_frontend.get_params().m_use_hybrid_selector_codebooks; + backend_params.m_used_global_codebooks = m_frontend.get_params().m_pGlobal_codebooks != nullptr; + + m_backend.init(&m_frontend, backend_params, m_slice_descs, m_params.m_pSel_codebook); + uint32_t total_packed_bytes = m_backend.encode(); + + if (!total_packed_bytes) + { + error_printf("basis_compressor::encode() failed!\n"); + return false; + } + + debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes); + + return true; + } + + bool basis_compressor::create_basis_file_and_transcode() + { + debug_printf("basis_compressor::create_basis_file_and_transcode\n"); + + const basisu_backend_output& encoded_output = m_params.m_uastc ? m_uastc_backend_output : m_backend.get_output(); + + if (!m_basis_file.init(encoded_output, m_params.m_tex_type, m_params.m_userdata0, m_params.m_userdata1, m_params.m_y_flip, m_params.m_us_per_frame)) + { + error_printf("basis_compressor::create_basis_file_and_transcode: basisu_backend:init() failed!\n"); + return false; + } + + const uint8_vec &comp_data = m_basis_file.get_compressed_data(); + + m_output_basis_file = comp_data; + + interval_timer tm; + tm.start(); + + basist::basisu_transcoder_init(); + + debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms()); + + // Verify the compressed data by transcoding it to ASTC (or ETC1)/BC7 and validating the CRC's. + basist::basisu_transcoder decoder(m_params.m_pSel_codebook); + if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true)) + { + error_printf("decoder.validate_file_checksums() failed!\n"); + return false; + } + + m_decoded_output_textures.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); + + m_decoded_output_textures_bc7.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size()); + + tm.start(); + if (m_params.m_pGlobal_codebooks) + { + decoder.set_global_codebooks(m_params.m_pGlobal_codebooks); + } + + if (!decoder.start_transcoding(&comp_data[0], (uint32_t)comp_data.size())) + { + error_printf("decoder.start_transcoding() failed!\n"); + return false; + } + + double start_transcoding_time = tm.get_elapsed_secs(); + + debug_printf("basisu_compressor::start_transcoding() took %3.3fms\n", start_transcoding_time * 1000.0f); + + uint32_t total_orig_pixels = 0; + uint32_t total_texels = 0; + + double total_time_etc1s_or_astc = 0; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(m_params.m_uastc ? texture_format::cASTC4x4 : texture_format::cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + tm.start(); + + basist::block_format format = m_params.m_uastc ? basist::block_format::cASTC_4x4 : basist::block_format::cETC1; + uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8; + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, format, bytes_per_block)) + { + error_printf("Transcoding failed on slice %u!\n", i); + return false; + } + + total_time_etc1s_or_astc += tm.get_elapsed_secs(); + + if (encoded_output.m_tex_format == basist::basis_tex_format::cETC1S) + { + uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0); + if (image_crc16 != encoded_output.m_slice_image_crcs[i]) + { + error_printf("Decoded image data CRC check failed on slice %u!\n", i); + return false; + } + debug_printf("Decoded image data CRC check succeeded on slice %i\n", i); + } + + m_decoded_output_textures[i] = decoded_texture; + + total_orig_pixels += m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height; + total_texels += m_slice_descs[i].m_width * m_slice_descs[i].m_height; + } + + double total_time_bc7 = 0; + + if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) && + basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S)) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + tm.start(); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16)) + { + error_printf("Transcoding failed to BC7 on slice %u!\n", i); + return false; + } + + total_time_bc7 += tm.get_elapsed_secs(); + + m_decoded_output_textures_bc7[i] = decoded_texture; + } + } + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); + + if (m_decoded_output_textures_bc7[i].get_pixel_width()) + m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]); + } + + debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n", m_params.m_uastc ? "ASTC" : "ETC1", total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc); + + if (total_time_bc7 != 0) + debug_printf("Transcoded to BC7 in %3.3fms, %f texels/sec\n", total_time_bc7 * 1000.0f, total_orig_pixels / total_time_bc7); + + debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); + + uint32_t total_orig_texels = 0; + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; + + const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + BASISU_NOTE_UNUSED(total_blocks); + + assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); + } + + m_basis_file_size = (uint32_t)comp_data.size(); + m_basis_bits_per_texel = (comp_data.size() * 8.0f) / total_orig_texels; + + return true; + } + + bool basis_compressor::write_output_files_and_compute_stats() + { + debug_printf("basis_compressor::write_output_files_and_compute_stats\n"); + + const uint8_vec& comp_data = m_params.m_create_ktx2_file ? m_output_ktx2_file : m_basis_file.get_compressed_data(); + if (m_params.m_write_output_basis_files) + { + const std::string& output_filename = m_params.m_out_filename; + + if (!write_vec_to_file(output_filename.c_str(), comp_data)) + { + error_printf("Failed writing output data to file \"%s\"\n", output_filename.c_str()); + return false; + } + + if (m_params.m_status_output) + { + printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str()); + } + } + + size_t comp_size = 0; + if ((m_params.m_compute_stats) && (m_params.m_uastc) && (comp_data.size())) + { + void* pComp_data = tdefl_compress_mem_to_heap(&comp_data[0], comp_data.size(), &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); + size_t decomp_size = 0; + void* pDecomp_data = tinfl_decompress_mem_to_heap(pComp_data, comp_size, &decomp_size, 0); + if ((decomp_size != comp_data.size()) || (memcmp(pDecomp_data, &comp_data[0], decomp_size) != 0)) + { + printf("basis_compressor::create_basis_file_and_transcode:: miniz compression or decompression failed!\n"); + return false; + } + + mz_free(pComp_data); + mz_free(pDecomp_data); + + uint32_t total_texels = 0; + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + total_texels += (m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y) * 16; + + m_basis_bits_per_texel = comp_size * 8.0f / total_texels; + + debug_printf(".basis file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n", + (uint32_t)comp_data.size(), + (uint32_t)comp_size, + m_basis_bits_per_texel); + } + + m_stats.resize(m_slice_descs.size()); + + uint32_t total_orig_texels = 0; + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; + + if (m_params.m_compute_stats) + { + printf("Slice: %u\n", slice_index); + + image_stats &s = m_stats[slice_index]; + + // TODO: We used to output SSIM (during heavy encoder development), but this slowed down compression too much. We'll be adding it back. + + image_metrics em; + + // ---- .basis stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); + em.print(".basis RGB Avg: "); + s.m_basis_rgb_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4); + em.print(".basis RGBA Avg: "); + s.m_basis_rgba_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1); + em.print(".basis R Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1); + em.print(".basis G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1); + em.print(".basis B Avg: "); + + if (m_params.m_uastc) + { + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1); + em.print(".basis A Avg: "); + + s.m_basis_a_avg_psnr = em.m_psnr; + } + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); + em.print(".basis 709 Luma: "); + s.m_basis_luma_709_psnr = static_cast(em.m_psnr); + s.m_basis_luma_709_ssim = static_cast(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); + em.print(".basis 601 Luma: "); + s.m_basis_luma_601_psnr = static_cast(em.m_psnr); + + if (m_slice_descs.size() == 1) + { + const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size(); + debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + } + + if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width()) + { + // ---- BC7 stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3); + em.print("BC7 RGB Avg: "); + s.m_bc7_rgb_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4); + em.print("BC7 RGBA Avg: "); + s.m_bc7_rgba_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1); + em.print("BC7 R Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1); + em.print("BC7 G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1); + em.print("BC7 B Avg: "); + + if (m_params.m_uastc) + { + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1); + em.print("BC7 A Avg: "); + + s.m_bc7_a_avg_psnr = em.m_psnr; + } + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0); + em.print("BC7 709 Luma: "); + s.m_bc7_luma_709_psnr = static_cast(em.m_psnr); + s.m_bc7_luma_709_ssim = static_cast(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true); + em.print("BC7 601 Luma: "); + s.m_bc7_luma_601_psnr = static_cast(em.m_psnr); + } + + if (!m_params.m_uastc) + { + // ---- Nearly best possible ETC1S stats + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); + em.print("Unquantized ETC1S 709 Luma: "); + + s.m_best_etc1s_luma_709_psnr = static_cast(em.m_psnr); + s.m_best_etc1s_luma_709_ssim = static_cast(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true); + em.print("Unquantized ETC1S 601 Luma: "); + + s.m_best_etc1s_luma_601_psnr = static_cast(em.m_psnr); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); + em.print("Unquantized ETC1S RGB Avg: "); + + s.m_best_etc1s_rgb_avg_psnr = static_cast(em.m_psnr); + } + } + + std::string out_basename; + if (m_params.m_out_filename.size()) + string_get_filename(m_params.m_out_filename.c_str(), out_basename); + else if (m_params.m_source_filenames.size()) + string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); + + string_remove_extension(out_basename); + out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); + + if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images)) + { + // Write "best" ETC1S debug images + if (!m_params.m_uastc) + { + gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); + best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image); + + image best_etc1s_unpacked; + best_etc1s_gpu_image.unpack(best_etc1s_unpacked); + save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); + } + } + + if (m_params.m_debug_images) + { + // Write decoded ETC1S/ASTC debug images + { + gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]); + decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc); + + image temp(m_decoded_output_textures_unpacked[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp); + } + + // Write decoded BC7 debug images + if (m_decoded_output_textures_bc7[slice_index].get_pixel_width()) + { + gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]); + decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7); + + image temp(m_decoded_output_textures_unpacked_bc7[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_bc7.png", temp); + } + } + } + + return true; + } + + // Make sure all the mip 0's have the same dimensions and number of mipmap levels, or we can't encode the KTX2 file. + bool basis_compressor::validate_ktx2_constraints() + { + uint32_t base_width = 0, base_height = 0; + uint32_t total_layers = 0; + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + if (m_slice_descs[i].m_mip_index == 0) + { + if (!base_width) + { + base_width = m_slice_descs[i].m_orig_width; + base_height = m_slice_descs[i].m_orig_height; + } + else + { + if ((m_slice_descs[i].m_orig_width != base_width) || (m_slice_descs[i].m_orig_height != base_height)) + { + return false; + } + } + + total_layers = maximum(total_layers, m_slice_descs[i].m_source_file_index + 1); + } + } + + basisu::vector total_mips(total_layers); + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + total_mips[m_slice_descs[i].m_source_file_index] = maximum(total_mips[m_slice_descs[i].m_source_file_index], m_slice_descs[i].m_mip_index + 1); + + for (uint32_t i = 1; i < total_layers; i++) + { + if (total_mips[0] != total_mips[i]) + { + return false; + } + } + + return true; + } + + static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_uastc_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + + void basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header) + { + const uint8_t* pDFD; + uint32_t dfd_len; + + if (m_params.m_uastc) + { + if (m_any_source_image_has_alpha) + { + pDFD = g_ktx2_uastc_alpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_alpha_dfd); + } + else + { + pDFD = g_ktx2_uastc_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_nonalpha_dfd); + } + } + else + { + if (m_any_source_image_has_alpha) + { + pDFD = g_ktx2_etc1s_alpha_dfd; + dfd_len = sizeof(g_ktx2_etc1s_alpha_dfd); + } + else + { + pDFD = g_ktx2_etc1s_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_etc1s_nonalpha_dfd); + } + } + + assert(dfd_len >= 44); + + dfd.resize(dfd_len); + memcpy(dfd.data(), pDFD, dfd_len); + + uint32_t dfd_bits = basisu::read_le_dword(dfd.data() + 3 * sizeof(uint32_t)); + + dfd_bits &= ~(0xFF << 16); + + if (m_params.m_ktx2_srgb_transfer_func) + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16); + else + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); + + basisu::write_le_dword(dfd.data() + 3 * sizeof(uint32_t), dfd_bits); + + if (header.m_supercompression_scheme != basist::KTX2_SS_NONE) + { + uint32_t plane_bits = basisu::read_le_dword(dfd.data() + 5 * sizeof(uint32_t)); + + plane_bits &= ~0xFF; + + basisu::write_le_dword(dfd.data() + 5 * sizeof(uint32_t), plane_bits); + } + + // Fix up the DFD channel(s) + uint32_t dfd_chan0 = basisu::read_le_dword(dfd.data() + 7 * sizeof(uint32_t)); + + if (m_params.m_uastc) + { + dfd_chan0 &= ~(0xF << 24); + + // TODO: Allow the caller to override this + if (m_any_source_image_has_alpha) + dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGBA << 24); + else + dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGB << 24); + } + + basisu::write_le_dword(dfd.data() + 7 * sizeof(uint32_t), dfd_chan0); + } + + bool basis_compressor::create_ktx2_file() + { + if (m_params.m_uastc) + { + if ((m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_NONE) && (m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_ZSTANDARD)) + return false; + } + + const basisu_backend_output& backend_output = m_backend.get_output(); + + // Determine the width/height, number of array layers, mipmap levels, and the number of faces (1 for 2D, 6 for cubemap). + // This does not support 1D or 3D. + uint32_t base_width = 0, base_height = 0, total_layers = 0, total_levels = 0, total_faces = 1; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + if ((m_slice_descs[i].m_mip_index == 0) && (!base_width)) + { + base_width = m_slice_descs[i].m_orig_width; + base_height = m_slice_descs[i].m_orig_height; + } + + total_layers = maximum(total_layers, m_slice_descs[i].m_source_file_index + 1); + + if (!m_slice_descs[i].m_source_file_index) + total_levels = maximum(total_levels, m_slice_descs[i].m_mip_index + 1); + } + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + assert((total_layers % 6) == 0); + + total_layers /= 6; + assert(total_layers >= 1); + + total_faces = 6; + } + + basist::ktx2_header header; + memset(&header, 0, sizeof(header)); + + memcpy(header.m_identifier, basist::g_ktx2_file_identifier, sizeof(basist::g_ktx2_file_identifier)); + header.m_pixel_width = base_width; + header.m_pixel_height = base_height; + header.m_face_count = total_faces; + header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + header.m_type_size = 1; + header.m_level_count = total_levels; + header.m_layer_count = (total_layers > 1) ? total_layers : 0; + + if (m_params.m_uastc) + { + switch (m_params.m_ktx2_uastc_supercompression) + { + case basist::KTX2_SS_NONE: + { + header.m_supercompression_scheme = basist::KTX2_SS_NONE; + break; + } + case basist::KTX2_SS_ZSTANDARD: + { +#if BASISD_SUPPORT_KTX2_ZSTD + header.m_supercompression_scheme = basist::KTX2_SS_ZSTANDARD; +#else + header.m_supercompression_scheme = basist::KTX2_SS_NONE; +#endif + break; + } + default: assert(0); return false; + } + } + + basisu::vector level_data_bytes(total_levels); + basisu::vector compressed_level_data_bytes(total_levels); + uint_vec slice_level_offsets(m_slice_descs.size()); + + // This will append the texture data in the correct order (for each level: layer, then face). + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + slice_level_offsets[slice_index] = level_data_bytes[slice_desc.m_mip_index].size(); + + if (m_params.m_uastc) + append_vector(level_data_bytes[slice_desc.m_mip_index], m_uastc_backend_output.m_slice_image_data[slice_index]); + else + append_vector(level_data_bytes[slice_desc.m_mip_index], backend_output.m_slice_image_data[slice_index]); + } + + // UASTC supercompression + if ((m_params.m_uastc) && (header.m_supercompression_scheme == basist::KTX2_SS_ZSTANDARD)) + { +#if BASISD_SUPPORT_KTX2_ZSTD + for (uint32_t level_index = 0; level_index < total_levels; level_index++) + { + compressed_level_data_bytes[level_index].resize(ZSTD_compressBound(level_data_bytes[level_index].size())); + + size_t result = ZSTD_compress(compressed_level_data_bytes[level_index].data(), compressed_level_data_bytes[level_index].size(), + level_data_bytes[level_index].data(), level_data_bytes[level_index].size(), + m_params.m_ktx2_zstd_supercompression_level); + + if (ZSTD_isError(result)) + return false; + + compressed_level_data_bytes[level_index].resize(result); + } +#else + // Can't get here + assert(0); + return false; +#endif + } + else + { + // No supercompression + compressed_level_data_bytes = level_data_bytes; + } + + uint8_vec etc1s_global_data; + + // Create ETC1S global supercompressed data + if (!m_params.m_uastc) + { + basist::ktx2_etc1s_global_data_header etc1s_global_data_header; + clear_obj(etc1s_global_data_header); + + etc1s_global_data_header.m_endpoint_count = backend_output.m_num_endpoints; + etc1s_global_data_header.m_selector_count = backend_output.m_num_selectors; + etc1s_global_data_header.m_endpoints_byte_length = backend_output.m_endpoint_palette.size(); + etc1s_global_data_header.m_selectors_byte_length = backend_output.m_selector_palette.size(); + etc1s_global_data_header.m_tables_byte_length = backend_output.m_slice_image_tables.size(); + + basisu::vector etc1s_image_descs(total_levels * total_layers * total_faces); + memset(etc1s_image_descs.data(), 0, etc1s_image_descs.size_in_bytes()); + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t level_index = slice_desc.m_mip_index; + uint32_t layer_index = slice_desc.m_source_file_index; + uint32_t face_index = 0; + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + face_index = layer_index % 6; + layer_index /= 6; + } + + const uint32_t etc1s_image_index = level_index * (total_layers * total_faces) + layer_index * total_faces + face_index; + + if (slice_desc.m_alpha) + { + etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_length = backend_output.m_slice_image_data[slice_index].size(); + etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_offset = slice_level_offsets[slice_index]; + } + else + { + if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) + etc1s_image_descs[etc1s_image_index].m_image_flags = !slice_desc.m_iframe ? basist::KTX2_IMAGE_IS_P_FRAME : 0; + + etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_length = backend_output.m_slice_image_data[slice_index].size(); + etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_offset = slice_level_offsets[slice_index]; + } + } // slice_index + + append_vector(etc1s_global_data, (const uint8_t*)&etc1s_global_data_header, sizeof(etc1s_global_data_header)); + append_vector(etc1s_global_data, (const uint8_t*)etc1s_image_descs.data(), etc1s_image_descs.size_in_bytes()); + append_vector(etc1s_global_data, backend_output.m_endpoint_palette); + append_vector(etc1s_global_data, backend_output.m_selector_palette); + append_vector(etc1s_global_data, backend_output.m_slice_image_tables); + + header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; + } + + // Key values + basist::ktx2_transcoder::key_value_vec key_values(m_params.m_ktx2_key_values); + key_values.enlarge(1); + + const char* pKTXwriter = "KTXwriter"; + key_values.back().m_key.resize(strlen(pKTXwriter) + 1); + memcpy(key_values.back().m_key.data(), pKTXwriter, strlen(pKTXwriter) + 1); + + char writer_id[128]; +#ifdef _MSC_VER + sprintf_s(writer_id, sizeof(writer_id), "Basis Universal %s", BASISU_LIB_VERSION_STRING); +#else + snprintf(writer_id, sizeof(writer_id), "Basis Universal %s", BASISU_LIB_VERSION_STRING); +#endif + key_values.back().m_value.resize(strlen(writer_id) + 1); + memcpy(key_values.back().m_value.data(), writer_id, strlen(writer_id) + 1); + + key_values.sort(); + +#if BASISU_DISABLE_KTX2_KEY_VALUES + // HACK HACK - Clear the key values array, which causes no key values to be written (triggering the ktx2check validator bug). + key_values.clear(); +#endif + + uint8_vec key_value_data; + + // DFD + uint8_vec dfd; + get_dfd(dfd, header); + + const uint32_t kvd_file_offset = sizeof(header) + sizeof(basist::ktx2_level_index) * total_levels + dfd.size(); + + for (uint32_t pass = 0; pass < 2; pass++) + { + for (uint32_t i = 0; i < key_values.size(); i++) + { + if (key_values[i].m_key.size() < 2) + return false; + + if (key_values[i].m_key.back() != 0) + return false; + + const uint64_t total_len = (uint64_t)key_values[i].m_key.size() + (uint64_t)key_values[i].m_value.size(); + if (total_len >= UINT32_MAX) + return false; + + packed_uint<4> le_len((uint32_t)total_len); + append_vector(key_value_data, (const uint8_t*)&le_len, sizeof(le_len)); + + append_vector(key_value_data, key_values[i].m_key); + append_vector(key_value_data, key_values[i].m_value); + + const uint32_t ofs = key_value_data.size() & 3; + const uint32_t padding = (4 - ofs) & 3; + for (uint32_t p = 0; p < padding; p++) + key_value_data.push_back(0); + } + + if (header.m_supercompression_scheme != basist::KTX2_SS_NONE) + break; + +#if BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND + break; +#endif + + // Hack to ensure the KVD block ends on a 16 byte boundary, because we have no other official way of aligning the data. + uint32_t kvd_end_file_offset = kvd_file_offset + key_value_data.size(); + uint32_t bytes_needed_to_pad = (16 - (kvd_end_file_offset & 15)) & 15; + if (!bytes_needed_to_pad) + { + // We're good. No need to add a dummy key. + break; + } + + assert(!pass); + if (pass) + return false; + + if (bytes_needed_to_pad < 6) + bytes_needed_to_pad += 16; + + printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); + + // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. + // We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize(). + key_values.enlarge(1); + for (uint32_t i = 0; i < (bytes_needed_to_pad - 4 - 1 - 1); i++) + key_values.back().m_key.push_back(127); + + key_values.back().m_key.push_back(0); + + key_values.back().m_value.push_back(0); + + key_values.sort(); + + key_value_data.resize(0); + + // Try again + } + + basisu::vector level_index_array(total_levels); + memset(level_index_array.data(), 0, level_index_array.size_in_bytes()); + + m_output_ktx2_file.clear(); + m_output_ktx2_file.reserve(m_output_basis_file.size()); + + // Dummy header + m_output_ktx2_file.resize(sizeof(header)); + + // Level index array + append_vector(m_output_ktx2_file, (const uint8_t*)level_index_array.data(), level_index_array.size_in_bytes()); + + // DFD + const uint8_t* pDFD = dfd.data(); + uint32_t dfd_len = dfd.size(); + + header.m_dfd_byte_offset = m_output_ktx2_file.size(); + header.m_dfd_byte_length = dfd_len; + append_vector(m_output_ktx2_file, pDFD, dfd_len); + + // Key value data + if (key_value_data.size()) + { + assert(kvd_file_offset == m_output_ktx2_file.size()); + + header.m_kvd_byte_offset = m_output_ktx2_file.size(); + header.m_kvd_byte_length = key_value_data.size(); + append_vector(m_output_ktx2_file, key_value_data); + } + + // Global Supercompressed Data + if (etc1s_global_data.size()) + { + uint32_t ofs = m_output_ktx2_file.size() & 7; + uint32_t padding = (8 - ofs) & 7; + for (uint32_t i = 0; i < padding; i++) + m_output_ktx2_file.push_back(0); + + header.m_sgd_byte_length = etc1s_global_data.size(); + header.m_sgd_byte_offset = m_output_ktx2_file.size(); + + append_vector(m_output_ktx2_file, etc1s_global_data); + } + + // mipPadding + if (header.m_supercompression_scheme == basist::KTX2_SS_NONE) + { + // We currently can't do this or the validator will incorrectly give an error. + uint32_t ofs = m_output_ktx2_file.size() & 15; + uint32_t padding = (16 - ofs) & 15; + + // Make sure we're always aligned here (due to a validator bug). + if (padding) + { + printf("Warning: KTX2 mip level data is not 16-byte aligned. This may trigger a ktx2check validation bug. Writing %u bytes of mipPadding.\n", padding); + } + + for (uint32_t i = 0; i < padding; i++) + m_output_ktx2_file.push_back(0); + } + + // Level data - write the smallest mipmap first. + for (int level = total_levels - 1; level >= 0; level--) + { + level_index_array[level].m_byte_length = compressed_level_data_bytes[level].size(); + if (m_params.m_uastc) + level_index_array[level].m_uncompressed_byte_length = level_data_bytes[level].size(); + + level_index_array[level].m_byte_offset = m_output_ktx2_file.size(); + append_vector(m_output_ktx2_file, compressed_level_data_bytes[level]); + } + + // Write final header + memcpy(m_output_ktx2_file.data(), &header, sizeof(header)); + + // Write final level index array + memcpy(m_output_ktx2_file.data() + sizeof(header), level_index_array.data(), level_index_array.size_in_bytes()); + + debug_printf("Total .ktx2 output file size: %u\n", m_output_ktx2_file.size()); + + return true; + } + +} // namespace basisu diff --git a/src/deps/basis-universal/basisu_comp.h b/src/deps/basis-universal/encoder/basisu_comp.h similarity index 56% rename from src/deps/basis-universal/basisu_comp.h rename to src/deps/basis-universal/encoder/basisu_comp.h index 1c201ddbed..2c3af968f7 100644 --- a/src/deps/basis-universal/basisu_comp.h +++ b/src/deps/basis-universal/encoder/basisu_comp.h @@ -1,5 +1,5 @@ // basisu_comp.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,8 +16,23 @@ #include "basisu_frontend.h" #include "basisu_backend.h" #include "basisu_basis_file.h" -#include "transcoder/basisu_global_selector_palette.h" -#include "transcoder/basisu_transcoder.h" +#include "../transcoder/basisu_global_selector_palette.h" +#include "../transcoder/basisu_transcoder.h" +#include "basisu_uastc_enc.h" + +#define BASISU_LIB_VERSION 115 +#define BASISU_LIB_VERSION_STRING "1.15" + +#ifndef BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 is undefined +#endif +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #error BASISD_SUPPORT_KTX2_ZSTD is undefined +#endif + +#if !BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 must be enabled when building the encoder. To reduce code size if KTX2 support is not needed, set BASISD_SUPPORT_KTX2_ZSTD to 0 +#endif namespace basisu { @@ -40,6 +55,10 @@ namespace basisu const uint32_t BASISU_MAX_SLICES = 0xFFFFFF; + const int BASISU_RDO_UASTC_DICT_SIZE_DEFAULT = 4096; // 32768; + const int BASISU_RDO_UASTC_DICT_SIZE_MIN = 64; + const int BASISU_RDO_UASTC_DICT_SIZE_MAX = 65536; + struct image_stats { image_stats() @@ -52,43 +71,52 @@ namespace basisu m_filename.clear(); m_width = 0; m_height = 0; - - m_basis_etc1s_rgb_avg_psnr = 0.0f; - m_basis_etc1s_luma_709_psnr = 0.0f; - m_basis_etc1s_luma_601_psnr = 0.0f; - m_basis_etc1s_luma_709_ssim = 0.0f; - - m_basis_bc1_rgb_avg_psnr = 0.0f; - m_basis_bc1_luma_709_psnr = 0.0f; - m_basis_bc1_luma_601_psnr = 0.0f; - m_basis_bc1_luma_709_ssim = 0.0f; - - m_best_rgb_avg_psnr = 0.0f; - m_best_luma_709_psnr = 0.0f; - m_best_luma_601_psnr = 0.0f; - m_best_luma_709_ssim = 0.0f; + + m_basis_rgb_avg_psnr = 0.0f; + m_basis_rgba_avg_psnr = 0.0f; + m_basis_a_avg_psnr = 0.0f; + m_basis_luma_709_psnr = 0.0f; + m_basis_luma_601_psnr = 0.0f; + m_basis_luma_709_ssim = 0.0f; + + m_bc7_rgb_avg_psnr = 0.0f; + m_bc7_rgba_avg_psnr = 0.0f; + m_bc7_a_avg_psnr = 0.0f; + m_bc7_luma_709_psnr = 0.0f; + m_bc7_luma_601_psnr = 0.0f; + m_bc7_luma_709_ssim = 0.0f; + + m_best_etc1s_rgb_avg_psnr = 0.0f; + m_best_etc1s_luma_709_psnr = 0.0f; + m_best_etc1s_luma_601_psnr = 0.0f; + m_best_etc1s_luma_709_ssim = 0.0f; } std::string m_filename; uint32_t m_width; uint32_t m_height; - // .basis compressed - float m_basis_etc1s_rgb_avg_psnr; - float m_basis_etc1s_luma_709_psnr; - float m_basis_etc1s_luma_601_psnr; - float m_basis_etc1s_luma_709_ssim; + // .basis compressed (ETC1S or UASTC statistics) + float m_basis_rgb_avg_psnr; + float m_basis_rgba_avg_psnr; + float m_basis_a_avg_psnr; + float m_basis_luma_709_psnr; + float m_basis_luma_601_psnr; + float m_basis_luma_709_ssim; + + // BC7 statistics + float m_bc7_rgb_avg_psnr; + float m_bc7_rgba_avg_psnr; + float m_bc7_a_avg_psnr; + float m_bc7_luma_709_psnr; + float m_bc7_luma_601_psnr; + float m_bc7_luma_709_ssim; - float m_basis_bc1_rgb_avg_psnr; - float m_basis_bc1_luma_709_psnr; - float m_basis_bc1_luma_601_psnr; - float m_basis_bc1_luma_709_ssim; - - // Normal (highest quality) compressed ETC1S - float m_best_rgb_avg_psnr; - float m_best_luma_709_psnr; - float m_best_luma_601_psnr; - float m_best_luma_709_ssim; + // Highest achievable quality ETC1S statistics + float m_best_etc1s_rgb_avg_psnr; + float m_best_etc1s_luma_709_psnr; + float m_best_etc1s_luma_601_psnr; + float m_best_etc1s_luma_709_ssim; }; template @@ -175,18 +203,30 @@ namespace basisu struct basis_compressor_params { basis_compressor_params() : + m_pSel_codebook(NULL), + m_compression_level((int)BASISU_DEFAULT_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_COMPRESSION_LEVEL), + m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), + m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f), m_hybrid_sel_cb_quality_thresh(BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH, 0.0f, 1e+10f), m_global_pal_bits(8, 0, ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS), m_global_mod_bits(8, 0, basist::etc1_global_palette_entry_modifier::cTotalBits), - m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f), - m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), - m_pSel_codebook(NULL), + m_mip_scale(1.0f, .000125f, 4.0f), + m_mip_smallest_dimension(1, 1, 16384), m_max_endpoint_clusters(512), m_max_selector_clusters(512), m_quality_level(-1), - m_mip_scale(1.0f, .000125f, 4.0f), - m_mip_smallest_dimension(1, 1, 16384), - m_compression_level((int)BASISU_DEFAULT_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_COMPRESSION_LEVEL), + m_pack_uastc_flags(cPackUASTCLevelDefault), + m_rdo_uastc_quality_scalar(1.0f, 0.001f, 50.0f), + m_rdo_uastc_dict_size(BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX), + m_rdo_uastc_max_smooth_block_error_scale(UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE, 1.0f, 300.0f), + m_rdo_uastc_smooth_block_max_std_dev(UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV, .01f, 65536.0f), + m_rdo_uastc_max_allowed_rms_increase_ratio(UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO, .01f, 100.0f), + m_rdo_uastc_skip_block_rms_thresh(UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH, .01f, 100.0f), + m_resample_width(0, 1, 16384), + m_resample_height(0, 1, 16384), + m_resample_factor(0.0f, .00125f, 100.0f), + m_ktx2_uastc_supercompression(basist::KTX2_SS_NONE), + m_ktx2_zstd_supercompression_level(6, INT_MIN, INT_MAX), m_pJob_pool(nullptr) { clear(); @@ -196,15 +236,20 @@ namespace basisu { m_pSel_codebook = NULL; + m_uastc.clear(); + m_status_output.clear(); + m_source_filenames.clear(); m_source_alpha_filenames.clear(); m_source_images.clear(); + m_source_mipmap_images.clear(); m_out_filename.clear(); m_y_flip.clear(); m_debug.clear(); + m_validate.clear(); m_debug_images.clear(); m_global_sel_pal.clear(); m_auto_global_sel_pal.clear(); @@ -219,7 +264,11 @@ namespace basisu m_check_for_alpha.clear(); m_force_alpha.clear(); m_multithreading.clear(); - m_seperate_rg_to_color_alpha.clear(); + m_swizzle[0] = 0; + m_swizzle[1] = 1; + m_swizzle[2] = 2; + m_swizzle[3] = 3; + m_renormalize.clear(); m_hybrid_sel_cb_quality_thresh.clear(); m_global_pal_bits.clear(); m_global_mod_bits.clear(); @@ -236,6 +285,7 @@ namespace basisu m_mip_premultiplied.clear(); m_mip_renormalize.clear(); m_mip_wrapping.clear(); + m_mip_fast.clear(); m_mip_smallest_dimension.clear(); m_max_endpoint_clusters = 0; @@ -247,30 +297,63 @@ namespace basisu m_userdata1 = 0; m_us_per_frame = 0; + m_pack_uastc_flags = cPackUASTCLevelDefault; + m_rdo_uastc.clear(); + m_rdo_uastc_quality_scalar.clear(); + m_rdo_uastc_max_smooth_block_error_scale.clear(); + m_rdo_uastc_smooth_block_max_std_dev.clear(); + m_rdo_uastc_max_allowed_rms_increase_ratio.clear(); + m_rdo_uastc_skip_block_rms_thresh.clear(); + m_rdo_uastc_favor_simpler_modes_in_rdo_mode.clear(); + m_rdo_uastc_multithreading.clear(); + + m_resample_width.clear(); + m_resample_height.clear(); + m_resample_factor.clear(); + + m_pGlobal_codebooks = nullptr; + + m_create_ktx2_file.clear(); + m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; + m_ktx2_key_values.clear(); + m_ktx2_zstd_supercompression_level.clear(); + m_ktx2_srgb_transfer_func.clear(); + m_pJob_pool = nullptr; } - + // Pointer to the global selector codebook, or nullptr to not use a global selector codebook const basist::etc1_global_selector_codebook *m_pSel_codebook; + // True to generate UASTC .basis file data, otherwise ETC1S. + bool_param m_uastc; + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. // Otherwise, the compressor processes the images in m_source_images. - std::vector m_source_filenames; - std::vector m_source_alpha_filenames; + basisu::vector m_source_filenames; + basisu::vector m_source_alpha_filenames; - std::vector m_source_images; - // TODO: Allow caller to supply their own mipmaps + basisu::vector m_source_images; + + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. + // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. + // The compressor applies the user-provided swizzling (in m_swizzle) to these images. + basisu::vector< basisu::vector > m_source_mipmap_images; // Filename of the output basis file - std::string m_out_filename; + std::string m_out_filename; // The params are done this way so we can detect when the user has explictly changed them. // Flip images across Y axis bool_param m_y_flip; + + // If true, the compressor will print basis status to stdout during compression. + bool_param m_status_output; // Output debug information during compression bool_param m_debug; + bool_param m_validate; // m_debug_images is pretty slow bool_param m_debug_images; @@ -284,7 +367,7 @@ namespace basisu // Frontend/backend codec parameters bool_param m_no_hybrid_sel_cb; - // Use perceptual sRGB colorspace metrics (for normal maps, etc.) + // Use perceptual sRGB colorspace metrics instead of linear bool_param m_perceptual; // Disable selector RDO, for faster compression but larger files @@ -299,7 +382,7 @@ namespace basisu // Write the output basis file to disk using m_out_filename bool_param m_write_output_basis_files; - + // Compute and display image metrics bool_param m_compute_stats; @@ -311,7 +394,9 @@ namespace basisu bool_param m_multithreading; // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels - bool_param m_seperate_rg_to_color_alpha; + char m_swizzle[4]; + + bool_param m_renormalize; bool_param m_disable_hierarchical_endpoint_codebooks; @@ -328,10 +413,11 @@ namespace basisu bool_param m_mip_premultiplied; // not currently supported bool_param m_mip_renormalize; bool_param m_mip_wrapping; + bool_param m_mip_fast; param m_mip_smallest_dimension; // Codebook size (quality) control. - // If m_quality_level != -1, it controls the quality level. It ranges from [0,255]. + // If m_quality_level != -1, it controls the quality level. It ranges from [0,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. uint32_t m_max_endpoint_clusters; uint32_t m_max_selector_clusters; @@ -343,6 +429,31 @@ namespace basisu uint32_t m_userdata1; uint32_t m_us_per_frame; + // cPackUASTCLevelDefault, etc. + uint32_t m_pack_uastc_flags; + bool_param m_rdo_uastc; + param m_rdo_uastc_quality_scalar; + param m_rdo_uastc_dict_size; + param m_rdo_uastc_max_smooth_block_error_scale; + param m_rdo_uastc_smooth_block_max_std_dev; + param m_rdo_uastc_max_allowed_rms_increase_ratio; + param m_rdo_uastc_skip_block_rms_thresh; + bool_param m_rdo_uastc_favor_simpler_modes_in_rdo_mode; + bool_param m_rdo_uastc_multithreading; + + param m_resample_width; + param m_resample_height; + param m_resample_factor; + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; + + // KTX2 specific parameters. + // Internally, the compressor always creates a .basis file then it converts that lossless to KTX2. + bool_param m_create_ktx2_file; + basist::ktx2_supercompression m_ktx2_uastc_supercompression; + basist::ktx2_transcoder::key_value_vec m_ktx2_key_values; + param m_ktx2_zstd_supercompression_level; + bool_param m_ktx2_srgb_transfer_func; + job_pool *m_pJob_pool; }; @@ -360,35 +471,41 @@ namespace basisu cECSuccess = 0, cECFailedReadingSourceImages, cECFailedValidating, + cECFailedEncodeUASTC, cECFailedFrontEnd, cECFailedFontendExtract, cECFailedBackend, cECFailedCreateBasisFile, - cECFailedWritingOutput + cECFailedWritingOutput, + cECFailedUASTCRDOPostProcess, + cECFailedCreateKTX2File }; error_code process(); + // The output .basis file will always be valid of process() succeeded. const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } - const etc_block_vec &get_output_blocks() const { return m_output_blocks; } + + // The output .ktx2 file will only be valid if m_create_ktx2_file was true and process() succeeded. + const uint8_vec& get_output_ktx2_file() const { return m_output_ktx2_file; } - const std::vector &get_stats() const { return m_stats; } + const basisu::vector &get_stats() const { return m_stats; } uint32_t get_basis_file_size() const { return m_basis_file_size; } double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } - + bool get_any_source_image_has_alpha() const { return m_any_source_image_has_alpha; } - + private: basis_compressor_params m_params; - std::vector m_slice_images; + basisu::vector m_slice_images; - std::vector m_stats; + basisu::vector m_stats; uint32_t m_basis_file_size; double m_basis_bits_per_texel; - + basisu_backend_slice_desc_vec m_slice_descs; uint32_t m_total_blocks; @@ -397,33 +514,41 @@ namespace basisu basisu_frontend m_frontend; pixel_block_vec m_source_blocks; - std::vector m_frontend_output_textures; + basisu::vector m_frontend_output_textures; - std::vector m_best_etc1s_images; - std::vector m_best_etc1s_images_unpacked; + basisu::vector m_best_etc1s_images; + basisu::vector m_best_etc1s_images_unpacked; basisu_backend m_backend; basisu_file m_basis_file; - std::vector m_decoded_output_textures; - std::vector m_decoded_output_textures_unpacked; - std::vector m_decoded_output_textures_bc1; - std::vector m_decoded_output_textures_unpacked_bc1; + basisu::vector m_decoded_output_textures; + basisu::vector m_decoded_output_textures_unpacked; + basisu::vector m_decoded_output_textures_bc7; + basisu::vector m_decoded_output_textures_unpacked_bc7; uint8_vec m_output_basis_file; - etc_block_vec m_output_blocks; + uint8_vec m_output_ktx2_file; + + basisu::vector m_uastc_slice_textures; + basisu_backend_output m_uastc_backend_output; bool m_any_source_image_has_alpha; bool read_source_images(); + bool extract_source_blocks(); bool process_frontend(); bool extract_frontend_texture_data(); bool process_backend(); bool create_basis_file_and_transcode(); bool write_output_files_and_compute_stats(); - bool generate_mipmaps(const image &img, std::vector &mips, bool has_alpha); + error_code encode_slices_to_uastc(); + bool generate_mipmaps(const image &img, basisu::vector &mips, bool has_alpha); bool validate_texture_type_constraints(); + bool validate_ktx2_constraints(); + void get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr); + bool create_ktx2_file(); }; } // namespace basisu diff --git a/src/deps/basis-universal/basisu_enc.cpp b/src/deps/basis-universal/encoder/basisu_enc.cpp similarity index 58% rename from src/deps/basis-universal/basisu_enc.cpp rename to src/deps/basis-universal/encoder/basisu_enc.cpp index 7057c65cf8..daaf65badc 100644 --- a/src/deps/basis-universal/basisu_enc.cpp +++ b/src/deps/basis-universal/encoder/basisu_enc.cpp @@ -1,5 +1,5 @@ // basisu_enc.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,11 @@ #include "basisu_resampler.h" #include "basisu_resampler_filters.h" #include "basisu_etc.h" -#include "transcoder/basisu_transcoder.h" +#include "../transcoder/basisu_transcoder.h" +#include "basisu_bc7enc.h" +#include "apg_bmp.h" +#include "jpgd.h" +#include #if defined(_WIN32) // For QueryPerformanceCounter/QueryPerformanceFrequency @@ -29,6 +33,9 @@ namespace basisu { uint64_t interval_timer::g_init_ticks, interval_timer::g_freq; double interval_timer::g_timer_freq; +#if BASISU_SUPPORT_SSE + bool g_cpu_supports_sse41; +#endif uint8_t g_hamming_dist[256] = { @@ -50,10 +57,117 @@ namespace basisu 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; + // This is a Public Domain 8x8 font from here: + // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h + const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = + { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( ) + { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) + { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (") + { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#) + { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($) + { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%) + { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&) + { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (') + { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (() + { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ()) + { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*) + { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,) + { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.) + { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/) + { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0) + { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1) + { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2) + { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3) + { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4) + { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5) + { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6) + { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7) + { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8) + { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;) + { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<) + { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=) + { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>) + { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?) + { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@) + { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A) + { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B) + { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C) + { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F) + { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G) + { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H) + { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I) + { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J) + { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K) + { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L) + { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M) + { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N) + { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O) + { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P) + { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q) + { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R) + { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S) + { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V) + { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W) + { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X) + { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y) + { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z) + { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([) + { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\) + { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (]) + { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_) + { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`) + { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a) + { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b) + { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c) + { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d) + { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e) + { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g) + { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h) + { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i) + { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j) + { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k) + { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l) + { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m) + { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n) + { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o) + { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q) + { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r) + { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s) + { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v) + { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w) + { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y) + { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z) + { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({) + { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|) + { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (}) + { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F + }; + // Encoder library initialization (just call once at startup) void basisu_encoder_init() { + detect_sse41(); + basist::basisu_transcoder_init(); + pack_etc1_solid_color_init(); + //uastc_init(); + bc7enc_compress_block_init(); // must be after uastc_init() } void error_printf(const char *pFmt, ...) @@ -81,7 +195,7 @@ namespace basisu { QueryPerformanceFrequency(reinterpret_cast(pTicks)); } -#elif defined(__APPLE__) +#elif defined(__APPLE__) || defined(__OpenBSD__) #include inline void query_counter(timer_ticks* pTicks) { @@ -108,7 +222,7 @@ namespace basisu #else #error TODO #endif - + interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_timer_freq) @@ -169,38 +283,142 @@ namespace basisu return ticks * g_timer_freq; } - bool load_png(const char* pFilename, image& img) + const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; + + bool load_bmp(const char* pFilename, image& img) { - std::vector buffer; - unsigned err = lodepng::load_file(buffer, std::string(pFilename)); - if (err) + int w = 0, h = 0; + unsigned int n_chans = 0; + unsigned char* pImage_data = apg_bmp_read(pFilename, &w, &h, &n_chans); + + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) + { + error_printf("Failed loading .BMP image \"%s\"!\n", pFilename); + + if (pImage_data) + apg_bmp_free(pImage_data); + return false; + } + + if (sizeof(void *) == sizeof(uint32_t)) + { + if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) + { + error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); + + if (pImage_data) + apg_bmp_free(pImage_data); + + return false; + } + } + + img.resize(w, h); + + const uint8_t *pSrc = pImage_data; + for (int y = 0; y < h; y++) + { + color_rgba *pDst = &img(0, y); + + for (int x = 0; x < w; x++) + { + pDst->r = pSrc[0]; + pDst->g = pSrc[1]; + pDst->b = pSrc[2]; + pDst->a = (n_chans == 3) ? 255 : pSrc[3]; + + pSrc += n_chans; + ++pDst; + } + } + + apg_bmp_free(pImage_data); - unsigned w = 0, h = 0; + return true; + } + + bool load_tga(const char* pFilename, image& img) + { + int w = 0, h = 0, n_chans = 0; + uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) + { + error_printf("Failed loading .TGA image \"%s\"!\n", pFilename); + + if (pImage_data) + free(pImage_data); + + return false; + } + if (sizeof(void *) == sizeof(uint32_t)) + { + if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) + { + error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); + + if (pImage_data) + free(pImage_data); + + return false; + } + } + + img.resize(w, h); + + const uint8_t *pSrc = pImage_data; + for (int y = 0; y < h; y++) + { + color_rgba *pDst = &img(0, y); + + for (int x = 0; x < w; x++) + { + pDst->r = pSrc[0]; + pDst->g = pSrc[1]; + pDst->b = pSrc[2]; + pDst->a = (n_chans == 3) ? 255 : pSrc[3]; + + pSrc += n_chans; + ++pDst; + } + } + + free(pImage_data); + + return true; + } + + bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename) + { + if (!buf_size) + return false; + + unsigned err = 0, w = 0, h = 0; + + if (sizeof(void*) == sizeof(uint32_t)) { // Inspect the image first on 32-bit builds, to see if the image would require too much memory. lodepng::State state; - err = lodepng_inspect(&w, &h, &state, &buffer[0], buffer.size()); + err = lodepng_inspect(&w, &h, &state, pBuf, buf_size); if ((err != 0) || (!w) || (!h)) return false; const uint32_t exepected_alloc_size = w * h * sizeof(uint32_t); - + // If the file is too large on 32-bit builds then just bail now, to prevent causing a memory exception. - const uint32_t MAX_ALLOC_SIZE = 250000000; - if (exepected_alloc_size >= MAX_ALLOC_SIZE) + if (exepected_alloc_size >= MAX_32BIT_ALLOC_SIZE) { - error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); + error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", (pFilename != nullptr) ? pFilename : "", w, h); return false; } - + w = h = 0; } - + std::vector out; - err = lodepng::decode(out, w, h, &buffer[0], buffer.size()); + err = lodepng::decode(out, w, h, pBuf, buf_size); if ((err != 0) || (!w) || (!h)) return false; @@ -213,12 +431,62 @@ namespace basisu return true; } + + bool load_png(const char* pFilename, image& img) + { + std::vector buffer; + unsigned err = lodepng::load_file(buffer, std::string(pFilename)); + if (err) + return false; + + + return load_png(buffer.data(), buffer.size(), img, pFilename); + } + + bool load_jpg(const char *pFilename, image& img) + { + int width = 0, height = 0, actual_comps = 0; + uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); + if (!pImage_data) + return false; + + img.init(pImage_data, width, height, 4); + + free(pImage_data); + + return true; + } + + bool load_image(const char* pFilename, image& img) + { + std::string ext(string_get_extension(std::string(pFilename))); + + if (ext.length() == 0) + return false; + + const char *pExt = ext.c_str(); + + if (strcasecmp(pExt, "png") == 0) + return load_png(pFilename, img); + if (strcasecmp(pExt, "bmp") == 0) + return load_bmp(pFilename, img); + if (strcasecmp(pExt, "tga") == 0) + return load_tga(pFilename, img); + if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) ) + return load_jpg(pFilename, img); + + return false; + } - bool save_png(const char* pFilename, const image & img, uint32_t image_save_flags, uint32_t grayscale_comp) + bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) { if (!img.get_total_pixels()) return false; + const uint32_t MAX_PNG_IMAGE_DIM = 32768; + if ((img.get_width() > MAX_PNG_IMAGE_DIM) || (img.get_height() > MAX_PNG_IMAGE_DIM)) + return false; + std::vector out; unsigned err = 0; @@ -231,16 +499,19 @@ namespace basisu for (uint32_t x = 0; x < img.get_width(); x++) *pDst++ = img(x, y)[grayscale_comp]; - err = lodepng::encode(out, (const uint8_t*)& g_pixels[0], img.get_width(), img.get_height(), LCT_GREY, 8); + err = lodepng::encode(out, (const uint8_t*)&g_pixels[0], img.get_width(), img.get_height(), LCT_GREY, 8); } else { bool has_alpha = img.has_alpha(); if ((!has_alpha) || ((image_save_flags & cImageSaveIgnoreAlpha) != 0)) { - uint8_vec rgb_pixels(img.get_width() * 3 * img.get_height()); + const uint64_t total_bytes = (uint64_t)img.get_width() * 3U * (uint64_t)img.get_height(); + if (total_bytes > INT_MAX) + return false; + uint8_vec rgb_pixels(static_cast(total_bytes)); uint8_t *pDst = &rgb_pixels[0]; - + for (uint32_t y = 0; y < img.get_height(); y++) { for (uint32_t x = 0; x < img.get_width(); x++) @@ -302,7 +573,11 @@ namespace basisu } } - data.resize((size_t)filesize); + if (!data.try_resize((size_t)filesize)) + { + fclose(pFile); + return false; + } if (filesize) { @@ -525,7 +800,7 @@ namespace basisu if ((s >= num_syms) || (A[r].m_key < A[s].m_key)) { A[next].m_key = A[r].m_key; - A[r].m_key = static_cast(next); + A[r].m_key = next; ++r; } else @@ -536,13 +811,13 @@ namespace basisu if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key)) { - A[next].m_key = static_cast(A[next].m_key + A[r].m_key); - A[r].m_key = static_cast(next); + A[next].m_key = A[next].m_key + A[r].m_key; + A[r].m_key = next; ++r; } else { - A[next].m_key = static_cast(A[next].m_key + A[s].m_key); + A[next].m_key = A[next].m_key + A[s].m_key; ++s; } } @@ -562,7 +837,7 @@ namespace basisu ; for ( ; num_avail > num_used; --next, --num_avail) - A[next].m_key = static_cast(depth); + A[next].m_key = depth; num_avail = 2 * num_used; num_used = 0; @@ -610,6 +885,10 @@ namespace basisu for (i = 0; i < num_syms; i++) { uint32_t freq = pSyms0[i].m_key; + + // We scale all input frequencies to 16-bits. + assert(freq <= UINT16_MAX); + hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } @@ -731,8 +1010,13 @@ namespace basisu else { for (uint32_t i = 0; i < num_syms; i++) + { if (pSym_freq[i]) - sym_freq[i] = static_cast(maximum((pSym_freq[i] * 65534U + (max_freq >> 1)) / max_freq, 1)); + { + uint32_t f = static_cast((static_cast(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq); + sym_freq[i] = static_cast(clamp(f, 1, 65534)); + } + } } return init(num_syms, &sym_freq[0], max_code_size); @@ -1125,10 +1409,10 @@ namespace basisu void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma) { assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); - - const uint32_t width = std::min(a.get_width(), b.get_width()); - const uint32_t height = std::min(a.get_height(), b.get_height()); - + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + double hist[256]; clear_obj(hist); @@ -1159,7 +1443,7 @@ namespace basisu { if (hist[i]) { - m_max = std::max(m_max, (float)i); + m_max = basisu::maximum(m_max, (float)i); double v = i * hist[i]; sum += v; sum2 += i * v; @@ -1171,9 +1455,9 @@ namespace basisu total_values *= (double)clamp(total_chans, 1, 4); m_mean = (float)clamp(sum / total_values, 0.0f, 255.0); - m_mean_squared = (float)clamp(sum2 / total_values, 0.0f, 255.0 * 255.0); + m_mean_squared = (float)clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); m_rms = (float)sqrt(m_mean_squared); - m_psnr = m_rms ? (float)clamp(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f; + m_psnr = m_rms ? (float)clamp(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f; } void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed) @@ -1253,8 +1537,8 @@ namespace basisu } job_pool::job_pool(uint32_t num_threads) : - m_kill_flag(false), - m_num_active_jobs(0) + m_num_active_jobs(0), + m_kill_flag(false) { assert(num_threads >= 1U); @@ -1302,13 +1586,15 @@ namespace basisu std::unique_lock lock(m_mutex); m_queue.emplace_back(std::move(job)); - + const size_t queue_size = m_queue.size(); lock.unlock(); if (queue_size > 1) + { m_has_work.notify_one(); + } } void job_pool::wait_for_all() @@ -1373,4 +1659,479 @@ namespace basisu debug_printf("job_pool::job_thread: exiting\n"); } + // .TGA image loading + #pragma pack(push) + #pragma pack(1) + struct tga_header + { + uint8_t m_id_len; + uint8_t m_cmap; + uint8_t m_type; + packed_uint<2> m_cmap_first; + packed_uint<2> m_cmap_len; + uint8_t m_cmap_bpp; + packed_uint<2> m_x_org; + packed_uint<2> m_y_org; + packed_uint<2> m_width; + packed_uint<2> m_height; + uint8_t m_depth; + uint8_t m_desc; + }; + #pragma pack(pop) + + const uint32_t MAX_TGA_IMAGE_SIZE = 16384; + + enum tga_image_type + { + cITPalettized = 1, + cITRGB = 2, + cITGrayscale = 3 + }; + + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans) + { + width = 0; + height = 0; + n_chans = 0; + + if (buf_size <= sizeof(tga_header)) + return nullptr; + + const tga_header &hdr = *reinterpret_cast(pBuf); + + if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE)) + return nullptr; + + if (hdr.m_desc >> 6) + return nullptr; + + // Simple validation + if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) + return nullptr; + + if (hdr.m_cmap) + { + if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) + return nullptr; + + // Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either. + if (hdr.m_cmap_first != 0) + return nullptr; + } + + const bool x_flipped = (hdr.m_desc & 0x10) != 0; + const bool y_flipped = (hdr.m_desc & 0x20) == 0; + + bool rle_flag = false; + int file_image_type = hdr.m_type; + if (file_image_type > 8) + { + file_image_type -= 8; + rle_flag = true; + } + + const tga_image_type image_type = static_cast(file_image_type); + + switch (file_image_type) + { + case cITRGB: + if (hdr.m_depth == 8) + return nullptr; + break; + case cITPalettized: + if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0)) + return nullptr; + break; + case cITGrayscale: + if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0)) + return nullptr; + if ((hdr.m_depth != 8) && (hdr.m_depth != 16)) + return nullptr; + break; + default: + return nullptr; + } + + uint32_t tga_bytes_per_pixel = 0; + + switch (hdr.m_depth) + { + case 32: + tga_bytes_per_pixel = 4; + n_chans = 4; + break; + case 24: + tga_bytes_per_pixel = 3; + n_chans = 3; + break; + case 16: + case 15: + tga_bytes_per_pixel = 2; + // For compatibility with stb_image_write.h + n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3; + break; + case 8: + tga_bytes_per_pixel = 1; + // For palettized RGBA support, which both FreeImage and stb_image support. + n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3; + break; + default: + return nullptr; + } + + const uint8_t *pSrc = pBuf + sizeof(tga_header); + uint32_t bytes_remaining = buf_size - sizeof(tga_header); + + if (hdr.m_id_len) + { + if (bytes_remaining < hdr.m_id_len) + return nullptr; + pSrc += hdr.m_id_len; + bytes_remaining += hdr.m_id_len; + } + + color_rgba pal[256]; + for (uint32_t i = 0; i < 256; i++) + pal[i].set(0, 0, 0, 255); + + if ((hdr.m_cmap) && (hdr.m_cmap_len)) + { + if (image_type == cITPalettized) + { + // Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years). + if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) ) + return nullptr; + + if (hdr.m_cmap_bpp == 32) + { + const uint32_t pal_size = hdr.m_cmap_len * 4; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + pal[i].r = pSrc[i * 4 + 2]; + pal[i].g = pSrc[i * 4 + 1]; + pal[i].b = pSrc[i * 4 + 0]; + pal[i].a = pSrc[i * 4 + 3]; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + else if (hdr.m_cmap_bpp == 24) + { + const uint32_t pal_size = hdr.m_cmap_len * 3; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + pal[i].r = pSrc[i * 3 + 2]; + pal[i].g = pSrc[i * 3 + 1]; + pal[i].b = pSrc[i * 3 + 0]; + pal[i].a = 255; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + else + { + const uint32_t pal_size = hdr.m_cmap_len * 2; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8); + + pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31; + pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31; + pal[i].b = ((v & 31) * 255 + 15) / 31; + pal[i].a = 255; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + } + else + { + const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len; + if (bytes_remaining < bytes_to_skip) + return nullptr; + pSrc += bytes_to_skip; + bytes_remaining += bytes_to_skip; + } + } + + width = hdr.m_width; + height = hdr.m_height; + + const uint32_t source_pitch = width * tga_bytes_per_pixel; + const uint32_t dest_pitch = width * n_chans; + + uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); + if (!pImage) + return nullptr; + + std::vector input_line_buf; + if (rle_flag) + input_line_buf.resize(source_pitch); + + int run_type = 0, run_remaining = 0; + uint8_t run_pixel[4]; + memset(run_pixel, 0, sizeof(run_pixel)); + + for (int y = 0; y < height; y++) + { + const uint8_t *pLine_data; + + if (rle_flag) + { + int pixels_remaining = width; + uint8_t *pDst = &input_line_buf[0]; + + do + { + if (!run_remaining) + { + if (bytes_remaining < 1) + { + free(pImage); + return nullptr; + } + + int v = *pSrc++; + bytes_remaining--; + + run_type = v & 0x80; + run_remaining = (v & 0x7F) + 1; + + if (run_type) + { + if (bytes_remaining < tga_bytes_per_pixel) + { + free(pImage); + return nullptr; + } + + memcpy(run_pixel, pSrc, tga_bytes_per_pixel); + pSrc += tga_bytes_per_pixel; + bytes_remaining -= tga_bytes_per_pixel; + } + } + + const uint32_t n = basisu::minimum(pixels_remaining, run_remaining); + pixels_remaining -= n; + run_remaining -= n; + + if (run_type) + { + for (uint32_t i = 0; i < n; i++) + for (uint32_t j = 0; j < tga_bytes_per_pixel; j++) + *pDst++ = run_pixel[j]; + } + else + { + const uint32_t bytes_wanted = n * tga_bytes_per_pixel; + + if (bytes_remaining < bytes_wanted) + { + free(pImage); + return nullptr; + } + + memcpy(pDst, pSrc, bytes_wanted); + pDst += bytes_wanted; + + pSrc += bytes_wanted; + bytes_remaining -= bytes_wanted; + } + + } while (pixels_remaining); + + assert((pDst - &input_line_buf[0]) == width * tga_bytes_per_pixel); + + pLine_data = &input_line_buf[0]; + } + else + { + if (bytes_remaining < source_pitch) + { + free(pImage); + return nullptr; + } + + pLine_data = pSrc; + bytes_remaining -= source_pitch; + pSrc += source_pitch; + } + + // Convert to 24bpp RGB or 32bpp RGBA. + uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0); + const int dst_stride = x_flipped ? -((int)n_chans) : n_chans; + + switch (hdr.m_depth) + { + case 32: + assert(tga_bytes_per_pixel == 4 && n_chans == 4); + for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride) + { + pDst[0] = pLine_data[2]; + pDst[1] = pLine_data[1]; + pDst[2] = pLine_data[0]; + pDst[3] = pLine_data[3]; + } + break; + case 24: + assert(tga_bytes_per_pixel == 3 && n_chans == 3); + for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride) + { + pDst[0] = pLine_data[2]; + pDst[1] = pLine_data[1]; + pDst[2] = pLine_data[0]; + } + break; + case 16: + case 15: + if (image_type == cITRGB) + { + assert(tga_bytes_per_pixel == 2 && n_chans == 3); + for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) + { + const uint32_t v = pLine_data[0] | (pLine_data[1] << 8); + pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31; + pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31; + pDst[2] = ((v & 31) * 255 + 15) / 31; + } + } + else + { + assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4); + for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) + { + pDst[0] = pLine_data[0]; + pDst[1] = pLine_data[0]; + pDst[2] = pLine_data[0]; + pDst[3] = pLine_data[1]; + } + } + break; + case 8: + assert(tga_bytes_per_pixel == 1); + if (image_type == cITPalettized) + { + if (hdr.m_cmap_bpp == 32) + { + assert(n_chans == 4); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint32_t c = *pLine_data; + pDst[0] = pal[c].r; + pDst[1] = pal[c].g; + pDst[2] = pal[c].b; + pDst[3] = pal[c].a; + } + } + else + { + assert(n_chans == 3); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint32_t c = *pLine_data; + pDst[0] = pal[c].r; + pDst[1] = pal[c].g; + pDst[2] = pal[c].b; + } + } + } + else + { + assert(n_chans == 3); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint8_t c = *pLine_data; + pDst[0] = c; + pDst[1] = c; + pDst[2] = c; + } + } + break; + default: + assert(0); + break; + } + } // y + + return pImage; + } + + uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans) + { + width = height = n_chans = 0; + + uint8_vec filedata; + if (!read_file_to_vec(pFilename, filedata)) + return nullptr; + + if (!filedata.size() || (filedata.size() > UINT32_MAX)) + return nullptr; + + return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); + } + + void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...) + { + char buf[2048]; + + va_list args; + va_start(args, pFmt); +#ifdef _WIN32 + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsnprintf(buf, sizeof(buf), pFmt, args); +#endif + va_end(args); + + const char* p = buf; + + const uint32_t orig_x_ofs = x_ofs; + + while (*p) + { + uint8_t c = *p++; + if ((c < 32) || (c > 127)) + c = '.'; + + const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0]; + + for (uint32_t y = 0; y < 8; y++) + { + uint32_t row_bits = pGlpyh[y]; + for (uint32_t x = 0; x < 8; x++) + { + const uint32_t q = row_bits & (1 << x); + + const color_rgba* pColor = q ? &fg : pBG; + if (!pColor) + continue; + + if (alpha_only) + fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); + else + fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); + } + } + + x_ofs += 8 * scale_x; + if ((x_ofs + 8 * scale_x) > m_width) + { + x_ofs = orig_x_ofs; + y_ofs += 8 * scale_y; + } + } + } + } // namespace basisu diff --git a/src/deps/basis-universal/basisu_enc.h b/src/deps/basis-universal/encoder/basisu_enc.h similarity index 87% rename from src/deps/basis-universal/basisu_enc.h rename to src/deps/basis-universal/encoder/basisu_enc.h index c2b9133045..0ce011452d 100644 --- a/src/deps/basis-universal/basisu_enc.h +++ b/src/deps/basis-universal/encoder/basisu_enc.h @@ -1,5 +1,5 @@ // basisu_enc.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" -#include "transcoder/basisu_transcoder_internal.h" +#include "../transcoder/basisu.h" +#include "../transcoder/basisu_transcoder_internal.h" #include #include @@ -22,18 +22,35 @@ #include #include #include +#include #if !defined(_WIN32) || defined(__MINGW32__) #include #endif +// This module is really just a huge grab bag of classes and helper functions needed by the encoder. + +// If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time. +#define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0) + namespace basisu { extern uint8_t g_hamming_dist[256]; + extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; - // Encoder library initialization + // Encoder library initialization. + // This function MUST be called before encoding anything! void basisu_encoder_init(); + // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 + extern void detect_sse41(); + +#if BASISU_SUPPORT_SSE + extern bool g_cpu_supports_sse41; +#else + const bool g_cpu_supports_sse41 = false; +#endif + void error_printf(const char *pFmt, ...); // Helpers @@ -42,7 +59,68 @@ namespace basisu { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } - + + inline int32_t clampi(int32_t value, int32_t low, int32_t high) + { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; + } + + inline uint8_t mul_8(uint32_t v, uint32_t a) + { + v = v * a + 128; + return (uint8_t)((v + (v >> 8)) >> 8); + } + + inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 64); + uint64_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= ((uint64_t)(byte_bits) << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 32); + uint32_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= (byte_bits << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + // Hashing inline uint32_t bitmix32c(uint32_t v) @@ -68,6 +146,16 @@ namespace basisu return v; } + inline uint32_t wang_hash(uint32_t seed) + { + seed = (seed ^ 61) ^ (seed >> 16); + seed *= 9; + seed = seed ^ (seed >> 4); + seed *= 0x27d4eb2d; + seed = seed ^ (seed >> 15); + return seed; + } + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); template @@ -79,6 +167,72 @@ namespace basisu } }; + class running_stat + { + public: + running_stat() : + m_n(0), + m_old_m(0), m_new_m(0), m_old_s(0), m_new_s(0) + { + } + void clear() + { + m_n = 0; + } + void push(double x) + { + m_n++; + if (m_n == 1) + { + m_old_m = m_new_m = x; + m_old_s = 0.0; + m_min = x; + m_max = x; + } + else + { + m_new_m = m_old_m + (x - m_old_m) / m_n; + m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m); + m_old_m = m_new_m; + m_old_s = m_new_s; + m_min = basisu::minimum(x, m_min); + m_max = basisu::maximum(x, m_max); + } + } + uint32_t get_num() const + { + return m_n; + } + double get_mean() const + { + return (m_n > 0) ? m_new_m : 0.0; + } + + double get_variance() const + { + return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0); + } + + double get_std_dev() const + { + return sqrt(get_variance()); + } + + double get_min() const + { + return m_min; + } + + double get_max() const + { + return m_max; + } + + private: + uint32_t m_n; + double m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max; + }; + // Linear algebra template @@ -117,7 +271,7 @@ namespace basisu inline vec &set(const vec &other) { uint32_t i; - if (static_cast(&other) == static_cast(this)) + if ((const void *)(&other) == (const void *)(this)) return *this; const uint32_t m = minimum(OtherN, N); for (i = 0; i < m; i++) @@ -357,6 +511,7 @@ namespace basisu BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool); public: + // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. job_pool(uint32_t num_threads); ~job_pool(); @@ -369,7 +524,7 @@ namespace basisu private: std::vector m_threads; - std::vector > m_queue; + std::vector > m_queue; std::mutex m_mutex; std::condition_variable m_has_work; @@ -419,7 +574,7 @@ namespace basisu return *this; } }; - + class color_rgba { public: @@ -439,6 +594,25 @@ namespace basisu inline color_rgba() { static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)"); + } + + // Not too hot about this idea. + inline color_rgba(const basist::color32& other) : + r(other.r), + g(other.g), + b(other.b), + a(other.a) + { + } + + color_rgba& operator= (const basist::color32& rhs) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + a = rhs.a; + return *this; } inline color_rgba(int y) @@ -562,11 +736,20 @@ namespace basisu inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } + + inline basist::color32 get_color32() const + { + return basist::color32(r, g, b, a); + } + + static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } }; - typedef std::vector color_rgba_vec; + typedef basisu::vector color_rgba_vec; const color_rgba g_black_color(0, 0, 0, 255); + const color_rgba g_black_trans_color(0, 0, 0, 0); const color_rgba g_white_color(255, 255, 255, 255); inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) @@ -594,6 +777,7 @@ namespace basisu { if (perceptual) { +#if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; @@ -616,11 +800,61 @@ namespace basisu } return d; +#elif 1 + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) + + ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + + ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); + + if (alpha) + { + int da = (e1.a - e2.a) << 7; + id += ((uint32_t)(da * da) >> 7U); + } + + return id; +#else + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + + int64_t delta_l = dr * 27 + dg * 92 + db * 9; + int64_t delta_cr = dr * 128 - delta_l; + int64_t delta_cb = db * 128 - delta_l; + + int64_t id = ((delta_l * delta_l) * 128) + + ((delta_cr * delta_cr) * 26) + + ((delta_cb * delta_cb) * 3); + + if (alpha) + { + int64_t da = (e1.a - e2.a); + id += (da * da) * 128; + } + + int d = (id + 8192) >> 14; + + return d; +#endif } else return color_distance(e1, e2, alpha); } + static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b) + { + const int dl = a.r - b.r; + const int da = a.a - b.a; + return dl * dl + da * da; + } + // String helpers inline int string_find_right(const std::string& filename, char c) @@ -928,7 +1162,7 @@ namespace basisu float m_priority; }; - std::vector m_heap; + basisu::vector m_heap; uint32_t m_size; // Push down entry at index @@ -960,7 +1194,7 @@ namespace basisu public: typedef TrainingVectorType training_vec_type; typedef std::pair training_vec_with_weight; - typedef std::vector< training_vec_with_weight > array_of_weighted_training_vecs; + typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs; tree_vector_quant() : m_next_codebook_index(0) @@ -980,7 +1214,7 @@ namespace basisu const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } - void retrieve(std::vector< std::vector > &codebook) const + void retrieve(basisu::vector< basisu::vector > &codebook) const { for (uint32_t i = 0; i < m_nodes.size(); i++) { @@ -993,7 +1227,7 @@ namespace basisu } } - void retrieve(std::vector &codebook) const + void retrieve(basisu::vector &codebook) const { for (uint32_t i = 0; i < m_nodes.size(); i++) { @@ -1006,7 +1240,7 @@ namespace basisu } } - void retrieve(uint32_t max_clusters, std::vector &codebook) const + void retrieve(uint32_t max_clusters, basisu::vector &codebook) const { uint_vec node_stack; node_stack.reserve(512); @@ -1053,7 +1287,7 @@ namespace basisu priority_queue var_heap; var_heap.init(max_size, 0, m_nodes[0].m_var); - std::vector l_children, r_children; + basisu::vector l_children, r_children; // Now split the worst nodes l_children.reserve(m_training_vecs.size() + 1); @@ -1091,7 +1325,7 @@ namespace basisu inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { } // vecs is erased - inline void set(const TrainingVectorType &org, uint64_t weight, float var, std::vector &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } + inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } inline bool is_leaf() const { return m_left_index < 0; } @@ -1099,11 +1333,11 @@ namespace basisu uint64_t m_weight; TrainingVectorType m_origin; int32_t m_left_index, m_right_index; - std::vector m_training_vecs; + basisu::vector m_training_vecs; int m_codebook_index; }; - typedef std::vector tsvq_node_vec; + typedef basisu::vector tsvq_node_vec; tsvq_node_vec m_nodes; array_of_weighted_training_vecs m_training_vecs; @@ -1138,7 +1372,7 @@ namespace basisu return root; } - bool split_node(uint32_t node_index, priority_queue &var_heap, std::vector &l_children, std::vector &r_children) + bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector &l_children, basisu::vector &r_children) { TrainingVectorType l_child_org, r_child_org; uint64_t l_weight = 0, r_weight = 0; @@ -1238,7 +1472,7 @@ namespace basisu bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const { - const uint32_t N = TrainingVectorType::num_elements; + //const uint32_t N = TrainingVectorType::num_elements; if (2 == node.m_training_vecs.size()) { @@ -1303,7 +1537,7 @@ namespace basisu if (largest_axis_index < 0) return false; - std::vector keys(node.m_training_vecs.size()); + basisu::vector keys(node.m_training_vecs.size()); for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index]; @@ -1351,8 +1585,8 @@ namespace basisu } bool refine_split(const tsvq_node &node, - TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, std::vector &l_children, - TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, std::vector &r_children) const + TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector &l_children, + TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector &r_children) const { l_children.reserve(node.m_training_vecs.size()); r_children.reserve(node.m_training_vecs.size()); @@ -1400,6 +1634,14 @@ namespace basisu if ((!l_weight) || (!r_weight)) { + l_children.resize(0); + new_l_child.set(0.0f); + l_ttsum = 0.0f; + l_weight = 0; + r_children.resize(0); + new_r_child.set(0.0f); + r_ttsum = 0.0f; + r_weight = 0; TrainingVectorType firstVec; for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) { @@ -1426,7 +1668,7 @@ namespace basisu } } - if (!l_weight) + if ((!l_weight) || (!r_weight)) return false; } @@ -1465,8 +1707,8 @@ namespace basisu template bool generate_hierarchical_codebook_threaded_internal(Quantizer& q, uint32_t max_codebook_size, uint32_t max_parent_codebook_size, - std::vector& codebook, - std::vector& parent_codebook, + basisu::vector& codebook, + basisu::vector& parent_codebook, uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool) { codebook.resize(0); @@ -1492,7 +1734,7 @@ namespace basisu if (!q.generate(max_threads)) return false; - std::vector initial_codebook; + basisu::vector initial_codebook; q.retrieve(initial_codebook); @@ -1511,12 +1753,14 @@ namespace basisu bool success_flags[cMaxThreads]; clear_obj(success_flags); - std::vector local_clusters[cMaxThreads]; - std::vector local_parent_clusters[cMaxThreads]; + basisu::vector local_clusters[cMaxThreads]; + basisu::vector local_parent_clusters[cMaxThreads]; for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) { +#ifndef __EMSCRIPTEN__ pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] { +#endif Quantizer& lq = quantizers[thread_iter]; uint_vec& cluster_indices = initial_codebook[thread_iter]; @@ -1557,11 +1801,15 @@ namespace basisu } } +#ifndef __EMSCRIPTEN__ } ); +#endif } // thread_iter +#ifndef __EMSCRIPTEN__ pJob_pool->wait_for_all(); +#endif uint32_t total_clusters = 0, total_parent_clusters = 0; @@ -1597,8 +1845,8 @@ namespace basisu template bool generate_hierarchical_codebook_threaded(Quantizer& q, uint32_t max_codebook_size, uint32_t max_parent_codebook_size, - std::vector& codebook, - std::vector& parent_codebook, + basisu::vector& codebook, + basisu::vector& parent_codebook, uint32_t max_threads, job_pool *pJob_pool) { typedef bit_hasher training_vec_bit_hasher; @@ -1628,7 +1876,7 @@ namespace basisu Quantizer group_quant; typedef typename group_hash::const_iterator group_hash_const_iter; - std::vector unique_vec_iters; + basisu::vector unique_vec_iters; unique_vec_iters.reserve(unique_vecs.size()); for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter) @@ -1643,7 +1891,7 @@ namespace basisu debug_printf("Limit clusterizers: %u\n", limit_clusterizers); - std::vector group_codebook, group_parent_codebook; + basisu::vector group_codebook, group_parent_codebook; bool status = generate_hierarchical_codebook_threaded_internal(group_quant, max_codebook_size, max_parent_codebook_size, group_codebook, @@ -1692,7 +1940,7 @@ namespace basisu class histogram { - std::vector m_hist; + basisu::vector m_hist; public: histogram(uint32_t size = 0) { init(size); } @@ -1753,7 +2001,8 @@ namespace basisu struct sym_freq { - uint16_t m_key, m_sym_index; + uint32_t m_key; + uint16_t m_sym_index; }; sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); @@ -1834,7 +2083,7 @@ namespace basisu { if (m_bit_buffer_size) { - m_total_bits += 8; + m_total_bits += 8 - (m_bit_buffer_size & 7); append_byte(static_cast(m_bit_buffer)); m_bit_buffer = 0; @@ -2106,6 +2355,12 @@ namespace basisu resize(w, h, p); } + image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : + m_width(0), m_height(0), m_pitch(0) + { + init(pImage, width, height, comps); + } + image(const image &other) : m_width(0), m_height(0), m_pitch(0) { @@ -2154,6 +2409,47 @@ namespace basisu return *this; } + void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) + { + assert(comps >= 1 && comps <= 4); + + resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const uint8_t *pSrc = &pImage[(x + y * width) * comps]; + color_rgba &dst = (*this)(x, y); + + if (comps == 1) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = 255; + } + else if (comps == 2) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = pSrc[1]; + } + else + { + dst.r = pSrc[0]; + dst.g = pSrc[1]; + dst.b = pSrc[2]; + if (comps == 4) + dst.a = pSrc[3]; + else + dst.a = 255; + } + } + } + } + image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) { for (uint32_t iy = 0; iy < h; iy++) @@ -2162,6 +2458,14 @@ namespace basisu return *this; } + image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped_alpha(x + ix, y + iy, c); + return *this; + } + image &crop_dup_borders(uint32_t w, uint32_t h) { const uint32_t orig_w = m_width, orig_h = m_height; @@ -2251,6 +2555,13 @@ namespace basisu return *this; } + inline image& set_clipped_alpha(int x, int y, const color_rgba& c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y).m_comps[3] = c.m_comps[3]; + return *this; + } + // Very straightforward blit with full clipping. Not fast, but it works. image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) { @@ -2375,6 +2686,8 @@ namespace basisu } return *this; } + + void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); private: uint32_t m_width, m_height, m_pitch; // all in pixels @@ -2383,7 +2696,7 @@ namespace basisu // Float images - typedef std::vector vec4F_vec; + typedef basisu::vector vec4F_vec; class imagef { @@ -2634,10 +2947,27 @@ namespace basisu }; // Image saving/loading/resampling - + + bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); bool load_png(const char* pFilename, image& img); inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } + bool load_bmp(const char* pFilename, image& img); + inline bool load_bmp(const std::string &filename, image &img) { return load_bmp(filename.c_str(), img); } + + bool load_tga(const char* pFilename, image& img); + inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } + + bool load_jpg(const char *pFilename, image& img); + inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } + + // Currently loads .BMP, .PNG, or .TGA. + bool load_image(const char* pFilename, image& img); + inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } + + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); + uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); + enum { cImageSaveGrayscale = 1, @@ -2696,7 +3026,7 @@ namespace basisu template class vector2D { - typedef std::vector TVec; + typedef basisu::vector TVec; uint32_t m_width, m_height; TVec m_values; @@ -2799,7 +3129,7 @@ namespace basisu } void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); - + } // namespace basisu diff --git a/src/deps/basis-universal/encoder/basisu_etc.cpp b/src/deps/basis-universal/encoder/basisu_etc.cpp new file mode 100644 index 0000000000..232e8965b0 --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_etc.cpp @@ -0,0 +1,1593 @@ +// basis_etc.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_etc.h" + +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + +#define BASISU_DEBUG_ETC_ENCODER 0 +#define BASISU_DEBUG_ETC_ENCODER_DEEPER 0 + +namespace basisu +{ + const int8_t g_etc2_eac_tables[16][8] = + { + { -3, -6, -9, -15, 2, 5, 8, 14 }, { -3, -7, -10, -13, 2, 6, 9, 12 }, { -2, -5, -8, -13, 1, 4, 7, 12 }, { -2, -4, -6, -13, 1, 3, 5, 12 }, + { -3, -6, -8, -12, 2, 5, 7, 11 }, { -3, -7, -9, -11, 2, 6, 8, 10 }, { -4, -7, -8, -11, 3, 6, 7, 10 }, { -3, -5, -8, -11, 2, 4, 7, 10 }, + { -2, -6, -8, -10, 1, 5, 7, 9 }, { -2, -5, -8, -10, 1, 4, 7, 9 }, { -2, -4, -8, -10, 1, 3, 7, 9 }, { -2, -5, -7, -10, 1, 4, 6, 9 }, + { -3, -4, -7, -10, 2, 3, 6, 9 }, { -1, -2, -3, -10, 0, 1, 2, 9 }, { -4, -6, -8, -9, 3, 5, 7, 8 }, { -3, -5, -7, -9, 2, 4, 6, 8 } + }; + + const int8_t g_etc2_eac_tables8[16][8] = + { + { -24, -48, -72, -120, 16, 40, 64, 112 }, { -24,-56,-80,-104,16,48,72,96 }, { -16,-40,-64,-104,8,32,56,96 }, { -16,-32,-48,-104,8,24,40,96 }, + { -24,-48,-64,-96,16,40,56,88 }, { -24,-56,-72,-88,16,48,64,80 }, { -32,-56,-64,-88,24,48,56,80 }, { -24,-40,-64,-88,16,32,56,80 }, + { -16,-48,-64,-80,8,40,56,72 }, { -16,-40,-64,-80,8,32,56,72 }, { -16,-32,-64,-80,8,24,56,72 }, { -16,-40,-56,-80,8,32,48,72 }, + { -24,-32,-56,-80,16,24,48,72 }, { -8,-16,-24,-80,0,8,16,72 }, { -32,-48,-64,-72,24,40,56,64 }, { -24,-40,-56,-72,16,32,48,64 } + }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16_t g_etc1_inverse_lookup[2 * 8 * 4][256]; // [ diff/inten_table/selector][desired_color ] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16_t g_etc1_color8_to_etc_block_config_0_255[2][33] = + { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16_t g_etc1_color8_to_etc_block_config_1_to_254[254][12] = + { + { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { + 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, + 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF + }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, + 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { + 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { + 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { + 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { + 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { + 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { + 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, + 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF + }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { + 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, + 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { + 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { + 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, + }; + + static uint32_t etc1_decode_value(uint32_t diff, uint32_t inten, uint32_t selector, uint32_t packed_c) + { + const uint32_t limit = diff ? 32 : 16; + BASISU_NOTE_UNUSED(limit); + assert((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); + int c; + if (diff) + c = (packed_c >> 2) | (packed_c << 3); + else + c = packed_c | (packed_c << 4); + c += g_etc1_inten_tables[inten][selector]; + c = clamp(c, 0, 255); + return c; + } + + void pack_etc1_solid_color_init() + { + for (uint32_t diff = 0; diff < 2; diff++) + { + const uint32_t limit = diff ? 32 : 16; + + for (uint32_t inten = 0; inten < 8; inten++) + { + for (uint32_t selector = 0; selector < 4; selector++) + { + const uint32_t inverse_table_index = diff + (inten << 1) + (selector << 4); + for (uint32_t color = 0; color < 256; color++) + { + uint32_t best_error = UINT32_MAX, best_packed_c = 0; + for (uint32_t packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint32_t err = (uint32_t)labs(v - static_cast(color)); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + break; + } + } + assert(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); + } + } + } + } + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor) + { + assert(g_etc1_inverse_lookup[0][255]); + + static uint32_t s_next_comp[4] = { 1, 2, 0, 1 }; + + uint32_t best_error = UINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint32_t i = 0; i < 3; i++) + { + const uint32_t c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = clamp(pColor[i] + delta, 0, 255); + + const uint16_t* pTable; + if (!c_plus_delta) + pTable = g_etc1_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_etc1_color8_to_etc_block_config_0_255[1]; + else + pTable = g_etc1_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint32_t x = *pTable++; + +#ifdef _DEBUG + const uint32_t diff = x & 1; + const uint32_t inten = (x >> 1) & 7; + const uint32_t selector = (x >> 4) & 3; + const uint32_t p0 = (x >> 8) & 255; + assert(etc1_decode_value(diff, inten, selector, p0) == (uint32_t)c_plus_delta); +#endif + + const uint16_t* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16_t p1 = pInverse_table[c1]; + uint16_t p2 = pInverse_table[c2]; + const uint32_t trial_error = square(c_plus_delta - pColor[i]) + square(p1 >> 8) + square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } + found_perfect_match: + + const uint32_t diff = best_x & 1; + const uint32_t inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint32_t etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint32_t best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); + } + else + { + block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); + } + + return best_error; + } + + const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; + + static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = + { + { { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } }, + { { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } }, + { { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } }, + { { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } }, + { { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } }, + { { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } }, + { { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } }, + { { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } }, + { { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } }, + { { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } }, + { { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } }, + { { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } }, + { { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } }, + { { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } }, + { { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } }, + { { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } }, + { { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } }, + { { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } }, + { { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } }, + { { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } }, + { { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } }, + { { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } }, + { { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } }, + { { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } }, + { { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } }, + { { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } }, + { { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } }, + { { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } }, + { { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } }, + { { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } }, + { { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } }, + { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, + { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } + }; + + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = + { + { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } + }; + + const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // [flip][subblock][pixel_index] + const etc_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, + } + }; + + // [flip][subblock][pixel_index] + const uint32_t g_etc1_pixel_indices[2][2][8] = + { + { + { + 0 + 4 * 0, 0 + 4 * 1, 0 + 4 * 2, 0 + 4 * 3, + 1 + 4 * 0, 1 + 4 * 1, 1 + 4 * 2, 1 + 4 * 3 + }, + { + 2 + 4 * 0, 2 + 4 * 1, 2 + 4 * 2, 2 + 4 * 3, + 3 + 4 * 0, 3 + 4 * 1, 3 + 4 * 2, 3 + 4 * 3 + } + }, + { + { + 0 + 4 * 0, 1 + 4 * 0, 2 + 4 * 0, 3 + 4 * 0, + 0 + 4 * 1, 1 + 4 * 1, 2 + 4 * 1, 3 + 4 * 1 + }, + { + 0 + 4 * 2, 1 + 4 * 2, 2 + 4 * 2, 3 + 4 * 2, + 0 + 4 * 3, 1 + 4 * 3, 2 + 4 * 3, 3 + 4 * 3 + }, + } + }; + + uint16_t etc_block::pack_color5(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = minimum(r, 31U); + g = minimum(g, 31U); + b = minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 10U)); + } + + color_rgba etc_block::unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color5 & 31U; + uint32_t g = (packed_color5 >> 5U) & 31U; + uint32_t r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled) + { + result = unpack_color5(packed_color5, scaled, 255); + } + + void etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) + { + color_rgba c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + bool etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba_i16 dc(unpack_delta3(packed_delta3)); + + int b = (packed_color5 & 31U) + dc.b; + int g = ((packed_color5 >> 5U) & 31U) + dc.g; + int r = ((packed_color5 >> 10U) & 31U) + dc.r; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = clamp(r, 0, 31); + g = clamp(g, 0, 31); + b = clamp(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, minimum(alpha, 255U)); + return success; + } + + bool etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } + + uint16_t etc_block::pack_delta3(const color_rgba_i16& color) + { + return pack_delta3(color.r, color.g, color.b); + } + + uint16_t etc_block::pack_delta3(int r, int g, int b) + { + assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast(b | (g << 3) | (r << 6)); + } + + color_rgba_i16 etc_block::unpack_delta3(uint16_t packed_delta3) + { + int r = (packed_delta3 >> 6) & 7; + int g = (packed_delta3 >> 3) & 7; + int b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + return color_rgba_i16(r, g, b, 255); + } + + void etc_block::unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + uint16_t etc_block::pack_color4(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = minimum(r, 15U); + g = minimum(g, 15U); + b = minimum(b, 15U); + + return static_cast(b | (g << 4U) | (r << 8U)); + } + + color_rgba etc_block::unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color4 & 15U; + uint32_t g = (packed_color4 >> 4U) & 15U; + uint32_t r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) + { + color_rgba c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + + return success; + } + + void etc_block::get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color4(r, g, b, packed_color4, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) + { + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint32_t table_index0 = block.get_inten_table(0); + const uint32_t table_index1 = block.get_inten_table(1); + + color_rgba subblock_colors0[4]; + color_rgba subblock_colors1[4]; + + if (diff_flag) + { + const uint16_t base_color5 = block.get_base5_color(); + const uint16_t delta_color3 = block.get_delta3_color(); + etc_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + + if (!etc_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + return false; + } + else + { + const uint16_t base_color4_0 = block.get_base4_color(0); + etc_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16_t base_color4_1 = block.get_base4_color(1); + etc_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint32_t y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint32_t y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint32_t y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return true; + } + + inline int extend_6_to_8(uint32_t n) + { + return (n << 2) | (n >> 4); + } + + inline int extend_7_to_8(uint32_t n) + { + return (n << 1) | (n >> 6); + } + + inline int extend_4_to_8(uint32_t n) + { + return (n << 4) | n; + } + + uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + uint64_t total_error = 0; + + if (subblock_index < 0) + { + for (uint32_t i = 0; i < 16; i++) + total_error += color_distance(perceptual, pBlock_pixels[i], unpacked_block[i], false); + } + else + { + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + total_error += color_distance(perceptual, pBlock_pixels[idx], unpacked_block[idx], false); + } + } + + return total_error; + } + + void etc_block::get_subblock_pixels(color_rgba* pPixels, int subblock_index) const + { + if (subblock_index < 0) + unpack_etc1(*this, pPixels); + else + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + pPixels[i] = unpacked_block[idx]; + } + } + } + + bool etc1_optimizer::compute() + { + assert(m_pResult->m_pSelectors); + + if (m_pParams->m_pForce_selectors) + { + assert(m_pParams->m_quality >= cETCQualitySlow); + if (m_pParams->m_quality < cETCQualitySlow) + return false; + } + + const uint32_t n = m_pParams->m_num_src_pixels; + + if (m_pParams->m_cluster_fit) + { + if (m_pParams->m_quality == cETCQualityFast) + compute_internal_cluster_fit(4); + else if (m_pParams->m_quality == cETCQualityMedium) + compute_internal_cluster_fit(16); + else if (m_pParams->m_quality == cETCQualitySlow) + compute_internal_cluster_fit(64); + else + compute_internal_cluster_fit(BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE); + } + else + compute_internal_neighborhood(m_br, m_bg, m_bb); + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = UINT32_MAX; + return false; + } + + //const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + const uint8_t* pSelectors = m_pParams->m_pForce_selectors ? m_pParams->m_pForce_selectors : &m_best_solution.m_selectors[0]; + +#if defined(DEBUG) || defined(_DEBUG) + { + // sanity check the returned error + color_rgba block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64_t actual_error = 0; + + bool perceptual; + if (m_pParams->m_quality >= cETCQualityMedium) + perceptual = m_pParams->m_perceptual; + else + perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; + + for (uint32_t i = 0; i < n; i++) + actual_error += color_distance(perceptual, pSrc_pixels[i], block_colors[pSelectors[i]], false); + + assert(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; + } + + void etc1_optimizer::refine_solution(uint32_t max_refinement_trials) + { + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint32_t n = m_pParams->m_num_src_pixels; + + for (uint32_t refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint32_t r = 0; r < n; r++) + { + const uint32_t s = *pSelectors++; + const int yd_temp = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += clamp(base_color.r + yd_temp, 0, 255) - base_color.r; + delta_sum_g += clamp(base_color.g + yd_temp, 0, 255) - base_color.g; + delta_sum_b += clamp(base_color.b + yd_temp, 0, 255) - base_color.b; + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + break; + + const float avg_delta_r_f = static_cast(delta_sum_r) / n; + const float avg_delta_g_f = static_cast(delta_sum_g) / n; + const float avg_delta_b_f = static_cast(delta_sum_b) / n; + const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Refinement trial %u, avg_delta %f %f %f\n", refinement_trial, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + if (!evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution)) + break; + + } // refinement_trial + } + + void etc1_optimizer::compute_internal_neighborhood(int scan_r, int scan_g, int scan_b) + { + if (m_best_solution.m_error == 0) + return; + + //const uint32_t n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; + + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = scan_b + zd; + if (mbb < 0) continue; else if (mbb > m_limit) break; + + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = scan_g + yd; + if (mbg < 0) continue; else if (mbg > m_limit) break; + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = scan_r + xd; + if (mbr < 0) continue; else if (mbr > m_limit) break; + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + continue; + + if (m_pParams->m_refinement) + { + refine_solution((m_pParams->m_quality == cETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2)); + } + + } // xdi + } // ydi + } // zdi + } + + void etc1_optimizer::compute_internal_cluster_fit(uint32_t total_perms_to_try) + { + if ((!m_best_solution.m_valid) || ((m_br != m_best_solution.m_coords.m_unscaled_color.r) || (m_bg != m_best_solution.m_coords.m_unscaled_color.g) || (m_bb != m_best_solution.m_coords.m_unscaled_color.b))) + { + evaluate_solution(etc1_solution_coordinates(m_br, m_bg, m_bb, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + } + + if ((m_best_solution.m_error == 0) || (!m_best_solution.m_valid)) + return; + + for (uint32_t i = 0; i < total_perms_to_try; i++) + { + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + + const int *pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + + const uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v; + + for (uint32_t q = 0; q < 4; q++) + { + const int yd_temp = pInten_table[q]; + + delta_sum_r += pNum_selectors[q] * (clamp(base_color.r + yd_temp, 0, 255) - base_color.r); + delta_sum_g += pNum_selectors[q] * (clamp(base_color.g + yd_temp, 0, 255) - base_color.g); + delta_sum_b += pNum_selectors[q] * (clamp(base_color.b + yd_temp, 0, 255) - base_color.b); + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + continue; + + const float avg_delta_r_f = static_cast(delta_sum_r) / 8; + const float avg_delta_g_f = static_cast(delta_sum_g) / 8; + const float avg_delta_b_f = static_cast(delta_sum_b) / 8; + + const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Second refinement trial %u, avg_delta %f %f %f\n", i, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + + if (m_best_solution.m_error == 0) + break; + } + } + + void etc1_optimizer::init(const params& params, results& result) + { + m_pParams = ¶ms; + m_pResult = &result; + + const uint32_t n = m_pParams->m_num_src_pixels; + + m_selectors.resize(n); + m_best_selectors.resize(n); + m_temp_selectors.resize(n); + m_trial_solution.m_selectors.resize(n); + m_best_solution.m_selectors.resize(n); + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + m_luma.resize(n); + m_sorted_luma_indices.resize(n); + m_sorted_luma.resize(n); + + int min_r = 255, min_g = 255, min_b = 255; + int max_r = 0, max_g = 0, max_b = 0; + + for (uint32_t i = 0; i < n; i++) + { + const color_rgba& c = m_pParams->m_pSrc_pixels[i]; + + min_r = basisu::minimum(min_r, c.r); + min_g = basisu::minimum(min_g, c.g); + min_b = basisu::minimum(min_b, c.b); + + max_r = basisu::maximum(max_r, c.r); + max_g = basisu::maximum(max_g, c.g); + max_b = basisu::maximum(max_b, c.b); + + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast(c.r + c.g + c.b); + m_sorted_luma_indices[i] = i; + } + avg_color /= static_cast(n); + m_avg_color = avg_color; + m_max_comp_spread = basisu::maximum(basisu::maximum(max_r - min_r, max_g - min_g), max_b - min_b); + + m_br = clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Avg block color: %u %u %u\n", m_br, m_bg, m_bb); +#endif + + if (m_pParams->m_quality == cETCQualityFast) + { + indirect_sort(n, &m_sorted_luma_indices[0], &m_luma[0]); + + m_pSorted_luma = &m_sorted_luma[0]; + m_pSorted_luma_indices = &m_sorted_luma_indices[0]; + + for (uint32_t i = 0; i < n; i++) + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = UINT64_MAX; + + clear_obj(m_solutions_tried); + } + + // Return false if we've probably already tried this solution, true if we have definitely not. + bool etc1_optimizer::check_for_redundant_solution(const etc1_solution_coordinates& coords) + { + // Hash first 3 bytes of color (RGB) + uint32_t kh = hash_hsieh((uint8_t*)&coords.m_unscaled_color.r, 3); + + uint32_t h0 = kh & cSolutionsTriedHashMask; + uint32_t h1 = (kh >> cSolutionsTriedHashBits) & cSolutionsTriedHashMask; + + // Simple Bloom filter lookup with k=2 + if ( ((m_solutions_tried[h0 >> 3] & (1 << (h0 & 7))) != 0) && + ((m_solutions_tried[h1 >> 3] & (1 << (h1 & 7))) != 0) ) + return false; + + m_solutions_tried[h0 >> 3] |= (1 << (h0 & 7)); + m_solutions_tried[h1 >> 3] |= (1 << (h1 & 7)); + + return true; + } + + static uint8_t g_eval_dist_tables[8][256] = + { + // 99% threshold + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,}, + { 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, + { 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,} + }; + + bool etc1_optimizer::evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (!check_for_redundant_solution(coords)) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = INT64_MAX; + + const uint8_t *pSelectors_to_use = m_pParams->m_pForce_selectors; + + for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + if (m_pParams->m_quality <= cETCQualityMedium) + { + if (!g_eval_dist_tables[inten_table][m_max_comp_spread]) + continue; + } +#if 0 + if (m_pParams->m_quality <= cETCQualityMedium) + { + // For tables 5-7, if the max component spread falls within certain ranges, skip the inten table. Statistically they are extremely unlikely to result in lower error. + if (inten_table == 7) + { + if (m_max_comp_spread < 42) + continue; + } + else if (inten_table == 6) + { + if ((m_max_comp_spread >= 12) && (m_max_comp_spread <= 31)) + continue; + } + else if (inten_table == 5) + { + if ((m_max_comp_spread >= 13) && (m_max_comp_spread <= 21)) + continue; + } + } +#endif + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + } + + uint64_t total_error = 0; + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + + if (!g_cpu_supports_sse41) + { + for (uint32_t c = 0; c < n; c++) + { + const color_rgba& src_pixel = *pSrc_pixels++; + + uint32_t best_selector_index = 0; + uint32_t best_error = 0; + + if (pSelectors_to_use) + { + best_selector_index = pSelectors_to_use[c]; + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[best_selector_index], false); + } + else + { + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[0], false); + + uint32_t trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[1], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[2], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[3], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + } + + m_temp_selectors[c] = static_cast(best_selector_index); + + total_error += best_error; + if (total_error >= trial_solution.m_error) + break; + } + } + else + { +#if BASISU_SUPPORT_SSE + if (pSelectors_to_use) + { + if (m_pParams->m_perceptual) + perceptual_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); + else + linear_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); + } + else + { + if (m_pParams->m_perceptual) + find_selectors_perceptual_rgb_4_N_sse41((int64_t*)&total_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, trial_solution.m_error); + else + find_selectors_linear_rgb_4_N_sse41((int64_t*)&total_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, trial_solution.m_error); + } +#endif + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (!check_for_redundant_solution(coords)) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution fast: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = UINT64_MAX; + + const bool perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; + + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint32_t block_inten[4]; + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_rgba block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } + + // evaluate_solution_fast() enforces/assumes a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64_t total_error = 0; + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + + if (perceptual) + { + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(true, block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(true, block_colors[3], pSrc_pixels[c], false); + } + else + { + if (!g_cpu_supports_sse41) + { + uint32_t cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done; + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += color_distance(true, block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done: + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color_distance(true, block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + else + { +#if BASISU_SUPPORT_SSE + uint32_t cur_selector = 0, c; + + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + { + if (++cur_selector > 2) + goto done3; + } + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + } + done3: + + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + ++c; + } + + int64_t block_error; + perceptual_distance_rgb_4_N_sse41(&block_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, INT64_MAX); + total_error += block_error; +#endif + } + } + } + else + { + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[3], pSrc_pixels[c], false); + } + else + { + uint32_t cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done2; + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += color_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done2: + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + if (!total_error) + break; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + results.m_selectors.resize(num_pixels); + results.m_selectors_temp.resize(num_pixels); + + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t a = pPixels[i]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + results.m_base = min_alpha; + results.m_table = 13; + results.m_multiplier = 1; + for (uint32_t i = 0; i < num_pixels; i++) + results.m_selectors[i] = 4; + return 0; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + uint64_t best_err = UINT64_MAX; + + for (uint32_t table = 0; table < 16; table++) + { + if ((table_mask & (1U << table)) == 0) + continue; + + const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + const int base_min = clamp255(center - base_search_rad); + const int base_max = clamp255(center + base_search_rad); + + const int mul = (int)roundf(alpha_range / range); + const int mul_low = clamp(mul - mul_search_rad, 1, 15); + const int mul_high = clamp(mul + mul_search_rad, 1, 15); + + for (int base = base_min; base <= base_max; base++) + { + for (int multiplier = mul_low; multiplier <= mul_high; multiplier++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const int a = pPixels[i]; + + uint32_t best_s_err = UINT32_MAX; + uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base); + + uint32_t err = iabs(a - v); + if (err < best_s_err) + { + best_s_err = err; + best_s = s; + } + } + + results.m_selectors_temp[i] = static_cast(best_s); + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base; + results.m_multiplier = multiplier; + results.m_table = table; + results.m_selectors.swap(results.m_selectors_temp); + if (!best_err) + return best_err; + } + + } // table + + } // multiplier + + } // base + + return best_err; + } + + void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + pack_eac_a8_results results; + pack_eac_a8(results, pPixels, 16, base_search_rad, mul_search_rad, table_mask); + + pBlock->m_base = results.m_base; + pBlock->m_multiplier = results.m_multiplier; + pBlock->m_table = results.m_table; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + pBlock->set_selector(x, y, results.m_selectors[x + y * 4]); + } + +} // namespace basisu diff --git a/src/deps/basis-universal/basisu_etc.h b/src/deps/basis-universal/encoder/basisu_etc.h similarity index 86% rename from src/deps/basis-universal/basisu_etc.h rename to src/deps/basis-universal/encoder/basisu_etc.h index a202d01f6e..1e3ece43b8 100644 --- a/src/deps/basis-universal/basisu_etc.h +++ b/src/deps/basis-universal/encoder/basisu_etc.h @@ -1,5 +1,5 @@ // basis_etc.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,9 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_enc.h" -#include namespace basisu { @@ -116,7 +115,7 @@ namespace basisu { assert((ofs + num) <= 64U); assert(num && (num < 32U)); - return (read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); + return (uint32_t)(read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); } inline void set_general_bits(uint32_t ofs, uint32_t num, uint32_t bits) @@ -266,6 +265,27 @@ namespace basisu p[-2] |= (msb << byte_bit_ofs); } + // Selector "etc1_val" ranges from 0-3 and is a direct (raw) ETC1 selector. + inline void set_raw_selector(uint32_t x, uint32_t y, uint32_t etc1_val) + { + assert((x | y | etc1_val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t* p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + inline uint32_t get_raw_selector_bits() const { return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24); @@ -622,6 +642,23 @@ namespace basisu return true; } + bool set_block_color5_clamp(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + dr = clamp(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + dg = clamp(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + db = clamp(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } color_rgba get_selector_color(uint32_t x, uint32_t y, uint32_t s) const { color_rgba block_colors[4]; @@ -720,7 +757,7 @@ namespace basisu } }; - typedef std::vector etc_block_vec; + typedef basisu::vector etc_block_vec; // Returns false if the unpack fails (could be bogus data or ETC2) bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha = false); @@ -844,10 +881,10 @@ namespace basisu bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } const int* pInten_table = g_etc1_inten_tables[m_inten_table]; - pBlock_colors[0].set((uint8_t)(br + pInten_table[0]), (uint8_t)(bg + pInten_table[0]), (uint8_t)(bb + pInten_table[0]), 255); - pBlock_colors[1].set((uint8_t)(br + pInten_table[1]), (uint8_t)(bg + pInten_table[1]), (uint8_t)(bb + pInten_table[1]), 255); - pBlock_colors[2].set((uint8_t)(br + pInten_table[2]), (uint8_t)(bg + pInten_table[2]), (uint8_t)(bb + pInten_table[2]), 255); - pBlock_colors[3].set((uint8_t)(br + pInten_table[3]), (uint8_t)(bg + pInten_table[3]), (uint8_t)(bb + pInten_table[3]), 255); + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0], 255); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1], 255); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2], 255); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3], 255); } color_rgba m_unscaled_color; @@ -914,9 +951,6 @@ namespace basisu m_refinement = true; m_pForce_selectors = nullptr; - - m_pEval_solution_override = nullptr; - m_pEval_solution_override_data = nullptr; } uint32_t m_num_src_pixels; @@ -932,9 +966,6 @@ namespace basisu bool m_refinement; const uint8_t* m_pForce_selectors; - - evaluate_solution_override_func m_pEval_solution_override; - void *m_pEval_solution_override_data; }; struct results @@ -970,7 +1001,7 @@ namespace basisu } etc1_solution_coordinates m_coords; - std::vector m_selectors; + basisu::vector m_selectors; uint64_t m_error; bool m_valid; @@ -1001,33 +1032,36 @@ namespace basisu vec3F m_avg_color; int m_br, m_bg, m_bb; - std::vector m_luma; - std::vector m_sorted_luma; - std::vector m_sorted_luma_indices; + int m_max_comp_spread; + basisu::vector m_luma; + basisu::vector m_sorted_luma; + basisu::vector m_sorted_luma_indices; const uint32_t* m_pSorted_luma_indices; uint32_t* m_pSorted_luma; - std::vector m_selectors; - std::vector m_best_selectors; + basisu::vector m_selectors; + basisu::vector m_best_selectors; potential_solution m_best_solution; potential_solution m_trial_solution; - std::vector m_temp_selectors; - - std::set m_solutions_tried; + basisu::vector m_temp_selectors; + enum { cSolutionsTriedHashBits = 10, cTotalSolutionsTriedHashSize = 1 << cSolutionsTriedHashBits, cSolutionsTriedHashMask = cTotalSolutionsTriedHashSize - 1 }; + uint8_t m_solutions_tried[cTotalSolutionsTriedHashSize / 8]; + void get_nearby_inten_tables(uint32_t idx, int &first_inten_table, int &last_inten_table) { first_inten_table = maximum(idx - 1, 0); last_inten_table = minimum(cETC1IntenModifierValues, idx + 1); } - + + bool check_for_redundant_solution(const etc1_solution_coordinates& coords); bool evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); inline bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { - if (m_pParams->m_quality >= cETCQualitySlow) + if (m_pParams->m_quality >= cETCQualityMedium) return evaluate_solution_slow(coords, trial_solution, pBest_solution); else return evaluate_solution_fast(coords, trial_solution, pBest_solution); @@ -1042,5 +1076,77 @@ namespace basisu { etc1_optimizer m_optimizer; }; + + void pack_etc1_solid_color_init(); + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor); + + // ETC EAC + extern const int8_t g_etc2_eac_tables[16][8]; + extern const int8_t g_etc2_eac_tables8[16][8]; + + const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7; + + struct eac_a8_block + { + uint16_t m_base : 8; + uint16_t m_table : 4; + uint16_t m_multiplier : 4; + + uint8_t m_selectors[6]; + + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const + { + assert((x < 4) && (y < 4)); + return static_cast((selector_bits >> (45 - (y + x * 4) * 3)) & 7); + } + + inline uint64_t get_selector_bits() const + { + uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5]; + return pixels; + } + + inline void set_selector_bits(uint64_t pixels) + { + m_selectors[0] = (uint8_t)(pixels >> 40); + m_selectors[1] = (uint8_t)(pixels >> 32); + m_selectors[2] = (uint8_t)(pixels >> 24); + m_selectors[3] = (uint8_t)(pixels >> 16); + m_selectors[4] = (uint8_t)(pixels >> 8); + m_selectors[5] = (uint8_t)(pixels); + } + + void set_selector(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 8)); + + const uint32_t ofs = 45 - (y + x * 4) * 3; + + uint64_t pixels = get_selector_bits(); + + pixels &= ~(7ULL << ofs); + pixels |= (static_cast(s) << ofs); + + set_selector_bits(pixels); + } + }; + + struct etc2_rgba_block + { + eac_a8_block m_alpha; + etc_block m_rgb; + }; + struct pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + uint8_vec m_selectors; + uint8_vec m_selectors_temp; + }; + + uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + } // namespace basisu diff --git a/src/deps/basis-universal/basisu_frontend.cpp b/src/deps/basis-universal/encoder/basisu_frontend.cpp similarity index 75% rename from src/deps/basis-universal/basisu_frontend.cpp rename to src/deps/basis-universal/encoder/basisu_frontend.cpp index 6f7a9bf889..324fc8e447 100644 --- a/src/deps/basis-universal/basisu_frontend.cpp +++ b/src/deps/basis-universal/encoder/basisu_frontend.cpp @@ -1,5 +1,5 @@ // basisu_frontend.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,11 +17,16 @@ // This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here. // Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this. // -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_frontend.h" #include #include +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + #define BASISU_FRONTEND_VERIFY(c) do { if (!(c)) handle_verify_failure(__LINE__); } while(0) namespace basisu @@ -29,10 +34,11 @@ namespace basisu const uint32_t cMaxCodebookCreationThreads = 8; const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 3; - const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3; + //const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3; const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16; - const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE = 16; + const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32; + const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16; // TODO - How to handle internal verifies in the basisu lib static inline void handle_verify_failure(int line) @@ -57,14 +63,14 @@ namespace basisu uint32_t tv = size / sizeof(vec6F_quantizer::training_vec_with_weight); - std::vector v(tv); + basisu::vector v(tv); fread(&v[0], 1, sizeof(v[0]) * tv, pFile); for (uint32_t i = 0; i < tv; i++) m_endpoint_clusterizer.add_training_vec(v[i].first, v[i].second); m_endpoint_clusterizer.generate(16128); - std::vector codebook; + basisu::vector codebook; m_endpoint_clusterizer.retrieve(codebook); printf("Generated %u entries\n", (uint32_t)codebook.size()); @@ -78,6 +84,7 @@ namespace basisu { if (!p.m_pGlobal_sel_codebook) { + debug_printf("basisu_frontend::init: No global sel codebook!\n"); assert(0); return false; } @@ -126,13 +133,21 @@ namespace basisu break; } case 2: + { + m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = true; + m_use_hierarchical_selector_codebooks = true; + + break; + } + case 3: { m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = false; m_use_hierarchical_selector_codebooks = false; break; } - case 3: + case 4: { m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = true; @@ -141,7 +156,7 @@ namespace basisu m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; break; } - case 4: + case 5: { m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = false; @@ -150,7 +165,8 @@ namespace basisu m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; break; } - case 5: + case 6: + default: { m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = false; @@ -180,106 +196,113 @@ namespace basisu init_etc1_images(); - init_endpoint_training_vectors(); - - generate_endpoint_clusters(); - - for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++) + if (m_params.m_pGlobal_codebooks) { - BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); - - if (refine_endpoint_step) - { - introduce_new_endpoint_clusters(); - } - - generate_endpoint_codebook(refine_endpoint_step); - - if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) - { - char buf[256]; - snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step); - dump_endpoint_clusterization_visualization(buf, false); - } - - bool early_out = false; + init_global_codebooks(); + } + else + { + init_endpoint_training_vectors(); - if (m_endpoint_refinement) + generate_endpoint_clusters(); + + for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++) { - //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png"); - - if (!refine_endpoint_clusterization()) - early_out = true; + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); - if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1)) + if (refine_endpoint_step) { - eliminate_redundant_or_empty_endpoint_clusters(); - generate_endpoint_codebook(refine_endpoint_step); + introduce_new_endpoint_clusters(); } + generate_endpoint_codebook(refine_endpoint_step); + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) { char buf[256]; - snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step); - + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step); dump_endpoint_clusterization_visualization(buf, false); - snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step); + } + + bool early_out = false; - dump_endpoint_clusterization_visualization(buf, true); + if (m_endpoint_refinement) + { + //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png"); + + if (!refine_endpoint_clusterization()) + early_out = true; + + if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1)) + { + eliminate_redundant_or_empty_endpoint_clusters(); + generate_endpoint_codebook(refine_endpoint_step); + } + + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) + { + char buf[256]; + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step); + + dump_endpoint_clusterization_visualization(buf, false); + snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step); + + dump_endpoint_clusterization_visualization(buf, true); + } } - } - eliminate_redundant_or_empty_endpoint_clusters(); + eliminate_redundant_or_empty_endpoint_clusters(); - if (m_params.m_debug_stats) - debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size()); + if (m_params.m_debug_stats) + debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size()); - if (early_out) - break; - } + if (early_out) + break; + } - BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); - generate_block_endpoint_clusters(); + generate_block_endpoint_clusters(); - create_initial_packed_texture(); + create_initial_packed_texture(); - generate_selector_clusters(); + generate_selector_clusters(); - if (m_use_hierarchical_selector_codebooks) - compute_selector_clusters_within_each_parent_cluster(); + if (m_use_hierarchical_selector_codebooks) + compute_selector_clusters_within_each_parent_cluster(); - if (m_params.m_compression_level == 0) - { - create_optimized_selector_codebook(0); + if (m_params.m_compression_level == 0) + { + create_optimized_selector_codebook(0); - find_optimal_selector_clusters_for_each_block(); + find_optimal_selector_clusters_for_each_block(); - introduce_special_selector_clusters(); - } - else - { - const uint32_t num_refine_selector_steps = m_params.m_pGlobal_sel_codebook ? 1 : m_num_selector_codebook_iterations; - for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++) + introduce_special_selector_clusters(); + } + else { - create_optimized_selector_codebook(refine_selector_steps); + const uint32_t num_refine_selector_steps = m_params.m_pGlobal_sel_codebook ? 1 : m_num_selector_codebook_iterations; + for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++) + { + create_optimized_selector_codebook(refine_selector_steps); - find_optimal_selector_clusters_for_each_block(); + find_optimal_selector_clusters_for_each_block(); - introduce_special_selector_clusters(); + introduce_special_selector_clusters(); - if ((m_params.m_compression_level >= 3) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) - { - if (!refine_block_endpoints_given_selectors()) - break; + if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) + { + if (!refine_block_endpoints_given_selectors()) + break; + } } } - } - - optimize_selector_codebook(); + + optimize_selector_codebook(); - if (m_params.m_debug_stats) - debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_indices.size()); + if (m_params.m_debug_stats) + debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size()); + } finalize(); @@ -294,6 +317,259 @@ namespace basisu return true; } + bool basisu_frontend::init_global_codebooks() + { + const basist::basisu_lowlevel_etc1s_transcoder* pTranscoder = m_params.m_pGlobal_codebooks; + + const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints(); + const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors(); + + m_endpoint_cluster_etc_params.resize(endpoints.size()); + for (uint32_t i = 0; i < endpoints.size(); i++) + { + m_endpoint_cluster_etc_params[i].m_inten_table[0] = endpoints[i].m_inten5; + m_endpoint_cluster_etc_params[i].m_inten_table[1] = endpoints[i].m_inten5; + + m_endpoint_cluster_etc_params[i].m_color_unscaled[0].set(endpoints[i].m_color5.r, endpoints[i].m_color5.g, endpoints[i].m_color5.b, 255); + m_endpoint_cluster_etc_params[i].m_color_used[0] = true; + m_endpoint_cluster_etc_params[i].m_valid = true; + } + + m_optimized_cluster_selectors.resize(selectors.size()); + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + m_optimized_cluster_selectors[i].set_selector(x, y, selectors[i].get_selector(x, y)); + } + + m_block_endpoint_clusters_indices.resize(m_total_blocks); + + m_orig_encoded_blocks.resize(m_total_blocks); + + m_block_selector_cluster_index.resize(m_total_blocks); + +#if 0 + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index] { +#endif + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block& blk = m_etc1_blocks_etc1s[block_index]; + + const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + + etc_block trial_blk; + trial_blk.set_block_color5_etc1s(blk.m_color_unscaled[0]); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits()); + + const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + if (!cur_err) + break; + } + + } // block_index + + m_block_selector_cluster_index[block_index] = best_index; + } + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + m_encoded_blocks.resize(m_total_blocks); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + const uint32_t selector_index = m_block_selector_cluster_index[block_index]; + + etc_block& blk = m_encoded_blocks[block_index]; + + blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]); + blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]); + blk.set_flip_bit(true); + blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits()); + } +#endif + + // HACK HACK + const uint32_t NUM_PASSES = 3; + for (uint32_t pass = 0; pass < NUM_PASSES; pass++) + { + debug_printf("init_global_codebooks: pass %u\n", pass); + + const uint32_t N = 128; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] { +#endif + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index]; + const uint32_t blk_raw_selector_bits = blk.get_raw_selector_bits(); + + etc_block trial_blk(blk); + trial_blk.set_raw_selector_bits(blk_raw_selector_bits); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + etc_block best_block(trial_blk); + + for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++) + { + if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0)) + continue; + + trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[i].m_color_unscaled[0]); + trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[i].m_inten_table[0]); + + const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr(); + uint64_t cur_err; + if (!pass) + cur_err = trial_blk.determine_selectors(pSource_pixels, m_params.m_perceptual); + else + cur_err = trial_blk.evaluate_etc1_error(pSource_pixels, m_params.m_perceptual); + + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + best_block = trial_blk; + + if (!cur_err) + break; + } + } + + m_block_endpoint_clusters_indices[block_index][0] = best_index; + m_block_endpoint_clusters_indices[block_index][1] = best_index; + + m_orig_encoded_blocks[block_index] = best_block; + + } // block_index + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + m_endpoint_clusters.resize(0); + m_endpoint_clusters.resize(endpoints.size()); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_cluster_index = m_block_endpoint_clusters_indices[block_index][0]; + m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2); + m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2 + 1); + } + + m_block_selector_cluster_index.resize(m_total_blocks); + + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index] { +#endif + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + + etc_block trial_blk; + trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]); + trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits()); + + const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + if (!cur_err) + break; + } + + } // block_index + + m_block_selector_cluster_index[block_index] = best_index; + } + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + m_encoded_blocks.resize(m_total_blocks); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + const uint32_t selector_index = m_block_selector_cluster_index[block_index]; + + etc_block& blk = m_encoded_blocks[block_index]; + + blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]); + blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]); + blk.set_flip_bit(true); + blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits()); + } + + } // pass + + m_selector_cluster_block_indices.resize(selectors.size()); + for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++) + m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index); + + return true; + } + void basisu_frontend::introduce_special_selector_clusters() { debug_printf("introduce_special_selector_clusters\n"); @@ -302,7 +578,7 @@ namespace basisu return; uint32_t total_blocks_relocated = 0; - const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_indices.size(); + const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size(); bool_vec block_relocated_flags(m_total_blocks); @@ -328,7 +604,7 @@ namespace basisu m_optimized_cluster_selectors.push_back(blk); - vector_ensure_element_is_valid(m_selector_cluster_indices, new_selector_cluster_index); + vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index); for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { @@ -357,14 +633,14 @@ namespace basisu // Change the block to use the new cluster m_block_selector_cluster_index[block_index] = new_selector_cluster_index; - m_selector_cluster_indices[new_selector_cluster_index].push_back(block_index); + m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index); block_relocated_flags[block_index] = true; #if 0 - int j = vector_find(m_selector_cluster_indices[old_selector_cluster_index], block_index); + int j = vector_find(m_selector_cluster_block_indices[old_selector_cluster_index], block_index); if (j >= 0) - m_selector_cluster_indices[old_selector_cluster_index].erase(m_selector_cluster_indices[old_selector_cluster_index].begin() + j); + m_selector_cluster_block_indices[old_selector_cluster_index].erase(m_selector_cluster_block_indices[old_selector_cluster_index].begin() + j); #endif total_blocks_relocated++; @@ -381,7 +657,7 @@ namespace basisu for (int selector_cluster_index = 0; selector_cluster_index < (int)initial_selector_clusters; selector_cluster_index++) { - uint_vec& block_indices = m_selector_cluster_indices[selector_cluster_index]; + uint_vec& block_indices = m_selector_cluster_block_indices[selector_cluster_index]; uint32_t dst_ofs = 0; @@ -399,6 +675,7 @@ namespace basisu debug_printf("Total blocks relocated to new flat selector clusters: %u\n", total_blocks_relocated); } + // This method will change the number and ordering of the selector codebook clusters. void basisu_frontend::optimize_selector_codebook() { debug_printf("optimize_selector_codebook\n"); @@ -436,15 +713,17 @@ namespace basisu new_to_old.push_back(i); } + debug_printf("Original selector clusters: %u, new cluster selectors: %u\n", orig_total_selector_clusters, total_new_entries); + for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++) { BASISU_FRONTEND_VERIFY((old_to_new[m_block_selector_cluster_index[i]] >= 0) && (old_to_new[m_block_selector_cluster_index[i]] < (int)total_new_entries)); m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]]; } - std::vector new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0); + basisu::vector new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0); basist::etc1_global_selector_codebook_entry_id_vec new_optimized_cluster_selector_global_cb_ids(m_optimized_cluster_selector_global_cb_ids.size() ? total_new_entries : 0); - std::vector new_selector_cluster_indices(m_selector_cluster_indices.size() ? total_new_entries : 0); + basisu::vector new_selector_cluster_indices(m_selector_cluster_block_indices.size() ? total_new_entries : 0); bool_vec new_selector_cluster_uses_global_cb(m_selector_cluster_uses_global_cb.size() ? total_new_entries : 0); for (uint32_t i = 0; i < total_new_entries; i++) @@ -455,24 +734,40 @@ namespace basisu if (m_optimized_cluster_selector_global_cb_ids.size()) new_optimized_cluster_selector_global_cb_ids[i] = m_optimized_cluster_selector_global_cb_ids[new_to_old[i]]; - if (m_selector_cluster_indices.size()) - new_selector_cluster_indices[i] = m_selector_cluster_indices[new_to_old[i]]; + //if (m_selector_cluster_block_indices.size()) + // new_selector_cluster_indices[i] = m_selector_cluster_block_indices[new_to_old[i]]; if (m_selector_cluster_uses_global_cb.size()) new_selector_cluster_uses_global_cb[i] = m_selector_cluster_uses_global_cb[new_to_old[i]]; } + for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++) + { + new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i); + } + m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors); m_optimized_cluster_selector_global_cb_ids.swap(new_optimized_cluster_selector_global_cb_ids); - m_selector_cluster_indices.swap(new_selector_cluster_indices); + m_selector_cluster_block_indices.swap(new_selector_cluster_indices); m_selector_cluster_uses_global_cb.swap(new_selector_cluster_uses_global_cb); - + + // This isn't strictly necessary - doing it for completeness/future sanity. + if (m_selector_clusters_within_each_parent_cluster.size()) + { + for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++) + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) + m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]]; + } + debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries); } void basisu_frontend::init_etc1_images() { debug_printf("basisu_frontend::init_etc1_images\n"); + + interval_timer tm; + tm.start(); m_etc1_blocks_etc1s.resize(m_total_blocks); @@ -481,8 +776,10 @@ namespace basisu { const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(m_total_blocks, first_index + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -494,6 +791,8 @@ namespace basisu if (m_params.m_compression_level == 0) optimizer_params.m_quality = cETCQualityFast; + else if (m_params.m_compression_level == 1) + optimizer_params.m_quality = cETCQualityMedium; else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) optimizer_params.m_quality = cETCQualityUber; @@ -506,8 +805,9 @@ namespace basisu optimizer_results.m_n = 16; optimizer.init(optimizer_params, optimizer_results); - optimizer.compute(); - + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); + etc_block &blk = m_etc1_blocks_etc1s[block_index]; memset(&blk, 0, sizeof(blk)); @@ -520,10 +820,17 @@ namespace basisu blk.set_selector(x, y, selectors[x + y * 4]); } +#ifndef __EMSCRIPTEN__ } ); +#endif + } - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); } void basisu_frontend::init_endpoint_training_vectors() @@ -540,7 +847,9 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -562,11 +871,15 @@ namespace basisu } // block_index; +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } void basisu_frontend::generate_endpoint_clusters() @@ -649,7 +962,7 @@ namespace basisu for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) { - const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -707,17 +1020,19 @@ namespace basisu const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; assert(cluster_indices.size()); for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { - std::vector cluster_pixels(8); + basisu::vector cluster_pixels(8); const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; @@ -775,10 +1090,15 @@ namespace basisu } } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif + } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif vector_sort(m_subblock_endpoint_quant_err_vec); } @@ -837,7 +1157,7 @@ namespace basisu continue; #endif - const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size(); + //const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size(); enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index); enlarge_vector(m_endpoint_cluster_etc_params, 1); @@ -893,18 +1213,20 @@ namespace basisu { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, step ] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; BASISU_FRONTEND_VERIFY(cluster_indices.size()); const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8; - std::vector cluster_pixels(total_pixels); + basisu::vector cluster_pixels(total_pixels); for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -935,20 +1257,21 @@ namespace basisu cluster_optimizer_params.m_use_color4 = false; cluster_optimizer_params.m_perceptual = m_params.m_perceptual; - if (m_params.m_compression_level == 0) + if (m_params.m_compression_level <= 1) cluster_optimizer_params.m_quality = cETCQualityMedium; else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) cluster_optimizer_params.m_quality = cETCQualityUber; etc1_optimizer::results cluster_optimizer_results; - std::vector cluster_selectors(total_pixels); + basisu::vector cluster_selectors(total_pixels); cluster_optimizer_results.m_n = total_pixels; cluster_optimizer_results.m_pSelectors = &cluster_selectors[0]; optimizer.init(cluster_optimizer_params, cluster_optimizer_results); - optimizer.compute(); + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; @@ -1012,20 +1335,24 @@ namespace basisu } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } bool basisu_frontend::check_etc1s_constraints() const { - std::vector block_clusters(m_total_blocks); + basisu::vector block_clusters(m_total_blocks); for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) { - const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1053,11 +1380,11 @@ namespace basisu if (m_use_hierarchical_endpoint_codebooks) compute_endpoint_clusters_within_each_parent_cluster(); - std::vector block_clusters(m_total_blocks); + basisu::vector block_clusters(m_total_blocks); for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) { - const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1081,19 +1408,19 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &best_cluster_indices, &block_clusters] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { - const bool is_flipped = true; - const uint32_t cluster_index = block_clusters[block_index][0]; BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]); - const color_rgba *subblock_pixels = get_source_pixel_block(block_index).get_ptr(); + const color_rgba *pSubblock_pixels = get_source_pixel_block(block_index).get_ptr(); const uint32_t num_subblock_pixels = 16; - uint64_t best_cluster_err = UINT64_MAX; + uint64_t best_cluster_err = INT64_MAX; uint32_t best_cluster_index = 0; const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster.size() ? m_block_parent_endpoint_cluster[block_index] : 0; @@ -1116,19 +1443,20 @@ namespace basisu // Can't assign it here - may result in too much error when selector quant occurs if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0]) { - total_err = UINT64_MAX; + total_err = INT64_MAX; goto skip_cluster; } etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten); - + +#if 0 for (uint32_t p = 0; p < num_subblock_pixels; p++) { uint64_t best_err = UINT64_MAX; for (uint32_t r = low_selector; r <= high_selector; r++) { - uint64_t err = color_distance(m_params.m_perceptual, subblock_pixels[p], subblock_colors[r], false); + uint64_t err = color_distance(m_params.m_perceptual, pSubblock_pixels[p], subblock_colors[r], false); best_err = minimum(best_err, err); if (!best_err) break; @@ -1138,6 +1466,64 @@ namespace basisu if (total_err > best_cluster_err) break; } // p +#else + if (m_params.m_perceptual) + { + if (!g_cpu_supports_sse41) + { + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(true, pSubblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p + } + else + { +#if BASISU_SUPPORT_SSE + find_lowest_error_perceptual_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err); +#endif + } + } + else + { + if (!g_cpu_supports_sse41) + { + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(false, pSubblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p + } + else + { +#if BASISU_SUPPORT_SSE + find_lowest_error_linear_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err); +#endif + } + } +#endif skip_cluster: if ((total_err < best_cluster_err) || @@ -1154,14 +1540,18 @@ namespace basisu best_cluster_indices[block_index] = best_cluster_index; } // block_index - + +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif - std::vector > optimized_endpoint_clusters(m_endpoint_clusters.size()); + basisu::vector > optimized_endpoint_clusters(m_endpoint_clusters.size()); uint32_t total_subblocks_reassigned = 0; for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) @@ -1199,8 +1589,8 @@ namespace basisu indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]); - std::vector > new_endpoint_clusters(m_endpoint_clusters.size()); - std::vector new_subblock_etc_params(m_endpoint_clusters.size()); + basisu::vector > new_endpoint_clusters(m_endpoint_clusters.size()); + basisu::vector new_subblock_etc_params(m_endpoint_clusters.size()); for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) { @@ -1264,7 +1654,9 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -1288,12 +1680,16 @@ namespace basisu blk.determine_selectors(pSource_pixels, m_params.m_perceptual); } // block_index - + +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif m_orig_encoded_blocks = m_encoded_blocks; } @@ -1302,9 +1698,9 @@ namespace basisu { uint_vec block_selector_cluster_indices(m_total_blocks); - for (int cluster_index = 0; cluster_index < static_cast(m_selector_cluster_indices.size()); cluster_index++) + for (int cluster_index = 0; cluster_index < static_cast(m_selector_cluster_block_indices.size()); cluster_index++) { - const std::vector& cluster_indices = m_selector_cluster_indices[cluster_index]; + const basisu::vector& cluster_indices = m_selector_cluster_block_indices[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1317,7 +1713,7 @@ namespace basisu } // cluster_index m_selector_clusters_within_each_parent_cluster.resize(0); - m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_indices.size()); + m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_block_indices.size()); for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { @@ -1355,7 +1751,9 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -1382,47 +1780,54 @@ namespace basisu } // block_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif vec16F_clusterizer selector_clusterizer; for (uint32_t i = 0; i < m_total_blocks; i++) selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second); - const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE : 0; + const int selector_parent_codebook_size = (m_params.m_compression_level <= 1) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 : BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT; + const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? selector_parent_codebook_size : 0; + debug_printf("Using selector parent codebook size %u\n", parent_codebook_size); uint32_t max_threads = 0; max_threads = m_params.m_multithreaded ? minimum(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0; bool status = generate_hierarchical_codebook_threaded(selector_clusterizer, m_params.m_max_selector_clusters, m_use_hierarchical_selector_codebooks ? parent_codebook_size : 0, - m_selector_cluster_indices, - m_selector_parent_cluster_indices, + m_selector_cluster_block_indices, + m_selector_parent_cluster_block_indices, max_threads, m_params.m_pJob_pool); BASISU_FRONTEND_VERIFY(status); if (m_use_hierarchical_selector_codebooks) { - if (!m_selector_parent_cluster_indices.size()) + if (!m_selector_parent_cluster_block_indices.size()) { - m_selector_parent_cluster_indices.resize(0); - m_selector_parent_cluster_indices.resize(1); + m_selector_parent_cluster_block_indices.resize(0); + m_selector_parent_cluster_block_indices.resize(1); for (uint32_t i = 0; i < m_total_blocks; i++) - m_selector_parent_cluster_indices[0].push_back(i); + m_selector_parent_cluster_block_indices[0].push_back(i); } - BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE <= UINT8_MAX); + BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 <= UINT8_MAX); + BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT <= UINT8_MAX); m_block_parent_selector_cluster.resize(0); m_block_parent_selector_cluster.resize(m_total_blocks); vector_set_all(m_block_parent_selector_cluster, 0xFF); - for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_indices.size(); parent_cluster_index++) + for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_block_indices.size(); parent_cluster_index++) { - const uint_vec &cluster = m_selector_parent_cluster_indices[parent_cluster_index]; + const uint_vec &cluster = m_selector_parent_cluster_block_indices[parent_cluster_index]; for (uint32_t j = 0; j < cluster.size(); j++) m_block_parent_selector_cluster[cluster[j]] = static_cast(parent_cluster_index); } @@ -1432,9 +1837,9 @@ namespace basisu } // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong. - for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_indices.size(); cluster_index++) + for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++) { - const uint_vec &cluster = m_selector_cluster_indices[cluster_index]; + const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index]; uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) @@ -1452,14 +1857,16 @@ namespace basisu } } - debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_indices.size(), (uint32_t)m_selector_parent_cluster_indices.size()); + debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size(), (uint32_t)m_selector_parent_cluster_block_indices.size()); } void basisu_frontend::create_optimized_selector_codebook(uint32_t iter) { debug_printf("create_optimized_selector_codebook\n"); - const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_indices.size(); + const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size(); + + debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size()); m_optimized_cluster_selectors.resize(total_selector_clusters); @@ -1474,12 +1881,14 @@ namespace basisu { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &total_clusters_processed, &total_selector_clusters] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector &cluster_block_indices = m_selector_cluster_indices[cluster_index]; + const basisu::vector &cluster_block_indices = m_selector_cluster_block_indices[cluster_index]; if (!cluster_block_indices.size()) continue; @@ -1528,11 +1937,15 @@ namespace basisu } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } else { @@ -1552,12 +1965,14 @@ namespace basisu { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &uses_hybrid_sel_codebook, &total_clusters_processed, &total_selector_clusters] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector &cluster_block_indices = m_selector_cluster_indices[cluster_index]; + const basisu::vector &cluster_block_indices = m_selector_cluster_block_indices[cluster_index]; if (!cluster_block_indices.size()) continue; @@ -1667,29 +2082,33 @@ namespace basisu } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } // if (m_params.m_pGlobal_sel_codebook) - + if (m_params.m_debug_images) { uint32_t max_selector_cluster_size = 0; - for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) - max_selector_cluster_size = maximum(max_selector_cluster_size, (uint32_t)m_selector_cluster_indices[i].size()); + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) + max_selector_cluster_size = maximum(max_selector_cluster_size, (uint32_t)m_selector_cluster_block_indices[i].size()); if ((max_selector_cluster_size * 5) < 32768) { const uint32_t x_spacer_len = 16; - image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_indices.size() * 5); + image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_block_indices.size() * 5); - for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_indices.size(); selector_cluster_index++) + for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++) { - const std::vector &cluster_block_indices = m_selector_cluster_indices[selector_cluster_index]; + const basisu::vector &cluster_block_indices = m_selector_cluster_block_indices[selector_cluster_index]; for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) @@ -1717,31 +2136,56 @@ namespace basisu void basisu_frontend::find_optimal_selector_clusters_for_each_block() { debug_printf("find_optimal_selector_clusters_for_each_block\n"); - + + // Sanity checks + BASISU_FRONTEND_VERIFY(m_selector_cluster_block_indices.size() == m_optimized_cluster_selectors.size()); + for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++) + { + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) + { + BASISU_FRONTEND_VERIFY(m_selector_clusters_within_each_parent_cluster[i][j] < m_optimized_cluster_selectors.size()); + } + } + m_block_selector_cluster_index.resize(m_total_blocks); - + if (m_params.m_compression_level == 0) { // Don't do anything, just leave the blocks in their original selector clusters. - for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) { - for (uint32_t j = 0; j < m_selector_cluster_indices[i].size(); j++) - m_block_selector_cluster_index[m_selector_cluster_indices[i][j]] = i; + for (uint32_t j = 0; j < m_selector_cluster_block_indices[i].size(); j++) + m_block_selector_cluster_index[m_selector_cluster_block_indices[i][j]] = i; } } else { - std::vector< std::vector > new_cluster_indices; - + // Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end. + basisu::vector< basisu::vector > new_cluster_indices(m_optimized_cluster_selectors.size()); + // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints. + basisu::vector unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size()); + for (uint32_t cluster_index = 0; cluster_index < m_optimized_cluster_selectors.size(); cluster_index++) + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + unpacked_optimized_cluster_selectors[cluster_index * 16 + y * 4 + x] = (uint8_t)m_optimized_cluster_selectors[cluster_index].get_selector(x, y); + } + } + } + const uint32_t N = 1024; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(m_total_blocks, first_index + N); - m_params.m_pJob_pool->add_job( [this, first_index, last_index, &new_cluster_indices] { +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job( [this, first_index, last_index, &new_cluster_indices, &unpacked_optimized_cluster_selectors] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -1752,20 +2196,32 @@ namespace basisu color_rgba trial_block_colors[4]; blk.get_block_colors(trial_block_colors, 0); - uint64_t best_cluster_err = UINT64_MAX; + // precompute errors for the i-th block pixel and selector sel: [sel][i] + uint32_t trial_errors[4][16]; + + for (int sel = 0; sel < 4; ++sel) + { + for (int i = 0; i < 16; ++i) + { + trial_errors[sel][i] = color_distance(m_params.m_perceptual, pBlock_pixels[i], trial_block_colors[sel], false); + } + } + + uint64_t best_cluster_err = INT64_MAX; uint32_t best_cluster_index = 0; const uint32_t parent_selector_cluster = m_block_parent_selector_cluster.size() ? m_block_parent_selector_cluster[block_index] : 0; const uint_vec *pCluster_indices = m_selector_clusters_within_each_parent_cluster.size() ? &m_selector_clusters_within_each_parent_cluster[parent_selector_cluster] : nullptr; - const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_indices.size(); + const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_block_indices.size(); +#if 0 for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) { const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; - + uint64_t trial_err = 0; for (int y = 0; y < 4; y++) { @@ -1778,18 +2234,82 @@ namespace basisu goto early_out; } } - + if (trial_err < best_cluster_err) { best_cluster_err = trial_err; best_cluster_index = cluster_index; - if (!best_cluster_err) + if (!best_cluster_err) break; } early_out: ; } +#else + if (m_params.m_perceptual) + { + for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) + { + const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; + //const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; + + uint64_t trial_err = 0; + + for (int i = 0; i < 16; i++) + { + const uint32_t sel = unpacked_optimized_cluster_selectors[cluster_index * 16 + i]; + + trial_err += trial_errors[sel][i]; + if (trial_err > best_cluster_err) + goto early_out; + } + + if (trial_err < best_cluster_err) + { + best_cluster_err = trial_err; + best_cluster_index = cluster_index; + if (!best_cluster_err) + break; + } + + early_out: + ; + + } // cluster_iter + } + else + { + for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) + { + const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; + //const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; + + uint64_t trial_err = 0; + + for (int i = 0; i < 16; i++) + { + const uint32_t sel = unpacked_optimized_cluster_selectors[cluster_index * 16 + i]; + + trial_err += trial_errors[sel][i]; + if (trial_err > best_cluster_err) + goto early_out2; + } + + if (trial_err < best_cluster_err) + { + best_cluster_err = trial_err; + best_cluster_index = cluster_index; + if (!best_cluster_err) + break; + } + + early_out2: + ; + + } // cluster_iter + } +#endif blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits()); @@ -1804,17 +2324,21 @@ namespace basisu } // block_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); - - m_selector_cluster_indices.swap(new_cluster_indices); +#endif + + m_selector_cluster_block_indices.swap(new_cluster_indices); } - for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) - vector_sort(m_selector_cluster_indices[i]); + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) + vector_sort(m_selector_cluster_block_indices[i]); } // TODO: Remove old ETC1 specific stuff, and thread this. @@ -1842,7 +2366,7 @@ namespace basisu const uint_vec &subblocks = subblock_params.m_subblocks; //uint32_t total_pixels = subblock.m_subblocks.size() * 8; - std::vector subblock_colors[2]; // [use_individual_mode] + basisu::vector subblock_colors[2]; // [use_individual_mode] uint8_vec subblock_selectors[2]; uint64_t cur_subblock_err[2] = { 0, 0 }; @@ -1882,7 +2406,7 @@ namespace basisu clear_obj(cluster_optimizer_results); - std::vector cluster_selectors[2]; + basisu::vector cluster_selectors[2]; for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++) { @@ -1938,7 +2462,7 @@ namespace basisu const uint32_t block_index = training_vector_index >> 1; const uint32_t subblock_index = training_vector_index & 1; - const bool is_flipped = true; + //const bool is_flipped = true; etc_block &blk = m_encoded_blocks[block_index]; @@ -2002,7 +2526,7 @@ namespace basisu if (m_params.m_debug_stats) debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined); - + return total_subblocks_refined; } @@ -2012,8 +2536,8 @@ namespace basisu uint32_t max_endpoint_cluster_size = 0; - std::vector cluster_sizes(m_endpoint_clusters.size()); - std::vector sorted_cluster_indices(m_endpoint_clusters.size()); + basisu::vector cluster_sizes(m_endpoint_clusters.size()); + basisu::vector sorted_cluster_indices(m_endpoint_clusters.size()); for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) { max_endpoint_cluster_size = maximum(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size()); @@ -2100,30 +2624,33 @@ namespace basisu { debug_printf("reoptimize_remapped_endpoints\n"); - std::vector new_endpoint_cluster_block_indices(m_endpoint_clusters.size()); + basisu::vector new_endpoint_cluster_block_indices(m_endpoint_clusters.size()); for (uint32_t i = 0; i < new_block_endpoints.size(); i++) new_endpoint_cluster_block_indices[new_block_endpoints[i]].push_back(i); - std::vector cluster_valid(new_endpoint_cluster_block_indices.size()); - std::vector cluster_improved(new_endpoint_cluster_block_indices.size()); + basisu::vector cluster_valid(new_endpoint_cluster_block_indices.size()); + basisu::vector cluster_improved(new_endpoint_cluster_block_indices.size()); const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N) { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] { +#endif + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index]; + const basisu::vector& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index]; if (!cluster_block_indices.size()) continue; const uint32_t total_pixels = (uint32_t)cluster_block_indices.size() * 16; - std::vector cluster_pixels(total_pixels); + basisu::vector cluster_pixels(total_pixels); uint8_vec force_selectors(total_pixels); etc_block blk; @@ -2170,16 +2697,19 @@ namespace basisu if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) cluster_optimizer_params.m_quality = cETCQualityUber; + else + cluster_optimizer_params.m_quality = cETCQualitySlow; etc1_optimizer::results cluster_optimizer_results; - std::vector cluster_selectors(total_pixels); + basisu::vector cluster_selectors(total_pixels); cluster_optimizer_results.m_n = total_pixels; cluster_optimizer_results.m_pSelectors = &cluster_selectors[0]; optimizer.init(cluster_optimizer_params, cluster_optimizer_results); - optimizer.compute(); + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); new_endpoint_cluster_etc_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; new_endpoint_cluster_etc_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; @@ -2198,11 +2728,16 @@ namespace basisu cluster_valid[cluster_index] = true; } // cluster_index + +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif uint32_t total_unused_clusters = 0; uint32_t total_improved_clusters = 0; @@ -2239,7 +2774,7 @@ namespace basisu debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 1\n"); - std::vector new_endpoint_clusters(total_new_endpoint_clusters); + basisu::vector new_endpoint_clusters(total_new_endpoint_clusters); for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++) { diff --git a/src/deps/basis-universal/basisu_frontend.h b/src/deps/basis-universal/encoder/basisu_frontend.h similarity index 89% rename from src/deps/basis-universal/basisu_frontend.h rename to src/deps/basis-universal/encoder/basisu_frontend.h index c3f5d23c71..4ff6d40466 100644 --- a/src/deps/basis-universal/basisu_frontend.h +++ b/src/deps/basis-universal/encoder/basisu_frontend.h @@ -1,5 +1,5 @@ // basisu_frontend.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,8 @@ #include "basisu_etc.h" #include "basisu_gpu_texture.h" #include "basisu_global_selector_palette_helpers.h" -#include "transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_transcoder.h" namespace basisu { @@ -34,8 +35,8 @@ namespace basisu uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } }; - const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 1; - const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 5; + const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 2; + const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 6; class basisu_frontend { @@ -72,16 +73,19 @@ namespace basisu m_perceptual(true), m_debug_stats(false), m_debug_images(false), + m_dump_endpoint_clusterization(true), + m_validate(false), + m_multithreaded(false), + m_disable_hierarchical_endpoint_codebooks(false), m_pGlobal_sel_codebook(NULL), m_num_global_sel_codebook_pal_bits(0), m_num_global_sel_codebook_mod_bits(0), m_use_hybrid_selector_codebooks(false), m_hybrid_codebook_quality_thresh(0.0f), - m_validate(false), m_tex_type(basist::cBASISTexType2D), - m_multithreaded(false), - m_disable_hierarchical_endpoint_codebooks(false), + m_pGlobal_codebooks(nullptr), + m_pJob_pool(nullptr) { } @@ -108,6 +112,7 @@ namespace basisu bool m_use_hybrid_selector_codebooks; float m_hybrid_codebook_quality_thresh; basist::basis_texture_type m_tex_type; + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; job_pool *m_pJob_pool; }; @@ -142,7 +147,7 @@ namespace basisu bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } // Selector clusters - uint32_t get_total_selector_clusters() const { return static_cast(m_selector_cluster_indices.size()); } + uint32_t get_total_selector_clusters() const { return static_cast(m_selector_cluster_block_indices.size()); } uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } @@ -150,7 +155,7 @@ namespace basisu const bool_vec &get_selector_cluster_uses_global_cb_vec() const { return m_selector_cluster_uses_global_cb; } // Returns block indices using each selector cluster - const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_indices[selector_cluster_index]; } + const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); @@ -188,16 +193,16 @@ namespace basisu // For each endpoint cluster: An array of which subblock indices (block_index*2+subblock) are located in that cluster. // Array of block indices for each endpoint cluster - std::vector m_endpoint_clusters; + basisu::vector m_endpoint_clusters; // Array of block indices for each parent endpoint cluster - std::vector m_endpoint_parent_clusters; + basisu::vector m_endpoint_parent_clusters; // Each block's parent cluster index uint8_vec m_block_parent_endpoint_cluster; // Array of endpoint cluster indices for each parent endpoint cluster - std::vector m_endpoint_clusters_within_each_parent_cluster; + basisu::vector m_endpoint_clusters_within_each_parent_cluster; struct endpoint_cluster_etc_params { @@ -267,35 +272,35 @@ namespace basisu } }; - typedef std::vector cluster_subblock_etc_params_vec; + typedef basisu::vector cluster_subblock_etc_params_vec; // Each endpoint cluster's ETC1S parameters cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; // The endpoint cluster index used by each ETC1 subblock. - std::vector m_block_endpoint_clusters_indices; + basisu::vector m_block_endpoint_clusters_indices; // The block(s) within each selector cluster // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! - std::vector m_selector_cluster_indices; + basisu::vector m_selector_cluster_block_indices; // The selector bits for each selector cluster. - std::vector m_optimized_cluster_selectors; + basisu::vector m_optimized_cluster_selectors; // The block(s) within each parent selector cluster. - std::vector m_selector_parent_cluster_indices; + basisu::vector m_selector_parent_cluster_block_indices; // Each block's parent selector cluster uint8_vec m_block_parent_selector_cluster; // Array of selector cluster indices for each parent selector cluster - std::vector m_selector_clusters_within_each_parent_cluster; + basisu::vector m_selector_clusters_within_each_parent_cluster; basist::etc1_global_selector_codebook_entry_id_vec m_optimized_cluster_selector_global_cb_ids; bool_vec m_selector_cluster_uses_global_cb; // Each block's selector cluster index - std::vector m_block_selector_cluster_index; + basisu::vector m_block_selector_cluster_index; struct subblock_endpoint_quant_err { @@ -321,13 +326,14 @@ namespace basisu }; // The sorted subblock endpoint quant error for each endpoint cluster - std::vector m_subblock_endpoint_quant_err_vec; + basisu::vector m_subblock_endpoint_quant_err_vec; std::mutex m_lock; //----------------------------------------------------------------------------- void init_etc1_images(); + bool init_global_codebooks(); void init_endpoint_training_vectors(); void dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors); void generate_endpoint_clusters(); diff --git a/src/deps/basis-universal/basisu_global_selector_palette_helpers.cpp b/src/deps/basis-universal/encoder/basisu_global_selector_palette_helpers.cpp similarity index 100% rename from src/deps/basis-universal/basisu_global_selector_palette_helpers.cpp rename to src/deps/basis-universal/encoder/basisu_global_selector_palette_helpers.cpp diff --git a/src/deps/basis-universal/basisu_global_selector_palette_helpers.h b/src/deps/basis-universal/encoder/basisu_global_selector_palette_helpers.h similarity index 92% rename from src/deps/basis-universal/basisu_global_selector_palette_helpers.h rename to src/deps/basis-universal/encoder/basisu_global_selector_palette_helpers.h index 32692c516b..7c35439df8 100644 --- a/src/deps/basis-universal/basisu_global_selector_palette_helpers.h +++ b/src/deps/basis-universal/encoder/basisu_global_selector_palette_helpers.h @@ -14,9 +14,9 @@ // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_etc.h" -#include "transcoder/basisu_global_selector_palette.h" +#include "../transcoder/basisu_global_selector_palette.h" namespace basisu { @@ -36,7 +36,7 @@ namespace basisu void clear() { clear_obj(*this); } }; - typedef std::vector pixel_block_vec; + typedef basisu::vector pixel_block_vec; uint64_t etc1_global_selector_codebook_find_best_entry(const basist::etc1_global_selector_codebook &codebook, uint32_t num_src_pixel_blocks, const pixel_block *pSrc_pixel_blocks, const etc_block *pBlock_endpoints, diff --git a/src/deps/basis-universal/basisu_gpu_texture.cpp b/src/deps/basis-universal/encoder/basisu_gpu_texture.cpp similarity index 71% rename from src/deps/basis-universal/basisu_gpu_texture.cpp rename to src/deps/basis-universal/encoder/basisu_gpu_texture.cpp index 117668c5e2..3f9fb67bdd 100644 --- a/src/deps/basis-universal/basisu_gpu_texture.cpp +++ b/src/deps/basis-universal/encoder/basisu_gpu_texture.cpp @@ -1,5 +1,5 @@ // basisu_gpu_texture.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,38 +16,10 @@ #include "basisu_enc.h" #include "basisu_pvrtc1_4.h" #include "basisu_astc_decomp.h" +#include "basisu_bc7enc.h" namespace basisu { - const int8_t g_etc2_eac_tables[16][8] = - { - { -3, -6, -9, -15, 2, 5, 8, 14 }, { -3, -7, -10, -13, 2, 6, 9, 12 }, { -2, -5, -8, -13, 1, 4, 7, 12 }, { -2, -4, -6, -13, 1, 3, 5, 12 }, - { -3, -6, -8, -12, 2, 5, 7, 11 }, { -3, -7, -9, -11, 2, 6, 8, 10 }, { -4, -7, -8, -11, 3, 6, 7, 10 }, { -3, -5, -8, -11, 2, 4, 7, 10 }, - { -2, -6, -8, -10, 1, 5, 7, 9 }, { -2, -5, -8, -10, 1, 4, 7, 9 }, { -2, -4, -8, -10, 1, 3, 7, 9 }, { -2, -5, -7, -10, 1, 4, 6, 9 }, - { -3, -4, -7, -10, 2, 3, 6, 9 }, { -1, -2, -3, -10, 0, 1, 2, 9 }, { -4, -6, -8, -9, 3, 5, 7, 8 }, { -3, -5, -7, -9, 2, 4, 6, 8 } - }; - - struct eac_a8_block - { - uint16_t m_base : 8; - uint16_t m_table : 4; - uint16_t m_multiplier : 4; - - uint8_t m_selectors[6]; - - inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const - { - assert((x < 4) && (y < 4)); - return static_cast((selector_bits >> (45 - (y + x * 4) * 3)) & 7); - } - - inline uint64_t get_selector_bits() const - { - uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5]; - return pixels; - } - }; - void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels) { static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8"); @@ -123,19 +95,18 @@ namespace basisu bc1_block::unpack_color(l, r0, g0, b0); bc1_block::unpack_color(h, r1, g1, b1); + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + bool used_punchthrough = false; if (l > h) { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); } else { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); c[3].set_noclamp_rgba(0, 0, 0, 0); used_punchthrough = true; @@ -165,6 +136,142 @@ namespace basisu return used_punchthrough; } + bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + { + static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); + + const bc1_block *pBlock = static_cast(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + color_rgba c[4]; + + int r0 = (l >> 11) & 31; + int g0 = (l >> 5) & 63; + int b0 = l & 31; + int r1 = (h >> 11) & 31; + int g1 = (h >> 5) & 63; + int b1 = h & 31; + + c[0].b = (uint8_t)((3 * b0 * 22) / 8); + c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4)); + c[0].r = (uint8_t)((3 * r0 * 22) / 8); + c[0].a = 0xFF; + + c[1].r = (uint8_t)((3 * r1 * 22) / 8); + c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4)); + c[1].b = (uint8_t)((3 * b1 * 22) / 8); + c[1].a = 0xFF; + + int gdiff = c[1].g - c[0].g; + + bool used_punchthrough = false; + + if (l > h) + { + c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8); + c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256)); + c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8); + c[2].a = 0xFF; + + c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8); + c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256); + c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8); + c[3].a = 0xFF; + } + else + { + c[2].r = (uint8_t)(((r0 + r1) * 33) / 8); + c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256); + c[2].b = (uint8_t)(((b0 + b1) * 33) / 8); + c[2].a = 0xFF; + + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + + static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } + static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } + + bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + { + const bc1_block *pBlock = static_cast(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + color_rgba c[4]; + + uint32_t r0, g0, b0, r1, g1, b1; + bc1_block::unpack_color(l, r0, g0, b0); + bc1_block::unpack_color(h, r1, g1, b1); + + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + + bool used_punchthrough = false; + + if (l > h) + { + c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + } + else + { + c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + struct bc4_block { enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; @@ -292,7 +399,7 @@ namespace basisu if (mode) { - c[1].set(std::max(0, c[0].r - (c[3].r >> 2)), std::max(0, c[0].g - (c[3].g >> 2)), std::max(0, c[0].b - (c[3].b >> 2)), 255); + c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255); c[2] = c[0]; c[0].set(0, 0, 0, 255); } @@ -317,6 +424,191 @@ namespace basisu } } + // BC7 mode 0-7 decompression. + // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + + static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } + static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + + static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; } + static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; } + static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; } + static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) + { + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; + } + + bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t PBITS = (mode == 0) ? 6 : 0; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[6]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = 255; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + const uint32_t ENDPOINTS = 2; + const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); + const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + + uint32_t weights[16], a_weights[16]; + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + color_rgba block_colors[8]; + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; + } + struct bc7_mode_6 { struct @@ -364,9 +656,6 @@ namespace basisu }; }; - static const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; - - // The transcoder only outputs mode 6 at the moment, so this is easy. bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) { static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); @@ -388,7 +677,7 @@ namespace basisu color_rgba vals[16]; for (uint32_t i = 0; i < 16; i++) { - const uint32_t w = g_bc7_weights4[i]; + const uint32_t w = basist::g_bc7_weights4[i]; const uint32_t iw = 64 - w; vals[i].set_noclamp_rgba( (r0 * iw + r1 * w + 32) >> 6, @@ -420,183 +709,37 @@ namespace basisu return true; } - static inline uint32_t get_block_bits(const uint8_t* pBytes, uint32_t bit_ofs, uint32_t bits_wanted) + bool unpack_bc7(const void *pBlock, color_rgba *pPixels) { - assert(bits_wanted < 32); + const uint32_t first_byte = static_cast(pBlock)[0]; - uint32_t v = 0; - uint32_t total_bits = 0; - - while (total_bits < bits_wanted) + for (uint32_t mode = 0; mode <= 7; mode++) { - uint32_t k = pBytes[bit_ofs >> 3]; - k >>= (bit_ofs & 7); - uint32_t num_bits_in_byte = 8 - (bit_ofs & 7); - - v |= (k << total_bits); - total_bits += num_bits_in_byte; - bit_ofs += num_bits_in_byte; - } - - return v & ((1 << bits_wanted) - 1); - } - - struct bc7_mode_5 - { - union - { - struct + if (first_byte & (1U << mode)) { - uint64_t m_mode : 6; - uint64_t m_rot : 2; - - uint64_t m_r0 : 7; - uint64_t m_r1 : 7; - uint64_t m_g0 : 7; - uint64_t m_g1 : 7; - uint64_t m_b0 : 7; - uint64_t m_b1 : 7; - uint64_t m_a0 : 8; - uint64_t m_a1_0 : 6; - - } m_lo; - - uint64_t m_lo_bits; - }; - - union - { - struct - { - uint64_t m_a1_1 : 2; - - // bit 2 - uint64_t m_c00 : 1; - uint64_t m_c10 : 2; - uint64_t m_c20 : 2; - uint64_t m_c30 : 2; - - uint64_t m_c01 : 2; - uint64_t m_c11 : 2; - uint64_t m_c21 : 2; - uint64_t m_c31 : 2; - - uint64_t m_c02 : 2; - uint64_t m_c12 : 2; - uint64_t m_c22 : 2; - uint64_t m_c32 : 2; - - uint64_t m_c03 : 2; - uint64_t m_c13 : 2; - uint64_t m_c23 : 2; - uint64_t m_c33 : 2; - - // bit 33 - uint64_t m_a00 : 1; - uint64_t m_a10 : 2; - uint64_t m_a20 : 2; - uint64_t m_a30 : 2; - - uint64_t m_a01 : 2; - uint64_t m_a11 : 2; - uint64_t m_a21 : 2; - uint64_t m_a31 : 2; - - uint64_t m_a02 : 2; - uint64_t m_a12 : 2; - uint64_t m_a22 : 2; - uint64_t m_a32 : 2; - - uint64_t m_a03 : 2; - uint64_t m_a13 : 2; - uint64_t m_a23 : 2; - uint64_t m_a33 : 2; - - } m_hi; - - uint64_t m_hi_bits; - }; - - color_rgba get_low_color() const - { - return color_rgba(cNoClamp, - (int)((m_lo.m_r0 << 1) | (m_lo.m_r0 >> 6)), - (int)((m_lo.m_g0 << 1) | (m_lo.m_g0 >> 6)), - (int)((m_lo.m_b0 << 1) | (m_lo.m_b0 >> 6)), - m_lo.m_a0); - } - - color_rgba get_high_color() const - { - return color_rgba(cNoClamp, - (int)((m_lo.m_r1 << 1) | (m_lo.m_r1 >> 6)), - (int)((m_lo.m_g1 << 1) | (m_lo.m_g1 >> 6)), - (int)((m_lo.m_b1 << 1) | (m_lo.m_b1 >> 6)), - (int)m_lo.m_a1_0 | ((int)m_hi.m_a1_1 << 6)); - } - - void get_block_colors(color_rgba* pColors) const - { - const color_rgba low_color(get_low_color()); - const color_rgba high_color(get_high_color()); - - for (uint32_t i = 0; i < 4; i++) - { - static const uint32_t s_bc7_weights2[4] = { 0, 21, 43, 64 }; - - pColors[i].set_noclamp_rgba( - (low_color.r * (64 - s_bc7_weights2[i]) + high_color.r * s_bc7_weights2[i] + 32) >> 6, - (low_color.g * (64 - s_bc7_weights2[i]) + high_color.g * s_bc7_weights2[i] + 32) >> 6, - (low_color.b * (64 - s_bc7_weights2[i]) + high_color.b * s_bc7_weights2[i] + 32) >> 6, - (low_color.a * (64 - s_bc7_weights2[i]) + high_color.a * s_bc7_weights2[i] + 32) >> 6); + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, pBlock, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, pBlock, pPixels); + case 6: + return unpack_bc7_mode6(pBlock, pPixels); + default: + break; + } } - } - - uint32_t get_selector(uint32_t idx, bool alpha) const - { - const uint32_t size = (idx == 0) ? 1 : 2; - - uint32_t ofs = alpha ? 97 : 66; - - if (idx) - ofs += 1 + 2 * (idx - 1); - - return get_block_bits(reinterpret_cast(this), ofs, size); - } - }; - - bool unpack_bc7_mode5(const void* pBlock_bits, color_rgba* pPixels) - { - static_assert(sizeof(bc7_mode_5) == 16, "sizeof(bc7_mode_5) == 16"); - - const bc7_mode_5& block = *static_cast(pBlock_bits); - - if (block.m_lo.m_mode != (1 << 5)) - return false; - - color_rgba block_colors[4]; - block.get_block_colors(block_colors); - - const uint32_t rot = block.m_lo.m_rot; - - for (uint32_t i = 0; i < 16; i++) - { - const uint32_t cs = block.get_selector(i, false); - - color_rgba c(block_colors[cs]); - - const uint32_t as = block.get_selector(i, true); - c.a = block_colors[as].a; - - if (rot > 0) - std::swap(c[3], c[rot - 1]); - - pPixels[i] = c; } - return true; + return false; } - + struct fxt1_block { union @@ -903,13 +1046,14 @@ namespace basisu etc2_eac_r11 m_c[2]; }; - static void unpack_etc2_eac_r(const etc2_eac_r11* p, color_rgba* pPixels, uint32_t c) + void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c) { - const uint64_t sels = p->get_sels(); + const etc2_eac_r11* pBlock = static_cast(p); + const uint64_t sels = pBlock->get_sels(); - const int base = (int)p->m_base * 8 + 4; - const int mul = p->m_mul ? ((int)p->m_mul * 8) : 1; - const int table = (int)p->m_table; + const int base = (int)pBlock->m_base * 8 + 4; + const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1; + const int table = (int)pBlock->m_table; for (uint32_t y = 0; y < 4; y++) { @@ -923,7 +1067,8 @@ namespace basisu val = clamp(val, 0, 2047); // Convert to 8-bits with rounding - pPixels[x + y * 4].m_comps[c] = static_cast((val * 255 + 1024) / 2047); + //pPixels[x + y * 4].m_comps[c] = static_cast((val * 255 + 1024) / 2047); + pPixels[x + y * 4].m_comps[c] = static_cast((val * 255 + 1023) / 2047); } // x } // y @@ -939,6 +1084,11 @@ namespace basisu } } + void unpack_uastc(const void* p, color_rgba* pPixels) + { + basist::unpack_uastc(*static_cast(p), (basist::color32 *)pPixels, false); + } + // Unpacks to RGBA, R, RG, or A bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) { @@ -949,6 +1099,16 @@ namespace basisu unpack_bc1(pBlock, pPixels, true); break; } + case texture_format::cBC1_NV: + { + unpack_bc1_nv(pBlock, pPixels, true); + break; + } + case texture_format::cBC1_AMD: + { + unpack_bc1_amd(pBlock, pPixels, true); + break; + } case texture_format::cBC3: { return unpack_bc3(pBlock, pPixels); @@ -966,14 +1126,7 @@ namespace basisu } case texture_format::cBC7: { - // We only support modes 5 and 6. - if (!unpack_bc7_mode5(pBlock, pPixels)) - { - if (!unpack_bc7_mode6(pBlock, pPixels)) - return false; - } - - break; + return unpack_bc7(pBlock, pPixels); } // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color) case texture_format::cETC2_RGB: @@ -1032,6 +1185,11 @@ namespace basisu unpack_etc2_eac_rg(pBlock, pPixels); break; } + case texture_format::cUASTC4x4: + { + unpack_uastc(pBlock, pPixels); + break; + } default: { assert(0); @@ -1113,6 +1271,7 @@ namespace basisu KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02, KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0, KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0, + KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value! KTX_ATC_RGB_AMD = 0x8C92, KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE, KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0, @@ -1143,7 +1302,7 @@ namespace basisu }; // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index] - bool create_ktx_texture_file(uint8_vec &ktx_data, const std::vector& gpu_images, bool cubemap_flag) + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag) { if (!gpu_images.size()) { @@ -1220,6 +1379,8 @@ namespace basisu switch (fmt) { case texture_format::cBC1: + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: { internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT; break; @@ -1305,6 +1466,12 @@ namespace basisu base_internal_fmt = KTX_RG; break; } + case texture_format::cUASTC4x4: + { + internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR; + base_internal_fmt = KTX_RGBA; + break; + } case texture_format::cFXT1_RGB: { internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX; @@ -1378,7 +1545,7 @@ namespace basisu return true; } - bool write_compressed_texture_file(const char* pFilename, const std::vector& g, bool cubemap_flag) + bool write_compressed_texture_file(const char* pFilename, const basisu::vector& g, bool cubemap_flag) { std::string extension(string_tolower(string_get_extension(pFilename))); @@ -1410,12 +1577,12 @@ namespace basisu bool write_compressed_texture_file(const char* pFilename, const gpu_image& g) { - std::vector v; + basisu::vector v; enlarge_vector(v, 1)->push_back(g); return write_compressed_texture_file(pFilename, v, false); } - const uint32_t OUT_FILE_MAGIC = 'TEXC'; + //const uint32_t OUT_FILE_MAGIC = 'TEXC'; struct out_file_header { packed_uint<4> m_magic; @@ -1428,7 +1595,11 @@ namespace basisu bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi) { out_file_header hdr; - hdr.m_magic = OUT_FILE_MAGIC; + //hdr.m_magic = OUT_FILE_MAGIC; + hdr.m_magic.m_bytes[0] = 67; + hdr.m_magic.m_bytes[1] = 88; + hdr.m_magic.m_bytes[2] = 69; + hdr.m_magic.m_bytes[3] = 84; hdr.m_pad = 0; hdr.m_width = gi.get_blocks_x() * 8; hdr.m_height = gi.get_blocks_y() * 4; diff --git a/src/deps/basis-universal/basisu_gpu_texture.h b/src/deps/basis-universal/encoder/basisu_gpu_texture.h similarity index 87% rename from src/deps/basis-universal/basisu_gpu_texture.h rename to src/deps/basis-universal/encoder/basisu_gpu_texture.h index 8a49757ca7..619926f5f9 100644 --- a/src/deps/basis-universal/basisu_gpu_texture.h +++ b/src/deps/basis-universal/encoder/basisu_gpu_texture.h @@ -1,5 +1,5 @@ // basisu_gpu_texture.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,13 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_etc.h" namespace basisu { - // GPU texture image - + // GPU texture "image" class gpu_image { public: @@ -115,17 +114,17 @@ namespace basisu uint64_vec m_blocks; }; - typedef std::vector gpu_image_vec; + typedef basisu::vector gpu_image_vec; // KTX file writing - bool create_ktx_texture_file(uint8_vec &ktx_data, const std::vector& gpu_images, bool cubemap_flag); + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag); - bool write_compressed_texture_file(const char *pFilename, const std::vector& g, bool cubemap_flag); + bool write_compressed_texture_file(const char *pFilename, const basisu::vector& g, bool cubemap_flag); inline bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec &g) { - std::vector a; + basisu::vector a; a.push_back(g); return write_compressed_texture_file(pFilename, a, false); } @@ -133,22 +132,23 @@ namespace basisu bool write_compressed_texture_file(const char *pFilename, const gpu_image &g); bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); - // GPU texture block unpacking + // GPU texture block unpacking void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels); bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha); void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride); bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels); void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels); bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels); - bool unpack_bc7_mode5(const void* pBlock_bits, color_rgba* pPixels); + bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); void unpack_atc(const void* pBlock_bits, color_rgba* pPixels); bool unpack_fxt1(const void* p, color_rgba* pPixels); bool unpack_pvrtc2(const void* p, color_rgba* pPixels); + void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c); void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels); - // unpack_block() is only capable of unpacking texture data created by the transcoder. - // For some texture formats (like BC7, or ETC2) it's not a complete implementation. + // unpack_block() is primarily intended to unpack texture data created by the transcoder. + // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not a complete implementation. bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); } // namespace basisu diff --git a/src/deps/basis-universal/encoder/basisu_kernels_sse.cpp b/src/deps/basis-universal/encoder/basisu_kernels_sse.cpp new file mode 100644 index 0000000000..12d2321f20 --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_kernels_sse.cpp @@ -0,0 +1,161 @@ +// basisu_kernels_sse.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" + +#if BASISU_SUPPORT_SSE + +#define CPPSPMD_SSE2 0 + +#ifdef _MSC_VER +#include +#endif + +#if !defined(_MSC_VER) + #if __AVX__ || __AVX2__ || __AVX512F__ + #error Please check your compiler options + #endif + + #if CPPSPMD_SSE2 + #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__ + #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file + #endif + #else + #if !__SSE4_1__ || !__SSE3__ || __SSE4_2__ || !__SSSE3__ + #error Please check your compiler options + #endif + #endif +#endif + +#include "cppspmd_sse.h" + +#include "cppspmd_type_aliases.h" + +using namespace basisu; + +#include "basisu_kernels_declares.h" +#include "basisu_kernels_imp.h" + +namespace basisu +{ + +struct cpu_info +{ + cpu_info() { memset(this, 0, sizeof(*this)); } + + bool m_has_fpu; + bool m_has_mmx; + bool m_has_sse; + bool m_has_sse2; + bool m_has_sse3; + bool m_has_ssse3; + bool m_has_sse41; + bool m_has_sse42; + bool m_has_avx; + bool m_has_avx2; + bool m_has_pclmulqdq; +}; + +static void extract_x86_flags(cpu_info &info, uint32_t ecx, uint32_t edx) +{ + info.m_has_fpu = (edx & (1 << 0)) != 0; + info.m_has_mmx = (edx & (1 << 23)) != 0; + info.m_has_sse = (edx & (1 << 25)) != 0; + info.m_has_sse2 = (edx & (1 << 26)) != 0; + info.m_has_sse3 = (ecx & (1 << 0)) != 0; + info.m_has_ssse3 = (ecx & (1 << 9)) != 0; + info.m_has_sse41 = (ecx & (1 << 19)) != 0; + info.m_has_sse42 = (ecx & (1 << 20)) != 0; + info.m_has_pclmulqdq = (ecx & (1 << 1)) != 0; + info.m_has_avx = (ecx & (1 << 28)) != 0; +} + +static void extract_x86_extended_flags(cpu_info &info, uint32_t ebx) +{ + info.m_has_avx2 = (ebx & (1 << 5)) != 0; +} + +#ifndef _MSC_VER +static void do_cpuid(uint32_t eax, uint32_t ecx, uint32_t* regs) +{ + uint32_t ebx = 0, edx = 0; + +#if defined(__PIC__) && defined(__i386__) + __asm__("movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#else + __asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#endif + + regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx; +} +#endif + +static void get_cpuinfo(cpu_info &info) +{ + int regs[4]; + +#ifdef _MSC_VER + __cpuid(regs, 0); +#else + do_cpuid(0, 0, (uint32_t *)regs); +#endif + + const uint32_t max_eax = regs[0]; + + if (max_eax >= 1U) + { +#ifdef _MSC_VER + __cpuid(regs, 1); +#else + do_cpuid(1, 0, (uint32_t*)regs); +#endif + extract_x86_flags(info, regs[2], regs[3]); + } + + if (max_eax >= 7U) + { +#ifdef _MSC_VER + __cpuidex(regs, 7, 0); +#else + do_cpuid(7, 0, (uint32_t*)regs); +#endif + + extract_x86_extended_flags(info, regs[1]); + } +} + +void detect_sse41() +{ + cpu_info info; + get_cpuinfo(info); + + // Check for everything from SSE to SSE 4.1 + g_cpu_supports_sse41 = info.m_has_sse && info.m_has_sse2 && info.m_has_sse3 && info.m_has_ssse3 && info.m_has_sse41; +} + +} // namespace basisu +#else // #if BASISU_SUPPORT_SSE +namespace basisu +{ + +void detect_sse41() +{ +} + +} // namespace basisu +#endif // #if BASISU_SUPPORT_SSE + diff --git a/src/deps/basis-universal/encoder/basisu_miniz.h b/src/deps/basis-universal/encoder/basisu_miniz.h new file mode 100644 index 0000000000..8627abe893 --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_miniz.h @@ -0,0 +1,2514 @@ +/* miniz.c v1.15 - deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ + + Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +#include + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be called. +// The current downside is the times written to your archives will be from 1979. +//#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) + // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux + #define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) + #include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +namespace buminiz { + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). +enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s +{ + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. +// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + typedef unsigned char Byte; + typedef unsigned int uInt; + typedef mz_ulong uLong; + typedef Byte Bytef; + typedef uInt uIntf; + typedef char charf; + typedef int intf; + typedef void *voidpf; + typedef uLong uLongf; + typedef void *voidp; + typedef void *const voidpc; + #define Z_NULL 0 + #define Z_NO_FLUSH MZ_NO_FLUSH + #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH + #define Z_SYNC_FLUSH MZ_SYNC_FLUSH + #define Z_FULL_FLUSH MZ_FULL_FLUSH + #define Z_FINISH MZ_FINISH + #define Z_BLOCK MZ_BLOCK + #define Z_OK MZ_OK + #define Z_STREAM_END MZ_STREAM_END + #define Z_NEED_DICT MZ_NEED_DICT + #define Z_ERRNO MZ_ERRNO + #define Z_STREAM_ERROR MZ_STREAM_ERROR + #define Z_DATA_ERROR MZ_DATA_ERROR + #define Z_MEM_ERROR MZ_MEM_ERROR + #define Z_BUF_ERROR MZ_BUF_ERROR + #define Z_VERSION_ERROR MZ_VERSION_ERROR + #define Z_PARAM_ERROR MZ_PARAM_ERROR + #define Z_NO_COMPRESSION MZ_NO_COMPRESSION + #define Z_BEST_SPEED MZ_BEST_SPEED + #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION + #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION + #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY + #define Z_FILTERED MZ_FILTERED + #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY + #define Z_RLE MZ_RLE + #define Z_FIXED MZ_FIXED + #define Z_DEFLATED MZ_DEFLATED + #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS + #define alloc_func mz_alloc_func + #define free_func mz_free_func + #define internal_state mz_internal_state + #define z_stream mz_stream + #define deflateInit mz_deflateInit + #define deflateInit2 mz_deflateInit2 + #define deflateReset mz_deflateReset + #define deflate mz_deflate + #define deflateEnd mz_deflateEnd + #define deflateBound mz_deflateBound + #define compress mz_compress + #define compress2 mz_compress2 + #define compressBound mz_compressBound + #define inflateInit mz_inflateInit + #define inflateInit2 mz_inflateInit2 + #define inflate mz_inflate + #define inflateEnd mz_inflateEnd + #define uncompress mz_uncompress + #define crc32 mz_crc32 + #define adler32 mz_adler32 + #define MAX_WBITS 15 + #define MAX_MEM_LEVEL 9 + #define zError mz_error + #define ZLIB_VERSION MZ_VERSION + #define ZLIB_VERNUM MZ_VERNUM + #define ZLIB_VER_MAJOR MZ_VER_MAJOR + #define ZLIB_VER_MINOR MZ_VER_MINOR + #define ZLIB_VER_REVISION MZ_VER_REVISION + #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION + #define zlibVersion mz_version + #define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message. +#ifdef _MSC_VER + #define MZ_MACRO_END while (0, 0) +#else + #define MZ_MACRO_END while (0) +#endif + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum +{ + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum +{ + TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS + #define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF + typedef mz_uint64 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (64) +#else + typedef mz_uint32 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). +enum +{ + TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. +// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL +// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#else +enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#endif + +// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. +typedef enum +{ + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1, +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum +{ + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +} // namespace buminiz + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +#include +#include + +namespace buminiz { + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1]; + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC + #define MZ_MALLOC(x) NULL + #define MZ_FREE(x) (void)x, ((void)0) + #define MZ_REALLOC(p, x) NULL +#else + #define MZ_MALLOC(x) malloc(x) + #define MZ_FREE(x) free(x) + #define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a,b) (((a)>(b))?(a):(b)) +#define MZ_MIN(a,b) (((a)<(b))?(a):(b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) + #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else + #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) + #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER + #define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else + #define MZ_FORCEINLINE inline +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } + return ~crcu32; +} + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } +static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } +//static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; + for ( ; ; ) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) + mz_uint64 a = 128ULL + (source_len * 110ULL) / 100ULL; + mz_uint64 b = 128ULL + (mz_uint64)source_len + ((source_len / (31 * 1024)) + 1ULL) * 5ULL; + + mz_uint64 t = MZ_MAX(a, b); + if (((mz_ulong)t) != t) + t = (mz_ulong)(-1); + + return (mz_ulong)t; +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + inflate_state* pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + + pState = (inflate_state*)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for ( ; ; ) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. + else if (flush == MZ_FINISH) + { + // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct { int m_err; const char *m_pDesc; } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, + { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif //MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN switch(r->m_state) { case 0: +#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for ( ; ; ) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else c = *pIn_buf_cur++; } MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a +// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \ + } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read +// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully +// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \ + int temp; mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \ + } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; + static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } + + num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } + while (pIn_buf_cur >= pIn_buf_end) + { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) + { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } + else + { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; + r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } + r->m_table_sizes[2] = 19; + } + for ( ; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; + cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); ) + { + mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for ( ; ; ) + { + mz_uint8 *pSrc; + for ( ; ; ) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } +#else + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + counter = sym2; bit_buf >>= code_len; num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + bit_buf >>= code_len; num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272, + 273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276, + 277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278, + 279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, + 281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281, + 282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282, + 283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, + 284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 }; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7 }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26, + 26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. +typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; +static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32* pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; mz_uint32 total = 0; if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) do { \ + mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ +} MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ +} rle_repeat_count = 0; } } + +#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ +} rle_z_count = 0; } } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; ) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for ( ; i <= 255; ++i) *p++ = 9; + for ( ; i <= 279; ++i) *p++ = 7; + for ( ; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64*)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. + if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) ) + { + mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } + } + else + { + mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + if (!probe_len) + { + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; + if (probe_len > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output buffer. + if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) + { + int n; + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) ) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; + d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; + do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); + pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; + } + memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; + *pOut_len = out_buf.m_size; return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif //MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) +{ + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + // write real header + *pLen_out = out_buf.m_size-41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52, + 0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0, + (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54}; + c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); +} + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +} // namespace buminiz + +#endif // MINIZ_HEADER_FILE_ONLY + diff --git a/src/deps/basis-universal/encoder/basisu_pvrtc1_4.cpp b/src/deps/basis-universal/encoder/basisu_pvrtc1_4.cpp new file mode 100644 index 0000000000..596fc197e6 --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_pvrtc1_4.cpp @@ -0,0 +1,564 @@ +// basisu_pvrtc1_4.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_pvrtc1_4.h" + +namespace basisu +{ +#if 0 + static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 }; + static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; + static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; + static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; +#endif + + static const uint8_t g_pvrtc_5_nearest[256] = { 0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31 }; + static const uint8_t g_pvrtc_4_nearest[256] = { 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15 }; +#if 0 + static const uint8_t g_pvrtc_3_nearest[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; + static const uint8_t g_pvrtc_alpha_nearest[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8 }; +#endif + +#if 0 + static const uint8_t g_pvrtc_5_floor[256] = + { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7, + 7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15, + 15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19, + 19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23, + 23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27, + 27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31 + }; + + static const uint8_t g_pvrtc_5_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8, + 8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12, + 12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16, + 16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20, + 20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24, + 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 + }; + + static const uint8_t g_pvrtc_4_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11, + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15 + }; + + static const uint8_t g_pvrtc_4_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 + }; + + static const uint8_t g_pvrtc_3_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7 + }; + + static const uint8_t g_pvrtc_3_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 + }; + + static const uint8_t g_pvrtc_alpha_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8 + }; + + static const uint8_t g_pvrtc_alpha_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + }; +#endif + + uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) + { + assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); + + uint32_t min_d = width, max_v = y; + if (height < width) + { + min_d = height; + max_v = x; + } + + // Interleave the XY LSB's + uint32_t shift_ofs = 0, swizzled = 0; + for (uint32_t s_bit = 1, d_bit = 1; s_bit < min_d; s_bit <<= 1, d_bit <<= 2, ++shift_ofs) + { + if (y & s_bit) swizzled |= d_bit; + if (x & s_bit) swizzled |= (2 * d_bit); + } + + max_v >>= shift_ofs; + + // OR in the rest of the bits from the largest dimension + swizzled |= (max_v << (2 * shift_ofs)); + + return swizzled; + } + + color_rgba pvrtc4_block::get_endpoint(uint32_t endpoint_index, bool unpack) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + if (unpack) + { + b = (b << 1) | (b >> 3); + } + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = unpack ? 255 : 7; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + } + + if (unpack) + { + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + } + + assert((r < 256) && (g < 256) && (b < 256) && (a < 256)); + + return color_rgba(r, g, b, a); + } + + color_rgba pvrtc4_block::get_endpoint_5554(uint32_t endpoint_index) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + b = (b << 1) | (b >> 3); + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = 15; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + + assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); + + return color_rgba(r, g, b, a); + } + + bool pvrtc4_image::get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + + pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + for (uint32_t c = 0; c < 4; c++) + { + uint32_t m = (pColors[0][c] + pColors[3][c]) / 2; + pColors[1][c] = static_cast(m); + pColors[2][c] = static_cast(m); + } + pColors[2][3] = 0; + return true; + } + + for (uint32_t c = 0; c < 4; c++) + { + pColors[1][c] = static_cast((pColors[0][c] * 5 + pColors[3][c] * 3) / 8); + pColors[2][c] = static_cast((pColors[0][c] * 3 + pColors[3][c] * 5) / 8); + } + + return false; + } + + color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + return color_rgba((l[0] + h[0]) / 2, (l[1] + h[1]) / 2, (l[2] + h[2]) / 2, (m == 2) ? 0 : (l[3] + h[3]) / 2); + } + else + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + if (m == 2) + return color_rgba((l[0] * 3 + h[0] * 5) / 8, (l[1] * 3 + h[1] * 5) / 8, (l[2] * 3 + h[2] * 5) / 8, (l[3] * 3 + h[3] * 5) / 8); + else + return color_rgba((l[0] * 5 + h[0] * 3) / 8, (l[1] * 5 + h[1] * 3) / 8, (l[2] * 5 + h[2] * 3) / 8, (l[3] * 5 + h[3] * 3) / 8); + } + } + + uint64_t pvrtc4_image::local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual) + { + uint64_t initial_error = evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false); + if (!initial_error) + return initial_error; + + vec3F c_avg_orig(0); + + for (int y = 0; y < 7; y++) + { + const uint32_t py = wrap_y(by * 4 + y - 1); + for (uint32_t x = 0; x < 7; x++) + { + const uint32_t px = wrap_x(bx * 4 + x - 1); + + const color_rgba& c = orig_img(px, py); + + c_avg_orig[0] += c[0]; + c_avg_orig[1] += c[1]; + c_avg_orig[2] += c[2]; + } + } + + c_avg_orig *= 1.0f / 49.0f; + + vec3F quant_colors[2]; + quant_colors[0].set(c_avg_orig); + quant_colors[0] -= vec3F(.0125f); + + quant_colors[1].set(c_avg_orig); + quant_colors[1] += vec3F(.0125f); + + float total_weight[2]; + + bool success = true; + + for (uint32_t pass = 0; pass < 4; pass++) + { + vec3F new_colors[2] = { vec3F(0), vec3F(0) }; + memset(total_weight, 0, sizeof(total_weight)); + + static const float s_weights[7][7] = + { + { 1.000000f, 1.637089f, 2.080362f, 2.242640f, 2.080362f, 1.637089f, 1.000000f }, + { 1.637089f, 2.414213f, 3.006572f, 3.242640f, 3.006572f, 2.414213f, 1.637089f }, + { 2.080362f, 3.006572f, 3.828426f, 4.242640f, 3.828426f, 3.006572f, 2.080362f }, + { 2.242640f, 3.242640f, 4.242640f, 5.000000f, 4.242640f, 3.242640f, 2.242640f }, + { 2.080362f, 3.006572f, 3.828426f, 4.242640f, 3.828426f, 3.006572f, 2.080362f }, + { 1.637089f, 2.414213f, 3.006572f, 3.242640f, 3.006572f, 2.414213f, 1.637089f }, + { 1.000000f, 1.637089f, 2.080362f, 2.242640f, 2.080362f, 1.637089f, 1.000000f } + }; + + for (int y = 0; y < 7; y++) + { + const uint32_t py = wrap_y(by * 4 + y - 1); + for (uint32_t x = 0; x < 7; x++) + { + const uint32_t px = wrap_x(bx * 4 + x - 1); + + const color_rgba& orig_c = orig_img(px, py); + + vec3F color(orig_c[0], orig_c[1], orig_c[2]); + + uint32_t c = quant_colors[0].squared_distance(color) > quant_colors[1].squared_distance(color); + + const float weight = s_weights[y][x]; + new_colors[c] += color * weight; + + total_weight[c] += weight; + } + } + + if (!total_weight[0] || !total_weight[1]) + success = false; + + quant_colors[0] = new_colors[0] / (float)total_weight[0]; + quant_colors[1] = new_colors[1] / (float)total_weight[1]; + } + + if (!success) + { + quant_colors[0] = c_avg_orig; + quant_colors[1] = c_avg_orig; + } + + vec4F colors[2] = { quant_colors[0], quant_colors[1] }; + + colors[0] += vec3F(.5f); + colors[1] += vec3F(.5f); + color_rgba color_0((int)colors[0][0], (int)colors[0][1], (int)colors[0][2], 0); + color_rgba color_1((int)colors[1][0], (int)colors[1][1], (int)colors[1][2], 0); + + pvrtc4_block cur_blocks[3][3]; + + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + cur_blocks[x + 1][y + 1] = m_blocks(block_x, block_y); + } + } + + color_rgba l1(0), h1(0); + + l1[0] = g_pvrtc_5_nearest[color_0[0]]; + h1[0] = g_pvrtc_5_nearest[color_1[0]]; + + l1[1] = g_pvrtc_5_nearest[color_0[1]]; + h1[1] = g_pvrtc_5_nearest[color_1[1]]; + + l1[2] = g_pvrtc_4_nearest[color_0[2]]; + h1[2] = g_pvrtc_5_nearest[color_0[2]]; + + l1[3] = 0; + h1[3] = 0; + + m_blocks(bx, by).set_endpoint_raw(0, l1, true); + m_blocks(bx, by).set_endpoint_raw(1, h1, true); + + uint64_t e03_err_0 = remap_pixels_influenced_by_endpoint(bx, by, orig_img, perceptual, false); + + pvrtc4_block blocks0[3][3]; + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + blocks0[x + 1][y + 1] = m_blocks(block_x, block_y); + } + } + + l1[0] = g_pvrtc_5_nearest[color_1[0]]; + h1[0] = g_pvrtc_5_nearest[color_0[0]]; + + l1[1] = g_pvrtc_5_nearest[color_1[1]]; + h1[1] = g_pvrtc_5_nearest[color_0[1]]; + + l1[2] = g_pvrtc_4_nearest[color_1[2]]; + h1[2] = g_pvrtc_5_nearest[color_0[2]]; + + l1[3] = 0; + h1[3] = 0; + + m_blocks(bx, by).set_endpoint_raw(0, l1, true); + m_blocks(bx, by).set_endpoint_raw(1, h1, true); + + uint64_t e03_err_1 = remap_pixels_influenced_by_endpoint(bx, by, orig_img, perceptual, false); + + if (initial_error < basisu::minimum(e03_err_0, e03_err_1)) + { + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + m_blocks(block_x, block_y) = cur_blocks[x + 1][y + 1]; + } + } + return initial_error; + } + else if (e03_err_0 < e03_err_1) + { + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + m_blocks(block_x, block_y) = blocks0[x + 1][y + 1]; + } + } + assert(e03_err_0 == evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false)); + return e03_err_0; + } + + assert(e03_err_1 == evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false)); + return e03_err_1; + } + +} // basisu diff --git a/src/deps/basis-universal/basisu_pvrtc1_4.h b/src/deps/basis-universal/encoder/basisu_pvrtc1_4.h similarity index 77% rename from src/deps/basis-universal/basisu_pvrtc1_4.h rename to src/deps/basis-universal/encoder/basisu_pvrtc1_4.h index 80b4413351..db6985a439 100644 --- a/src/deps/basis-universal/basisu_pvrtc1_4.h +++ b/src/deps/basis-universal/encoder/basisu_pvrtc1_4.h @@ -1,5 +1,5 @@ // basisu_pvrtc1_4.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -87,6 +87,14 @@ namespace basisu return (m_modulation >> ((y * 4 + x) * 2)) & 3; } + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 4)); + uint32_t n = (y * 4 + x) * 2; + m_modulation = (m_modulation & (~(3 << n))) | (s << n); + assert(get_modulation(x, y) == s); + } + // Scaled by 8 inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const { @@ -107,7 +115,7 @@ namespace basisu } // opaque endpoints: 554, 555 - // transparent endpoints: 3443 or 3444 + // transparent endpoints: 3443, 3444 inline void set_endpoint_raw(uint32_t endpoint_index, const color_rgba& c, bool opaque_endpoint) { assert(endpoint_index < 2); @@ -352,7 +360,93 @@ namespace basisu return result; } - + + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < m_width) && (y < m_height)); + return m_blocks(x >> 2, y >> 2).set_modulation(x & 3, y & 3, s); + } + + inline uint64_t map_pixel(uint32_t x, uint32_t y, const color_rgba& c, bool perceptual, bool alpha_is_significant, bool record = true) + { + color_rgba v[4]; + get_interpolated_colors(x, y, v); + + uint64_t best_dist = color_distance(perceptual, c, v[0], alpha_is_significant); + uint32_t best_v = 0; + for (uint32_t i = 1; i < 4; i++) + { + uint64_t dist = color_distance(perceptual, c, v[i], alpha_is_significant); + if (dist < best_dist) + { + best_dist = dist; + best_v = i; + } + } + + if (record) + set_modulation(x, y, best_v); + + return best_dist; + } + + inline uint64_t remap_pixels_influenced_by_endpoint(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant) + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += map_pixel(x, y, orig_img(x, y), perceptual, alpha_is_significant); + } + } + + return total_error; + } + + inline uint64_t evaluate_1x1_endpoint_error(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant, uint64_t threshold_error = 0) const + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += color_distance(perceptual, get_pixel(x, y), orig_img(x, y), alpha_is_significant); + + if ((threshold_error) && (total_error >= threshold_error)) + return total_error; + } + } + + return total_error; + } + + uint64_t local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual); + + inline uint64_t map_all_pixels(const image& img, bool perceptual, bool alpha_is_significant) + { + assert(m_width == img.get_width()); + assert(m_height == img.get_height()); + + uint64_t total_error = 0; + for (uint32_t y = 0; y < img.get_height(); y++) + for (uint32_t x = 0; x < img.get_width(); x++) + total_error += map_pixel(x, y, img(x, y), perceptual, alpha_is_significant); + + return total_error; + } + + public: uint32_t m_width, m_height; pvrtc4_block_vector2D m_blocks; uint32_t m_block_width, m_block_height; diff --git a/src/deps/basis-universal/basisu_resample_filters.cpp b/src/deps/basis-universal/encoder/basisu_resample_filters.cpp similarity index 87% rename from src/deps/basis-universal/basisu_resample_filters.cpp rename to src/deps/basis-universal/encoder/basisu_resample_filters.cpp index d0b2fd77bb..597cb3f618 100644 --- a/src/deps/basis-universal/basisu_resample_filters.cpp +++ b/src/deps/basis-universal/encoder/basisu_resample_filters.cpp @@ -1,5 +1,5 @@ // basisu_resampler_filters.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -283,7 +283,7 @@ namespace basisu return sum; } - static const float KAISER_ALPHA = 4.0; + //static const float KAISER_ALPHA = 4.0; static double kaiser(double alpha, double half_width, double x) { const double ratio = (x / half_width); @@ -310,10 +310,22 @@ namespace basisu const resample_filter g_resample_filters[] = { - { "box", box_filter, BOX_FILTER_SUPPORT }, { "tent", tent_filter, TENT_FILTER_SUPPORT }, { "bell", bell_filter, BELL_SUPPORT }, { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, - { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, { "blackman", blackman_filter, BLACKMAN_SUPPORT }, { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, - { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, { "kaiser", kaiser_filter, KAISER_SUPPORT }, { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, - { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, + { "box", box_filter, BOX_FILTER_SUPPORT }, + { "tent", tent_filter, TENT_FILTER_SUPPORT }, + { "bell", bell_filter, BELL_SUPPORT }, + { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, + { "blackman", blackman_filter, BLACKMAN_SUPPORT }, + { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, + { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, + { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, + { "kaiser", kaiser_filter, KAISER_SUPPORT }, + { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, + { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, + { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, + { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, }; const int g_num_resample_filters = BASISU_ARRAY_SIZE(g_resample_filters); diff --git a/src/deps/basis-universal/basisu_resampler.cpp b/src/deps/basis-universal/encoder/basisu_resampler.cpp similarity index 99% rename from src/deps/basis-universal/basisu_resampler.cpp rename to src/deps/basis-universal/encoder/basisu_resampler.cpp index e193ce83ff..f4cedf0031 100644 --- a/src/deps/basis-universal/basisu_resampler.cpp +++ b/src/deps/basis-universal/encoder/basisu_resampler.cpp @@ -15,14 +15,6 @@ #include "basisu_resampler.h" #include "basisu_resampler_filters.h" -#ifndef max -#define max(a, b) (((a) > (b)) ? (a) : (b)) -#endif - -#ifndef min -#define min(a, b) (((a) < (b)) ? (a) : (b)) -#endif - #define RESAMPLER_DEBUG 0 namespace basisu diff --git a/src/deps/basis-universal/basisu_resampler.h b/src/deps/basis-universal/encoder/basisu_resampler.h similarity index 99% rename from src/deps/basis-universal/basisu_resampler.h rename to src/deps/basis-universal/encoder/basisu_resampler.h index c3f2e05c25..dc0978caeb 100644 --- a/src/deps/basis-universal/basisu_resampler.h +++ b/src/deps/basis-universal/encoder/basisu_resampler.h @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #define BASISU_RESAMPLER_DEBUG_OPS (0) #define BASISU_RESAMPLER_DEFAULT_FILTER "lanczos4" diff --git a/src/deps/basis-universal/basisu_resampler_filters.h b/src/deps/basis-universal/encoder/basisu_resampler_filters.h similarity index 96% rename from src/deps/basis-universal/basisu_resampler_filters.h rename to src/deps/basis-universal/encoder/basisu_resampler_filters.h index 5659c5fe86..0ebb51c334 100644 --- a/src/deps/basis-universal/basisu_resampler_filters.h +++ b/src/deps/basis-universal/encoder/basisu_resampler_filters.h @@ -14,7 +14,7 @@ // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" namespace basisu { diff --git a/src/deps/basis-universal/basisu_ssim.cpp b/src/deps/basis-universal/encoder/basisu_ssim.cpp similarity index 100% rename from src/deps/basis-universal/basisu_ssim.cpp rename to src/deps/basis-universal/encoder/basisu_ssim.cpp diff --git a/src/deps/basis-universal/basisu_ssim.h b/src/deps/basis-universal/encoder/basisu_ssim.h similarity index 100% rename from src/deps/basis-universal/basisu_ssim.h rename to src/deps/basis-universal/encoder/basisu_ssim.h diff --git a/src/deps/basis-universal/encoder/basisu_uastc_enc.cpp b/src/deps/basis-universal/encoder/basisu_uastc_enc.cpp new file mode 100644 index 0000000000..ca2b325693 --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_uastc_enc.cpp @@ -0,0 +1,4189 @@ +// basisu_uastc_enc.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_uastc_enc.h" +#include "basisu_astc_decomp.h" +#include "basisu_gpu_texture.h" +#include "basisu_bc7enc.h" + +#ifdef _DEBUG +// When BASISU_VALIDATE_UASTC_ENC is 1, we pack and unpack to/from UASTC and ASTC, then validate that each codec returns the exact same results. This is slower. +#define BASISU_VALIDATE_UASTC_ENC 1 +#endif + +#define BASISU_SUPPORT_FORCE_MODE 0 + +using namespace basist; + +namespace basisu +{ + const uint32_t MAX_ENCODE_RESULTS = 512; + +#if BASISU_VALIDATE_UASTC_ENC + static void validate_func(bool condition, int line) + { + if (!condition) + { + fprintf(stderr, "basisu_uastc_enc: Internal validation failed on line %u!\n", line); + } + } + + #define VALIDATE(c) validate_func(c, __LINE__); +#else + #define VALIDATE(c) +#endif + + enum dxt_constants + { + cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, + cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U, + }; + + struct dxt1_block + { + enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; + + uint8_t m_low_color[cTotalEndpointBytes]; + uint8_t m_high_color[cTotalEndpointBytes]; + uint8_t m_selectors[cTotalSelectorBytes]; + + inline void clear() { basisu::clear_obj(*this); } + + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } + inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast(c & 0xFF); m_low_color[1] = static_cast((c >> 8) & 0xFF); } + inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast(c & 0xFF); m_high_color[1] = static_cast((c >> 8) & 0xFF); } + inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits))& cDXT1SelectorMask; } + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } + + static uint16_t pack_color(const color_rgba& color, bool scaled, uint32_t bias = 127U) + { + uint32_t r = color.r, g = color.g, b = color.b; + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + return static_cast(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U)); + } + + static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast(b | (g << 5U) | (r << 11U)); } + }; + +#define UASTC_WRITE_MODE_DESCS 0 + + static inline void uastc_write_bits(uint8_t* pBuf, uint32_t& bit_offset, uint64_t code, uint32_t codesize, const char* pDesc) + { + (void)pDesc; + +#if UASTC_WRITE_MODE_DESCS + if (pDesc) + printf("%s: %u %u\n", pDesc, bit_offset, codesize); +#endif + + assert((codesize == 64) || (code < (1ULL << codesize))); + + while (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_write = basisu::minimum(codesize, 8 - byte_bit_offset); + + pBuf[bit_offset >> 3] |= (code << byte_bit_offset); + + code >>= bits_to_write; + codesize -= bits_to_write; + bit_offset += bits_to_write; + } + } + + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1) + { + if ((g_uastc_mode_has_alpha[result.m_uastc_mode]) && (result.m_uastc_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + assert(etc_eac_a8_blk.m_multiplier >= 1); + } + + uint8_t buf[32]; + memset(buf, 0, sizeof(buf)); + + uint32_t block_bit_offset = 0; + +#if UASTC_WRITE_MODE_DESCS + printf("**** Mode: %u\n", result.m_uastc_mode); +#endif + + uastc_write_bits(buf, block_bit_offset, g_uastc_mode_huff_codes[result.m_uastc_mode][0], g_uastc_mode_huff_codes[result.m_uastc_mode][1], "mode"); + + if (result.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.r, 8, "R"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.g, 8, "G"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.b, 8, "B"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.a, 8, "A"); + + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_selector(0, 0), 2, "ETC1S"); + + uint32_t r, g, b; + if (etc1_blk.get_diff_bit()) + etc_block::unpack_color5(r, g, b, etc1_blk.get_base5_color(), false); + else + etc_block::unpack_color4(r, g, b, etc1_blk.get_base4_color(0), false); + + uastc_write_bits(buf, block_bit_offset, r, 5, "ETC1R"); + uastc_write_bits(buf, block_bit_offset, g, 5, "ETC1G"); + uastc_write_bits(buf, block_bit_offset, b, 5, "ETC1B"); + + memcpy(&blk, buf, sizeof(blk)); + return; + } + + if (g_uastc_mode_has_bc1_hint0[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, bc1_hint0, 1, "BC1H0"); + else + { + assert(bc1_hint0 == false); + } + + if (g_uastc_mode_has_bc1_hint1[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, bc1_hint1, 1, "BC1H1"); + else + { + assert(bc1_hint1 == false); + } + + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_flip_bit(), 1, "ETC1F"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I0"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(1), 3, "ETC1I1"); + + if (g_uastc_mode_has_etc1_bias[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, etc1_bias, 5, "ETC1BIAS"); + else + { + assert(etc1_bias == 0); + } + + if (g_uastc_mode_has_alpha[result.m_uastc_mode]) + { + const uint32_t etc2_hints = etc_eac_a8_blk.m_table | (etc_eac_a8_blk.m_multiplier << 4); + + assert(etc2_hints > 0 && etc2_hints <= 0xFF); + uastc_write_bits(buf, block_bit_offset, etc2_hints, 8, "ETC2TM"); + } + + uint32_t subsets = 1; + switch (result.m_uastc_mode) + { + case 2: + case 4: + case 7: + case 9: + case 16: + uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 5, "PAT"); + subsets = 2; + break; + case 3: + uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 4, "PAT"); + subsets = 3; + break; + default: + break; + } + +#ifdef _DEBUG + uint32_t part_seed = 0; + switch (result.m_uastc_mode) + { + case 2: + case 4: + case 9: + case 16: + part_seed = g_astc_bc7_common_partitions2[result.m_common_pattern].m_astc; + break; + case 3: + part_seed = g_astc_bc7_common_partitions3[result.m_common_pattern].m_astc; + break; + case 7: + part_seed = g_bc7_3_astc2_common_partitions[result.m_common_pattern].m_astc2; + break; + default: + break; + } +#endif + + uint32_t total_planes = 1; + switch (result.m_uastc_mode) + { + case 6: + case 11: + case 13: + uastc_write_bits(buf, block_bit_offset, result.m_astc.m_ccs, 2, "COMPSEL"); + total_planes = 2; + break; + case 17: + // CCS field is always 3 for dual plane LA. + assert(result.m_astc.m_ccs == 3); + total_planes = 2; + break; + default: + break; + } + + uint8_t weights[32]; + memcpy(weights, result.m_astc.m_weights, 16 * total_planes); + + uint8_t endpoints[18]; + memcpy(endpoints, result.m_astc.m_endpoints, sizeof(endpoints)); + + const uint32_t total_comps = g_uastc_mode_comps[result.m_uastc_mode]; + + // LLAA + // LLAA LLAA + // LLAA LLAA LLAA + // RRGGBB + // RRGGBB RRGGBB + // RRGGBB RRGGBB RRGGBB + // RRGGBBAA + // RRGGBBAA RRGGBBAA + + const uint32_t weight_bits = g_uastc_mode_weight_bits[result.m_uastc_mode]; + + const uint8_t* pPartition_pattern; + const uint8_t* pSubset_anchor_indices = basist::get_anchor_indices(subsets, result.m_uastc_mode, result.m_common_pattern, pPartition_pattern); + + for (uint32_t plane_index = 0; plane_index < total_planes; plane_index++) + { + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) + { + const uint32_t anchor_index = pSubset_anchor_indices[subset_index]; + +#ifdef _DEBUG + if (subsets >= 2) + { + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t part_index = astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true); + if (part_index == subset_index) + { + assert(anchor_index == i); + break; + } + } + } + else + { + assert(!anchor_index); + } +#endif + + // Check anchor weight's MSB - if it's set then invert this subset's weights and swap the endpoints + if (weights[anchor_index * total_planes + plane_index] & (1 << (weight_bits - 1))) + { + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t part_index = pPartition_pattern[i]; + +#ifdef _DEBUG + if (subsets >= 2) + { + assert(part_index == (uint32_t)astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true)); + } + else + { + assert(!part_index); + } +#endif + + if (part_index == subset_index) + weights[i * total_planes + plane_index] = ((1 << weight_bits) - 1) - weights[i * total_planes + plane_index]; + } + + if (total_planes == 2) + { + for (int c = 0; c < (int)total_comps; c++) + { + const uint32_t comp_plane = (total_comps == 2) ? c : ((c == result.m_astc.m_ccs) ? 1 : 0); + + if (comp_plane == plane_index) + std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]); + } + } + else + { + for (uint32_t c = 0; c < total_comps; c++) + std::swap(endpoints[subset_index * total_comps * 2 + c * 2 + 0], endpoints[subset_index * total_comps * 2 + c * 2 + 1]); + } + } + } // subset_index + } // plane_index + + const uint32_t total_values = total_comps * 2 * subsets; + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[result.m_uastc_mode]; + + uint32_t bit_values[18]; + uint32_t tq_values[8]; + uint32_t total_tq_values = 0; + uint32_t tq_accum = 0; + uint32_t tq_mul = 1; + + const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0]; + const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1]; + const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2]; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t val = endpoints[i]; + + uint32_t bits = val & ((1 << ep_bits) - 1); + uint32_t tq = val >> ep_bits; + + bit_values[i] = bits; + + if (ep_trits) + { + assert(tq < 3); + tq_accum += tq * tq_mul; + tq_mul *= 3; + if (tq_mul == 243) + { + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + else if (ep_quints) + { + assert(tq < 5); + tq_accum += tq * tq_mul; + tq_mul *= 5; + if (tq_mul == 125) + { + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + } + + uint32_t total_endpoint_bits = 0; + + for (uint32_t i = 0; i < total_tq_values; i++) + { + const uint32_t num_bits = ep_trits ? 8 : 7; + uastc_write_bits(buf, block_bit_offset, tq_values[i], num_bits, "ETQ"); + total_endpoint_bits += num_bits; + } + + if (tq_mul > 1) + { + uint32_t num_bits; + if (ep_trits) + { + if (tq_mul == 3) + num_bits = 2; + else if (tq_mul == 9) + num_bits = 4; + else if (tq_mul == 27) + num_bits = 5; + else //if (tq_mul == 81) + num_bits = 7; + } + else + { + if (tq_mul == 5) + num_bits = 3; + else //if (tq_mul == 25) + num_bits = 5; + } + uastc_write_bits(buf, block_bit_offset, tq_accum, num_bits, "ETQ"); + total_endpoint_bits += num_bits; + } + + for (uint32_t i = 0; i < total_values; i++) + { + uastc_write_bits(buf, block_bit_offset, bit_values[i], ep_bits, "EBITS"); + total_endpoint_bits += ep_bits; + } + +#if UASTC_WRITE_MODE_DESCS + uint32_t weight_start = block_bit_offset; +#endif + + uint32_t total_weight_bits = 0; + const uint32_t plane_shift = (total_planes == 2) ? 1 : 0; + for (uint32_t i = 0; i < 16 * total_planes; i++) + { + uint32_t numbits = weight_bits; + for (uint32_t s = 0; s < subsets; s++) + { + if (pSubset_anchor_indices[s] == (i >> plane_shift)) + { + numbits--; + break; + } + } + + uastc_write_bits(buf, block_bit_offset, weights[i], numbits, nullptr); + + total_weight_bits += numbits; + } + +#if UASTC_WRITE_MODE_DESCS + printf("WEIGHTS: %u %u\n", weight_start, total_weight_bits); +#endif + + assert(block_bit_offset <= 128); + memcpy(&blk, buf, sizeof(blk)); + +#if UASTC_WRITE_MODE_DESCS + printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits); +#endif + } + + // MODE 0 + // 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB + // 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB + static void astc_mode0_or_18(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, const uint8_t *pForce_selectors = nullptr) + { + const uint32_t endpoint_range = (mode == 18) ? 11 : 19; + const uint32_t weight_range = (mode == 18) ? 11 : 8; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = (mode == 18) ? 32 : 16; + ccell_params.m_pSelector_weights = (mode == 18) ? g_astc_weights5 : g_astc_weights4; + ccell_params.m_pSelector_weightsx = (mode == 18) ? (const bc7enc_vec4F*)g_astc_weights5x : (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_pForce_selectors = pForce_selectors; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range;// (mode == 18) ? 11 : 8; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 8; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + if (pForce_selectors == nullptr) + { + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = ((mode == 18) ? 31 : 15) - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 1 + // 1-subset, 2-bit indices, 8-bit endpoints, BC7 mode 3 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) MODE3 or MODE5 RGB + static void astc_mode1(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 4; + ccell_params.m_pSelector_weights = g_bc7_weights2; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params.m_astc_endpoint_range = 20; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 8; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + const uint32_t range = 20; + + bool invert = false; + + int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 1; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + static uint32_t estimate_partition2(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], const uint32_t weights[4]) + { + assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64); + + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16]; + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + return best_common_pattern; + } + + // MODE 2 + // 2-subset, 3-bit indices, 4-bit endpoints, BC7 mode 1 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 8 (16) MODE1 + static void astc_mode2(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + + if (estimate_partition) + { + const uint32_t weights[4] = { 1, 1, 1, 1 }; + first_common_pattern = estimate_partition2(8, 3, g_bc7_weights3, block, weights); + last_common_pattern = first_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 8; + ccell_params[part].m_pSelector_weights = g_bc7_weights3; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params[part].m_astc_endpoint_range = 8; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + { + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 5; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t p0 = 0; + uint32_t p1 = 1; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(p0, p1); + + astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + const uint32_t range = 8; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert[0] = true; + } + + astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]); + std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]); + std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 2; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + } + + } // common_pattern + } + + // MODE 3 + // 3-subsets, 2-bit indices, [0,11] endpoints, BC7 mode 2 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 3, CEM: 8 (RGB Direct ), EndpointRange: 7 (12) MODE2 + static void astc_mode3(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS3; + + if (estimate_partition) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS3; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition3[bc7_pattern * 16]; + + color_quad_u8 subset_colors[3][16]; + uint32_t subset_total_colors[3] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 3) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + first_common_pattern = best_common_pattern; + last_common_pattern = best_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t endpoint_range = 7; + + const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7; + + color_rgba part_pixels[3][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[3] = { 0, 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[bc7_part]; + part_pixels[bc7_part][num_part_pixels[bc7_part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[3]; + color_cell_compressor_results ccell_results[3]; + uint8_t ccell_result_selectors[3][16]; + uint8_t ccell_result_selectors_temp[3][16]; + + uint64_t total_part_err = 0; + for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++) + { + memset(&ccell_params[bc7_part], 0, sizeof(ccell_params[bc7_part])); + + ccell_params[bc7_part].m_num_pixels = num_part_pixels[bc7_part]; + ccell_params[bc7_part].m_pPixels = (color_quad_u8*)&part_pixels[bc7_part][0]; + ccell_params[bc7_part].m_num_selector_weights = 4; + ccell_params[bc7_part].m_pSelector_weights = g_bc7_weights2; + ccell_params[bc7_part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[bc7_part].m_astc_endpoint_range = endpoint_range; + ccell_params[bc7_part].m_weights[0] = 1; + ccell_params[bc7_part].m_weights[1] = 1; + ccell_params[bc7_part].m_weights[2] = 1; + ccell_params[bc7_part].m_weights[3] = 1; + + memset(&ccell_results[bc7_part], 0, sizeof(ccell_results[bc7_part])); + ccell_results[bc7_part].m_pSelectors = &ccell_result_selectors[bc7_part][0]; + ccell_results[bc7_part].m_pSelectors_temp = &ccell_result_selectors_temp[bc7_part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[bc7_part], &ccell_results[bc7_part], &comp_params); + total_part_err += part_err; + } // part + + { + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 3; + astc_results.m_partition_seed = g_astc_bc7_common_partitions3[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t astc_to_bc7_part[3]; // converts ASTC to BC7 partition index + const uint32_t perm = g_astc_bc7_common_partitions3[common_pattern].m_astc_to_bc7_perm; + astc_to_bc7_part[0] = g_astc_to_bc7_partition_index_perm_tables[perm][0]; + astc_to_bc7_part[1] = g_astc_to_bc7_partition_index_perm_tables[perm][1]; + astc_to_bc7_part[2] = g_astc_to_bc7_partition_index_perm_tables[perm][2]; + + bool invert_astc_part[3] = { false, false, false }; + + for (uint32_t astc_part = 0; astc_part < 3; astc_part++) + { + uint8_t* pEndpoints = &astc_results.m_endpoints[6 * astc_part]; + + pEndpoints[0] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[0]; + pEndpoints[1] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[0]; + pEndpoints[2] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[1]; + pEndpoints[3] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[1]; + pEndpoints[4] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[2]; + pEndpoints[5] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[2]; + + int s0 = g_astc_unquant[endpoint_range][pEndpoints[0]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[2]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][pEndpoints[1]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[3]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(pEndpoints[0], pEndpoints[1]); + std::swap(pEndpoints[2], pEndpoints[3]); + std::swap(pEndpoints[4], pEndpoints[5]); + invert_astc_part[astc_part] = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = 0; + for (uint32_t i = 0; i < 3; i++) + { + if (astc_to_bc7_part[i] == bc7_part) + { + astc_part = i; + break; + } + } + + if (invert_astc_part[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 3; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } + + } // common_pattern + } + + // MODE 4 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE3 + static void astc_mode4(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = 12; + + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + + if (estimate_partition) + { + const uint32_t weights[4] = { 1, 1, 1, 1 }; + first_common_pattern = estimate_partition2(4, 3, g_bc7_weights2, block, weights); + last_common_pattern = first_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 4; + ccell_params[part].m_pSelector_weights = g_bc7_weights2; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[part].m_astc_endpoint_range = endpoint_range; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t p0 = 0; + uint32_t p1 = 1; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(p0, p1); + + astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert[0] = true; + } + + astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]); + std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]); + std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 4; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } // common_pattern + } + + // MODE 5 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset) + static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 5; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 8; + ccell_params.m_pSelector_weights = g_bc7_weights3; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = false; + blk.m_weight_range = weight_range; + + blk.m_ccs = 0; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 8; + + blk.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + blk.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + blk.m_weights[x + y * 4] = 7 - blk.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 5; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 6 + // DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 18 (160) BC7 MODE5 + static void astc_mode6(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + for (uint32_t rot_comp = 0; rot_comp < 3; rot_comp++) + { + const uint32_t weight_range = 2; + const uint32_t endpoint_range = 18; + + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = 255; + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 4; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights2; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 4; + ccell_params_a.m_pSelector_weights = g_bc7_weights2; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &comp_params) / 3; + + uint64_t total_err = part_err_rgb + part_err_a; + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 8; + + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 3 - rgb_index; + a_index = 3 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 6; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE 7 - 2 subset ASTC, 3 subset BC7 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE2 + static void astc_mode7(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; + + if (estimate_partition) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; common_pattern++) + { + const uint8_t* pPartition = &g_bc7_3_astc2_patterns2[common_pattern][0]; + +#ifdef _DEBUG + const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2; + const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73; + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); + assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); + assert(astc_part == pPartition[x + y * 4]); + } + } +#endif + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + first_common_pattern = best_common_pattern; + last_common_pattern = best_common_pattern + 1; + } + + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = 12; + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2; + const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73; + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); +#ifdef _DEBUG + assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); +#endif + + part_pixel_index[y][x] = num_part_pixels[astc_part]; + part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 4; + ccell_params[part].m_pSelector_weights = g_bc7_weights2; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[part].m_astc_endpoint_range = endpoint_range; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = false; + blk.m_weight_range = 2; + + blk.m_ccs = 0; + blk.m_subsets = 2; + blk.m_partition_seed = astc_pattern; + blk.m_cem = 8; + + const uint32_t p0 = 0; + const uint32_t p1 = 1; + + blk.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert[0] = true; + } + + blk.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(blk.m_endpoints[0 + 6], blk.m_endpoints[1 + 6]); + std::swap(blk.m_endpoints[2 + 6], blk.m_endpoints[3 + 6]); + std::swap(blk.m_endpoints[4 + 6], blk.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); + + blk.m_weights[x + y * 4] = ccell_result_selectors[astc_part][part_pixel_index[y][x]]; + + if (invert[astc_part]) + blk.m_weights[x + y * 4] = 3 - blk.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 7; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } // common_pattern + } + + static void estimate_partition2_list(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], uint32_t* pParts, uint32_t max_parts, const uint32_t weights[4]) + { + assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64); + + const uint32_t MAX_PARTS = 8; + assert(max_parts <= MAX_PARTS); + + uint64_t part_error[MAX_PARTS]; + memset(part_error, 0xFF, sizeof(part_error)); + memset(pParts, 0, sizeof(pParts[0]) * max_parts); + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16]; + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; subset < 2; subset++) + total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], UINT64_MAX, weights); + + for (int i = 0; i < (int)max_parts; i++) + { + if (total_subset_err < part_error[i]) + { + for (int j = max_parts - 1; j > i; --j) + { + pParts[j] = pParts[j - 1]; + part_error[j] = part_error[j - 1]; + } + + pParts[i] = common_pattern; + part_error[i] = total_subset_err; + + break; + } + } + } + +#ifdef _DEBUG + for (uint32_t i = 0; i < max_parts - 1; i++) + { + assert(part_error[i] <= part_error[i + 1]); + } +#endif + } + + // 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7 + // 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7 + static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size) + { + assert(mode == 9 || mode == 16); + + const color_rgba* pBlock = &source_block[0][0]; + + color_rgba temp_block[16]; + if (mode == 16) + { + for (uint32_t i = 0; i < 16; i++) + { + if (mode == 16) + { + assert(pBlock[i].r == pBlock[i].g); + assert(pBlock[i].r == pBlock[i].b); + } + + const uint32_t l = pBlock[i].r; + const uint32_t a = pBlock[i].a; + + // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally. + temp_block[i].set_noclamp_rgba(l, 0, 0, a); + } + + pBlock = temp_block; + } + + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = (mode == 16) ? 20 : 8; + + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + bool use_part_list = false; + + const uint32_t MAX_PARTS = 8; + uint32_t parts[MAX_PARTS]; + + if (estimate_partition_list_size == 1) + { + first_common_pattern = estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights); + last_common_pattern = first_common_pattern + 1; + } + else if (estimate_partition_list_size > 0) + { + assert(estimate_partition_list_size <= MAX_PARTS); + estimate_partition_list_size = basisu::minimum(estimate_partition_list_size, MAX_PARTS); + + estimate_partition2_list(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, parts, estimate_partition_list_size, weights); + + first_common_pattern = 0; + last_common_pattern = estimate_partition_list_size; + use_part_list = true; + +#ifdef _DEBUG + assert(parts[0] == estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights)); +#endif + } + + for (uint32_t common_pattern_iter = first_common_pattern; common_pattern_iter < last_common_pattern; common_pattern_iter++) + { + const uint32_t common_pattern = use_part_list ? parts[common_pattern_iter] : common_pattern_iter; + + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = pBlock[y * 4 + x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_err = 0; + for (uint32_t subset = 0; subset < 2; subset++) + { + memset(&ccell_params[subset], 0, sizeof(ccell_params[subset])); + + ccell_params[subset].m_num_pixels = num_part_pixels[subset]; + ccell_params[subset].m_pPixels = (color_quad_u8*)&part_pixels[subset][0]; + ccell_params[subset].m_num_selector_weights = 4; + ccell_params[subset].m_pSelector_weights = g_bc7_weights2; + ccell_params[subset].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[subset].m_astc_endpoint_range = endpoint_range; + ccell_params[subset].m_weights[0] = weights[0]; + ccell_params[subset].m_weights[1] = weights[1]; + ccell_params[subset].m_weights[2] = weights[2]; + ccell_params[subset].m_weights[3] = weights[3]; + ccell_params[subset].m_has_alpha = true; + + memset(&ccell_results[subset], 0, sizeof(ccell_results[subset])); + ccell_results[subset].m_pSelectors = &ccell_result_selectors[subset][0]; + ccell_results[subset].m_pSelectors_temp = &ccell_result_selectors_temp[subset][0]; + + uint64_t subset_err = color_cell_compression(255, &ccell_params[subset], &ccell_results[subset], &comp_params); + + if (mode == 16) + { + color_rgba colors[4]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + colors[3].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + } + + for (uint32_t i = 1; i < 4 - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[3].m_comps[c], g_bc7_weights2[i], false); + + for (uint32_t p = 0; p < ccell_params[subset].m_num_pixels; p++) + { + color_rgba orig_pix(part_pixels[subset][p]); + orig_pix.g = orig_pix.r; + orig_pix.b = orig_pix.r; + total_err += color_distance_la(orig_pix, colors[ccell_result_selectors[subset][p]]); + } + } + else + { + total_err += subset_err; + } + } // subset + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = (mode == 16) ? 4 : 12; + + uint32_t part[2] = { 0, 1 }; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(part[0], part[1]); + + bool invert[2] = { false, false }; + + for (uint32_t p = 0; p < 2; p++) + { + if (mode == 16) + { + astc_results.m_endpoints[p * 4 + 0] = ccell_results[part[p]].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[p * 4 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[0]; + + astc_results.m_endpoints[p * 4 + 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[p * 4 + 3] = ccell_results[part[p]].m_astc_high_endpoint.m_c[3]; + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + astc_results.m_endpoints[p * 8 + c * 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[c]; + astc_results.m_endpoints[p * 8 + c * 2 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[c]; + } + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 0]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 2]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 4]].m_unquant; + + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 1]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 3]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 5]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[p * 8 + 0], astc_results.m_endpoints[p * 8 + 1]); + std::swap(astc_results.m_endpoints[p * 8 + 2], astc_results.m_endpoints[p * 8 + 3]); + std::swap(astc_results.m_endpoints[p * 8 + 4], astc_results.m_endpoints[p * 8 + 5]); + std::swap(astc_results.m_endpoints[p * 8 + 6], astc_results.m_endpoints[p * 8 + 7]); + invert[p] = true; + } + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + + } // common_pattern + } + + // MODE 10 + // DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 13 (48) MODE6 + static void astc_mode10(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 8; + const uint32_t endpoint_range = 13; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 16; + ccell_params.m_pSelector_weights = g_astc_weights4; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 15 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 10; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // 11. DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct), EndpointRange: 13 (48) MODE5 + // 17. DualPlane: 1, WeightRange : 2 (4), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE5 + static void astc_mode11_or_17(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + assert((mode == 11) || (mode == 17)); + + const uint32_t weight_range = 2; + const uint32_t endpoint_range = (mode == 17) ? 20 : 13; + + bc7enc_compress_block_params local_comp_params(comp_params); + local_comp_params.m_perceptual = false; + local_comp_params.m_weights[0] = 1; + local_comp_params.m_weights[1] = 1; + local_comp_params.m_weights[2] = 1; + local_comp_params.m_weights[3] = 1; + + const uint32_t last_rot_comp = (mode == 17) ? 1 : 4; + + for (uint32_t rot_comp = 0; rot_comp < last_rot_comp; rot_comp++) + { + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + if (mode == 17) + { + assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[1]); + assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[2]); + + block_a[i].m_c[0] = block_rgb[i].m_c[3]; + block_a[i].m_c[1] = block_rgb[i].m_c[3]; + block_a[i].m_c[2] = block_rgb[i].m_c[3]; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[1] = block_rgb[i].m_c[0]; + block_rgb[i].m_c[2] = block_rgb[i].m_c[0]; + block_rgb[i].m_c[3] = 255; + } + else + { + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3]; + block_rgb[i].m_c[3] = 255; + } + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 4; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights2; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 4; + ccell_params_a.m_pSelector_weights = g_bc7_weights2; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3; + + uint64_t total_err = (mode == 17) ? ((part_err_rgb / 3) + part_err_a) : (part_err_rgb + part_err_a); + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = (mode == 17) ? 3 : rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = (mode == 17) ? 4 : 12; + + bool invert = false; + + if (mode == 17) + { + assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[1]); + assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[2]); + + assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[1]); + assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[2]); + + blk.m_endpoints[0] = ccell_results_rgb.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results_rgb.m_astc_high_endpoint.m_c[0]; + + blk.m_endpoints[2] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[3] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + if (rot_comp == 3) + { + blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp]; + blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp]; + } + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + std::swap(blk.m_endpoints[6], blk.m_endpoints[7]); + invert = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 3 - rgb_index; + a_index = 3 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE 12 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 19 (192) MODE6 + static void astc_mode12(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 5; + const uint32_t endpoint_range = 19; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 8; + ccell_params.m_pSelector_weights = g_bc7_weights3; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 12; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // 13. DualPlane: 1, WeightRange: 0 (2), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE5 + static void astc_mode13(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + bc7enc_compress_block_params local_comp_params(comp_params); + local_comp_params.m_perceptual = false; + local_comp_params.m_weights[0] = 1; + local_comp_params.m_weights[1] = 1; + local_comp_params.m_weights[2] = 1; + local_comp_params.m_weights[3] = 1; + + for (uint32_t rot_comp = 0; rot_comp < 4; rot_comp++) + { + const uint32_t weight_range = 0; + const uint32_t endpoint_range = 20; + + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3]; + block_rgb[i].m_c[3] = 255; + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 2; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights1; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 2; + ccell_params_a.m_pSelector_weights = g_bc7_weights1; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3; + + uint64_t total_err = part_err_rgb + part_err_a; + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 12; + + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + if (rot_comp == 3) + { + blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp]; + blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp]; + } + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + std::swap(blk.m_endpoints[6], blk.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 1 - rgb_index; + a_index = 1 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 13; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE14 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE6 + static void astc_mode14(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 2; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 4; + ccell_params.m_pSelector_weights = g_bc7_weights2; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 14; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 15 + // DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE6 + static void astc_mode15(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 8; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + color_rgba temp_block[16]; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t l = ((const color_rgba*)block)[i].r; + const uint32_t a = ((const color_rgba*)block)[i].a; + + // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally. + temp_block[i].set_noclamp_rgba(l, 0, 0, a); + } + + ccell_params.m_num_pixels = 16; + //ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_pPixels = (color_quad_u8*)temp_block; + ccell_params.m_num_selector_weights = 16; + ccell_params.m_pSelector_weights = g_astc_weights4; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 4; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[3]; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + color_rgba colors[16]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + colors[15].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + } + + for (uint32_t i = 1; i < 16 - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[15].m_comps[c], g_astc_weights4[i], false); + + uint64_t total_err = 0; + for (uint32_t p = 0; p < 16; p++) + total_err += color_distance_la(((const color_rgba*)block)[p], colors[ccell_result_selectors[p]]); + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 15; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } + + static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err) + { + uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int dr = (int)block[y][x].m_comps[0] - (int)decoded_block[y][x].m_comps[0]; + const int dg = (int)block[y][x].m_comps[1] - (int)decoded_block[y][x].m_comps[1]; + const int db = (int)block[y][x].m_comps[2] - (int)decoded_block[y][x].m_comps[2]; + const int da = (int)block[y][x].m_comps[3] - (int)decoded_block[y][x].m_comps[3]; + + total_err_r += dr * dr; + total_err_g += dg * dg; + total_err_b += db * db; + total_err_a += da * da; + } + } + + total_la_err = total_err_r + total_err_a; + total_rgb_err = total_err_r + total_err_g + total_err_b; + total_rgba_err = total_rgb_err + total_err_a; + } + + static void compute_bc1_hints(bool &bc1_hint0, bool &bc1_hint1, const uastc_encode_results &best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4]) + { + const uint32_t best_mode = best_results.m_uastc_mode; + const bool perceptual = false; + + bc1_hint0 = false; + bc1_hint1 = false; + + if (best_mode == UASTC_MODE_INDEX_SOLID_COLOR) + return; + + if (!g_uastc_mode_has_bc1_hint0[best_mode] && !g_uastc_mode_has_bc1_hint1[best_mode]) + return; + + color_rgba tblock_bc1[4][4]; + dxt1_block tbc1_block[8]; + basist::encode_bc1(tbc1_block, (const uint8_t*)&decoded_uastc_block[0][0], 0); + unpack_block(texture_format::cBC1, tbc1_block, &tblock_bc1[0][0]); + + color_rgba tblock_hint0_bc1[4][4]; + color_rgba tblock_hint1_bc1[4][4]; + + etc_block etc1_blk; + memset(&etc1_blk, 0, sizeof(etc1_blk)); + + eac_a8_block etc2_blk; + memset(&etc2_blk, 0, sizeof(etc2_blk)); + etc2_blk.m_multiplier = 1; + + // Pack to UASTC, then unpack, because the endpoints may be swapped. + + uastc_block temp_ublock; + pack_uastc(temp_ublock, best_results, etc1_blk, 0, etc2_blk, false, false); + + unpacked_uastc_block temp_ublock_unpacked; + unpack_uastc(temp_ublock, temp_ublock_unpacked, false); + + unpacked_uastc_block ublock; + memset(&ublock, 0, sizeof(ublock)); + ublock.m_mode = best_results.m_uastc_mode; + ublock.m_common_pattern = best_results.m_common_pattern; + ublock.m_astc = temp_ublock_unpacked.m_astc; + + dxt1_block b; + + // HINT1 + if (!g_uastc_mode_has_bc1_hint1[best_mode]) + { + memset(tblock_hint1_bc1, 0, sizeof(tblock_hint1_bc1)); + } + else + { + transcode_uastc_to_bc1_hint1(ublock, (color32 (*)[4]) decoded_uastc_block, &b, false); + + unpack_block(texture_format::cBC1, &b, &tblock_hint1_bc1[0][0]); + } + + // HINT0 + if (!g_uastc_mode_has_bc1_hint0[best_mode]) + { + memset(tblock_hint0_bc1, 0, sizeof(tblock_hint0_bc1)); + } + else + { + transcode_uastc_to_bc1_hint0(ublock, &b); + + unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]); + } + + // Compute block errors + uint64_t total_t_err = 0, total_hint0_err = 0, total_hint1_err = 0; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + total_t_err += color_distance(perceptual, block[y][x], tblock_bc1[y][x], false); + total_hint0_err += color_distance(perceptual, block[y][x], tblock_hint0_bc1[y][x], false); + total_hint1_err += color_distance(perceptual, block[y][x], tblock_hint1_bc1[y][x], false); + } + } + + const float t_err = sqrtf((float)total_t_err); + const float t_err_hint0 = sqrtf((float)total_hint0_err); + const float t_err_hint1 = sqrtf((float)total_hint1_err); + + const float err_thresh0 = 1.075f; + const float err_thresh1 = 1.075f; + + if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0)) + bc1_hint0 = true; + + if ((g_uastc_mode_has_bc1_hint1[best_mode]) && (t_err_hint1 <= t_err * err_thresh1)) + bc1_hint1 = true; + } + + struct ycbcr + { + int32_t m_y; + int32_t m_cb; + int32_t m_cr; + }; + + static inline void rgb_to_y_cb_cr(const color_rgba& c, ycbcr& dst) + { + const int y = c.r * 54 + c.g * 183 + c.b * 19; + dst.m_y = y; + dst.m_cb = (c.b << 8) - y; + dst.m_cr = (c.r << 8) - y; + } + + static inline uint64_t color_diff(const ycbcr& a, const ycbcr& b) + { + const int y_delta = a.m_y - b.m_y; + const int cb_delta = a.m_cb - b.m_cb; + const int cr_delta = a.m_cr - b.m_cr; + return ((int64_t)y_delta * y_delta * 4) + ((int64_t)cr_delta * cr_delta) + ((int64_t)cb_delta * cb_delta); + } + + static inline int gray_distance2(const color_rgba& c, int r, int g, int b) + { + int gray_dist = (((int)c[0] - r) + ((int)c[1] - g) + ((int)c[2] - b) + 1) / 3; + + int gray_point_r = clamp255(r + gray_dist); + int gray_point_g = clamp255(g + gray_dist); + int gray_point_b = clamp255(b + gray_dist); + + int dist_to_gray_point_r = c[0] - gray_point_r; + int dist_to_gray_point_g = c[1] - gray_point_g; + int dist_to_gray_point_b = c[2] - gray_point_b; + + return (dist_to_gray_point_r * dist_to_gray_point_r) + (dist_to_gray_point_g * dist_to_gray_point_g) + (dist_to_gray_point_b * dist_to_gray_point_b); + } + + static bool pack_etc1_estimate_flipped(const color_rgba* pSrc_pixels) + { + int sums[3][2][2]; + +#define GET_XY(x, y, c) pSrc_pixels[(x) + ((y) * 4)][c] + + for (uint32_t c = 0; c < 3; c++) + { + sums[c][0][0] = GET_XY(0, 0, c) + GET_XY(0, 1, c) + GET_XY(1, 0, c) + GET_XY(1, 1, c); + sums[c][1][0] = GET_XY(2, 0, c) + GET_XY(2, 1, c) + GET_XY(3, 0, c) + GET_XY(3, 1, c); + sums[c][0][1] = GET_XY(0, 2, c) + GET_XY(0, 3, c) + GET_XY(1, 2, c) + GET_XY(1, 3, c); + sums[c][1][1] = GET_XY(2, 2, c) + GET_XY(2, 3, c) + GET_XY(3, 2, c) + GET_XY(3, 3, c); + } + + int upper_avg[3], lower_avg[3], left_avg[3], right_avg[3]; + for (uint32_t c = 0; c < 3; c++) + { + upper_avg[c] = (sums[c][0][0] + sums[c][1][0] + 4) / 8; + lower_avg[c] = (sums[c][0][1] + sums[c][1][1] + 4) / 8; + left_avg[c] = (sums[c][0][0] + sums[c][0][1] + 4) / 8; + right_avg[c] = (sums[c][1][0] + sums[c][1][1] + 4) / 8; + } + +#undef GET_XY +#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a[0], a[1], a[2]) + + int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0; + for (uint32_t i = 0; i < 4; i++) + { + for (uint32_t j = 0; j < 2; j++) + { + upper_gray_dist += GET_XY(i, j, upper_avg); + lower_gray_dist += GET_XY(i, 2 + j, lower_avg); + left_gray_dist += GET_XY(j, i, left_avg); + right_gray_dist += GET_XY(2 + j, i, right_avg); + } + } + +#undef GET_XY + + int upper_lower_sum = upper_gray_dist + lower_gray_dist; + int left_right_sum = left_gray_dist + right_gray_dist; + + return upper_lower_sum < left_right_sum; + } + + static void compute_etc1_hints(etc_block& best_etc1_blk, uint32_t& best_etc1_bias, const uastc_encode_results& best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4], int level, uint32_t flags) + { + best_etc1_bias = 0; + + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_etc1_block_solid_color(best_etc1_blk, &best_results.m_solid_color.m_comps[0]); + return; + } + + const bool faster_etc1 = (flags & cPackUASTCETC1FasterHints) != 0; + const bool fastest_etc1 = (flags & cPackUASTCETC1FastestHints) != 0; + + const bool has_bias = g_uastc_mode_has_etc1_bias[best_results.m_uastc_mode]; + + // 0 should be at the top, but we need 13 first because it represents bias (0,0,0). + const uint8_t s_sorted_bias_modes[32] = { 13, 0, 22, 29, 27, 12, 26, 9, 30, 31, 8, 10, 25, 2, 23, 5, 15, 7, 3, 11, 6, 17, 28, 18, 1, 19, 20, 21, 24, 4, 14, 16 }; + + uint32_t last_bias = 1; + bool use_faster_bias_mode_table = false; + const bool flip_estimate = (level <= cPackUASTCLevelFaster) || (faster_etc1) || (fastest_etc1); + if (has_bias) + { + switch (level) + { + case cPackUASTCLevelFastest: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 1 : 2); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelFaster: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 3 : 5); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelDefault: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 10 : 20); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelSlower: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 16 : 32); + use_faster_bias_mode_table = true; + break; + } + default: + { + last_bias = 32; + break; + } + } + } + + memset(&best_etc1_blk, 0, sizeof(best_etc1_blk)); + uint64_t best_err = UINT64_MAX; + + etc_block trial_block; + memset(&trial_block, 0, sizeof(trial_block)); + + ycbcr block_ycbcr[4][4], decoded_uastc_block_ycbcr[4][4]; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + rgb_to_y_cb_cr(block[y][x], block_ycbcr[y][x]); + rgb_to_y_cb_cr(decoded_uastc_block[y][x], decoded_uastc_block_ycbcr[y][x]); + } + } + + uint32_t first_flip = 0, last_flip = 2; + uint32_t first_individ = 0, last_individ = 2; + + if (flags & cPackUASTCETC1DisableFlipAndIndividual) + { + last_flip = 1; + last_individ = 1; + } + else if (flip_estimate) + { + if (pack_etc1_estimate_flipped(&decoded_uastc_block[0][0])) + first_flip = 1; + last_flip = first_flip + 1; + } + + for (uint32_t flip = first_flip; flip < last_flip; flip++) + { + trial_block.set_flip_bit(flip != 0); + + for (uint32_t individ = first_individ; individ < last_individ; individ++) + { + const uint32_t mul = individ ? 15 : 31; + + trial_block.set_diff_bit(individ == 0); + + color_rgba unbiased_block_colors[2]; + + int min_r[2] = { 255, 255 }, min_g[2] = { 255, 255 }, min_b[2] = { 255, 255 }, max_r[2] = { 0, 0 }, max_g[2] = { 0, 0 }, max_b[2] = { 0, 0 }; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint32_t avg_color[3]; + memset(avg_color, 0, sizeof(avg_color)); + + for (uint32_t j = 0; j < 8; j++) + { + const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j]; + const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x]; + + avg_color[0] += p.r; + avg_color[1] += p.g; + avg_color[2] += p.b; + + min_r[subset] = basisu::minimum(min_r[subset], p.r); + min_g[subset] = basisu::minimum(min_g[subset], p.g); + min_b[subset] = basisu::minimum(min_b[subset], p.b); + + max_r[subset] = basisu::maximum(max_r[subset], p.r); + max_g[subset] = basisu::maximum(max_g[subset], p.g); + max_b[subset] = basisu::maximum(max_b[subset], p.b); + } // j + + unbiased_block_colors[subset][0] = (uint8_t)((avg_color[0] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][3] = 0; + + } // subset + + for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++) + { + const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter; + + color_rgba block_colors[2]; + for (uint32_t subset = 0; subset < 2; subset++) + block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset]; + + if (individ) + trial_block.set_block_color4(block_colors[0], block_colors[1]); + else + trial_block.set_block_color5_clamp(block_colors[0], block_colors[1]); + + uint32_t range[2]; + for (uint32_t subset = 0; subset < 2; subset++) + { + const color_rgba base_c(trial_block.get_block_color(subset, true)); + + const int pos_r = iabs(max_r[subset] - base_c.r); + const int neg_r = iabs(base_c.r - min_r[subset]); + + const int pos_g = iabs(max_g[subset] - base_c.g); + const int neg_g = iabs(base_c.g - min_g[subset]); + + const int pos_b = iabs(max_b[subset] - base_c.b); + const int neg_b = iabs(base_c.b - min_b[subset]); + + range[subset] = maximum(maximum(pos_r, neg_r, pos_g, neg_g), pos_b, neg_b); + } + + uint32_t best_inten_table[2] = { 0, 0 }; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint64_t best_subset_err = UINT64_MAX; + + const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2)); + + for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++) + { + trial_block.set_inten_table(subset, inten_table); + + color_rgba color_table[4]; + trial_block.get_block_colors(color_table, subset); + + ycbcr color_table_ycbcr[4]; + for (uint32_t i = 0; i < 4; i++) + rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]); + + uint64_t total_error = 0; + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][0]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][1]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][2]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][3]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + if (total_error >= best_subset_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 0]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 1]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + } + if (total_error >= best_subset_err) + break; + } + + if (total_error < best_subset_err) + { + best_subset_err = total_error; + best_inten_table[subset] = inten_table; + } + + } // inten_table + + } // subset + + trial_block.set_inten_table(0, best_inten_table[0]); + trial_block.set_inten_table(1, best_inten_table[1]); + + // Compute error against the ORIGINAL block. + uint64_t err = 0; + + for (uint32_t subset = 0; subset < 2; subset++) + { + color_rgba color_table[4]; + trial_block.get_block_colors(color_table, subset); + + ycbcr color_table_ycbcr[4]; + for (uint32_t i = 0; i < 4; i++) + rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]); + + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][x]; + const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3); + + const uint32_t best_index = (uint32_t)best_index_err & 3; + err += color_diff(block_ycbcr[subset * 2 + y][x], color_table_ycbcr[best_index]); + } + if (err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + x]; + const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3); + + const uint32_t best_index = (uint32_t)best_index_err & 3; + err += color_diff(block_ycbcr[y][subset * 2 + x], color_table_ycbcr[best_index]); + } + if (err >= best_err) + break; + } + } + + } // subset + + if (err < best_err) + { + best_err = err; + + best_etc1_blk = trial_block; + best_etc1_bias = bias; + } + + } // bias_iter + + } // individ + + } // flip + } + + struct uastc_pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + }; + + static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + assert(num_pixels <= 16); + + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t a = pPixels[i]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + results.m_base = min_alpha; + results.m_table = 13; + results.m_multiplier = 1; + return 0; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + uint64_t best_err = UINT64_MAX; + + for (uint32_t table = 0; table < 16; table++) + { + if ((table_mask & (1U << table)) == 0) + continue; + + const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + const int base_min = clamp255(center - base_search_rad); + const int base_max = clamp255(center + base_search_rad); + + const int mul = (int)roundf(alpha_range / range); + const int mul_low = clamp(mul - mul_search_rad, 1, 15); + const int mul_high = clamp(mul + mul_search_rad, 1, 15); + + for (int base = base_min; base <= base_max; base++) + { + for (int multiplier = mul_low; multiplier <= mul_high; multiplier++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const int a = pPixels[i]; + + uint32_t best_s_err = UINT32_MAX; + //uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base); + + uint32_t err = iabs(a - v); + if (err < best_s_err) + { + best_s_err = err; + //best_s = s; + } + } + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base; + results.m_multiplier = multiplier; + results.m_table = table; + if (!best_err) + return best_err; + } + + } // table + + } // multiplier + + } // base + + return best_err; + } + + const int32_t DEFAULT_BC7_ERROR_WEIGHT = 50; + const float UASTC_ERROR_THRESH = 1.3f; + + // TODO: This is a quick hack to favor certain modes when we know we'll be followed up with an RDO postprocess. + static inline float get_uastc_mode_weight(uint32_t mode) + { + const float FAVORED_MODE_WEIGHT = .8f; + + switch (mode) + { + case 0: + case 10: + return FAVORED_MODE_WEIGHT; + default: + break; + } + + return 1.0f; + } + + void encode_uastc(const uint8_t* pRGBAPixels, uastc_block& output_block, uint32_t flags) + { +// printf("encode_uastc: \n"); +// for (int i = 0; i < 16; i++) +// printf("[%u %u %u %u] ", pRGBAPixels[i * 4 + 0], pRGBAPixels[i * 4 + 1], pRGBAPixels[i * 4 + 2], pRGBAPixels[i * 4 + 3]); +// printf("\n"); + + const color_rgba(*block)[4] = reinterpret_cast(pRGBAPixels); + + bool solid_color = true, has_alpha = false, is_la = true; + + const color_rgba first_color(block[0][0]); + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + if (block[y][x].a < 255) + has_alpha = true; + + if (block[y][x] != first_color) + solid_color = false; + + if ((block[y][x].r != block[y][x].g) || (block[y][x].r != block[y][x].b)) + is_la = false; + } + } + + if (solid_color) + { + // Solid color blocks are so common that we handle them specially and as quickly as we can. + uastc_encode_results solid_results; + solid_results.m_uastc_mode = UASTC_MODE_INDEX_SOLID_COLOR; + solid_results.m_astc_err = 0; + solid_results.m_common_pattern = 0; + solid_results.m_solid_color = first_color; + memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc)); + + etc_block etc1_blk; + uint32_t etc1_bias = 0; + + pack_etc1_block_solid_color(etc1_blk, &first_color.m_comps[0]); + + eac_a8_block eac_a8_blk; + eac_a8_blk.m_table = 0; + eac_a8_blk.m_multiplier = 1; + + pack_uastc(output_block, solid_results, etc1_blk, etc1_bias, eac_a8_blk, false, false); + +// printf(" Solid\n"); + + return; + } + + int level = flags & 7; + const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0; + const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0); + //const bool etc1_perceptual = true; + + uastc_encode_results results[MAX_ENCODE_RESULTS]; + + level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow); + + // Set all options to slowest, then configure from there depending on the selected level. + uint32_t mode_mask = UINT32_MAX; + uint32_t uber_level = 6; + bool estimate_partition = false; + bool always_try_alpha_modes = true; + uint32_t eac_a8_mul_search_rad = 3; + uint32_t eac_a8_table_mask = UINT32_MAX; + uint32_t least_squares_passes = 2; + bool bc1_hints = true; + bool only_use_la_on_transparent_blocks = false; + + switch (level) + { + case cPackUASTCLevelFastest: + { + mode_mask = (1 << 0) | (1 << 8) | + (1 << 11) | (1 << 12) | + (1 << 15); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + uber_level = 0; + least_squares_passes = 1; + bc1_hints = false; + estimate_partition = true; + only_use_la_on_transparent_blocks = true; + break; + } + case cPackUASTCLevelFaster: + { + mode_mask = (1 << 0) | (1 << 4) | (1 << 6) | (1 << 8) | + (1 << 9) | (1 << 11) | (1 << 12) | + (1 << 15) | (1 << 17); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + uber_level = 0; + least_squares_passes = 1; + estimate_partition = true; + break; + } + case cPackUASTCLevelDefault: + { + mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) | + (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) | + (1 << 15) | (1 << 16) | (1 << 17); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 1; + eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13); + uber_level = 1; + least_squares_passes = 1; + estimate_partition = true; + break; + } + case cPackUASTCLevelSlower: + { + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 2; + uber_level = 3; + estimate_partition = true; + break; + } + case cPackUASTCLevelVerySlow: + { + break; + } + } + +#if BASISU_SUPPORT_FORCE_MODE + static int force_mode = -1; + force_mode = (force_mode + 1) % TOTAL_UASTC_MODES; + mode_mask = UINT32_MAX; + always_try_alpha_modes = true; + only_use_la_on_transparent_blocks = false; +#endif + + // HACK HACK + //mode_mask &= ~(1 << 18); + //mode_mask = (1 << 18)| (1 << 10); + + uint32_t total_results = 0; + + if (only_use_la_on_transparent_blocks) + { + if ((is_la) && (!has_alpha)) + is_la = false; + } + + const bool try_alpha_modes = has_alpha || always_try_alpha_modes; + + bc7enc_compress_block_params comp_params; + memset(&comp_params, 0, sizeof(comp_params)); + comp_params.m_max_partitions_mode1 = 64; + comp_params.m_least_squares_passes = least_squares_passes; + comp_params.m_weights[0] = 1; + comp_params.m_weights[1] = 1; + comp_params.m_weights[2] = 1; + comp_params.m_weights[3] = 1; + comp_params.m_uber_level = uber_level; + + if (is_la) + { + if (mode_mask & (1U << 15)) + astc_mode15(block, results, total_results, comp_params); + + if (mode_mask & (1U << 16)) + astc_mode9_or_16(16, block, results, total_results, comp_params, estimate_partition ? 4 : 0); + + if (mode_mask & (1U << 17)) + astc_mode11_or_17(17, block, results, total_results, comp_params); + } + + if (!has_alpha) + { + if (mode_mask & (1U << 0)) + astc_mode0_or_18(0, block, results, total_results, comp_params); + + if (mode_mask & (1U << 1)) + astc_mode1(block, results, total_results, comp_params); + + if (mode_mask & (1U << 2)) + astc_mode2(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 3)) + astc_mode3(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 4)) + astc_mode4(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 5)) + astc_mode5(block, results, total_results, comp_params); + + if (mode_mask & (1U << 6)) + astc_mode6(block, results, total_results, comp_params); + + if (mode_mask & (1U << 7)) + astc_mode7(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 18)) + astc_mode0_or_18(18, block, results, total_results, comp_params); + } + + if (try_alpha_modes) + { + if (mode_mask & (1U << 9)) + astc_mode9_or_16(9, block, results, total_results, comp_params, estimate_partition ? 4 : 0); + + if (mode_mask & (1U << 10)) + astc_mode10(block, results, total_results, comp_params); + + if (mode_mask & (1U << 11)) + astc_mode11_or_17(11, block, results, total_results, comp_params); + + if (mode_mask & (1U << 12)) + astc_mode12(block, results, total_results, comp_params); + + if (mode_mask & (1U << 13)) + astc_mode13(block, results, total_results, comp_params); + + if (mode_mask & (1U << 14)) + astc_mode14(block, results, total_results, comp_params); + } + + assert(total_results); + + // Fix up the errors so we consistently have LA, RGB, or RGBA error. + for (uint32_t i = 0; i < total_results; i++) + { + uastc_encode_results& r = results[i]; + if (!is_la) + { + if (g_uastc_mode_is_la[r.m_uastc_mode]) + { + color_rgba unpacked_block[16]; + unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false); + + uint64_t total_err = 0; + for (uint32_t j = 0; j < 16; j++) + total_err += color_distance(unpacked_block[j], ((const color_rgba*)block)[j], true); + + r.m_astc_err = total_err; + } + } + else + { + if (!g_uastc_mode_is_la[r.m_uastc_mode]) + { + color_rgba unpacked_block[16]; + unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false); + + uint64_t total_err = 0; + for (uint32_t j = 0; j < 16; j++) + total_err += color_distance_la(unpacked_block[j], ((const color_rgba*)block)[j]); + + r.m_astc_err = total_err; + } + } + } + + unpacked_uastc_block unpacked_ublock; + memset(&unpacked_ublock, 0, sizeof(unpacked_ublock)); + + uint64_t total_overall_err[MAX_ENCODE_RESULTS]; + float uastc_err_f[MAX_ENCODE_RESULTS]; + double best_uastc_err_f = 1e+20f; + + int best_index = -1; + + if (total_results == 1) + { + best_index = 0; + } + else + { + const uint32_t bc7_err_weight = favor_bc7_error ? 100 : ((favor_uastc_error ? 0 : DEFAULT_BC7_ERROR_WEIGHT)); + const uint32_t uastc_err_weight = favor_bc7_error ? 0 : 100; + + // Find best overall results, balancing UASTC and UASTC->BC7 error. + // We purposely allow UASTC error to increase a little, if doing so lowers the BC7 error. + for (uint32_t i = 0; i < total_results; i++) + { +#if BASISU_SUPPORT_FORCE_MODE + if (results[i].m_uastc_mode == force_mode) + { + best_index = i; + break; + } +#endif + + unpacked_ublock.m_mode = results[i].m_uastc_mode; + unpacked_ublock.m_astc = results[i].m_astc; + unpacked_ublock.m_common_pattern = results[i].m_common_pattern; + unpacked_ublock.m_solid_color = results[i].m_solid_color.get_color32(); + + color_rgba decoded_uastc_block[4][4]; + bool success = unpack_uastc(results[i].m_uastc_mode, results[i].m_common_pattern, results[i].m_solid_color.get_color32(), results[i].m_astc, (basist::color32 *)&decoded_uastc_block[0][0], false); + (void)success; + VALIDATE(success); + + uint64_t total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err; + compute_block_error(block, decoded_uastc_block, total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err); + + // Validate the computed error, or we're go mad if it's inaccurate. + if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + VALIDATE(total_uastc_rgba_err == 0); + } + else if (is_la) + { + VALIDATE(total_uastc_la_err == results[i].m_astc_err); + } + else if (g_uastc_mode_has_alpha[results[i].m_uastc_mode]) + { + VALIDATE(total_uastc_rgba_err == results[i].m_astc_err); + } + else + { + VALIDATE(total_uastc_rgb_err == results[i].m_astc_err); + } + + // Transcode to BC7 + bc7_optimization_results bc7_results; + transcode_uastc_to_bc7(unpacked_ublock, bc7_results); + + bc7_block bc7_data; + encode_bc7_block(&bc7_data, &bc7_results); + + color_rgba decoded_bc7_block[4][4]; + unpack_block(texture_format::cBC7, &bc7_data, &decoded_bc7_block[0][0]); + + // Compute BC7 error + uint64_t total_bc7_la_err, total_bc7_rgb_err, total_bc7_rgba_err; + compute_block_error(block, decoded_bc7_block, total_bc7_rgb_err, total_bc7_rgba_err, total_bc7_la_err); + + if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + VALIDATE(total_bc7_rgba_err == 0); + + best_index = i; + break; + } + + uint64_t total_uastc_err = 0, total_bc7_err = 0; + if (is_la) + { + total_bc7_err = total_bc7_la_err; + total_uastc_err = total_uastc_la_err; + } + else if (has_alpha) + { + total_bc7_err = total_bc7_rgba_err; + total_uastc_err = total_uastc_rgba_err; + } + else + { + total_bc7_err = total_bc7_rgb_err; + total_uastc_err = total_uastc_rgb_err; + } + + total_overall_err[i] = ((total_bc7_err * bc7_err_weight) / 100) + ((total_uastc_err * uastc_err_weight) / 100); + if (!total_overall_err[i]) + { + best_index = i; + break; + } + + uastc_err_f[i] = sqrtf((float)total_uastc_err); + + if (uastc_err_f[i] < best_uastc_err_f) + { + best_uastc_err_f = uastc_err_f[i]; + } + + } // total_results + + if (best_index < 0) + { + uint64_t best_err = UINT64_MAX; + + if ((best_uastc_err_f == 0.0f) || (favor_bc7_error)) + { + for (uint32_t i = 0; i < total_results; i++) + { + // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression. + const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f; + + const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight); + if (w < best_err) + { + best_err = w; + best_index = i; + if (!best_err) + break; + } + } // i + } + else + { + // Scan the UASTC results, and consider all results within a window that has the best UASTC+BC7 error. + for (uint32_t i = 0; i < total_results; i++) + { + double err_delta = uastc_err_f[i] / best_uastc_err_f; + + if (err_delta <= UASTC_ERROR_THRESH) + { + // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression. + const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f; + + const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight); + if (w < best_err) + { + best_err = w; + best_index = i; + if (!best_err) + break; + } + } + } // i + } + } + } + + const uastc_encode_results& best_results = results[best_index]; + const uint32_t best_mode = best_results.m_uastc_mode; + const astc_block_desc& best_astc_results = best_results.m_astc; + + color_rgba decoded_uastc_block[4][4]; + bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false); + (void)success; + VALIDATE(success); + +#if BASISU_VALIDATE_UASTC_ENC + // Make sure that the UASTC block unpacks to the same exact pixels as the ASTC block does, using two different decoders. + { + // Round trip to packed UASTC and back, then decode to pixels. + etc_block etc1_blk; + memset(&etc1_blk, 0, sizeof(etc1_blk)); + eac_a8_block etc_eac_a8_blk; + memset(&etc_eac_a8_blk, 0, sizeof(etc_eac_a8_blk)); + etc_eac_a8_blk.m_multiplier = 1; + + basist::uastc_block temp_block; + pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false); + + basist::color32 temp_block_unpacked[4][4]; + success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false); + VALIDATE(success); + + // Now round trip to packed ASTC and back, then decode to pixels. + uint32_t astc_data[4]; + + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color); + else + { + success = pack_astc_block(astc_data, &best_astc_results, best_results.m_uastc_mode); + VALIDATE(success); + } + + color_rgba decoded_astc_block[4][4]; + success = basisu_astc::astc::decompress((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4); + VALIDATE(success); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]); + + VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r); + VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g); + VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b); + VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a); + } + } + } +#endif + + // Compute BC1 hints + bool bc1_hint0 = false, bc1_hint1 = false; + if (bc1_hints) + compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block); + + eac_a8_block eac_a8_blk; + if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + // Compute ETC2 hints + uint8_t decoded_uastc_block_alpha[16]; + for (uint32_t i = 0; i < 16; i++) + decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a; + + uastc_pack_eac_a8_results eac8_a8_results; + memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); + uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); + + // All we care about for hinting is the table and multiplier. + eac_a8_blk.m_table = eac8_a8_results.m_table; + eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; + } + else + { + memset(&eac_a8_blk, 0, sizeof(eac_a8_blk)); + } + + // Compute ETC1 hints + etc_block etc1_blk; + uint32_t etc1_bias = 0; + compute_etc1_hints(etc1_blk, etc1_bias, best_results, block, decoded_uastc_block, level, flags); + + // Finally, pack the UASTC block with its hints and we're done. + pack_uastc(output_block, best_results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1); + +// printf(" Packed: "); +// for (int i = 0; i < 16; i++) +// printf("%X ", output_block.m_bytes[i]); +// printf("\n"); + } + + static bool uastc_recompute_hints(basist::uastc_block* pBlock, const color_rgba* pBlock_pixels, uint32_t flags, const unpacked_uastc_block *pUnpacked_blk) + { + unpacked_uastc_block unpacked_blk; + + if (pUnpacked_blk) + unpacked_blk = *pUnpacked_blk; + else + { + if (!unpack_uastc(*pBlock, unpacked_blk, false, true)) + return false; + } + color_rgba decoded_uastc_block[4][4]; + if (!unpack_uastc(unpacked_blk, (basist::color32 *)decoded_uastc_block, false)) + return false; + uastc_encode_results results; + results.m_uastc_mode = unpacked_blk.m_mode; + results.m_common_pattern = unpacked_blk.m_common_pattern; + results.m_astc = unpacked_blk.m_astc; + results.m_solid_color = unpacked_blk.m_solid_color; + results.m_astc_err = 0; + bool bc1_hints = true; + uint32_t eac_a8_mul_search_rad = 3; + uint32_t eac_a8_table_mask = UINT32_MAX; + const uint32_t level = flags & cPackUASTCLevelMask; + switch (level) + { + case cPackUASTCLevelFastest: + { + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + bc1_hints = false; + break; + } + case cPackUASTCLevelFaster: + { + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + break; + } + case cPackUASTCLevelDefault: + { + eac_a8_mul_search_rad = 1; + eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13); + break; + } + case cPackUASTCLevelSlower: + { + eac_a8_mul_search_rad = 2; + break; + } + case cPackUASTCLevelVerySlow: + { + break; + } + } + bool bc1_hint0 = false, bc1_hint1 = false; + if (bc1_hints) + compute_bc1_hints(bc1_hint0, bc1_hint1, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block); + const uint32_t best_mode = unpacked_blk.m_mode; + eac_a8_block eac_a8_blk; + if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + uint8_t decoded_uastc_block_alpha[16]; + for (uint32_t i = 0; i < 16; i++) + decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a; + uastc_pack_eac_a8_results eac8_a8_results; + memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); + uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); + eac_a8_blk.m_table = eac8_a8_results.m_table; + eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; + } + else + { + memset(&eac_a8_blk, 0, sizeof(eac_a8_blk)); + } + etc_block etc1_blk; + uint32_t etc1_bias = 0; + compute_etc1_hints(etc1_blk, etc1_bias, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block, level, flags); + pack_uastc(*pBlock, results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1); + return true; + } + + static const uint8_t g_uastc_mode_selector_bits[TOTAL_UASTC_MODES][2] = + { + { 65, 63 }, { 69, 31 }, { 73, 46 }, { 89, 29 }, + { 89, 30 }, { 68, 47 }, { 66, 62 }, { 89, 30 }, + { 0, 0 }, { 97, 30 }, { 65, 63 }, { 66, 62 }, + { 81, 47 }, { 94, 30 }, { 92, 31 }, { 62, 63 }, + { 98, 30 }, { 61, 62 }, { 49, 79 } + }; + + static inline uint32_t set_block_bits(uint8_t* pBytes, uint64_t val, uint32_t num_bits, uint32_t cur_ofs) + { + assert(num_bits <= 64); + assert((num_bits == 64) || (val < (1ULL << num_bits))); + uint64_t mask = (num_bits == 64) ? UINT64_MAX : ((1ULL << num_bits) - 1); + while (num_bits) + { + const uint32_t n = basisu::minimum(8U - (cur_ofs & 7U), num_bits); + pBytes[cur_ofs >> 3] &= ~static_cast(mask << (cur_ofs & 7U)); + pBytes[cur_ofs >> 3] |= static_cast(val << (cur_ofs & 7U)); + val >>= n; + mask >>= n; + num_bits -= n; + cur_ofs += n; + } + return cur_ofs; + } + + static const uint8_t g_tdefl_small_dist_extra[512] = + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; + + static const uint8_t g_tdefl_large_dist_extra[128] = + { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 + }; + + static inline uint32_t compute_match_cost_estimate(uint32_t dist) + { + uint32_t len_cost = 7; + uint32_t dist_cost = 5; + if (dist < 512) + dist_cost += g_tdefl_small_dist_extra[dist & 511]; + else + { + dist_cost += g_tdefl_large_dist_extra[basisu::minimum(dist, 32767) >> 8]; + while (dist >= 32768) + { + dist_cost++; + dist >>= 1; + } + } + return len_cost + dist_cost; + } + + struct selector_bitsequence + { + uint64_t m_sel; + uint32_t m_ofs; + selector_bitsequence() { } + selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs) { } + bool operator== (const selector_bitsequence& other) const + { + return (m_ofs == other.m_ofs) && (m_sel == other.m_sel); + } + + bool operator< (const selector_bitsequence& other) const + { + if (m_ofs < other.m_ofs) + return true; + else if (m_ofs == other.m_ofs) + return m_sel < other.m_sel; + + return false; + } + }; + + struct selector_bitsequence_hash + { + std::size_t operator()(selector_bitsequence const& s) const noexcept + { + return static_cast(hash_hsieh((uint8_t *)&s, sizeof(s)) ^ s.m_sel); + } + }; + + class tracked_stat + { + public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; + }; + + static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, + uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth) + { + debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index); + + const int total_blocks_to_check = basisu::maximum(1U, params.m_lz_dict_size / sizeof(basist::uastc_block)); + const bool perceptual = false; + + std::unordered_map selector_history; + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const basist::uastc_block& blk = pBlocks[block_index]; + const color_rgba* pPixels = &pBlock_pixels[16 * block_index]; + + unpacked_uastc_block unpacked_blk; + if (!unpack_uastc(blk, unpacked_blk, false, true)) + return false; + + const uint32_t block_mode = unpacked_blk.m_mode; + if (block_mode == UASTC_MODE_INDEX_SOLID_COLOR) + continue; + + tracked_stat r_stats, g_stats, b_stats, a_stats; + + for (uint32_t i = 0; i < 16; i++) + { + r_stats.update(pPixels[i].r); + g_stats.update(pPixels[i].g); + b_stats.update(pPixels[i].b); + a_stats.update(pPixels[i].a); + } + + const float max_std_dev = basisu::maximum(basisu::maximum(basisu::maximum(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev()); + + float yl = clamp(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f); + yl = yl * yl; + const float smooth_block_error_scale = lerp(params.m_smooth_block_max_error_scale, 1.0f, yl); + if (smooth_block_error_scale > 1.0f) + total_smooth++; + + color_rgba decoded_uastc_block[4][4]; + if (!unpack_uastc(unpacked_blk, (basist::color32*)decoded_uastc_block, false)) + return false; + + uint64_t uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_uastc_block)[i], true); + + // Transcode to BC7 + bc7_optimization_results b7_results; + if (!transcode_uastc_to_bc7(unpacked_blk, b7_results)) + return false; + + basist::bc7_block b7_block; + basist::encode_bc7_block(&b7_block, &b7_results); + + color_rgba decoded_b7_blk[4][4]; + unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]); + + uint64_t bc7_err = 0; + for (uint32_t i = 0; i < 16; i++) + bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true); + + uint64_t cur_err = (uastc_err + bc7_err) / 2; + + // Divide by 16*4 to compute RMS error + const float cur_ms_err = (float)cur_err * (1.0f / 64.0f); + const float cur_rms_err = sqrt(cur_ms_err); + + const uint32_t first_sel_bit = g_uastc_mode_selector_bits[block_mode][0]; + const uint32_t total_sel_bits = g_uastc_mode_selector_bits[block_mode][1]; + assert(first_sel_bit + total_sel_bits <= 128); + assert(total_sel_bits > 0); + + uint32_t cur_bit_offset = first_sel_bit; + uint64_t cur_sel_bits = read_bits((const uint8_t*)&blk, cur_bit_offset, basisu::minimum(64U, total_sel_bits)); + + if (cur_rms_err >= params.m_skip_block_rms_thresh) + { + auto cur_search_res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, cur_sel_bits), block_index)); + + // Block already has too much error, so don't mess with it. + if (!cur_search_res.second) + (*cur_search_res.first).second = block_index; + + total_skipped++; + continue; + } + + int cur_bits; + auto cur_find_res = selector_history.find(selector_bitsequence(first_sel_bit, cur_sel_bits)); + if (cur_find_res == selector_history.end()) + { + // Wasn't found - wildly estimate literal cost + //cur_bits = (total_sel_bits * 5) / 4; + cur_bits = (total_sel_bits * params.m_lz_literal_cost) / 100; + } + else + { + // Was found - wildly estimate match cost + uint32_t match_block_index = cur_find_res->second; + const int block_dist_in_bytes = (block_index - match_block_index) * 16; + cur_bits = compute_match_cost_estimate(block_dist_in_bytes); + } + + int first_block_to_check = basisu::maximum(first_index, block_index - total_blocks_to_check); + int last_block_to_check = block_index - 1; + + basist::uastc_block best_block(blk); + uint32_t best_block_index = block_index; + + float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda; + + // Now scan through previous blocks, insert their selector bit patterns into the current block, and find + // selector bit patterns which don't increase the overall block error too much. + for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) + { + const basist::uastc_block& prev_blk = pBlocks[prev_block_index]; + + uint32_t bit_offset = first_sel_bit; + uint64_t sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, basisu::minimum(64U, total_sel_bits)); + + int match_block_index = prev_block_index; + auto res = selector_history.find(selector_bitsequence(first_sel_bit, sel_bits)); + if (res != selector_history.end()) + match_block_index = res->second; + // Have we already checked this bit pattern? If so then skip this block. + if (match_block_index > prev_block_index) + continue; + + unpacked_uastc_block unpacked_prev_blk; + if (!unpack_uastc(prev_blk, unpacked_prev_blk, false, true)) + return false; + + basist::uastc_block trial_blk(blk); + + set_block_bits((uint8_t*)&trial_blk, sel_bits, basisu::minimum(64U, total_sel_bits), first_sel_bit); + + if (total_sel_bits > 64) + { + sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, total_sel_bits - 64U); + + set_block_bits((uint8_t*)&trial_blk, sel_bits, total_sel_bits - 64U, first_sel_bit + basisu::minimum(64U, total_sel_bits)); + } + + unpacked_uastc_block unpacked_trial_blk; + if (!unpack_uastc(trial_blk, unpacked_trial_blk, false, true)) + continue; + + color_rgba decoded_trial_uastc_block[4][4]; + if (!unpack_uastc(unpacked_trial_blk, (basist::color32*)decoded_trial_uastc_block, false)) + continue; + + uint64_t trial_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true); + + // Transcode trial to BC7, compute error + bc7_optimization_results trial_b7_results; + if (!transcode_uastc_to_bc7(unpacked_trial_blk, trial_b7_results)) + return false; + + basist::bc7_block trial_b7_block; + basist::encode_bc7_block(&trial_b7_block, &trial_b7_results); + + color_rgba decoded_trial_b7_blk[4][4]; + unpack_block(texture_format::cBC7, &trial_b7_block, &decoded_trial_b7_blk[0][0]); + + uint64_t trial_bc7_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_b7_blk)[i], true); + + uint64_t trial_err = (trial_uastc_err + trial_bc7_err) / 2; + + const float trial_ms_err = (float)trial_err * (1.0f / 64.0f); + const float trial_rms_err = sqrtf(trial_ms_err); + + if (trial_rms_err > cur_rms_err * params.m_max_allowed_rms_increase_ratio) + continue; + + const int block_dist_in_bytes = (block_index - match_block_index) * 16; + const int match_bits = compute_match_cost_estimate(block_dist_in_bytes); + + float t = trial_ms_err * smooth_block_error_scale + match_bits * params.m_lambda; + if (t < best_t) + { + best_t = t; + best_block_index = prev_block_index; + + best_block = trial_blk; + } + + } // prev_block_index + + if (best_block_index != block_index) + { + total_modified++; + + unpacked_uastc_block unpacked_best_blk; + if (!unpack_uastc(best_block, unpacked_best_blk, false, false)) + return false; + + if ((params.m_endpoint_refinement) && (block_mode == 0)) + { + // Attempt to refine mode 0 block's endpoints, using the new selectors. This doesn't help much, but it does help. + // TODO: We could do this with the other modes too. + color_rgba decoded_best_uastc_block[4][4]; + if (!unpack_uastc(unpacked_best_blk, (basist::color32*)decoded_best_uastc_block, false)) + return false; + + // Compute the block's current error (with the modified selectors). + uint64_t best_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + best_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_best_uastc_block)[i], true); + + bc7enc_compress_block_params comp_params; + memset(&comp_params, 0, sizeof(comp_params)); + comp_params.m_max_partitions_mode1 = 64; + comp_params.m_least_squares_passes = 1; + comp_params.m_weights[0] = 1; + comp_params.m_weights[1] = 1; + comp_params.m_weights[2] = 1; + comp_params.m_weights[3] = 1; + comp_params.m_uber_level = 0; + + uastc_encode_results results; + uint32_t total_results = 0; + astc_mode0_or_18(0, (color_rgba(*)[4])pPixels, &results, total_results, comp_params, unpacked_best_blk.m_astc.m_weights); + assert(total_results == 1); + + // See if the overall error has actually gone done. + + color_rgba decoded_trial_uastc_block[4][4]; + bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false); + assert(success); + + BASISU_NOTE_UNUSED(success); + + uint64_t trial_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true); + + if (trial_uastc_err < best_uastc_err) + { + // The error went down, so accept the new endpoints. + + // Ensure the selectors haven't changed, otherwise we'll invalidate the LZ matches. + for (uint32_t i = 0; i < 16; i++) + assert(unpacked_best_blk.m_astc.m_weights[i] == results.m_astc.m_weights[i]); + + unpacked_best_blk.m_astc = results.m_astc; + + total_refined++; + } + } // if ((params.m_endpoint_refinement) && (block_mode == 0)) + + // The selectors have changed, so go recompute the block hints. + if (!uastc_recompute_hints(&best_block, pPixels, flags, &unpacked_best_blk)) + return false; + + // Write the modified block + pBlocks[block_index] = best_block; + + } // if (best_block_index != block_index) + + { + uint32_t bit_offset = first_sel_bit; + uint64_t sel_bits = read_bits((const uint8_t*)&best_block, bit_offset, basisu::minimum(64U, total_sel_bits)); + + auto res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, sel_bits), block_index)); + if (!res.second) + (*res.first).second = block_index; + } + + } // block_index + + return true; + } + + // This function implements a basic form of rate distortion optimization (RDO) for UASTC. + // It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation. + // There's A LOT that can be done better in here, but it's a start. + // One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses. + bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, job_pool* pJob_pool, uint32_t total_jobs) + { + assert(params.m_max_allowed_rms_increase_ratio > 1.0f); + assert(params.m_lz_dict_size > 0); + assert(params.m_lambda > 0.0f); + + uint32_t total_skipped = 0, total_modified = 0, total_refined = 0, total_smooth = 0; + + uint32_t blocks_per_job = total_jobs ? (num_blocks / total_jobs) : 0; + + std::mutex stat_mutex; + + bool status = false; + + if ((!pJob_pool) || (total_jobs <= 1) || (blocks_per_job <= 8)) + { + status = uastc_rdo_blocks(0, num_blocks, pBlocks, pBlock_pixels, params, flags, total_skipped, total_refined, total_modified, total_smooth); + } + else + { + bool all_succeeded = true; + + for (uint32_t block_index_iter = 0; block_index_iter < num_blocks; block_index_iter += blocks_per_job) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(num_blocks, block_index_iter + blocks_per_job); + +#ifndef __EMSCRIPTEN__ + pJob_pool->add_job([first_index, last_index, pBlocks, pBlock_pixels, ¶ms, flags, &total_skipped, &total_modified, &total_refined, &total_smooth, &all_succeeded, &stat_mutex] { +#endif + + uint32_t job_skipped = 0, job_modified = 0, job_refined = 0, job_smooth = 0; + + bool status = uastc_rdo_blocks(first_index, last_index, pBlocks, pBlock_pixels, params, flags, job_skipped, job_refined, job_modified, job_smooth); + + { + std::lock_guard lck(stat_mutex); + + all_succeeded = all_succeeded && status; + total_skipped += job_skipped; + total_modified += job_modified; + total_refined += job_refined; + total_smooth += job_smooth; + } + +#ifndef __EMSCRIPTEN__ + } + ); +#endif + + } // block_index_iter + +#ifndef __EMSCRIPTEN__ + pJob_pool->wait_for_all(); +#endif + + status = all_succeeded; + } + + debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks); + + return status; + } +} // namespace basisu + + + + + diff --git a/src/deps/basis-universal/encoder/basisu_uastc_enc.h b/src/deps/basis-universal/encoder/basisu_uastc_enc.h new file mode 100644 index 0000000000..ba39a558b3 --- /dev/null +++ b/src/deps/basis-universal/encoder/basisu_uastc_enc.h @@ -0,0 +1,140 @@ +// basisu_uastc_enc.h +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_etc.h" + +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + const uint32_t TOTAL_PACK_UASTC_LEVELS = 5; + + enum + { + // Fastest is the lowest quality, although it's stil substantially higher quality vs. BC1/ETC1. It supports 5 modes. + // The output may be somewhat blocky because this setting doesn't support 2/3-subset UASTC modes, but it should be less blocky vs. BC1/ETC1. + // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. + // Transcoded ETC1 quality will be lower because it only considers 2 hints out of 32. + // Avg. 43.45 dB + cPackUASTCLevelFastest = 0, + + // Faster is ~3x slower than fastest. It supports 9 modes. + // Avg. 46.49 dB + cPackUASTCLevelFaster = 1, + + // Default is ~5.5x slower than fastest. It supports 14 modes. + // Avg. 47.47 dB + cPackUASTCLevelDefault = 2, + + // Slower is ~14.5x slower than fastest. It supports all 18 modes. + // Avg. 48.01 dB + cPackUASTCLevelSlower = 3, + + // VerySlow is ~200x slower than fastest. + // The best quality the codec is capable of, but you'll need to be patient or have a lot of cores. + // Avg. 48.24 dB + cPackUASTCLevelVerySlow = 4, + + cPackUASTCLevelMask = 0xF, + + // By default the encoder tries to strike a balance between UASTC and transcoded BC7 quality. + // These flags allow you to favor only optimizing for lowest UASTC error, or lowest BC7 error. + cPackUASTCFavorUASTCError = 8, + cPackUASTCFavorBC7Error = 16, + + cPackUASTCETC1FasterHints = 64, + cPackUASTCETC1FastestHints = 128, + cPackUASTCETC1DisableFlipAndIndividual = 256, + + // Favor UASTC modes 0 and 10 more than the others (this is experimental, it's useful for RDO compression) + cPackUASTCFavorSimplerModes = 512, + }; + + // pRGBAPixels: Pointer to source 4x4 block of RGBA pixels (R first in memory). + // block: Reference to destination UASTC block. + // level: Controls compression speed vs. performance tradeoff. + void encode_uastc(const uint8_t* pRGBAPixels, basist::uastc_block& output_block, uint32_t flags = cPackUASTCLevelDefault); + + struct uastc_encode_results + { + uint32_t m_uastc_mode; + uint32_t m_common_pattern; + basist::astc_block_desc m_astc; + color_rgba m_solid_color; + uint64_t m_astc_err; + }; + + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1); + + const uint32_t UASCT_RDO_DEFAULT_LZ_DICT_SIZE = 4096; + + const float UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO = 10.0f; + const float UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH = 8.0f; + + // The RDO encoder computes a smoothness factor, from [0,1], for each block. To do this it computes each block's maximum component variance, then it divides this by this factor and clamps the result. + // Larger values will result in more blocks being protected from too much distortion. + const float UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV = 18.0f; + + // The RDO encoder can artifically boost the error of smooth blocks, in order to suppress distortions on smooth areas of the texture. + // The encoder will use this value as the maximum error scale to use on smooth blocks. The larger this value, the better smooth bocks will look. Set to 1.0 to disable this completely. + const float UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE = 10.0f; + + struct uastc_rdo_params + { + uastc_rdo_params() + { + clear(); + } + + void clear() + { + m_lz_dict_size = UASCT_RDO_DEFAULT_LZ_DICT_SIZE; + m_lambda = 0.5f; + m_max_allowed_rms_increase_ratio = UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO; + m_skip_block_rms_thresh = UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH; + m_endpoint_refinement = true; + m_lz_literal_cost = 100; + + m_max_smooth_block_std_dev = UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV; + m_smooth_block_max_error_scale = UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE; + } + + // m_lz_dict_size: Size of LZ dictionary to simulate in bytes. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. + uint32_t m_lz_dict_size; + + // m_lambda: The post-processor tries to reduce distortion+rate*lambda (rate is approximate LZ bits and distortion is scaled MS error). + // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. + float m_lambda; + + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. + float m_max_allowed_rms_increase_ratio; + + // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. + float m_skip_block_rms_thresh; + + // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. + bool m_endpoint_refinement; + + float m_max_smooth_block_std_dev; + float m_smooth_block_max_error_scale; + + uint32_t m_lz_literal_cost; + }; + + // num_blocks, pBlocks: Number of blocks and pointer to UASTC blocks to process. + // pBlock_pixels: Pointer to an array of 4x4 blocks containing the original texture pixels. This is NOT a raster image, but a pointer to individual 4x4 blocks. + // flags: Pass in the same flags used to encode the UASTC blocks. The flags are used to reencode the transcode hints in the same way. + bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params ¶ms, uint32_t flags = cPackUASTCLevelDefault, job_pool* pJob_pool = nullptr, uint32_t total_jobs = 0); +} // namespace basisu diff --git a/src/deps/basis-universal/encoder/jpgd.cpp b/src/deps/basis-universal/encoder/jpgd.cpp new file mode 100644 index 0000000000..460834409d --- /dev/null +++ b/src/deps/basis-universal/encoder/jpgd.cpp @@ -0,0 +1,3241 @@ +// jpgd.cpp - C++ class for JPEG decompression. Written by Richard Geldreich between 1994-2020. +// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2. +// Supports box and linear chroma upsampling. +// +// Released under two licenses. You are free to choose which license you want: +// License 1: +// Public Domain +// +// License 2: +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Alex Evans: Linear memory allocator (taken from jpge.h). +// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings +// v2.00, March 20, 2020: Fuzzed with zzuf and afl. Fixed several issues, converted most assert()'s to run-time checks. Added chroma upsampling. Removed freq. domain upsampling. gcc/clang warnings. +// +#ifdef _MSC_VER +#ifndef BASISU_NO_ITERATOR_DEBUG_LEVEL +#if defined(_DEBUG) || defined(DEBUG) +#define _ITERATOR_DEBUG_LEVEL 1 +#define _SECURE_SCL 1 +#else +#define _SECURE_SCL 0 +#define _ITERATOR_DEBUG_LEVEL 0 +#endif +#endif +#endif + +#include "jpgd.h" +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable +#endif + +#define JPGD_TRUE (1) +#define JPGD_FALSE (0) + +#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b)) + +namespace jpgd { + + static inline void* jpgd_malloc(size_t nSize) { return malloc(nSize); } + static inline void jpgd_free(void* p) { free(p); } + + // DCT coefficients are stored in this sequence. + static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; + + enum JPEG_MARKER + { + M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, + M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, + M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, + M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, + M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 + }; + + enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define SCALEDONE ((int32)1) + +#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ + +#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) + +#define MULTIPLY(var, cnst) ((var) * (cnst)) + +#define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) + + static inline int left_shifti(int val, uint32_t bits) + { + return static_cast(static_cast(val) << bits); + } + + // Compiler creates a fast path 1D IDCT for X non-zero columns + template + struct Row + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + // ACCESS_COL() will be optimized at compile time to either an array access, or 0. Good compilers will then optimize out muls against 0. +#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0) + + const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = left_shifti(ACCESS_COL(0) + ACCESS_COL(4), CONST_BITS); + const int tmp1 = left_shifti(ACCESS_COL(0) - ACCESS_COL(4), CONST_BITS); + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, -FIX_0_899976223); + const int az2 = MULTIPLY(bz2, -FIX_2_562915447); + const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS - PASS1_BITS); + pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS - PASS1_BITS); + pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS - PASS1_BITS); + pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS - PASS1_BITS); + pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS - PASS1_BITS); + pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS - PASS1_BITS); + pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS - PASS1_BITS); + pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS - PASS1_BITS); + } + }; + + template <> + struct Row<0> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + (void)pTemp; + (void)pSrc; + } + }; + + template <> + struct Row<1> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + const int dcval = left_shifti(pSrc[0], PASS1_BITS); + + pTemp[0] = dcval; + pTemp[1] = dcval; + pTemp[2] = dcval; + pTemp[3] = dcval; + pTemp[4] = dcval; + pTemp[5] = dcval; + pTemp[6] = dcval; + pTemp[7] = dcval; + } + }; + + // Compiler creates a fast path 1D IDCT for X non-zero rows + template + struct Col + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + // ACCESS_ROW() will be optimized at compile time to either an array access, or 0. +#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0) + + const int z2 = ACCESS_ROW(2); + const int z3 = ACCESS_ROW(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = left_shifti(ACCESS_ROW(0) + ACCESS_ROW(4), CONST_BITS); + const int tmp1 = left_shifti(ACCESS_ROW(0) - ACCESS_ROW(4), CONST_BITS); + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, -FIX_0_899976223); + const int az2 = MULTIPLY(bz2, -FIX_2_562915447); + const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 0] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 7] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 1] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 6] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 2] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 5] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 3] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 4] = (uint8)CLAMP(i); + } + }; + + template <> + struct Col<1> + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS + 3); + const uint8 dcval_clamped = (uint8)CLAMP(dcval); + pDst_ptr[0 * 8] = dcval_clamped; + pDst_ptr[1 * 8] = dcval_clamped; + pDst_ptr[2 * 8] = dcval_clamped; + pDst_ptr[3 * 8] = dcval_clamped; + pDst_ptr[4 * 8] = dcval_clamped; + pDst_ptr[5 * 8] = dcval_clamped; + pDst_ptr[6 * 8] = dcval_clamped; + pDst_ptr[7 * 8] = dcval_clamped; + } + }; + + static const uint8 s_idct_row_table[] = + { + 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0, + 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0, + 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0, + 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0, + 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2, + 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2, + 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4, + 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, + }; + + static const uint8 s_idct_col_table[] = + { + 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + }; + + // Scalar "fast pathing" IDCT. + static void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag) + { + assert(block_max_zag >= 1); + assert(block_max_zag <= 64); + + if (block_max_zag <= 1) + { + int k = ((pSrc_ptr[0] + 4) >> 3) + 128; + k = CLAMP(k); + k = k | (k << 8); + k = k | (k << 16); + + for (int i = 8; i > 0; i--) + { + *(int*)&pDst_ptr[0] = k; + *(int*)&pDst_ptr[4] = k; + pDst_ptr += 8; + } + return; + } + + int temp[64]; + + const jpgd_block_t* pSrc = pSrc_ptr; + int* pTemp = temp; + + const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8]; + int i; + for (i = 8; i > 0; i--, pRow_tab++) + { + switch (*pRow_tab) + { + case 0: Row<0>::idct(pTemp, pSrc); break; + case 1: Row<1>::idct(pTemp, pSrc); break; + case 2: Row<2>::idct(pTemp, pSrc); break; + case 3: Row<3>::idct(pTemp, pSrc); break; + case 4: Row<4>::idct(pTemp, pSrc); break; + case 5: Row<5>::idct(pTemp, pSrc); break; + case 6: Row<6>::idct(pTemp, pSrc); break; + case 7: Row<7>::idct(pTemp, pSrc); break; + case 8: Row<8>::idct(pTemp, pSrc); break; + } + + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + + const int nonzero_rows = s_idct_col_table[block_max_zag - 1]; + for (i = 8; i > 0; i--) + { + switch (nonzero_rows) + { + case 1: Col<1>::idct(pDst_ptr, pTemp); break; + case 2: Col<2>::idct(pDst_ptr, pTemp); break; + case 3: Col<3>::idct(pDst_ptr, pTemp); break; + case 4: Col<4>::idct(pDst_ptr, pTemp); break; + case 5: Col<5>::idct(pDst_ptr, pTemp); break; + case 6: Col<6>::idct(pDst_ptr, pTemp); break; + case 7: Col<7>::idct(pDst_ptr, pTemp); break; + case 8: Col<8>::idct(pDst_ptr, pTemp); break; + } + + pTemp++; + pDst_ptr++; + } + } + + // Retrieve one character from the input stream. + inline uint jpeg_decoder::get_char() + { + // Any bytes remaining in buffer? + if (!m_in_buf_left) + { + // Try to get more bytes. + prep_in_buffer(); + // Still nothing to get? + if (!m_in_buf_left) + { + // Pad the end of the stream with 0xFF 0xD9 (EOI marker) + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Same as previous method, except can indicate if the character is a pad character or not. + inline uint jpeg_decoder::get_char(bool* pPadding_flag) + { + if (!m_in_buf_left) + { + prep_in_buffer(); + if (!m_in_buf_left) + { + *pPadding_flag = true; + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + *pPadding_flag = false; + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Inserts a previously retrieved character back into the input buffer. + inline void jpeg_decoder::stuff_char(uint8 q) + { + // This could write before the input buffer, but we've placed another array there. + *(--m_pIn_buf_ofs) = q; + m_in_buf_left++; + } + + // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered. + inline uint8 jpeg_decoder::get_octet() + { + bool padding_flag; + int c = get_char(&padding_flag); + + if (c == 0xFF) + { + if (padding_flag) + return 0xFF; + + c = get_char(&padding_flag); + if (padding_flag) + { + stuff_char(0xFF); + return 0xFF; + } + + if (c == 0x00) + return 0xFF; + else + { + stuff_char(static_cast(c)); + stuff_char(0xFF); + return 0xFF; + } + } + + return static_cast(c); + } + + // Retrieves a variable number of bits from the input stream. Does not recognize markers. + inline uint jpeg_decoder::get_bits(int num_bits) + { + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + uint c1 = get_char(); + uint c2 = get_char(); + m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2; + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + assert(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered. + inline uint jpeg_decoder::get_bits_no_markers(int num_bits) + { + if (!num_bits) + return 0; + + assert(num_bits <= 16); + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) + { + uint c1 = get_octet(); + uint c2 = get_octet(); + m_bit_buf |= (c1 << 8) | c2; + } + else + { + m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1]; + m_in_buf_left -= 2; + m_pIn_buf_ofs += 2; + } + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + assert(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables* pH) + { + if (!pH) + stop_decoding(JPGD_DECODE_ERROR); + + int symbol; + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) + { + // Decode more bits, use a tree traversal to find symbol. + int ofs = 23; + do + { + unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1)); + + // This should never happen, but to be safe I'm turning these asserts into a run-time check. + if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0)) + stop_decoding(JPGD_DECODE_ERROR); + + symbol = pH->tree[idx]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + } + else + { + assert(symbol < JPGD_HUFF_CODE_SIZE_MAX_LENGTH); + get_bits_no_markers(pH->code_size[symbol]); + } + + return symbol; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables* pH, int& extra_bits) + { + int symbol; + + if (!pH) + stop_decoding(JPGD_DECODE_ERROR); + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) + { + // Use a tree traversal to find symbol. + int ofs = 23; + do + { + unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1)); + + // This should never happen, but to be safe I'm turning these asserts into a run-time check. + if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0)) + stop_decoding(JPGD_DECODE_ERROR); + + symbol = pH->tree[idx]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + + extra_bits = get_bits_no_markers(symbol & 0xF); + } + else + { + if (symbol & 0x8000) + { + //get_bits_no_markers((symbol >> 8) & 31); + assert(((symbol >> 8) & 31) <= 15); + get_bits_no_markers((symbol >> 8) & 15); + extra_bits = symbol >> 16; + } + else + { + int code_size = (symbol >> 8) & 31; + int num_extra_bits = symbol & 0xF; + int bits = code_size + num_extra_bits; + + if (bits <= 16) + extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1); + else + { + get_bits_no_markers(code_size); + extra_bits = get_bits_no_markers(num_extra_bits); + } + } + + symbol &= 0xFF; + } + + return symbol; + } + + // Tables and macro used to fully decode the DPCM differences. + static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; + static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 }; + //static const int s_extend_mask[] = { 0, (1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 4), (1 << 5), (1 << 6), (1 << 7), (1 << 8), (1 << 9), (1 << 10), (1 << 11), (1 << 12), (1 << 13), (1 << 14), (1 << 15), (1 << 16) }; + +#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x)) + + // Unconditionally frees all allocated m_blocks. + void jpeg_decoder::free_all_blocks() + { + m_pStream = nullptr; + for (mem_block* b = m_pMem_blocks; b; ) + { + mem_block* n = b->m_pNext; + jpgd_free(b); + b = n; + } + m_pMem_blocks = nullptr; + } + + // This method handles all errors. It will never return. + // It could easily be changed to use C++ exceptions. + JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status) + { + m_error_code = status; + free_all_blocks(); + longjmp(m_jmp_state, status); + } + + void* jpeg_decoder::alloc(size_t nSize, bool zero) + { + nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; + char* rv = nullptr; + for (mem_block* b = m_pMem_blocks; b; b = b->m_pNext) + { + if ((b->m_used_count + nSize) <= b->m_size) + { + rv = b->m_data + b->m_used_count; + b->m_used_count += nSize; + break; + } + } + if (!rv) + { + int capacity = JPGD_MAX(32768 - 256, ((int)nSize + 2047) & ~2047); + mem_block* b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity); + if (!b) + { + stop_decoding(JPGD_NOTENOUGHMEM); + } + + b->m_pNext = m_pMem_blocks; + m_pMem_blocks = b; + b->m_used_count = nSize; + b->m_size = capacity; + rv = b->m_data; + } + if (zero) memset(rv, 0, nSize); + return rv; + } + + void jpeg_decoder::word_clear(void* p, uint16 c, uint n) + { + uint8* pD = (uint8*)p; + const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF; + while (n) + { + pD[0] = l; + pD[1] = h; + pD += 2; + n--; + } + } + + // Refill the input buffer. + // This method will sit in a loop until (A) the buffer is full or (B) + // the stream's read() method reports and end of file condition. + void jpeg_decoder::prep_in_buffer() + { + m_in_buf_left = 0; + m_pIn_buf_ofs = m_in_buf; + + if (m_eof_flag) + return; + + do + { + int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag); + if (bytes_read == -1) + stop_decoding(JPGD_STREAM_READ); + + m_in_buf_left += bytes_read; + } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag)); + + m_total_bytes_read += m_in_buf_left; + + // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid). + // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.) + word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64); + } + + // Read a Huffman code table. + void jpeg_decoder::read_dht_marker() + { + int i, index, count; + uint8 huff_num[17]; + uint8 huff_val[256]; + + uint num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= 2; + + while (num_left) + { + index = get_bits(8); + + huff_num[0] = 0; + + count = 0; + + for (i = 1; i <= 16; i++) + { + huff_num[i] = static_cast(get_bits(8)); + count += huff_num[i]; + } + + if (count > 255) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + bool symbol_present[256]; + memset(symbol_present, 0, sizeof(symbol_present)); + + for (i = 0; i < count; i++) + { + const int s = get_bits(8); + + // Check for obviously bogus tables. + if (symbol_present[s]) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + huff_val[i] = static_cast(s); + symbol_present[s] = true; + } + + i = 1 + 16 + count; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= i; + + if ((index & 0x10) > 0x10) + stop_decoding(JPGD_BAD_DHT_INDEX); + + index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1); + + if (index >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_BAD_DHT_INDEX); + + if (!m_huff_num[index]) + m_huff_num[index] = (uint8*)alloc(17); + + if (!m_huff_val[index]) + m_huff_val[index] = (uint8*)alloc(256); + + m_huff_ac[index] = (index & 0x10) != 0; + memcpy(m_huff_num[index], huff_num, 17); + memcpy(m_huff_val[index], huff_val, 256); + } + } + + // Read a quantization table. + void jpeg_decoder::read_dqt_marker() + { + int n, i, prec; + uint num_left; + uint temp; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DQT_MARKER); + + num_left -= 2; + + while (num_left) + { + n = get_bits(8); + prec = n >> 4; + n &= 0x0F; + + if (n >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_BAD_DQT_TABLE); + + if (!m_quant[n]) + m_quant[n] = (jpgd_quant_t*)alloc(64 * sizeof(jpgd_quant_t)); + + // read quantization entries, in zag order + for (i = 0; i < 64; i++) + { + temp = get_bits(8); + + if (prec) + temp = (temp << 8) + get_bits(8); + + m_quant[n][i] = static_cast(temp); + } + + i = 64 + 1; + + if (prec) + i += 64; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DQT_LENGTH); + + num_left -= i; + } + } + + // Read the start of frame (SOF) marker. + void jpeg_decoder::read_sof_marker() + { + int i; + uint num_left; + + num_left = get_bits(16); + + /* precision: sorry, only 8-bit precision is supported */ + if (get_bits(8) != 8) + stop_decoding(JPGD_BAD_PRECISION); + + m_image_y_size = get_bits(16); + + if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) + stop_decoding(JPGD_BAD_HEIGHT); + + m_image_x_size = get_bits(16); + + if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) + stop_decoding(JPGD_BAD_WIDTH); + + m_comps_in_frame = get_bits(8); + + if (m_comps_in_frame > JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_TOO_MANY_COMPONENTS); + + if (num_left != (uint)(m_comps_in_frame * 3 + 8)) + stop_decoding(JPGD_BAD_SOF_LENGTH); + + for (i = 0; i < m_comps_in_frame; i++) + { + m_comp_ident[i] = get_bits(8); + m_comp_h_samp[i] = get_bits(4); + m_comp_v_samp[i] = get_bits(4); + + if (!m_comp_h_samp[i] || !m_comp_v_samp[i] || (m_comp_h_samp[i] > 2) || (m_comp_v_samp[i] > 2)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_comp_quant[i] = get_bits(8); + if (m_comp_quant[i] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + } + } + + // Used to skip unrecognized markers. + void jpeg_decoder::skip_variable_marker() + { + uint num_left; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_VARIABLE_MARKER); + + num_left -= 2; + + while (num_left) + { + get_bits(8); + num_left--; + } + } + + // Read a define restart interval (DRI) marker. + void jpeg_decoder::read_dri_marker() + { + if (get_bits(16) != 4) + stop_decoding(JPGD_BAD_DRI_LENGTH); + + m_restart_interval = get_bits(16); + } + + // Read a start of scan (SOS) marker. + void jpeg_decoder::read_sos_marker() + { + uint num_left; + int i, ci, n, c, cc; + + num_left = get_bits(16); + + n = get_bits(8); + + m_comps_in_scan = n; + + num_left -= 3; + + if ((num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN)) + stop_decoding(JPGD_BAD_SOS_LENGTH); + + for (i = 0; i < n; i++) + { + cc = get_bits(8); + c = get_bits(8); + num_left -= 2; + + for (ci = 0; ci < m_comps_in_frame; ci++) + if (cc == m_comp_ident[ci]) + break; + + if (ci >= m_comps_in_frame) + stop_decoding(JPGD_BAD_SOS_COMP_ID); + + if (ci >= JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_DECODE_ERROR); + + m_comp_list[i] = ci; + + m_comp_dc_tab[ci] = (c >> 4) & 15; + m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1); + + if (m_comp_dc_tab[ci] >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + if (m_comp_ac_tab[ci] >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + } + + m_spectral_start = get_bits(8); + m_spectral_end = get_bits(8); + m_successive_high = get_bits(4); + m_successive_low = get_bits(4); + + if (!m_progressive_flag) + { + m_spectral_start = 0; + m_spectral_end = 63; + } + + num_left -= 3; + + /* read past whatever is num_left */ + while (num_left) + { + get_bits(8); + num_left--; + } + } + + // Finds the next marker. + int jpeg_decoder::next_marker() + { + uint c, bytes; + + bytes = 0; + + do + { + do + { + bytes++; + c = get_bits(8); + } while (c != 0xFF); + + do + { + c = get_bits(8); + } while (c == 0xFF); + + } while (c == 0); + + // If bytes > 0 here, there where extra bytes before the marker (not good). + + return c; + } + + // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is + // encountered. + int jpeg_decoder::process_markers() + { + int c; + + for (; ; ) + { + c = next_marker(); + + switch (c) + { + case M_SOF0: + case M_SOF1: + case M_SOF2: + case M_SOF3: + case M_SOF5: + case M_SOF6: + case M_SOF7: + // case M_JPG: + case M_SOF9: + case M_SOF10: + case M_SOF11: + case M_SOF13: + case M_SOF14: + case M_SOF15: + case M_SOI: + case M_EOI: + case M_SOS: + { + return c; + } + case M_DHT: + { + read_dht_marker(); + break; + } + // No arithmitic support - dumb patents! + case M_DAC: + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + case M_DQT: + { + read_dqt_marker(); + break; + } + case M_DRI: + { + read_dri_marker(); + break; + } + //case M_APP0: /* no need to read the JFIF marker */ + case M_JPG: + case M_RST0: /* no parameters */ + case M_RST1: + case M_RST2: + case M_RST3: + case M_RST4: + case M_RST5: + case M_RST6: + case M_RST7: + case M_TEM: + { + stop_decoding(JPGD_UNEXPECTED_MARKER); + break; + } + default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ + { + skip_variable_marker(); + break; + } + } + } + } + + // Finds the start of image (SOI) marker. + void jpeg_decoder::locate_soi_marker() + { + uint lastchar, thischar; + uint bytesleft; + + lastchar = get_bits(8); + + thischar = get_bits(8); + + /* ok if it's a normal JPEG file without a special header */ + + if ((lastchar == 0xFF) && (thischar == M_SOI)) + return; + + bytesleft = 4096; + + for (; ; ) + { + if (--bytesleft == 0) + stop_decoding(JPGD_NOT_JPEG); + + lastchar = thischar; + + thischar = get_bits(8); + + if (lastchar == 0xFF) + { + if (thischar == M_SOI) + break; + else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end + stop_decoding(JPGD_NOT_JPEG); + } + } + + // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad. + thischar = (m_bit_buf >> 24) & 0xFF; + + if (thischar != 0xFF) + stop_decoding(JPGD_NOT_JPEG); + } + + // Find a start of frame (SOF) marker. + void jpeg_decoder::locate_sof_marker() + { + locate_soi_marker(); + + int c = process_markers(); + + switch (c) + { + case M_SOF2: + { + m_progressive_flag = JPGD_TRUE; + read_sof_marker(); + break; + } + case M_SOF0: /* baseline DCT */ + case M_SOF1: /* extended sequential DCT */ + { + read_sof_marker(); + break; + } + case M_SOF9: /* Arithmitic coding */ + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + default: + { + stop_decoding(JPGD_UNSUPPORTED_MARKER); + break; + } + } + } + + // Find a start of scan (SOS) marker. + int jpeg_decoder::locate_sos_marker() + { + int c; + + c = process_markers(); + + if (c == M_EOI) + return JPGD_FALSE; + else if (c != M_SOS) + stop_decoding(JPGD_UNEXPECTED_MARKER); + + read_sos_marker(); + + return JPGD_TRUE; + } + + // Reset everything to default/uninitialized state. + void jpeg_decoder::init(jpeg_decoder_stream* pStream, uint32_t flags) + { + m_flags = flags; + m_pMem_blocks = nullptr; + m_error_code = JPGD_SUCCESS; + m_ready_flag = false; + m_image_x_size = m_image_y_size = 0; + m_pStream = pStream; + m_progressive_flag = JPGD_FALSE; + + memset(m_huff_ac, 0, sizeof(m_huff_ac)); + memset(m_huff_num, 0, sizeof(m_huff_num)); + memset(m_huff_val, 0, sizeof(m_huff_val)); + memset(m_quant, 0, sizeof(m_quant)); + + m_scan_type = 0; + m_comps_in_frame = 0; + + memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp)); + memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp)); + memset(m_comp_quant, 0, sizeof(m_comp_quant)); + memset(m_comp_ident, 0, sizeof(m_comp_ident)); + memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks)); + memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks)); + + m_comps_in_scan = 0; + memset(m_comp_list, 0, sizeof(m_comp_list)); + memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab)); + memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab)); + + m_spectral_start = 0; + m_spectral_end = 0; + m_successive_low = 0; + m_successive_high = 0; + m_max_mcu_x_size = 0; + m_max_mcu_y_size = 0; + m_blocks_per_mcu = 0; + m_max_blocks_per_row = 0; + m_mcus_per_row = 0; + m_mcus_per_col = 0; + + memset(m_mcu_org, 0, sizeof(m_mcu_org)); + + m_total_lines_left = 0; + m_mcu_lines_left = 0; + m_num_buffered_scanlines = 0; + m_real_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_pixel = 0; + + memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs)); + + memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs)); + memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs)); + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_eob_run = 0; + + m_pIn_buf_ofs = m_in_buf; + m_in_buf_left = 0; + m_eof_flag = false; + m_tem_flag = 0; + + memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start)); + memset(m_in_buf, 0, sizeof(m_in_buf)); + memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end)); + + m_restart_interval = 0; + m_restarts_left = 0; + m_next_restart_num = 0; + + m_max_mcus_per_row = 0; + m_max_blocks_per_mcu = 0; + m_max_mcus_per_col = 0; + + memset(m_last_dc_val, 0, sizeof(m_last_dc_val)); + m_pMCU_coefficients = nullptr; + m_pSample_buf = nullptr; + m_pSample_buf_prev = nullptr; + m_sample_buf_prev_valid = false; + + m_total_bytes_read = 0; + + m_pScan_line_0 = nullptr; + m_pScan_line_1 = nullptr; + + // Ready the input buffer. + prep_in_buffer(); + + // Prime the bit buffer. + m_bits_left = 16; + m_bit_buf = 0; + + get_bits(16); + get_bits(16); + + for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) + m_mcu_block_max_zag[i] = 64; + } + +#define SCALEBITS 16 +#define ONE_HALF ((int) 1 << (SCALEBITS-1)) +#define FIX(x) ((int) ((x) * (1L<> SCALEBITS; + m_cbb[i] = (FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS; + m_crg[i] = (-FIX(0.71414f)) * k; + m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF; + } + } + + // This method throws back into the stream any bytes that where read + // into the bit buffer during initial marker scanning. + void jpeg_decoder::fix_in_buffer() + { + // In case any 0xFF's where pulled into the buffer during marker scanning. + assert((m_bits_left & 7) == 0); + + if (m_bits_left == 16) + stuff_char((uint8)(m_bit_buf & 0xFF)); + + if (m_bits_left >= 8) + stuff_char((uint8)((m_bit_buf >> 8) & 0xFF)); + + stuff_char((uint8)((m_bit_buf >> 16) & 0xFF)); + stuff_char((uint8)((m_bit_buf >> 24) & 0xFF)); + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + void jpeg_decoder::transform_mcu(int mcu_row) + { + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + if (mcu_row * m_blocks_per_mcu >= m_max_blocks_per_row) + stop_decoding(JPGD_DECODE_ERROR); + + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } + } + + // Loads and dequantizes the next row of (already decoded) coefficients. + // Progressive images only. + void jpeg_decoder::load_next_row() + { + int i; + jpgd_block_t* p; + jpgd_quant_t* q; + int mcu_row, mcu_block, row_block = 0; + int component_num, component_id; + int block_x_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + q = m_quant[m_comp_quant[component_id]]; + + p = m_pMCU_coefficients + 64 * mcu_block; + + jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + p[0] = pDC[0]; + memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t)); + + for (i = 63; i > 0; i--) + if (p[g_ZAG[i]]) + break; + + m_mcu_block_max_zag[mcu_block] = i + 1; + + for (; i >= 0; i--) + if (p[g_ZAG[i]]) + p[g_ZAG[i]] = static_cast(p[g_ZAG[i]] * q[i]); + + row_block++; + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + transform_mcu(mcu_row); + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + + // Restart interval processing. + void jpeg_decoder::process_restart() + { + int i; + int c = 0; + + // Align to a byte boundry + // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers! + //get_bits_no_markers(m_bits_left & 7); + + // Let's scan a little bit to find the marker, but not _too_ far. + // 1536 is a "fudge factor" that determines how much to scan. + for (i = 1536; i > 0; i--) + if (get_char() == 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + for (; i > 0; i--) + if ((c = get_char()) != 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Is it the expected marker? If not, something bad happened. + if (c != (m_next_restart_num + M_RST0)) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Reset each component's DC prediction values. + memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + m_restarts_left = m_restart_interval; + + m_next_restart_num = (m_next_restart_num + 1) & 7; + + // Get the bit buffer going again... + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + static inline int dequantize_ac(int c, int q) { c *= q; return c; } + + // Decodes and dequantizes the next row of coefficients. + void jpeg_decoder::decode_next_row() + { + int row_block = 0; + + for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + jpgd_block_t* p = m_pMCU_coefficients; + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) + { + int component_id = m_mcu_org[mcu_block]; + if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + jpgd_quant_t* q = m_quant[m_comp_quant[component_id]]; + + int r, s; + s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r); + if (s >= 16) + stop_decoding(JPGD_DECODE_ERROR); + + s = JPGD_HUFF_EXTEND(r, s); + + m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]); + + p[0] = static_cast(s * q[0]); + + int prev_num_set = m_mcu_block_max_zag[mcu_block]; + + huff_tables* pH = m_pHuff_tabs[m_comp_ac_tab[component_id]]; + + int k; + for (k = 1; k < 64; k++) + { + int extra_bits; + s = huff_decode(pH, extra_bits); + + r = s >> 4; + s &= 15; + + if (s) + { + if (r) + { + if ((k + r) > 63) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(r, prev_num_set - k); + int kt = k; + while (n--) + p[g_ZAG[kt++]] = 0; + } + + k += r; + } + + s = JPGD_HUFF_EXTEND(extra_bits, s); + + if (k >= 64) + stop_decoding(JPGD_DECODE_ERROR); + + p[g_ZAG[k]] = static_cast(dequantize_ac(s, q[k])); //s * q[k]; + } + else + { + if (r == 15) + { + if ((k + 16) > 64) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(16, prev_num_set - k); + int kt = k; + while (n--) + { + if (kt > 63) + stop_decoding(JPGD_DECODE_ERROR); + p[g_ZAG[kt++]] = 0; + } + } + + k += 16 - 1; // - 1 because the loop counter is k + + if (p[g_ZAG[k & 63]] != 0) + stop_decoding(JPGD_DECODE_ERROR); + } + else + break; + } + } + + if (k < prev_num_set) + { + int kt = k; + while (kt < prev_num_set) + p[g_ZAG[kt++]] = 0; + } + + m_mcu_block_max_zag[mcu_block] = k; + + row_block++; + } + + transform_mcu(mcu_row); + + m_restarts_left--; + } + } + + // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB + void jpeg_decoder::H1V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d = m_pScan_line_0; + uint8* s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int y = s[j]; + int cb = s[64 + j]; + int cr = s[128 + j]; + + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + + d += 4; + } + + s += 64 * 3; + } + } + + // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H2V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* y = m_pSample_buf + row * 8; + uint8* c = m_pSample_buf + 2 * 64 + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 4; j++) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j << 1]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[(j << 1) + 1]; + d0[4] = clamp(yy + rc); + d0[5] = clamp(yy + gc); + d0[6] = clamp(yy + bc); + d0[7] = 255; + + d0 += 8; + + c++; + } + y += 64; + } + + y += 64 * 4 - 64 * 2; + c += 64 * 4 - 8; + } + } + + // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H2V1ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 4; + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + + const int half_image_x_size = (m_image_x_size >> 1) - 1; + const int row_x8 = row * 8; + + for (int x = 0; x < m_image_x_size; x++) + { + int y = m_pSample_buf[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + row_x8)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7) + row_x8 + 128; + int cb0 = m_pSample_buf[check_sample_buf_ofs(a)]; + int cr0 = m_pSample_buf[check_sample_buf_ofs(a + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7) + row_x8 + 128; + int cb1 = m_pSample_buf[check_sample_buf_ofs(b)]; + int cr1 = m_pSample_buf[check_sample_buf_ofs(b + 64)]; + + int w0 = (x & 1) ? 3 : 1; + int w1 = (x & 1) ? 1 : 3; + + int cb = (cb0 * w0 + cb1 * w1 + 2) >> 2; + int cr = (cr0 * w0 + cr1 * w1 + 2) >> 2; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y + rc); + d0[1] = clamp(y + gc); + d0[2] = clamp(y + bc); + d0[3] = 255; + + d0 += 4; + } + } + + // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H1V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* d1 = m_pScan_line_1; + uint8* y; + uint8* c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64 * 1 + (row & 7) * 8; + + c = m_pSample_buf + 64 * 2 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int cb = c[0 + j]; + int cr = c[64 + j]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[8 + j]; + d1[0] = clamp(yy + rc); + d1[1] = clamp(yy + gc); + d1[2] = clamp(yy + bc); + d1[3] = 255; + + d0 += 4; + d1 += 4; + } + + y += 64 * 4; + c += 64 * 4; + } + } + + // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H1V2ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 4; + int y = m_image_y_size - m_total_lines_left; + int row = y & 15; + + const int half_image_y_size = (m_image_y_size >> 1) - 1; + + uint8* d0 = m_pScan_line_0; + + const int w0 = (row & 1) ? 3 : 1; + const int w1 = (row & 1) ? 1 : 3; + + int c_y0 = (y - 1) >> 1; + int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size); + + const uint8_t* p_YSamples = m_pSample_buf; + const uint8_t* p_C0Samples = m_pSample_buf; + if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1)) + { + assert(y > 0); + assert(m_sample_buf_prev_valid); + + if ((row & 15) == 15) + p_YSamples = m_pSample_buf_prev; + + p_C0Samples = m_pSample_buf_prev; + } + + const int y_sample_base_ofs = ((row & 8) ? 64 : 0) + (row & 7) * 8; + const int y0_base = (c_y0 & 7) * 8 + 128; + const int y1_base = (c_y1 & 7) * 8 + 128; + + for (int x = 0; x < m_image_x_size; x++) + { + const int base_ofs = (x >> 3) * BLOCKS_PER_MCU * 64 + (x & 7); + + int y_sample = p_YSamples[check_sample_buf_ofs(base_ofs + y_sample_base_ofs)]; + + int a = base_ofs + y0_base; + int cb0_sample = p_C0Samples[check_sample_buf_ofs(a)]; + int cr0_sample = p_C0Samples[check_sample_buf_ofs(a + 64)]; + + int b = base_ofs + y1_base; + int cb1_sample = m_pSample_buf[check_sample_buf_ofs(b)]; + int cr1_sample = m_pSample_buf[check_sample_buf_ofs(b + 64)]; + + int cb = (cb0_sample * w0 + cb1_sample * w1 + 2) >> 2; + int cr = (cr0_sample * w0 + cr1_sample * w1 + 2) >> 2; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample + rc); + d0[1] = clamp(y_sample + gc); + d0[2] = clamp(y_sample + bc); + d0[3] = 255; + + d0 += 4; + } + } + + // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB + void jpeg_decoder::H2V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* d1 = m_pScan_line_1; + uint8* y; + uint8* c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64 * 2 + (row & 7) * 8; + + c = m_pSample_buf + 64 * 4 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 8; j += 2) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[j + 1]; + d0[4] = clamp(yy + rc); + d0[5] = clamp(yy + gc); + d0[6] = clamp(yy + bc); + d0[7] = 255; + + yy = y[j + 8]; + d1[0] = clamp(yy + rc); + d1[1] = clamp(yy + gc); + d1[2] = clamp(yy + bc); + d1[3] = 255; + + yy = y[j + 8 + 1]; + d1[4] = clamp(yy + rc); + d1[5] = clamp(yy + gc); + d1[6] = clamp(yy + bc); + d1[7] = 255; + + d0 += 8; + d1 += 8; + + c++; + } + y += 64; + } + + y += 64 * 6 - 64 * 2; + c += 64 * 6 - 8; + } + } + + uint32_t jpeg_decoder::H2V2ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 6; + int y = m_image_y_size - m_total_lines_left; + int row = y & 15; + + const int half_image_y_size = (m_image_y_size >> 1) - 1; + + uint8* d0 = m_pScan_line_0; + + int c_y0 = (y - 1) >> 1; + int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size); + + const uint8_t* p_YSamples = m_pSample_buf; + const uint8_t* p_C0Samples = m_pSample_buf; + if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1)) + { + assert(y > 0); + assert(m_sample_buf_prev_valid); + + if ((row & 15) == 15) + p_YSamples = m_pSample_buf_prev; + + p_C0Samples = m_pSample_buf_prev; + } + + const int y_sample_base_ofs = ((row & 8) ? 128 : 0) + (row & 7) * 8; + const int y0_base = (c_y0 & 7) * 8 + 256; + const int y1_base = (c_y1 & 7) * 8 + 256; + + const int half_image_x_size = (m_image_x_size >> 1) - 1; + + static const uint8_t s_muls[2][2][4] = + { + { { 1, 3, 3, 9 }, { 3, 9, 1, 3 }, }, + { { 3, 1, 9, 3 }, { 9, 3, 3, 1 } } + }; + + if (((row & 15) >= 1) && ((row & 15) <= 14)) + { + assert((row & 1) == 1); + assert(((y + 1 - 1) >> 1) == c_y0); + + assert(p_YSamples == m_pSample_buf); + assert(p_C0Samples == m_pSample_buf); + + uint8* d1 = m_pScan_line_1; + const int y_sample_base_ofs1 = (((row + 1) & 8) ? 128 : 0) + ((row + 1) & 7) * 8; + + for (int x = 0; x < m_image_x_size; x++) + { + int k = (x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7); + int y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)]; + int y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7); + int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)]; + int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)]; + + int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)]; + int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7); + int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)]; + int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)]; + + int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)]; + int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)]; + + { + const uint8_t* pMuls = &s_muls[row & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample0 + rc); + d0[1] = clamp(y_sample0 + gc); + d0[2] = clamp(y_sample0 + bc); + d0[3] = 255; + + d0 += 4; + } + + { + const uint8_t* pMuls = &s_muls[(row + 1) & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d1[0] = clamp(y_sample1 + rc); + d1[1] = clamp(y_sample1 + gc); + d1[2] = clamp(y_sample1 + bc); + d1[3] = 255; + + d1 += 4; + } + + if (((x & 1) == 1) && (x < m_image_x_size - 1)) + { + const int nx = x + 1; + assert(c_x0 == (nx - 1) >> 1); + + k = (nx >> 4) * BLOCKS_PER_MCU * 64 + ((nx & 8) ? 64 : 0) + (nx & 7); + y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)]; + y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)]; + + { + const uint8_t* pMuls = &s_muls[row & 1][nx & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample0 + rc); + d0[1] = clamp(y_sample0 + gc); + d0[2] = clamp(y_sample0 + bc); + d0[3] = 255; + + d0 += 4; + } + + { + const uint8_t* pMuls = &s_muls[(row + 1) & 1][nx & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d1[0] = clamp(y_sample1 + rc); + d1[1] = clamp(y_sample1 + gc); + d1[2] = clamp(y_sample1 + bc); + d1[3] = 255; + + d1 += 4; + } + + ++x; + } + } + + return 2; + } + else + { + for (int x = 0; x < m_image_x_size; x++) + { + int y_sample = p_YSamples[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + y_sample_base_ofs)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7); + int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)]; + int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)]; + + int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)]; + int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7); + int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)]; + int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)]; + + int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)]; + int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)]; + + const uint8_t* pMuls = &s_muls[row & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample + rc); + d0[1] = clamp(y_sample + gc); + d0[2] = clamp(y_sample + bc); + d0[3] = 255; + + d0 += 4; + } + + return 1; + } + } + + // Y (1 block per MCU) to 8-bit grayscale + void jpeg_decoder::gray_convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d = m_pScan_line_0; + uint8* s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + *(uint*)d = *(uint*)s; + *(uint*)(&d[4]) = *(uint*)(&s[4]); + + s += 64; + d += 8; + } + } + + // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream. + void jpeg_decoder::find_eoi() + { + if (!m_progressive_flag) + { + // Attempt to read the EOI marker. + //get_bits_no_markers(m_bits_left & 7); + + // Prime the bit buffer + m_bits_left = 16; + get_bits(16); + get_bits(16); + + // The next marker _should_ be EOI + process_markers(); + } + + m_total_bytes_read -= m_in_buf_left; + } + + int jpeg_decoder::decode_next_mcu_row() + { + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)); + if (chroma_y_filtering) + { + std::swap(m_pSample_buf, m_pSample_buf_prev); + + m_sample_buf_prev_valid = true; + } + + if (m_progressive_flag) + load_next_row(); + else + decode_next_row(); + + // Find the EOI marker if that was the last row. + if (m_total_lines_left <= m_max_mcu_y_size) + find_eoi(); + + m_mcu_lines_left = m_max_mcu_y_size; + return 0; + } + + int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len) + { + if ((m_error_code) || (!m_ready_flag)) + return JPGD_FAILED; + + if (m_total_lines_left == 0) + return JPGD_DONE; + + const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)); + + bool get_another_mcu_row = false; + bool got_mcu_early = false; + if (chroma_y_filtering) + { + if (m_total_lines_left == m_image_y_size) + get_another_mcu_row = true; + else if ((m_mcu_lines_left == 1) && (m_total_lines_left > 1)) + { + get_another_mcu_row = true; + got_mcu_early = true; + } + } + else + { + get_another_mcu_row = (m_mcu_lines_left == 0); + } + + if (get_another_mcu_row) + { + int status = decode_next_mcu_row(); + if (status != 0) + return status; + } + + switch (m_scan_type) + { + case JPGD_YH2V2: + { + if (m_flags & cFlagLinearChromaFiltering) + { + if (m_num_buffered_scanlines == 1) + { + *pScan_line = m_pScan_line_1; + } + else if (m_num_buffered_scanlines == 0) + { + m_num_buffered_scanlines = H2V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; + } + + m_num_buffered_scanlines--; + } + else + { + if ((m_mcu_lines_left & 1) == 0) + { + H2V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + } + + break; + } + case JPGD_YH2V1: + { + if (m_flags & cFlagLinearChromaFiltering) + H2V1ConvertFiltered(); + else + H2V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V2: + { + if (chroma_y_filtering) + { + H1V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; + } + else + { + if ((m_mcu_lines_left & 1) == 0) + { + H1V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + } + + break; + } + case JPGD_YH1V1: + { + H1V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_GRAYSCALE: + { + gray_convert(); + *pScan_line = m_pScan_line_0; + + break; + } + } + + *pScan_line_len = m_real_dest_bytes_per_scan_line; + + if (!got_mcu_early) + { + m_mcu_lines_left--; + } + + m_total_lines_left--; + + return JPGD_SUCCESS; + } + + // Creates the tables needed for efficient Huffman decoding. + void jpeg_decoder::make_huff_table(int index, huff_tables* pH) + { + int p, i, l, si; + uint8 huffsize[258]; + uint huffcode[258]; + uint code; + uint subtree; + int code_size; + int lastp; + int nextfreeentry; + int currententry; + + pH->ac_table = m_huff_ac[index] != 0; + + p = 0; + + for (l = 1; l <= 16; l++) + { + for (i = 1; i <= m_huff_num[index][l]; i++) + { + if (p >= 257) + stop_decoding(JPGD_DECODE_ERROR); + huffsize[p++] = static_cast(l); + } + } + + assert(p < 258); + huffsize[p] = 0; + + lastp = p; + + code = 0; + si = huffsize[0]; + p = 0; + + while (huffsize[p]) + { + while (huffsize[p] == si) + { + if (p >= 257) + stop_decoding(JPGD_DECODE_ERROR); + huffcode[p++] = code; + code++; + } + + code <<= 1; + si++; + } + + memset(pH->look_up, 0, sizeof(pH->look_up)); + memset(pH->look_up2, 0, sizeof(pH->look_up2)); + memset(pH->tree, 0, sizeof(pH->tree)); + memset(pH->code_size, 0, sizeof(pH->code_size)); + + nextfreeentry = -1; + + p = 0; + + while (p < lastp) + { + i = m_huff_val[index][p]; + + code = huffcode[p]; + code_size = huffsize[p]; + + assert(i < JPGD_HUFF_CODE_SIZE_MAX_LENGTH); + pH->code_size[i] = static_cast(code_size); + + if (code_size <= 8) + { + code <<= (8 - code_size); + + for (l = 1 << (8 - code_size); l > 0; l--) + { + if (code >= 256) + stop_decoding(JPGD_DECODE_ERROR); + + pH->look_up[code] = i; + + bool has_extrabits = false; + int extra_bits = 0; + int num_extra_bits = i & 15; + + int bits_to_fetch = code_size; + if (num_extra_bits) + { + int total_codesize = code_size + num_extra_bits; + if (total_codesize <= 8) + { + has_extrabits = true; + extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize)); + + if (extra_bits > 0x7FFF) + stop_decoding(JPGD_DECODE_ERROR); + + bits_to_fetch += num_extra_bits; + } + } + + if (!has_extrabits) + pH->look_up2[code] = i | (bits_to_fetch << 8); + else + pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8); + + code++; + } + } + else + { + subtree = (code >> (code_size - 8)) & 0xFF; + + currententry = pH->look_up[subtree]; + + if (currententry == 0) + { + pH->look_up[subtree] = currententry = nextfreeentry; + pH->look_up2[subtree] = currententry = nextfreeentry; + + nextfreeentry -= 2; + } + + code <<= (16 - (code_size - 8)); + + for (l = code_size; l > 9; l--) + { + if ((code & 0x8000) == 0) + currententry--; + + unsigned int idx = -currententry - 1; + + if (idx >= JPGD_HUFF_TREE_MAX_LENGTH) + stop_decoding(JPGD_DECODE_ERROR); + + if (pH->tree[idx] == 0) + { + pH->tree[idx] = nextfreeentry; + + currententry = nextfreeentry; + + nextfreeentry -= 2; + } + else + { + currententry = pH->tree[idx]; + } + + code <<= 1; + } + + if ((code & 0x8000) == 0) + currententry--; + + if ((-currententry - 1) >= JPGD_HUFF_TREE_MAX_LENGTH) + stop_decoding(JPGD_DECODE_ERROR); + + pH->tree[-currententry - 1] = i; + } + + p++; + } + } + + // Verifies the quantization tables needed for this scan are available. + void jpeg_decoder::check_quant_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr) + stop_decoding(JPGD_UNDEFINED_QUANT_TABLE); + } + + // Verifies that all the Huffman tables needed for this scan are available. + void jpeg_decoder::check_huff_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + { + if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + + if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + } + + for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) + if (m_huff_num[i]) + { + if (!m_pHuff_tabs[i]) + m_pHuff_tabs[i] = (huff_tables*)alloc(sizeof(huff_tables)); + + make_huff_table(i, m_pHuff_tabs[i]); + } + } + + // Determines the component order inside each MCU. + // Also calcs how many MCU's are on each row, etc. + bool jpeg_decoder::calc_mcu_block_order() + { + int component_num, component_id; + int max_h_samp = 0, max_v_samp = 0; + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + if (m_comp_h_samp[component_id] > max_h_samp) + max_h_samp = m_comp_h_samp[component_id]; + + if (m_comp_v_samp[component_id] > max_v_samp) + max_v_samp = m_comp_v_samp[component_id]; + } + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8; + m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8; + } + + if (m_comps_in_scan == 1) + { + m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]]; + m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]]; + } + else + { + m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp; + m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp; + } + + if (m_comps_in_scan == 1) + { + m_mcu_org[0] = m_comp_list[0]; + + m_blocks_per_mcu = 1; + } + else + { + m_blocks_per_mcu = 0; + + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + int num_blocks; + + component_id = m_comp_list[component_num]; + + num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id]; + + while (num_blocks--) + m_mcu_org[m_blocks_per_mcu++] = component_id; + } + } + + if (m_blocks_per_mcu > m_max_blocks_per_mcu) + return false; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + int comp_id = m_mcu_org[mcu_block]; + if (comp_id >= JPGD_MAX_QUANT_TABLES) + return false; + } + + return true; + } + + // Starts a new scan. + int jpeg_decoder::init_scan() + { + if (!locate_sos_marker()) + return JPGD_FALSE; + + if (!calc_mcu_block_order()) + return JPGD_FALSE; + + check_huff_tables(); + + check_quant_tables(); + + memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + if (m_restart_interval) + { + m_restarts_left = m_restart_interval; + m_next_restart_num = 0; + } + + fix_in_buffer(); + + return JPGD_TRUE; + } + + // Starts a frame. Determines if the number of components or sampling factors + // are supported. + void jpeg_decoder::init_frame() + { + int i; + + if (m_comps_in_frame == 1) + { + if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_scan_type = JPGD_GRAYSCALE; + m_max_blocks_per_mcu = 1; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if (m_comps_in_frame == 3) + { + if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || + ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1))) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH1V1; + + m_max_blocks_per_mcu = 3; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH2V1; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH1V2; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 16; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH2V2; + m_max_blocks_per_mcu = 6; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 16; + } + else + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + } + else + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size; + m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size; + + // These values are for the *destination* pixels: after conversion. + if (m_scan_type == JPGD_GRAYSCALE) + m_dest_bytes_per_pixel = 1; + else + m_dest_bytes_per_pixel = 4; + + m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel; + + m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel); + + // Initialize two scan line buffers. + m_pScan_line_0 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); + if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) + m_pScan_line_1 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); + + m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu; + + // Should never happen + if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) + stop_decoding(JPGD_DECODE_ERROR); + + // Allocate the coefficient buffer, enough for one MCU + m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t)); + + for (i = 0; i < m_max_blocks_per_mcu; i++) + m_mcu_block_max_zag[i] = 64; + + m_pSample_buf = (uint8*)alloc(m_max_blocks_per_row * 64); + m_pSample_buf_prev = (uint8*)alloc(m_max_blocks_per_row * 64); + + m_total_lines_left = m_image_y_size; + + m_mcu_lines_left = 0; + + create_look_ups(); + } + + // The coeff_buf series of methods originally stored the coefficients + // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache + // was used to make this process more efficient. Now, we can store the entire + // thing in RAM. + jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) + { + coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf)); + + cb->block_num_x = block_num_x; + cb->block_num_y = block_num_y; + cb->block_len_x = block_len_x; + cb->block_len_y = block_len_y; + cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t); + cb->pData = (uint8*)alloc(cb->block_size * block_num_x * block_num_y, true); + return cb; + } + + inline jpgd_block_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y) + { + if ((block_x >= cb->block_num_x) || (block_y >= cb->block_num_y)) + stop_decoding(JPGD_DECODE_ERROR); + + return (jpgd_block_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x)); + } + + // The following methods decode the various types of m_blocks encountered + // in progressively encoded images. + void jpeg_decoder::decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int s, r; + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) + { + if (s >= 16) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + } + + pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]); + + p[0] = static_cast(s << pD->m_successive_low); + } + + void jpeg_decoder::decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + if (pD->get_bits_no_markers(1)) + { + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + p[0] |= (1 << pD->m_successive_low); + } + } + + void jpeg_decoder::decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int k, s, r; + + if (pD->m_eob_run) + { + pD->m_eob_run--; + return; + } + + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) + { + unsigned int idx = pD->m_comp_ac_tab[component_id]; + if (idx >= JPGD_MAX_HUFF_TABLES) + pD->stop_decoding(JPGD_DECODE_ERROR); + + s = pD->huff_decode(pD->m_pHuff_tabs[idx]); + + r = s >> 4; + s &= 15; + + if (s) + { + if ((k += r) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + + p[g_ZAG[k]] = static_cast(s << pD->m_successive_low); + } + else + { + if (r == 15) + { + if ((k += 15) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + } + else + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + pD->m_eob_run--; + + break; + } + } + } + } + + void jpeg_decoder::decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int s, k, r; + + int p1 = 1 << pD->m_successive_low; + + //int m1 = (-1) << pD->m_successive_low; + int m1 = static_cast((UINT32_MAX << pD->m_successive_low)); + + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + if (pD->m_spectral_end > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + k = pD->m_spectral_start; + + if (pD->m_eob_run == 0) + { + for (; k <= pD->m_spectral_end; k++) + { + unsigned int idx = pD->m_comp_ac_tab[component_id]; + if (idx >= JPGD_MAX_HUFF_TABLES) + pD->stop_decoding(JPGD_DECODE_ERROR); + + s = pD->huff_decode(pD->m_pHuff_tabs[idx]); + + r = s >> 4; + s &= 15; + + if (s) + { + if (s != 1) + pD->stop_decoding(JPGD_DECODE_ERROR); + + if (pD->get_bits_no_markers(1)) + s = p1; + else + s = m1; + } + else + { + if (r != 15) + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + break; + } + } + + do + { + jpgd_block_t* this_coef = p + g_ZAG[k & 63]; + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + else + { + if (--r < 0) + break; + } + + k++; + + } while (k <= pD->m_spectral_end); + + if ((s) && (k < 64)) + { + p[g_ZAG[k]] = static_cast(s); + } + } + } + + if (pD->m_eob_run > 0) + { + for (; k <= pD->m_spectral_end; k++) + { + jpgd_block_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + } + + pD->m_eob_run--; + } + } + + // Decode a scan in a progressively encoded image. + void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func) + { + int mcu_row, mcu_col, mcu_block; + int block_x_mcu[JPGD_MAX_COMPONENTS], block_y_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_y_mcu, 0, sizeof(block_y_mcu)); + + for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) + { + int component_num, component_id; + + memset(block_x_mcu, 0, sizeof(block_x_mcu)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + + decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, block_y_mcu[component_id] + block_y_mcu_ofs); + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + m_restarts_left--; + } + + if (m_comps_in_scan == 1) + block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + } + + // Decode a progressively encoded image. + void jpeg_decoder::init_progressive() + { + int i; + + if (m_comps_in_frame == 4) + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + // Allocate the coefficient buffers. + for (i = 0; i < m_comps_in_frame; i++) + { + m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1); + m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8); + } + + // See https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf + uint32_t total_scans = 0; + const uint32_t MAX_SCANS_TO_PROCESS = 1000; + + for (; ; ) + { + int dc_only_scan, refinement_scan; + pDecode_block_func decode_block_func; + + if (!init_scan()) + break; + + dc_only_scan = (m_spectral_start == 0); + refinement_scan = (m_successive_high != 0); + + if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if (dc_only_scan) + { + if (m_spectral_end) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + } + else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) + stop_decoding(JPGD_BAD_SOS_SUCCESSIVE); + + if (dc_only_scan) + { + if (refinement_scan) + decode_block_func = decode_block_dc_refine; + else + decode_block_func = decode_block_dc_first; + } + else + { + if (refinement_scan) + decode_block_func = decode_block_ac_refine; + else + decode_block_func = decode_block_ac_first; + } + + decode_scan(decode_block_func); + + m_bits_left = 16; + get_bits(16); + get_bits(16); + + total_scans++; + if (total_scans > MAX_SCANS_TO_PROCESS) + stop_decoding(JPGD_TOO_MANY_SCANS); + } + + m_comps_in_scan = m_comps_in_frame; + + for (i = 0; i < m_comps_in_frame; i++) + m_comp_list[i] = i; + + if (!calc_mcu_block_order()) + stop_decoding(JPGD_DECODE_ERROR); + } + + void jpeg_decoder::init_sequential() + { + if (!init_scan()) + stop_decoding(JPGD_UNEXPECTED_MARKER); + } + + void jpeg_decoder::decode_start() + { + init_frame(); + + if (m_progressive_flag) + init_progressive(); + else + init_sequential(); + } + + void jpeg_decoder::decode_init(jpeg_decoder_stream* pStream, uint32_t flags) + { + init(pStream, flags); + locate_sof_marker(); + } + + jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags) + { + if (setjmp(m_jmp_state)) + return; + decode_init(pStream, flags); + } + + int jpeg_decoder::begin_decoding() + { + if (m_ready_flag) + return JPGD_SUCCESS; + + if (m_error_code) + return JPGD_FAILED; + + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + decode_start(); + + m_ready_flag = true; + + return JPGD_SUCCESS; + } + + jpeg_decoder::~jpeg_decoder() + { + free_all_blocks(); + } + + jpeg_decoder_file_stream::jpeg_decoder_file_stream() + { + m_pFile = nullptr; + m_eof_flag = false; + m_error_flag = false; + } + + void jpeg_decoder_file_stream::close() + { + if (m_pFile) + { + fclose(m_pFile); + m_pFile = nullptr; + } + + m_eof_flag = false; + m_error_flag = false; + } + + jpeg_decoder_file_stream::~jpeg_decoder_file_stream() + { + close(); + } + + bool jpeg_decoder_file_stream::open(const char* Pfilename) + { + close(); + + m_eof_flag = false; + m_error_flag = false; + +#if defined(_MSC_VER) + m_pFile = nullptr; + fopen_s(&m_pFile, Pfilename, "rb"); +#else + m_pFile = fopen(Pfilename, "rb"); +#endif + return m_pFile != nullptr; + } + + int jpeg_decoder_file_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) + { + if (!m_pFile) + return -1; + + if (m_eof_flag) + { + *pEOF_flag = true; + return 0; + } + + if (m_error_flag) + return -1; + + int bytes_read = static_cast(fread(pBuf, 1, max_bytes_to_read, m_pFile)); + if (bytes_read < max_bytes_to_read) + { + if (ferror(m_pFile)) + { + m_error_flag = true; + return -1; + } + + m_eof_flag = true; + *pEOF_flag = true; + } + + return bytes_read; + } + + bool jpeg_decoder_mem_stream::open(const uint8* pSrc_data, uint size) + { + close(); + m_pSrc_data = pSrc_data; + m_ofs = 0; + m_size = size; + return true; + } + + int jpeg_decoder_mem_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) + { + *pEOF_flag = false; + + if (!m_pSrc_data) + return -1; + + uint bytes_remaining = m_size - m_ofs; + if ((uint)max_bytes_to_read > bytes_remaining) + { + max_bytes_to_read = bytes_remaining; + *pEOF_flag = true; + } + + memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read); + m_ofs += max_bytes_to_read; + + return max_bytes_to_read; + } + + unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + if (!actual_comps) + return nullptr; + *actual_comps = 0; + + if ((!pStream) || (!width) || (!height) || (!req_comps)) + return nullptr; + + if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) + return nullptr; + + jpeg_decoder decoder(pStream, flags); + if (decoder.get_error_code() != JPGD_SUCCESS) + return nullptr; + + const int image_width = decoder.get_width(), image_height = decoder.get_height(); + *width = image_width; + *height = image_height; + *actual_comps = decoder.get_num_components(); + + if (decoder.begin_decoding() != JPGD_SUCCESS) + return nullptr; + + const int dst_bpl = image_width * req_comps; + + uint8* pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height); + if (!pImage_data) + return nullptr; + + for (int y = 0; y < image_height; y++) + { + const uint8* pScan_line; + uint scan_line_len; + if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) + { + jpgd_free(pImage_data); + return nullptr; + } + + uint8* pDst = pImage_data + y * dst_bpl; + + if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3))) + memcpy(pDst, pScan_line, dst_bpl); + else if (decoder.get_num_components() == 1) + { + if (req_comps == 3) + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst += 3; + } + } + else + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst[3] = 255; + pDst += 4; + } + } + } + else if (decoder.get_num_components() == 3) + { + if (req_comps == 1) + { + const int YR = 19595, YG = 38470, YB = 7471; + for (int x = 0; x < image_width; x++) + { + int r = pScan_line[x * 4 + 0]; + int g = pScan_line[x * 4 + 1]; + int b = pScan_line[x * 4 + 2]; + *pDst++ = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + } + } + else + { + for (int x = 0; x < image_width; x++) + { + pDst[0] = pScan_line[x * 4 + 0]; + pDst[1] = pScan_line[x * 4 + 1]; + pDst[2] = pScan_line[x * 4 + 2]; + pDst += 3; + } + } + } + } + + return pImage_data; + } + + unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size); + return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps, flags); + } + + unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + jpgd::jpeg_decoder_file_stream file_stream; + if (!file_stream.open(pSrc_filename)) + return nullptr; + return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps, flags); + } + +} // namespace jpgd diff --git a/src/deps/basis-universal/encoder/jpgd.h b/src/deps/basis-universal/encoder/jpgd.h new file mode 100644 index 0000000000..86a7814cae --- /dev/null +++ b/src/deps/basis-universal/encoder/jpgd.h @@ -0,0 +1,347 @@ +// jpgd.h - C++ class for JPEG decompression. +// Public domain, Rich Geldreich +#ifndef JPEG_DECODER_H +#define JPEG_DECODER_H + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#define JPGD_NORETURN __declspec(noreturn) +#elif defined(__GNUC__) +#define JPGD_NORETURN __attribute__ ((noreturn)) +#else +#define JPGD_NORETURN +#endif + +#define JPGD_HUFF_TREE_MAX_LENGTH 512 +#define JPGD_HUFF_CODE_SIZE_MAX_LENGTH 256 + +namespace jpgd +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef signed int int32; + + // Loads a JPEG image from a memory buffer or a file. + // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). + // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB). + // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly. + // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp. + unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + // Success/failure error codes. + enum jpgd_status + { + JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, + JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, + JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, + JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, + JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, + JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, + JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, + JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, + JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS + }; + + // Input stream interface. + // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available. + // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set. + // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer. + // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding. + class jpeg_decoder_stream + { + public: + jpeg_decoder_stream() { } + virtual ~jpeg_decoder_stream() { } + + // The read() method is called when the internal input buffer is empty. + // Parameters: + // pBuf - input buffer + // max_bytes_to_read - maximum bytes that can be written to pBuf + // pEOF_flag - set this to true if at end of stream (no more bytes remaining) + // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0). + // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full. + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) = 0; + }; + + // stdio FILE stream class. + class jpeg_decoder_file_stream : public jpeg_decoder_stream + { + jpeg_decoder_file_stream(const jpeg_decoder_file_stream&); + jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&); + + FILE* m_pFile; + bool m_eof_flag, m_error_flag; + + public: + jpeg_decoder_file_stream(); + virtual ~jpeg_decoder_file_stream(); + + bool open(const char* Pfilename); + void close(); + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Memory stream class. + class jpeg_decoder_mem_stream : public jpeg_decoder_stream + { + const uint8* m_pSrc_data; + uint m_ofs, m_size; + + public: + jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { } + jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { } + + virtual ~jpeg_decoder_mem_stream() { } + + bool open(const uint8* pSrc_data, uint size); + void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Loads JPEG file from a jpeg_decoder_stream. + unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + enum + { + JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, + JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768 + }; + + typedef int16 jpgd_quant_t; + typedef int16 jpgd_block_t; + + class jpeg_decoder + { + public: + enum + { + cFlagLinearChromaFiltering = 1 + }; + + // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc. + // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline. + jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = cFlagLinearChromaFiltering); + + ~jpeg_decoder(); + + // Call this method after constructing the object to begin decompression. + // If JPGD_SUCCESS is returned you may then call decode() on each scanline. + + int begin_decoding(); + + // Returns the next scan line. + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). + // Returns JPGD_SUCCESS if a scan line has been returned. + // Returns JPGD_DONE if all scan lines have been returned. + // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info. + int decode(const void** pScan_line, uint* pScan_line_len); + + inline jpgd_status get_error_code() const { return m_error_code; } + + inline int get_width() const { return m_image_x_size; } + inline int get_height() const { return m_image_y_size; } + + inline int get_num_components() const { return m_comps_in_frame; } + + inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; } + inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); } + + // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file). + inline int get_total_bytes_read() const { return m_total_bytes_read; } + + private: + jpeg_decoder(const jpeg_decoder&); + jpeg_decoder& operator =(const jpeg_decoder&); + + typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int); + + struct huff_tables + { + bool ac_table; + uint look_up[256]; + uint look_up2[256]; + uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH]; + uint tree[JPGD_HUFF_TREE_MAX_LENGTH]; + }; + + struct coeff_buf + { + uint8* pData; + int block_num_x, block_num_y; + int block_len_x, block_len_y; + int block_size; + }; + + struct mem_block + { + mem_block* m_pNext; + size_t m_used_count; + size_t m_size; + char m_data[1]; + }; + + jmp_buf m_jmp_state; + uint32_t m_flags; + mem_block* m_pMem_blocks; + int m_image_x_size; + int m_image_y_size; + jpeg_decoder_stream* m_pStream; + + int m_progressive_flag; + + uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES]; + uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size + uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size + jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables + int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported) + int m_comps_in_frame; // # of components in frame + int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor + int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor + int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector + int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID + int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; + int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; + int m_comps_in_scan; // # of components in scan + int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan + int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector + int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector + int m_spectral_start; // spectral selection start + int m_spectral_end; // spectral selection end + int m_successive_low; // successive approximation low + int m_successive_high; // successive approximation high + int m_max_mcu_x_size; // MCU's max. X size in pixels + int m_max_mcu_y_size; // MCU's max. Y size in pixels + int m_blocks_per_mcu; + int m_max_blocks_per_row; + int m_mcus_per_row, m_mcus_per_col; + int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; + int m_total_lines_left; // total # lines left in image + int m_mcu_lines_left; // total # lines left in this MCU + int m_num_buffered_scanlines; + int m_real_dest_bytes_per_scan_line; + int m_dest_bytes_per_scan_line; // rounded up + int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) + huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; + coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; + coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; + int m_eob_run; + int m_block_y_mcu[JPGD_MAX_COMPONENTS]; + uint8* m_pIn_buf_ofs; + int m_in_buf_left; + int m_tem_flag; + + uint8 m_in_buf_pad_start[64]; + uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128]; + uint8 m_in_buf_pad_end[64]; + + int m_bits_left; + uint m_bit_buf; + int m_restart_interval; + int m_restarts_left; + int m_next_restart_num; + int m_max_mcus_per_row; + int m_max_blocks_per_mcu; + + int m_max_mcus_per_col; + uint m_last_dc_val[JPGD_MAX_COMPONENTS]; + jpgd_block_t* m_pMCU_coefficients; + int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU]; + uint8* m_pSample_buf; + uint8* m_pSample_buf_prev; + int m_crr[256]; + int m_cbb[256]; + int m_crg[256]; + int m_cbg[256]; + uint8* m_pScan_line_0; + uint8* m_pScan_line_1; + jpgd_status m_error_code; + int m_total_bytes_read; + + bool m_ready_flag; + bool m_eof_flag; + bool m_sample_buf_prev_valid; + + inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; } + void free_all_blocks(); + JPGD_NORETURN void stop_decoding(jpgd_status status); + void* alloc(size_t n, bool zero = false); + void word_clear(void* p, uint16 c, uint n); + void prep_in_buffer(); + void read_dht_marker(); + void read_dqt_marker(); + void read_sof_marker(); + void skip_variable_marker(); + void read_dri_marker(); + void read_sos_marker(); + int next_marker(); + int process_markers(); + void locate_soi_marker(); + void locate_sof_marker(); + int locate_sos_marker(); + void init(jpeg_decoder_stream* pStream, uint32_t flags); + void create_look_ups(); + void fix_in_buffer(); + void transform_mcu(int mcu_row); + coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y); + inline jpgd_block_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y); + void load_next_row(); + void decode_next_row(); + void make_huff_table(int index, huff_tables* pH); + void check_quant_tables(); + void check_huff_tables(); + bool calc_mcu_block_order(); + int init_scan(); + void init_frame(); + void process_restart(); + void decode_scan(pDecode_block_func decode_block_func); + void init_progressive(); + void init_sequential(); + void decode_start(); + void decode_init(jpeg_decoder_stream* pStream, uint32_t flags); + void H2V2Convert(); + uint32_t H2V2ConvertFiltered(); + void H2V1Convert(); + void H2V1ConvertFiltered(); + void H1V2Convert(); + void H1V2ConvertFiltered(); + void H1V1Convert(); + void gray_convert(); + void find_eoi(); + inline uint get_char(); + inline uint get_char(bool* pPadding_flag); + inline void stuff_char(uint8 q); + inline uint8 get_octet(); + inline uint get_bits(int num_bits); + inline uint get_bits_no_markers(int numbits); + inline int huff_decode(huff_tables* pH); + inline int huff_decode(huff_tables* pH, int& extrabits); + + // Clamps a value between 0-255. + static inline uint8 clamp(int i) + { + if (static_cast(i) > 255) + i = (((~i) >> 31) & 0xFF); + return static_cast(i); + } + int decode_next_mcu_row(); + + static void decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + }; + +} // namespace jpgd + +#endif // JPEG_DECODER_H diff --git a/src/deps/basis-universal/lodepng.cpp b/src/deps/basis-universal/encoder/lodepng.cpp similarity index 99% rename from src/deps/basis-universal/lodepng.cpp rename to src/deps/basis-universal/encoder/lodepng.cpp index cf964d0555..63adcf49b6 100644 --- a/src/deps/basis-universal/lodepng.cpp +++ b/src/deps/basis-universal/encoder/lodepng.cpp @@ -29,6 +29,7 @@ Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for */ #ifdef _MSC_VER +#define _CRT_SECURE_NO_DEPRECATE #pragma warning (disable : 4201) #ifndef BASISU_NO_ITERATOR_DEBUG_LEVEL @@ -200,6 +201,7 @@ static void uivector_init(uivector* p) { /*returns 1 if success, 0 if failure ==> nothing done*/ static unsigned uivector_push_back(uivector* p, unsigned c) { if(!uivector_resize(p, p->size + 1)) return 0; + if (!p->data) return 0; p->data[p->size - 1] = c; return 1; } diff --git a/src/deps/basis-universal/lodepng.h b/src/deps/basis-universal/encoder/lodepng.h similarity index 100% rename from src/deps/basis-universal/lodepng.h rename to src/deps/basis-universal/encoder/lodepng.h diff --git a/src/deps/basis-universal/transcoder/basisu.h b/src/deps/basis-universal/transcoder/basisu.h index 25600a69bf..f33baf67c8 100644 --- a/src/deps/basis-universal/transcoder/basisu.h +++ b/src/deps/basis-universal/transcoder/basisu.h @@ -1,5 +1,5 @@ // basisu.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -41,10 +41,6 @@ #endif #endif // defined(_DEBUG) || defined(DEBUG) - #ifndef NOMINMAX - #define NOMINMAX - #endif - #endif // BASISU_NO_ITERATOR_DEBUG_LEVEL #endif // _MSC_VER @@ -63,10 +59,11 @@ #include #include #include -#include #include #include +#include "basisu_containers.h" + #ifdef max #undef max #endif @@ -79,20 +76,20 @@ #define strcasecmp _stricmp #endif -// Set to one to enable debug printf()'s when any errors occur, for development/debugging. -#ifndef BASISU_DEVEL_MESSAGES -#define BASISU_DEVEL_MESSAGES 0 +// Set to one to enable debug printf()'s when any errors occur, for development/debugging. Especially useful for WebGL development. +#ifndef BASISU_FORCE_DEVEL_MESSAGES +#define BASISU_FORCE_DEVEL_MESSAGES 0 #endif #define BASISU_NOTE_UNUSED(x) (void)(x) #define BASISU_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(x) x(const x &) = delete; x& operator= (const x &) = delete; #define BASISU_ASSUME(x) static_assert(x, #x); -#define BASISU_OFFSETOF(s, m) (uint32_t)(intptr_t)(&((s *)(0))->m) +#define BASISU_OFFSETOF(s, m) offsetof(s, m) #define BASISU_STRINGIZE(x) #x #define BASISU_STRINGIZE2(x) BASISU_STRINGIZE(x) -#if BASISU_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES #define BASISU_DEVEL_ERROR(...) do { basisu::debug_printf(__VA_ARGS__); } while(0) #else #define BASISU_DEVEL_ERROR(...) @@ -108,26 +105,43 @@ namespace basisu const char BASISU_PATH_SEPERATOR_CHAR = '/'; #endif - typedef std::vector uint8_vec; - typedef std::vector int16_vec; - typedef std::vector uint16_vec; - typedef std::vector uint_vec; - typedef std::vector uint64_vec; - typedef std::vector int_vec; - typedef std::vector bool_vec; + typedef basisu::vector uint8_vec; + typedef basisu::vector int16_vec; + typedef basisu::vector uint16_vec; + typedef basisu::vector uint_vec; + typedef basisu::vector uint64_vec; + typedef basisu::vector int_vec; + typedef basisu::vector bool_vec; void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); + template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } template inline S maximum(S a, S b) { return (a > b) ? a : b; } template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } template inline S minimum(S a, S b) { return (a < b) ? a : b; } template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } + template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } + + inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + inline float saturate(float value) { return clampf(value, 0, 1.0f); } + inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; } + inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; } + inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; } + inline float minimumf(float a, float b) { return (a < b) ? a : b; } + inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; } + inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; } + inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; } + inline float maximumf(float a, float b) { return (a > b) ? a : b; } + inline int squarei(int i) { return i * i; } + inline float squaref(float i) { return i * i; } + template inline T square(T a) { return a * a; } template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } @@ -137,12 +151,10 @@ namespace basisu template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } - template inline S square(S val) { return val * val; } - inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } inline bool is_pow2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } - template inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); return v; } + template inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } template inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; } inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; } @@ -244,27 +256,92 @@ namespace basisu if ((ha <= lb) || (la >= hb)) return false; return true; } + + static inline uint32_t read_le_dword(const uint8_t *pBytes) + { + return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]); + } + + static inline void write_le_dword(uint8_t* pBytes, uint32_t val) + { + pBytes[0] = (uint8_t)val; + pBytes[1] = (uint8_t)(val >> 8U); + pBytes[2] = (uint8_t)(val >> 16U); + pBytes[3] = (uint8_t)(val >> 24U); + } - // Always little endian 2-4 byte unsigned int + // Always little endian 1-8 byte unsigned int template struct packed_uint { uint8_t m_bytes[NumBytes]; - inline packed_uint() { static_assert(NumBytes <= 4, "NumBytes <= 4"); } - inline packed_uint(uint32_t v) { *this = v; } + inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } + inline packed_uint(uint64_t v) { *this = v; } inline packed_uint(const packed_uint& other) { *this = other; } + + inline packed_uint& operator= (uint64_t v) + { + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast(v >> (i * 8)); + return *this; + } - inline packed_uint& operator= (uint32_t v) { for (uint32_t i = 0; i < NumBytes; i++) m_bytes[i] = static_cast(v >> (i * 8)); return *this; } + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + return *this; + } inline operator uint32_t() const { switch (NumBytes) { - case 1: return m_bytes[0]; - case 2: return (m_bytes[1] << 8U) | m_bytes[0]; - case 3: return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | (m_bytes[0]); - default: return (m_bytes[3] << 24U) | (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | (m_bytes[0]); + case 1: + { + return m_bytes[0]; + } + case 2: + { + return (m_bytes[1] << 8U) | m_bytes[0]; + } + case 3: + { + return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0]; + } + case 4: + { + return read_le_dword(m_bytes); + } + case 5: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + case 6: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + case 7: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + case 8: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = read_le_dword(m_bytes + 4); + return static_cast(l) | (static_cast(h) << 32U); + } + default: + { + assert(0); + return 0; + } } } }; @@ -278,7 +355,7 @@ namespace basisu enum { cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, - cHuffmanFastLookupBits = 10, cHuffmanFastLookupSize = 1 << cHuffmanFastLookupBits, + cHuffmanFastLookupBits = 10, cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs @@ -308,15 +385,15 @@ namespace basisu // Block-based formats cETC1, // ETC1 cETC1S, // ETC1 (subset: diff colors only, no subblocks) - cETC2_RGB, // ETC2 color block - cETC2_RGBA, // ETC2 alpha block followed by ETC2 color block + cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) + cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block cETC2_ALPHA, // ETC2 EAC alpha block cBC1, // DXT1 - cBC3, // DXT5 (DXT5A block followed by a DXT1 block) + cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A - cBC5, // 3DC/DXN (two DXT5A blocks) + cBC5, // 3DC/DXN (two BC4/DXT5A blocks) cBC7, - cASTC4x4, + cASTC4x4, // LDR only cPVRTC1_4_RGB, cPVRTC1_4_RGBA, cATC_RGB, @@ -325,6 +402,9 @@ namespace basisu cPVRTC2_4_RGBA, cETC2_R11_EAC, cETC2_RG11_EAC, + cUASTC4x4, + cBC1_NV, + cBC1_AMD, // Uncompressed/raw pixels cRGBA32, @@ -343,6 +423,8 @@ namespace basisu case texture_format::cETC2_RGB: case texture_format::cETC2_ALPHA: case texture_format::cBC1: + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: case texture_format::cBC4: case texture_format::cPVRTC1_4_RGB: case texture_format::cPVRTC1_4_RGBA: diff --git a/src/deps/basis-universal/transcoder/basisu_containers.h b/src/deps/basis-universal/transcoder/basisu_containers.h new file mode 100644 index 0000000000..1ca4bab307 --- /dev/null +++ b/src/deps/basis-universal/transcoder/basisu_containers.h @@ -0,0 +1,1908 @@ +// basisu_containers.h +#pragma once +#include +#include +#include +#include +#include + +#if defined(__linux__) && !defined(ANDROID) +// Only for malloc_usable_size() in basisu_containers_impl.h +#include +#define HAS_MALLOC_USABLE_SIZE 1 +#endif + +#ifdef _MSC_VER +#define BASISU_FORCE_INLINE __forceinline +#else +#define BASISU_FORCE_INLINE inline +#endif + +namespace basisu +{ + enum { cInvalidIndex = -1 }; + + namespace helpers + { + inline bool is_power_of_2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_power_of_2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } + template const T& minimum(const T& a, const T& b) { return (b < a) ? b : a; } + template const T& maximum(const T& a, const T& b) { return (a < b) ? b : a; } + + inline uint32_t floor_log2i(uint32_t v) + { + uint32_t l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint32_t next_pow2(uint32_t val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint64_t next_pow2(uint64_t val) + { + val--; + val |= val >> 32; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + } // namespace helpers + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + inline void construct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T; + } + + template + inline void construct_array(T* p, size_t n, const U& init) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T(init); + } + + template + inline void destruct(T* p) + { + (void)p; + p->~T(); + } + + template inline void destruct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + p->~T(); + } + + template struct int_traits { enum { cMin = INT32_MIN, cMax = INT32_MAX, cSigned = true }; }; + + template<> struct int_traits { enum { cMin = INT8_MIN, cMax = INT8_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = INT16_MIN, cMax = INT16_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = INT32_MIN, cMax = INT32_MAX, cSigned = true }; }; + + template<> struct int_traits { enum { cMin = 0, cMax = UINT8_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = UINT16_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = UINT32_MAX, cSigned = false }; }; + + template + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { basisu::construct(p); } + static inline void construct(T* p, const T& init) { basisu::construct(p, init); } + static inline void construct_array(T* p, size_t n) { basisu::construct_array(p, n); } + static inline void destruct(T* p) { basisu::destruct(p); } + static inline void destruct_array(T* p, size_t n) { basisu::destruct_array(p, n); } + }; + + template struct scalar_type + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, size_t n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { p; } + static inline void destruct_array(T** p, size_t n) { p, n; } + }; + +#define BASISU_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, size_t n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { p; } \ + static inline void destruct_array(X* p, size_t n) { p, n; } }; + + BASISU_DEFINE_BUILT_IN_TYPE(bool) + BASISU_DEFINE_BUILT_IN_TYPE(char) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned char) + BASISU_DEFINE_BUILT_IN_TYPE(short) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned short) + BASISU_DEFINE_BUILT_IN_TYPE(int) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned int) + BASISU_DEFINE_BUILT_IN_TYPE(long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long) +#ifdef __GNUC__ + BASISU_DEFINE_BUILT_IN_TYPE(long long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long long) +#else + BASISU_DEFINE_BUILT_IN_TYPE(__int64) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned __int64) +#endif + BASISU_DEFINE_BUILT_IN_TYPE(float) + BASISU_DEFINE_BUILT_IN_TYPE(double) + BASISU_DEFINE_BUILT_IN_TYPE(long double) + +#undef BASISU_DEFINE_BUILT_IN_TYPE + + template + struct bitwise_movable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; + + template + struct bitwise_copyable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable { enum { cFlag = true }; }; + +#define BASISU_IS_POD(T) __is_pod(T) + +#define BASISU_IS_SCALAR_TYPE(T) (scalar_type::cFlag) + +#if defined(__GNUC__) && __GNUC__<5 + #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) +#else + #define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value +#endif + +// TODO: clean this up +#define BASISU_IS_BITWISE_COPYABLE(T) (BASISU_IS_SCALAR_TYPE(T) || BASISU_IS_POD(T) || BASISU_IS_TRIVIALLY_COPYABLE(T) || (bitwise_copyable::cFlag)) + +#define BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) (BASISU_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) + +#define BASISU_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T))) + + typedef char(&yes_t)[1]; + typedef char(&no_t)[2]; + + template yes_t class_test(int U::*); + template no_t class_test(...); + + template struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; + + template struct is_pointer + { + enum { value = false }; + }; + + template struct is_pointer + { + enum { value = true }; + }; + + struct empty_type { }; + + BASISU_DEFINE_BITWISE_COPYABLE(empty_type); + BASISU_DEFINE_BITWISE_MOVABLE(empty_type); + + template struct rel_ops + { + friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend bool operator> (const T& x, const T& y) { return (y < x); } + friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } + }; + + struct elemental_vector + { + void* m_p; + uint32_t m_size; + uint32_t m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint32_t num); + + bool increase_capacity(uint32_t min_new_capacity, bool grow_hint, uint32_t element_size, object_mover pRelocate, bool nofail); + }; + + template + class vector : public rel_ops< vector > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(NULL), + m_size(0), + m_capacity(0) + { + } + + inline vector(uint32_t n, const T& init) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(n, false); + construct_array(m_p, n, init); + m_size = n; + } + + inline vector(const vector& other) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(other.m_size, false); + + m_size = other.m_size; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32_t i = m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + } + + inline explicit vector(size_t size) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + resize(size); + } + + inline ~vector() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + free(m_p); + } + } + + inline vector& operator= (const vector& other) + { + if (this == &other) + return *this; + + if (m_capacity >= other.m_size) + resize(0); + else + { + clear(); + increase_capacity(other.m_size, false); + } + + if (BASISU_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32_t i = other.m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return *this; + } + + BASISU_FORCE_INLINE const T* begin() const { return m_p; } + BASISU_FORCE_INLINE T* begin() { return m_p; } + + BASISU_FORCE_INLINE const T* end() const { return m_p + m_size; } + BASISU_FORCE_INLINE T* end() { return m_p + m_size; } + + BASISU_FORCE_INLINE bool empty() const { return !m_size; } + BASISU_FORCE_INLINE uint32_t size() const { return m_size; } + BASISU_FORCE_INLINE uint32_t size_in_bytes() const { return m_size * sizeof(T); } + BASISU_FORCE_INLINE uint32_t capacity() const { return m_capacity; } + + // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. + //BASISU_FORCE_INLINE const T& operator[] (uint32_t i) const { assert(i < m_size); return m_p[i]; } + //BASISU_FORCE_INLINE T& operator[] (uint32_t i) { assert(i < m_size); return m_p[i]; } + + BASISU_FORCE_INLINE const T& operator[] (size_t i) const { assert(i < m_size); return m_p[i]; } + BASISU_FORCE_INLINE T& operator[] (size_t i) { assert(i < m_size); return m_p[i]; } + + // at() always includes range checking, even in final builds, unlike operator []. + // The first element is returned if the index is out of range. + BASISU_FORCE_INLINE const T& at(size_t i) const { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + BASISU_FORCE_INLINE T& at(size_t i) { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + + BASISU_FORCE_INLINE const T& front() const { assert(m_size); return m_p[0]; } + BASISU_FORCE_INLINE T& front() { assert(m_size); return m_p[0]; } + + BASISU_FORCE_INLINE const T& back() const { assert(m_size); return m_p[m_size - 1]; } + BASISU_FORCE_INLINE T& back() { assert(m_size); return m_p[m_size - 1]; } + + BASISU_FORCE_INLINE const T* get_ptr() const { return m_p; } + BASISU_FORCE_INLINE T* get_ptr() { return m_p; } + + BASISU_FORCE_INLINE const T* data() const { return m_p; } + BASISU_FORCE_INLINE T* data() { return m_p; } + + // clear() sets the container to empty, then frees the allocated block. + inline void clear() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline void clear_no_destruction() + { + if (m_p) + { + free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline void reserve(size_t new_capacity_size_t) + { + if (new_capacity_size_t > UINT32_MAX) + { + assert(0); + return; + } + + uint32_t new_capacity = (uint32_t)new_capacity_size_t; + + if (new_capacity > m_capacity) + increase_capacity(new_capacity, false); + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false); + tmp = *this; + swap(tmp); + } + } + + inline bool try_reserve(size_t new_capacity_size_t) + { + if (new_capacity_size_t > UINT32_MAX) + { + assert(0); + return false; + } + + uint32_t new_capacity = (uint32_t)new_capacity_size_t; + + if (new_capacity > m_capacity) + { + if (!increase_capacity(new_capacity, false)) + return false; + } + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false); + tmp = *this; + swap(tmp); + } + + return true; + } + + // resize(0) sets the container to empty, but does not free the allocated block. + inline void resize(size_t new_size_size_t, bool grow_hint = false) + { + if (new_size_size_t > UINT32_MAX) + { + assert(0); + return; + } + + uint32_t new_size = (uint32_t)new_size_size_t; + + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint); + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + } + + inline bool try_resize(size_t new_size_size_t, bool grow_hint = false) + { + if (new_size_size_t > UINT32_MAX) + { + assert(0); + return false; + } + + uint32_t new_size = (uint32_t)new_size_size_t; + + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). + // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 + inline void reset() + { + if (m_size >= (m_capacity >> 1)) + resize(0); + else + clear(); + } + + inline T* enlarge(uint32_t i) + { + uint32_t cur_size = m_size; + resize(cur_size + i, true); + return get_ptr() + cur_size; + } + + inline T* try_enlarge(uint32_t i) + { + uint32_t cur_size = m_size; + if (!try_resize(cur_size + i, true)) + return NULL; + return get_ptr() + cur_size; + } + + BASISU_FORCE_INLINE void push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + increase_capacity(m_size + 1, true); + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + inline bool try_push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline void push_back_value(T obj) + { + if (m_size >= m_capacity) + increase_capacity(m_size + 1, true); + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + inline void pop_back() + { + assert(m_size); + + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline void insert(uint32_t index, const T* p, uint32_t n) + { + assert(index <= m_size); + if (!n) + return; + + const uint32_t orig_size = m_size; + resize(m_size + n, true); + + const uint32_t num_to_move = orig_size - index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction. + memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); + } + else + { + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + for (uint32_t i = 0; i < num_to_move; i++) + { + assert((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + } + + T* pDst = m_p + index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction. + memcpy(pDst, p, sizeof(T) * n); + } + else + { + for (uint32_t i = 0; i < n; i++) + { + assert((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + } + + inline void insert(T* p, const T& obj) + { + int64_t ofs = p - begin(); + if ((ofs < 0) || (ofs > UINT32_MAX)) + { + assert(0); + return; + } + + insert((uint32_t)ofs, &obj, 1); + } + + // push_front() isn't going to be very fast - it's only here for usability. + inline void push_front(const T& obj) + { + insert(0, &obj, 1); + } + + vector& append(const vector& other) + { + if (other.m_size) + insert(m_size, &other[0], other.m_size); + return *this; + } + + vector& append(const T* p, uint32_t n) + { + if (n) + insert(m_size, p, n); + return *this; + } + + inline void erase(uint32_t start, uint32_t n) + { + assert((start + n) <= m_size); + if ((start + n) > m_size) + return; + + if (!n) + return; + + const uint32_t num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + + const T* pSrc = m_p + start + n; + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) + { + // This test is overly cautious. + if ((!BASISU_IS_BITWISE_COPYABLE(T)) || (BASISU_HAS_DESTRUCTOR(T))) + { + // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. + // First destroy the erased objects. + scalar_type::destruct_array(pDst, n); + } + + // Copy "down" the objects to preserve, filling in the empty slots. + memmove(pDst, pSrc, num_to_move * sizeof(T)); + } + else + { + // Type is not bitwise copyable or movable. + // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. + T* pDst_end = pDst + num_to_move; + while (pDst != pDst_end) + *pDst++ = *pSrc++; + + scalar_type::destruct_array(pDst_end, n); + } + + m_size -= n; + } + + inline void erase(uint32_t index) + { + erase(index, 1); + } + + inline void erase(T* p) + { + assert((p >= m_p) && (p < (m_p + m_size))); + erase(static_cast(p - m_p)); + } + + inline void erase(T *pFirst, T *pEnd) + { + assert(pFirst <= pEnd); + assert(pFirst >= begin() && pFirst <= end()); + assert(pEnd >= begin() && pEnd <= end()); + + int64_t ofs = pFirst - begin(); + if ((ofs < 0) || (ofs > UINT32_MAX)) + { + assert(0); + return; + } + + int64_t n = pEnd - pFirst; + if ((n < 0) || (n > UINT32_MAX)) + { + assert(0); + return; + } + + erase((uint32_t)ofs, (uint32_t)n); + } + + void erase_unordered(uint32_t index) + { + assert(index < m_size); + + if ((index + 1) < m_size) + (*this)[index] = back(); + + pop_back(); + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint32_t i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const uint32_t min_size = helpers::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + inline void swap(vector& other) + { + std::swap(m_p, other.m_p); + std::swap(m_size, other.m_size); + std::swap(m_capacity, other.m_capacity); + } + + inline void sort() + { + std::sort(begin(), end()); + } + + inline void unique() + { + if (!empty()) + { + sort(); + + resize(std::unique(begin(), end()) - begin()); + } + } + + inline void reverse() + { + uint32_t j = m_size >> 1; + for (uint32_t i = 0; i < j; i++) + std::swap(m_p[i], m_p[m_size - 1 - i]); + } + + inline int find(const T& key) const + { + const T* p = m_p; + const T* p_end = m_p + m_size; + + uint32_t index = 0; + + while (p != p_end) + { + if (key == *p) + return index; + + p++; + index++; + } + + return cInvalidIndex; + } + + inline int find_sorted(const T& key) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for (; ; ) + { + assert(i >= 0 && i < (int)m_size); + const T* pKey_i = m_p + i; + int cmp = key < *pKey_i; +#if defined(_DEBUG) || defined(DEBUG) + int cmp2 = *pKey_i < key; + assert((cmp != cmp2) || (key == *pKey_i)); +#endif + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + + assert(i >= 0 && i < (int)m_size); + pKey_i = m_p + i; + cmp = key < *pKey_i; +#if defined(_DEBUG) || defined(DEBUG) + cmp2 = *pKey_i < key; + assert((cmp != cmp2) || (key == *pKey_i)); +#endif + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + } + } + + return cInvalidIndex; + } + + template + inline int find_sorted(const T& key, Q less_than) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for (; ; ) + { + assert(i >= 0 && i < (int)m_size); + const T* pKey_i = m_p + i; + int cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + + assert(i >= 0 && i < (int)m_size); + pKey_i = m_p + i; + cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + } + } + + return cInvalidIndex; + } + + inline uint32_t count_occurences(const T& key) const + { + uint32_t c = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + while (p != p_end) + { + if (key == *p) + c++; + + p++; + } + + return c; + } + + inline void set_all(const T& o) + { + if ((sizeof(T) == 1) && (scalar_type::cFlag)) + memset(m_p, *reinterpret_cast(&o), m_size); + else + { + T* pDst = m_p; + T* pDst_end = pDst + m_size; + while (pDst != pDst_end) + *pDst++ = o; + } + } + + // Caller assumes ownership of the heap block associated with the container. Container is cleared. + inline void* assume_ownership() + { + T* p = m_p; + m_p = NULL; + m_size = 0; + m_capacity = 0; + return p; + } + + // Caller is granting ownership of the indicated heap block. + // Block must have size constructed elements, and have enough room for capacity elements. + inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity) + { + // To to prevent the caller from obviously shooting themselves in the foot. + if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) + { + // Can grant ownership of a block inside the container itself! + assert(0); + return false; + } + + if (size > capacity) + { + assert(0); + return false; + } + + if (!p) + { + if (capacity) + { + assert(0); + return false; + } + } + else if (!capacity) + { + assert(0); + return false; + } + + clear(); + m_p = p; + m_size = size; + m_capacity = capacity; + return true; + } + + private: + T* m_p; + uint32_t m_size; + uint32_t m_capacity; + + template struct is_vector { enum { cFlag = false }; }; + template struct is_vector< vector > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint32_t num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + // placement new + new (static_cast(pDst)) T(*pSrc); + pSrc->~T(); + ++pSrc; + ++pDst; + } + } + + inline bool increase_capacity(uint32_t min_new_capacity, bool grow_hint, bool nofail = false) + { + return reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); + } + }; + + template struct bitwise_movable< vector > { enum { cFlag = true }; }; + + // Hash map + + template + struct hasher + { + inline size_t operator() (const T& key) const { return static_cast(key); } + }; + + template + struct equal_to + { + inline bool operator()(const T& a, const T& b) const { return a == b; } + }; + + // Important: The Hasher and Equals objects must be bitwise movable! + template, typename Equals = equal_to > + class hash_map + { + public: + class iterator; + class const_iterator; + + private: + friend class iterator; + friend class const_iterator; + + enum state + { + cStateInvalid = 0, + cStateValid = 1 + }; + + enum + { + cMinHashSize = 4U + }; + + public: + typedef hash_map hash_map_type; + typedef std::pair value_type; + typedef Key key_type; + typedef Value referent_type; + typedef Hasher hasher_type; + typedef Equals equals_type; + + hash_map() : + m_hash_shift(32), m_num_valid(0), m_grow_threshold(0) + { + } + + hash_map(const hash_map& other) : + m_values(other.m_values), + m_hash_shift(other.m_hash_shift), + m_hasher(other.m_hasher), + m_equals(other.m_equals), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold) + { + } + + hash_map& operator= (const hash_map& other) + { + if (this == &other) + return *this; + + clear(); + + m_values = other.m_values; + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = other.m_hasher; + m_equals = other.m_equals; + + return *this; + } + + inline ~hash_map() + { + clear(); + } + + const Equals& get_equals() const { return m_equals; } + Equals& get_equals() { return m_equals; } + + void set_equals(const Equals& equals) { m_equals = equals; } + + const Hasher& get_hasher() const { return m_hasher; } + Hasher& get_hasher() { return m_hasher; } + + void set_hasher(const Hasher& hasher) { m_hasher = hasher; } + + inline void clear() + { + if (!m_values.empty()) + { + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint32_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_values.clear_no_destruction(); + + m_hash_shift = 32; + m_num_valid = 0; + m_grow_threshold = 0; + } + } + + inline void reset() + { + if (!m_num_valid) + return; + + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint32_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + else if (sizeof(node) <= 32) + { + memset(&m_values[0], 0, m_values.size_in_bytes()); + } + else + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint32_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_num_valid = 0; + } + + inline uint32_t size() + { + return m_num_valid; + } + + inline uint32_t get_table_size() + { + return m_values.size(); + } + + inline bool empty() + { + return !m_num_valid; + } + + inline void reserve(uint32_t new_capacity) + { + uint64_t new_hash_size = helpers::maximum(1U, new_capacity); + + new_hash_size = new_hash_size * 2ULL; + + if (!helpers::is_power_of_2(new_hash_size)) + new_hash_size = helpers::next_pow2(new_hash_size); + + new_hash_size = helpers::maximum(cMinHashSize, new_hash_size); + + new_hash_size = helpers::minimum(0x80000000UL, new_hash_size); + + if (new_hash_size > m_values.size()) + rehash((uint32_t)new_hash_size); + } + + class iterator + { + friend class hash_map; + friend class hash_map::const_iterator; + + public: + inline iterator() : m_pTable(NULL), m_index(0) { } + inline iterator(hash_map_type& table, uint32_t index) : m_pTable(&table), m_index(index) { } + inline iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline iterator operator++(int) + { + iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline iterator& operator++() + { + probe(); + return *this; + } + + inline value_type& operator*() const { return *get_cur(); } + inline value_type* operator->() const { return get_cur(); } + + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + + private: + hash_map_type* m_pTable; + uint32_t m_index; + + inline value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + class const_iterator + { + friend class hash_map; + friend class hash_map::iterator; + + public: + inline const_iterator() : m_pTable(NULL), m_index(0) { } + inline const_iterator(const hash_map_type& table, uint32_t index) : m_pTable(&table), m_index(index) { } + inline const_iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + inline const_iterator(const const_iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline const_iterator& operator= (const const_iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + inline const_iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline const_iterator operator++(int) + { + const_iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline const_iterator& operator++() + { + probe(); + return *this; + } + + inline const value_type& operator*() const { return *get_cur(); } + inline const value_type* operator->() const { return get_cur(); } + + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + + private: + const hash_map_type* m_pTable; + uint32_t m_index; + + inline const value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + inline const_iterator begin() const + { + if (!m_num_valid) + return end(); + + return const_iterator(*this, find_next(UINT32_MAX)); + } + + inline const_iterator end() const + { + return const_iterator(*this, m_values.size()); + } + + inline iterator begin() + { + if (!m_num_valid) + return end(); + + return iterator(*this, find_next(UINT32_MAX)); + } + + inline iterator end() + { + return iterator(*this, m_values.size()); + } + + // insert_result.first will always point to inserted key/value (or the already existing key/value). + // insert_resutt.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). + typedef std::pair insert_result; + + inline insert_result insert(const Key& k, const Value& v = Value()) + { + insert_result result; + if (!insert_no_grow(result, k, v)) + { + grow(); + + // This must succeed. + if (!insert_no_grow(result, k, v)) + { + fprintf(stderr, "insert() failed"); + abort(); + } + } + + return result; + } + + inline insert_result insert(const value_type& v) + { + return insert(v.first, v.second); + } + + inline const_iterator find(const Key& k) const + { + return const_iterator(*this, find_index(k)); + } + + inline iterator find(const Key& k) + { + return iterator(*this, find_index(k)); + } + + inline bool erase(const Key& k) + { + uint32_t i = find_index(k); + + if (i >= m_values.size()) + return false; + + node* pDst = &get_node(i); + destruct_value_type(pDst); + pDst->state = cStateInvalid; + + m_num_valid--; + + for (; ; ) + { + uint32_t r, j = i; + + node* pSrc = pDst; + + do + { + if (!i) + { + i = m_values.size() - 1; + pSrc = &get_node(i); + } + else + { + i--; + pSrc--; + } + + if (!pSrc->state) + return true; + + r = hash_key(pSrc->first); + + } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); + + move_node(pDst, pSrc); + + pDst = pSrc; + } + } + + inline void swap(hash_map_type& other) + { + m_values.swap(other.m_values); + std::swap(m_hash_shift, other.m_hash_shift); + std::swap(m_num_valid, other.m_num_valid); + std::swap(m_grow_threshold, other.m_grow_threshold); + std::swap(m_hasher, other.m_hasher); + std::swap(m_equals, other.m_equals); + } + + private: + struct node : public value_type + { + uint8_t state; + }; + + static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &k, sizeof(Key)); + else + scalar_type::construct(&pDst->first, k); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &v, sizeof(Value)); + else + scalar_type::construct(&pDst->second, v); + } + + static inline void construct_value_type(value_type* pDst, const value_type* pSrc) + { + if ((BASISU_IS_BITWISE_COPYABLE(Key)) && (BASISU_IS_BITWISE_COPYABLE(Value))) + { + memcpy(pDst, pSrc, sizeof(value_type)); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + scalar_type::construct(&pDst->first, pSrc->first); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + scalar_type::construct(&pDst->second, pSrc->second); + } + } + + static inline void destruct_value_type(value_type* p) + { + scalar_type::destruct(&p->first); + scalar_type::destruct(&p->second); + } + + // Moves *pSrc to *pDst efficiently. + // pDst should NOT be constructed on entry. + static inline void move_node(node* pDst, node* pSrc, bool update_src_state = true) + { + assert(!pDst->state); + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + { + memcpy(pDst, pSrc, sizeof(node)); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + { + scalar_type::construct(&pDst->first, pSrc->first); + scalar_type::destruct(&pSrc->first); + } + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + { + scalar_type::construct(&pDst->second, pSrc->second); + scalar_type::destruct(&pSrc->second); + } + + pDst->state = cStateValid; + } + + if (update_src_state) + pSrc->state = cStateInvalid; + } + + struct raw_node + { + inline raw_node() + { + node* p = reinterpret_cast(this); + p->state = cStateInvalid; + } + + inline ~raw_node() + { + node* p = reinterpret_cast(this); + if (p->state) + hash_map_type::destruct_value_type(p); + } + + inline raw_node(const raw_node& other) + { + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&other); + + if (pSrc->state) + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + else + pDst->state = cStateInvalid; + } + + inline raw_node& operator= (const raw_node& rhs) + { + if (this == &rhs) + return *this; + + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&rhs); + + if (pSrc->state) + { + if (pDst->state) + { + pDst->first = pSrc->first; + pDst->second = pSrc->second; + } + else + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + } + else if (pDst->state) + { + hash_map_type::destruct_value_type(pDst); + pDst->state = cStateInvalid; + } + + return *this; + } + + uint8_t m_bits[sizeof(node)]; + }; + + typedef basisu::vector node_vector; + + node_vector m_values; + uint32_t m_hash_shift; + + Hasher m_hasher; + Equals m_equals; + + uint32_t m_num_valid; + + uint32_t m_grow_threshold; + + inline uint32_t hash_key(const Key& k) const + { + assert((1U << (32U - m_hash_shift)) == m_values.size()); + + uint32_t hash = static_cast(m_hasher(k)); + + // Fibonacci hashing + hash = (2654435769U * hash) >> m_hash_shift; + + assert(hash < m_values.size()); + return hash; + } + + inline const node& get_node(uint32_t index) const + { + return *reinterpret_cast(&m_values[index]); + } + + inline node& get_node(uint32_t index) + { + return *reinterpret_cast(&m_values[index]); + } + + inline state get_node_state(uint32_t index) const + { + return static_cast(get_node(index).state); + } + + inline void set_node_state(uint32_t index, bool valid) + { + get_node(index).state = valid; + } + + inline void grow() + { + uint64_t n = m_values.size() * 3ULL; // was * 2 + + if (!helpers::is_power_of_2(n)) + n = helpers::next_pow2(n); + + if (n > 0x80000000UL) + n = 0x80000000UL; + + rehash(helpers::maximum(cMinHashSize, (uint32_t)n)); + } + + inline void rehash(uint32_t new_hash_size) + { + assert(new_hash_size >= m_num_valid); + assert(helpers::is_power_of_2(new_hash_size)); + + if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size())) + return; + + hash_map new_map; + new_map.m_values.resize(new_hash_size); + new_map.m_hash_shift = 32U - helpers::floor_log2i(new_hash_size); + assert(new_hash_size == (1U << (32U - new_map.m_hash_shift))); + new_map.m_grow_threshold = UINT_MAX; + + node* pNode = reinterpret_cast(m_values.begin()); + node* pNode_end = pNode + m_values.size(); + + while (pNode != pNode_end) + { + if (pNode->state) + { + new_map.move_into(pNode); + + if (new_map.m_num_valid == m_num_valid) + break; + } + + pNode++; + } + + new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U; + + m_values.clear_no_destruction(); + m_hash_shift = 32; + + swap(new_map); + } + + inline uint32_t find_next(uint32_t index) const + { + index++; + + if (index >= m_values.size()) + return index; + + const node* pNode = &get_node(index); + + for (; ; ) + { + if (pNode->state) + break; + + if (++index >= m_values.size()) + break; + + pNode++; + } + + return index; + } + + inline uint32_t find_index(const Key& k) const + { + if (m_num_valid) + { + uint32_t index = hash_key(k); + const node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + return index; + + const uint32_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (index == orig_index) + break; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + return index; + } + } + } + + return m_values.size(); + } + + inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v = Value()) + { + if (!m_values.size()) + return false; + + uint32_t index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const uint32_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + return false; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + return false; + + construct_value_type(pNode, k, v); + + pNode->state = cStateValid; + + m_num_valid++; + assert(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; + } + + inline void move_into(node* pNode) + { + uint32_t index = hash_key(pNode->first); + node* pDst_node = &get_node(index); + + if (pDst_node->state) + { + const uint32_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pDst_node = &get_node(index); + } + else + { + index--; + pDst_node--; + } + + if (index == orig_index) + { + assert(false); + return; + } + + if (!pDst_node->state) + break; + } + } + + move_node(pDst_node, pNode, false); + + m_num_valid++; + } + }; + + template + struct bitwise_movable< hash_map > { enum { cFlag = true }; }; + +#if BASISU_HASHMAP_TEST + extern void hash_map_test(); +#endif + +} // namespace basisu + +namespace std +{ + template + inline void swap(basisu::vector& a, basisu::vector& b) + { + a.swap(b); + } + + template + inline void swap(basisu::hash_map& a, basisu::hash_map& b) + { + a.swap(b); + } + +} // namespace std diff --git a/src/deps/basis-universal/transcoder/basisu_containers_impl.h b/src/deps/basis-universal/transcoder/basisu_containers_impl.h new file mode 100644 index 0000000000..6555171419 --- /dev/null +++ b/src/deps/basis-universal/transcoder/basisu_containers_impl.h @@ -0,0 +1,311 @@ +// basisu_containers_impl.h +// Do not include directly + +#ifdef _MSC_VER +#pragma warning (disable:4127) // warning C4127: conditional expression is constant +#endif + +namespace basisu +{ + bool elemental_vector::increase_capacity(uint32_t min_new_capacity, bool grow_hint, uint32_t element_size, object_mover pMover, bool nofail) + { + assert(m_size <= m_capacity); + + if (sizeof(void *) == sizeof(uint64_t)) + assert(min_new_capacity < (0x400000000ULL / element_size)); + else + assert(min_new_capacity < (0x7FFF0000U / element_size)); + + if (m_capacity >= min_new_capacity) + return true; + + size_t new_capacity = min_new_capacity; + if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity))) + { + new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity); + + assert(new_capacity && (new_capacity > m_capacity)); + + if (new_capacity < min_new_capacity) + { + if (nofail) + return false; + fprintf(stderr, "vector too large\n"); + abort(); + } + } + + const size_t desired_size = element_size * new_capacity; + size_t actual_size = 0; + if (!pMover) + { + void* new_p = realloc(m_p, desired_size); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; +#ifdef _MSC_VER + sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size); +#else + sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size); +#endif + fprintf(stderr, "%s", buf); + abort(); + } + +#ifdef _MSC_VER + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + m_p = new_p; + } + else + { + void* new_p = malloc(desired_size); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; +#ifdef _MSC_VER + sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size); +#else + sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size); +#endif + fprintf(stderr, "%s", buf); + abort(); + } + +#ifdef _MSC_VER + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + free(m_p); + + m_p = new_p; + } + + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = static_cast(new_capacity); + + return true; + } + +#if BASISU_HASHMAP_TEST + +#define HASHMAP_TEST_VERIFY(c) do { if (!(c)) handle_hashmap_test_verify_failure(__LINE__); } while(0) + + static void handle_hashmap_test_verify_failure(int line) + { + fprintf(stderr, "HASHMAP_TEST_VERIFY() faild on line %i\n", line); + abort(); + } + + class counted_obj + { + public: + counted_obj(uint32_t v = 0) : + m_val(v) + { + m_count++; + } + + counted_obj(const counted_obj& obj) : + m_val(obj.m_val) + { + m_count++; + } + + ~counted_obj() + { + assert(m_count > 0); + m_count--; + } + + static uint32_t m_count; + + uint32_t m_val; + + operator size_t() const { return m_val; } + + bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; } + bool operator== (const uint32_t rhs) const { return m_val == rhs; } + + }; + + uint32_t counted_obj::m_count; + + static uint32_t urand32() + { + uint32_t a = rand(); + uint32_t b = rand() << 15; + uint32_t c = rand() << (32 - 15); + return a ^ b ^ c; + } + + static int irand32(int l, int h) + { + assert(l < h); + if (l >= h) + return l; + + uint32_t range = static_cast(h - l); + + uint32_t rnd = urand32(); + + uint32_t rnd_range = static_cast((((uint64_t)range) * ((uint64_t)rnd)) >> 32U); + + int result = l + rnd_range; + assert((result >= l) && (result < h)); + return result; + } + + void hash_map_test() + { + { + basisu::hash_map k; + basisu::hash_map l; + std::swap(k, l); + + k.begin(); + k.end(); + k.clear(); + k.empty(); + k.erase(0); + k.insert(0, 1); + k.find(0); + k.get_equals(); + k.get_hasher(); + k.get_table_size(); + k.reset(); + k.reserve(1); + k = l; + k.set_equals(l.get_equals()); + k.set_hasher(l.get_hasher()); + k.get_table_size(); + } + + uint32_t seed = 0; + for (; ; ) + { + seed++; + + typedef basisu::hash_map my_hash_map; + my_hash_map m; + + const uint32_t n = irand32(0, 100000); + + printf("%u\n", n); + + srand(seed); // r1.seed(seed); + + basisu::vector q; + + uint32_t count = 0; + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::insert_result res = m.insert(counted_obj(v), counted_obj(v ^ 0xdeadbeef)); + if (res.second) + { + count++; + q.push_back(v); + } + } + + HASHMAP_TEST_VERIFY(m.size() == count); + + srand(seed); + + my_hash_map cm(m); + m.clear(); + m = cm; + cm.reset(); + + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == v); + HASHMAP_TEST_VERIFY(it->second == (v ^ 0xdeadbeef)); + } + + for (uint32_t t = 0; t < 2; t++) + { + const uint32_t nd = irand32(1, q.size() + 1); + for (uint32_t i = 0; i < nd; i++) + { + uint32_t p = irand32(0, q.size()); + + int k = q[p]; + if (k >= 0) + { + q[p] = -k - 1; + + bool s = m.erase(counted_obj(k)); + HASHMAP_TEST_VERIFY(s); + } + } + + typedef basisu::hash_map uint_hash_set; + uint_hash_set s; + + for (uint32_t i = 0; i < q.size(); i++) + { + int v = q[i]; + + if (v >= 0) + { + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == (uint32_t)v); + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)v ^ 0xdeadbeef)); + + s.insert(v); + } + else + { + my_hash_map::const_iterator it = m.find(counted_obj(-v - 1)); + HASHMAP_TEST_VERIFY(it == m.end()); + } + } + + uint32_t found_count = 0; + for (my_hash_map::const_iterator it = m.begin(); it != m.end(); ++it) + { + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)it->first ^ 0xdeadbeef)); + + uint_hash_set::const_iterator fit(s.find((uint32_t)it->first)); + HASHMAP_TEST_VERIFY(fit != s.end()); + + HASHMAP_TEST_VERIFY(fit->first == it->first); + + found_count++; + } + + HASHMAP_TEST_VERIFY(found_count == s.size()); + } + + HASHMAP_TEST_VERIFY(counted_obj::m_count == m.size() * 2); + } + } + +#endif // BASISU_HASHMAP_TEST + +} // namespace basisu diff --git a/src/deps/basis-universal/transcoder/basisu_file_headers.h b/src/deps/basis-universal/transcoder/basisu_file_headers.h index c90b3f3af0..4316d738e6 100644 --- a/src/deps/basis-universal/transcoder/basisu_file_headers.h +++ b/src/deps/basis-universal/transcoder/basisu_file_headers.h @@ -1,5 +1,5 @@ // basis_file_headers.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,11 @@ namespace basist // Slice desc header flags enum basis_slice_desc_flags { - cSliceDescFlagsIsAlphaData = 1, - cSliceDescFlagsFrameIsIFrame = 2 // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) + cSliceDescFlagsHasAlpha = 1, + + // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) + // Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames. + cSliceDescFlagsFrameIsIFrame = 2 }; #pragma pack(push) @@ -38,7 +41,7 @@ namespace basist basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. - basisu::packed_uint<4> m_file_ofs; // Offset from the header to the start of the slice's data + basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes basisu::packed_uint<2> m_slice_data_crc16; // The CRC16 of the compressed slice data, for extra-paranoid use cases @@ -47,9 +50,21 @@ namespace basist // File header files enum basis_header_flags { - cBASISHeaderFlagETC1S = 1, // Always set for basis universal files - cBASISHeaderFlagYFlipped = 2, // Set if the texture had to be Y flipped before encoding - cBASISHeaderFlagHasAlphaSlices = 4 // True if the odd slices contain alpha data + // Always set for ETC1S files. Not set for UASTC files. + cBASISHeaderFlagETC1S = 1, + + // Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user. + cBASISHeaderFlagYFlipped = 2, + + // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) + cBASISHeaderFlagHasAlphaSlices = 4, + + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + cBASISHeaderFlagUsesGlobalCodebook = 8, + + // Set if the texture data is sRGB, otherwise it's linear. + // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. + cBASISHeaderFlagSRGB = 16, }; // The image type field attempts to describe how to interpret the image data in a Basis file. @@ -71,6 +86,12 @@ namespace basist cBASISMaxUSPerFrame = 0xFFFFFF }; + enum class basis_tex_format + { + cETC1S = 0, + cUASTC4x4 = 1 + }; + struct basis_file_header { enum @@ -82,16 +103,16 @@ namespace basist basisu::packed_uint<2> m_sig; // 2 byte file signature basisu::packed_uint<2> m_ver; // Baseline file version basisu::packed_uint<2> m_header_size; // Header size in bytes, sizeof(basis_file_header) - basisu::packed_uint<2> m_header_crc16; // crc16 of the remaining header data + basisu::packed_uint<2> m_header_crc16; // CRC16 of the remaining header data basisu::packed_uint<4> m_data_size; // The total size of all data after the header basisu::packed_uint<2> m_data_crc16; // The CRC16 of all data after the header - basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha basis files) + basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) basisu::packed_uint<3> m_total_images; // The total # of images - basisu::packed_uint<1> m_format; // enum basist::block_format + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format basisu::packed_uint<2> m_flags; // enum basist::header_flags basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type basisu::packed_uint<3> m_us_per_frame; // Framerate of video, in microseconds per frame @@ -101,11 +122,11 @@ namespace basist basisu::packed_uint<4> m_userdata1; // For client use basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook - basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the header + basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook - basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the header + basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes basisu::packed_uint<4> m_tables_file_ofs; // The file offset of the compressed Huffman codelength tables, for decompressing slices diff --git a/src/deps/basis-universal/transcoder/basisu_global_selector_cb.h b/src/deps/basis-universal/transcoder/basisu_global_selector_cb.h index 695b0b3b97..8ab5098898 100644 --- a/src/deps/basis-universal/transcoder/basisu_global_selector_cb.h +++ b/src/deps/basis-universal/transcoder/basisu_global_selector_cb.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/deps/basis-universal/transcoder/basisu_global_selector_palette.h b/src/deps/basis-universal/transcoder/basisu_global_selector_palette.h index b0260541c3..8bedf94710 100644 --- a/src/deps/basis-universal/transcoder/basisu_global_selector_palette.h +++ b/src/deps/basis-universal/transcoder/basisu_global_selector_palette.h @@ -1,5 +1,7 @@ // basisu_global_selector_palette.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// TODO: NONE of this is used in .basis/.ktx2 files. It will be deleted soon. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -609,7 +611,7 @@ namespace basist uint8_t m_selectors[16]; }; - typedef std::vector etc1_selector_palette_entry_vec; + typedef basisu::vector etc1_selector_palette_entry_vec; extern const uint32_t g_global_selector_cb[]; extern const uint32_t g_global_selector_cb_size; @@ -628,7 +630,7 @@ namespace basist void set(uint32_t palette_index, const etc1_global_palette_entry_modifier &modifier) { m_palette_index = palette_index; m_modifier = modifier; } }; - typedef std::vector etc1_global_selector_codebook_entry_id_vec; + typedef basisu::vector etc1_global_selector_codebook_entry_id_vec; class etc1_global_selector_codebook { diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder.cpp b/src/deps/basis-universal/transcoder/basisu_transcoder.cpp index 79a0560e54..0b3733385d 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder.cpp +++ b/src/deps/basis-universal/transcoder/basisu_transcoder.cpp @@ -1,5 +1,5 @@ // basisu_transcoder.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,77 +15,88 @@ #include "basisu_transcoder.h" #include -#include +#include "basisu_containers_impl.h" + +#ifndef BASISD_IS_BIG_ENDIAN +// TODO: This doesn't work on OSX. How can this be so difficult? +//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN) +// #define BASISD_IS_BIG_ENDIAN (1) +//#else + #define BASISD_IS_BIG_ENDIAN (0) +//#endif +#endif + +#ifndef BASISD_USE_UNALIGNED_WORD_READS + #ifdef __EMSCRIPTEN__ + // Can't use unaligned loads/stores with WebAssembly. + #define BASISD_USE_UNALIGNED_WORD_READS (0) + #elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__) + #define BASISD_USE_UNALIGNED_WORD_READS (1) + #else + #define BASISD_USE_UNALIGNED_WORD_READS (0) + #endif +#endif -// The supported .basis file header version. Keep in sync with BASIS_FILE_VERSION. #define BASISD_SUPPORTED_BASIS_VERSION (0x13) +#ifndef BASISD_SUPPORT_KTX2 + #error Must have defined BASISD_SUPPORT_KTX2 +#endif + +#ifndef BASISD_SUPPORT_KTX2_ZSTD +#error Must have defined BASISD_SUPPORT_KTX2_ZSTD +#endif + // Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data. #ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS -#define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0 + #define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0 #endif #ifndef BASISD_SUPPORT_DXT1 -#define BASISD_SUPPORT_DXT1 1 + #define BASISD_SUPPORT_DXT1 1 #endif #ifndef BASISD_SUPPORT_DXT5A -#define BASISD_SUPPORT_DXT5A 1 + #define BASISD_SUPPORT_DXT5A 1 #endif // Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript) #if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7 - #ifndef BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - #define BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY 0 - #endif #ifndef BASISD_SUPPORT_BC7_MODE5 - #define BASISD_SUPPORT_BC7_MODE5 0 + #define BASISD_SUPPORT_BC7_MODE5 0 #endif #endif // !BASISD_SUPPORT_BC7 -// BC7 mode 6 opaque only is the highest quality (compared to ETC1), but the tables are massive. -// For web/mobile use you probably should disable this. -#ifndef BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY -#define BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY 1 -#endif - -// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses substantially less memory than BC7 mode 6 and even BC1. +// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1. #ifndef BASISD_SUPPORT_BC7_MODE5 -#define BASISD_SUPPORT_BC7_MODE5 1 + #define BASISD_SUPPORT_BC7_MODE5 1 #endif #ifndef BASISD_SUPPORT_PVRTC1 -#define BASISD_SUPPORT_PVRTC1 1 + #define BASISD_SUPPORT_PVRTC1 1 #endif #ifndef BASISD_SUPPORT_ETC2_EAC_A8 -#define BASISD_SUPPORT_ETC2_EAC_A8 1 + #define BASISD_SUPPORT_ETC2_EAC_A8 1 +#endif + +// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files. +#ifndef BASISD_SUPPORT_UASTC + #define BASISD_SUPPORT_UASTC 1 #endif #ifndef BASISD_SUPPORT_ASTC -#define BASISD_SUPPORT_ASTC 1 + #define BASISD_SUPPORT_ASTC 1 #endif // Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support. #ifndef BASISD_SUPPORT_ATC -#define BASISD_SUPPORT_ATC 1 + #define BASISD_SUPPORT_ATC 1 #endif // Support for ETC2 EAC R11 and ETC2 EAC RG11 #ifndef BASISD_SUPPORT_ETC2_EAC_RG11 -#define BASISD_SUPPORT_ETC2_EAC_RG11 1 -#endif - -#if BASISD_SUPPORT_PVRTC2 -#if !BASISD_SUPPORT_ATC -#error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1 -#endif -#endif - -#if BASISD_SUPPORT_ATC -#if !BASISD_SUPPORT_DXT5A -#error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1 -#endif + #define BASISD_SUPPORT_ETC2_EAC_RG11 1 #endif // If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large. @@ -101,14 +112,25 @@ #endif #ifndef BASISD_SUPPORT_FXT1 -#define BASISD_SUPPORT_FXT1 1 + #define BASISD_SUPPORT_FXT1 1 #endif #ifndef BASISD_SUPPORT_PVRTC2 -#define BASISD_SUPPORT_PVRTC2 1 + #define BASISD_SUPPORT_PVRTC2 1 +#endif + +#if BASISD_SUPPORT_PVRTC2 + #if !BASISD_SUPPORT_ATC + #error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1 + #endif +#endif + +#if BASISD_SUPPORT_ATC + #if !BASISD_SUPPORT_DXT5A + #error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1 + #endif #endif -#define BASISD_WRITE_NEW_BC7_TABLES 0 #define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0 #define BASISD_WRITE_NEW_DXT1_TABLES 0 #define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0 @@ -117,7 +139,16 @@ #define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0 #ifndef BASISD_ENABLE_DEBUG_FLAGS -#define BASISD_ENABLE_DEBUG_FLAGS 0 + #define BASISD_ENABLE_DEBUG_FLAGS 0 +#endif + +// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header. +#if BASISD_SUPPORT_KTX2 + // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded. + #if BASISD_SUPPORT_KTX2_ZSTD + // We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError() + #include "../zstd/zstd.h" + #endif #endif namespace basisu @@ -131,7 +162,7 @@ namespace basisu void debug_printf(const char* pFmt, ...) { -#if BASISU_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -146,9 +177,6 @@ namespace basisu namespace basist { -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY -#include "basisu_transcoder_tables_bc7_m6.inc" -#endif #if BASISD_ENABLE_DEBUG_FLAGS static uint32_t g_debug_flags = 0; @@ -165,16 +193,28 @@ namespace basist void set_debug_flags(uint32_t f) { - (void)f; + BASISU_NOTE_UNUSED(f); #if BASISD_ENABLE_DEBUG_FLAGS g_debug_flags = f; #endif } + + inline uint16_t byteswap_uint16(uint16_t v) + { + return static_cast((v >> 8) | (v << 8)); + } + + static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } + static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + static inline float saturate(float value) { return clampf(value, 0, 1.0f); } + + static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + uint16_t crc16(const void* r, size_t size, uint16_t crc) { crc = ~crc; - const uint8_t* p = reinterpret_cast(r); + const uint8_t* p = static_cast(r); for (; size; --size) { const uint16_t q = *p++ ^ (crc >> 8); @@ -279,6 +319,9 @@ namespace basist DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1); DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16); DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16); + + //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; @@ -522,11 +565,39 @@ namespace basist return static_cast(b | (g << 5U) | (r << 10U)); } + inline uint16_t get_base4_color(uint32_t idx) const + { + uint32_t r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + inline color32 get_base5_color_unscaled() const { return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255); } + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + inline uint32_t get_inten_table(uint32_t subblock_id) const { assert(subblock_id < 2); @@ -534,6 +605,38 @@ namespace basist return (m_bytes[3] >> ofs) & 7; } + inline uint16_t get_delta3_color() const + { + const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const + { + color32 b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true, 255); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U) { return pack_color4(color.r, color.g, color.b, scaled, bias); @@ -592,7 +695,17 @@ namespace basist return static_cast(b | (g << 3) | (r << 6)); } - static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha = 255) + static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) { uint32_t b = packed_color5 & 31U; uint32_t g = (packed_color5 >> 5U) & 31U; @@ -605,7 +718,9 @@ namespace basist r = (r << 3U) | (r >> 2U); } - return color32(r, g, b, alpha); + assert(alpha <= 255); + + return color32(cNoClamp, r, g, b, alpha); } static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) @@ -615,6 +730,64 @@ namespace basist g = c.g; b = c.b; } + + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) + { + result = unpack_color5(packed_color5, scaled, 255); + } + + static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + int dr, dg, db; + unpack_delta3(dr, dg, db, packed_delta3); + + int r = ((packed_color5 >> 10U) & 31U) + dr; + int g = ((packed_color5 >> 5U) & 31U) + dg; + int b = (packed_color5 & 31U) + db; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = basisu::clamp(r, 0, 31); + g = basisu::clamp(g, 0, 31); + b = basisu::clamp(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U)); + return success; + } + + static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color4 & 15U; + uint32_t g = (packed_color4 >> 4U) & 15U; + uint32_t r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U)); + } + + static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) + { + color32 c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx) { @@ -823,197 +996,6 @@ namespace basist uint32_t m_high; }; -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - static dxt_selector_range g_etc1_to_bc7_selector_ranges[] = - { - { 0, 0 }, - { 1, 1 }, - { 2, 2 }, - { 3, 3 }, - - { 0, 3 }, - - { 1, 3 }, - { 0, 2 }, - - { 1, 2 }, - - { 2, 3 }, - { 0, 1 }, - }; - const uint32_t NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_selector_ranges) / sizeof(g_etc1_to_bc7_selector_ranges[0]); - - static uint32_t g_etc1_to_bc7_m6_selector_range_index[4][4]; - - static const uint8_t g_etc1_to_bc7_selector_mappings[][4] = - { -#if 1 - { 5 * 0, 5 * 0, 5 * 0, 5 * 0 }, - { 5 * 0, 5 * 0, 5 * 0, 5 * 1 }, - { 5 * 0, 5 * 0, 5 * 0, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 0, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 3, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 2 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 3, 5 * 3 }, - { 5 * 0, 5 * 2, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 2, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 0, 5 * 3, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 1, 5 * 1, 5 * 1 }, - { 5 * 1, 5 * 1, 5 * 1, 5 * 2 }, - { 5 * 1, 5 * 1, 5 * 1, 5 * 3 }, - { 5 * 1, 5 * 1, 5 * 2, 5 * 2 }, - { 5 * 1, 5 * 1, 5 * 2, 5 * 3 }, - { 5 * 1, 5 * 1, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 2, 5 * 2, 5 * 2 }, - { 5 * 1, 5 * 2, 5 * 2, 5 * 3 }, - { 5 * 1, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 3, 5 * 3, 5 * 3 }, - { 5 * 2, 5 * 2, 5 * 2, 5 * 2 }, - { 5 * 2, 5 * 2, 5 * 2, 5 * 3 }, - { 5 * 2, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 2, 5 * 3, 5 * 3, 5 * 3 }, - { 5 * 3, 5 * 3, 5 * 3, 5 * 3 }, - - { 0, 1, 2, 3 }, - { 0, 0, 1, 1 }, - { 0, 0, 0, 1 }, - { 0, 2, 4, 6 }, - { 0, 3, 6, 9 }, - { 0, 4, 8, 12 }, - - { 0, 4, 9, 15 }, - { 0, 6, 11, 15 }, - - { 1, 2, 3, 4 }, - { 1, 3, 5, 7 }, - - { 1, 8, 8, 14 }, -#else - { 5 * 0, 5 * 0, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 2, 5 * 2, 5 * 2 }, -#endif - { 5 * 1, 5 * 2, 5 * 3, 5 * 3 }, - { 8, 8, 8, 8 }, - }; - const uint32_t NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS = sizeof(g_etc1_to_bc7_selector_mappings) / sizeof(g_etc1_to_bc7_selector_mappings[0]); - - static uint8_t g_etc1_to_bc7_selector_mappings_inv[NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS][4]; - - // encoding from LSB to MSB: low8, high8, error16, size is [32*8][NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES][NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS] - extern const uint32_t* g_etc1_to_bc7_m6_table[]; - - const uint16_t s_bptc_table_aWeight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; - -#if BASISD_WRITE_NEW_BC7_TABLES - static void create_etc1_to_bc7_m6_conversion_table() - { - FILE* pFile = NULL; - - pFile = fopen("basisu_decoder_tables_bc7_m6.inc", "w"); - - for (int inten = 0; inten < 8; inten++) - { - for (uint32_t g = 0; g < 32; g++) - { - color32 block_colors[4]; - decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); - - fprintf(pFile, "static const uint32_t g_etc1_to_bc7_m6_table%u[] = {\n", g + inten * 32); - uint32_t n = 0; - - for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES; sr++) - { - const uint32_t low_selector = g_etc1_to_bc7_selector_ranges[sr].m_low; - const uint32_t high_selector = g_etc1_to_bc7_selector_ranges[sr].m_high; - - for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS; m++) - { - uint32_t best_lo = 0; - uint32_t best_hi = 0; - uint64_t best_err = UINT64_MAX; - - for (uint32_t hi = 0; hi <= 127; hi++) - { - for (uint32_t lo = 0; lo <= 127; lo++) - { - uint32_t bc7_block_colors[16]; - - bc7_block_colors[0] = lo << 1; - bc7_block_colors[15] = (hi << 1) | 1; - - for (uint32_t i = 1; i < 15; i++) - bc7_block_colors[i] = (bc7_block_colors[0] * (64 - s_bptc_table_aWeight4[i]) + bc7_block_colors[15] * s_bptc_table_aWeight4[i] + 32) >> 6; - - uint64_t total_err = 0; - - for (uint32_t s = low_selector; s <= high_selector; s++) - { - int err = (int)block_colors[s].g - (int)bc7_block_colors[g_etc1_to_bc7_selector_mappings[m][s]]; - - total_err += err * err; - } - - if (total_err < best_err) - { - best_err = total_err; - best_lo = lo; - best_hi = hi; - } - } // lo - - } // hi - - best_err = basisu::minimum(best_err, 0xFFFF); - - const uint32_t index = (g + inten * 32) * (NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS) + (sr * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS) + m; - - uint32_t v = best_err | (best_lo << 18) | (best_hi << 25); - - fprintf(pFile, "0x%X,", v); - n++; - if ((n & 31) == 31) - fprintf(pFile, "\n"); - - } // m - } // sr - - fprintf(pFile, "};\n"); - - } // g - } // inten - - fprintf(pFile, "const uint32_t *g_etc1_to_bc7_m6_table[] = {\n"); - - for (uint32_t i = 0; i < 32 * 8; i++) - { - fprintf(pFile, "g_etc1_to_bc7_m6_table%u, ", i); - if ((i & 15) == 15) - fprintf(pFile, "\n"); - } - - fprintf(pFile, "};\n"); - fclose(pFile); - } -#endif -#endif - struct etc1_to_dxt1_56_solution { uint8_t m_lo; @@ -1064,7 +1046,9 @@ namespace basist static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_dxt1_5.inc" }; +#endif // BASISD_SUPPORT_DXT1 +#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC // First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt. struct bc1_match_entry { @@ -1089,14 +1073,15 @@ namespace basist if (sel == 1) { // Selector 1 - e = abs(((hi_e * 2 + lo_e) / 3) - i) + ((abs(hi_e - lo_e) >> 5)); + e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i); + e += (basisu::iabs(hi_e - lo_e) * 3) / 100; } else { assert(sel == 0); // Selector 0 - e = abs(hi_e - i); + e = basisu::iabs(hi_e - i); } if (e < lowest_e) @@ -1111,7 +1096,7 @@ namespace basist } // lo } } -#endif // BASISD_SUPPORT_DXT1 +#endif #if BASISD_WRITE_NEW_DXT1_TABLES static void create_etc1_to_dxt1_5_conversion_table() @@ -1268,7 +1253,8 @@ namespace basist } #endif -#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + +#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 static const int8_t g_eac_modifier_table[16][8] = { { -3, -6, -9, -15, 2, 5, 8, 14 }, @@ -1344,6 +1330,9 @@ namespace basist } }; +#endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + +#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 static const dxt_selector_range s_etc2_eac_selector_ranges[] = { { 0, 3 }, @@ -1372,8 +1361,8 @@ namespace basist uint32_t m_base; uint32_t m_table; uint32_t m_multiplier; - std::vector m_selectors; - std::vector m_selectors_temp; + basisu::vector m_selectors; + basisu::vector m_selectors_temp; }; static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels) @@ -1769,8 +1758,8 @@ namespace basist uint32_t m_base; uint32_t m_table; uint32_t m_multiplier; - std::vector m_selectors; - std::vector m_selectors_temp; + basisu::vector m_selectors; + basisu::vector m_selectors_temp; }; static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels) @@ -1924,14 +1913,28 @@ namespace basist #if BASISD_SUPPORT_PVRTC2 static void transcoder_init_pvrtc2(); #endif + +#if BASISD_SUPPORT_UASTC + void uastc_init(); +#endif + + static bool g_transcoder_initialized; // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. // If this is too slow, these computed tables can easilky be moved to be compiled in. void basisu_transcoder_init() { - static bool s_initialized; - if (s_initialized) + if (g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; + } + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + +#if BASISD_SUPPORT_UASTC + uastc_init(); +#endif #if BASISD_SUPPORT_ASTC transcoder_init_astc(); @@ -1943,11 +1946,6 @@ namespace basist exit(0); #endif -#if BASISD_WRITE_NEW_BC7_TABLES - create_etc1_to_bc7_m6_conversion_table(); - exit(0); -#endif - #if BASISD_WRITE_NEW_BC7_MODE5_TABLES create_etc1_to_bc7_m5_color_conversion_table(); create_etc1_to_bc7_m5_alpha_conversion_table(); @@ -1975,7 +1973,7 @@ namespace basist exit(0); #endif -#if BASISD_SUPPORT_DXT1 +#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC uint8_t bc1_expand5[32]; for (int i = 0; i < 32; i++) bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); @@ -1988,6 +1986,17 @@ namespace basist prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1); prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0); +#if 0 + for (uint32_t i = 0; i < 256; i++) + { + printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi); + } + exit(0); +#endif + +#endif + +#if BASISD_SUPPORT_DXT1 for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++) { uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low; @@ -2023,19 +2032,6 @@ namespace basist } #endif -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES; i++) - { - uint32_t l = g_etc1_to_bc7_selector_ranges[i].m_low; - uint32_t h = g_etc1_to_bc7_selector_ranges[i].m_high; - g_etc1_to_bc7_m6_selector_range_index[l][h] = i; - } - - for (uint32_t sm = 0; sm < NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS; sm++) - for (uint32_t j = 0; j < 4; j++) - g_etc1_to_bc7_selector_mappings_inv[sm][j] = 15 - g_etc1_to_bc7_selector_mappings[sm][j]; -#endif - #if BASISD_SUPPORT_BC7_MODE5 transcoder_init_bc7_mode5(); #endif @@ -2048,7 +2044,7 @@ namespace basist transcoder_init_pvrtc2(); #endif - s_initialized = true; + g_transcoder_initialized = true; } #if BASISD_SUPPORT_DXT1 @@ -2780,7 +2776,7 @@ namespace basist // PVRTC -#if BASISD_SUPPORT_PVRTC1 +#if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC static const uint16_t g_pvrtc_swizzle_table[256] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, @@ -3304,6 +3300,7 @@ namespace basist } }; +#if 0 static const uint8_t g_pvrtc_bilinear_weights[16][4] = { { 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 }, @@ -3311,6 +3308,7 @@ namespace basist { 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 }, { 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 }, }; +#endif struct pvrtc1_temp_block { @@ -3402,7 +3400,9 @@ namespace basist color32 c(get_endpoint_8888(endpoints, endpoint_index)); return c.r + c.g + c.b + c.a; } +#endif +#if BASISD_SUPPORT_PVRTC1 // TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture. static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y) { @@ -3411,7 +3411,7 @@ namespace basist const uint32_t x_bits = basisu::total_bits(x_mask); const uint32_t y_bits = basisu::total_bits(y_mask); const uint32_t min_bits = basisu::minimum(x_bits, y_bits); - const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; uint32_t block_index = 0; @@ -3592,7 +3592,7 @@ namespace basist const uint32_t x_bits = basisu::total_bits(x_mask); const uint32_t y_bits = basisu::total_bits(y_mask); const uint32_t min_bits = basisu::minimum(x_bits, y_bits); - const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; uint32_t block_index = 0; @@ -3763,267 +3763,16 @@ namespace basist } #endif // BASISD_SUPPORT_PVRTC1 -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - struct bc7_mode_6 +#if BASISD_SUPPORT_BC7_MODE5 + static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] = { - struct - { - uint64_t m_mode : 7; - uint64_t m_r0 : 7; - uint64_t m_r1 : 7; - uint64_t m_g0 : 7; - uint64_t m_g1 : 7; - uint64_t m_b0 : 7; - uint64_t m_b1 : 7; - uint64_t m_a0 : 7; - uint64_t m_a1 : 7; - uint64_t m_p0 : 1; - } m_lo; - - union - { - struct - { - uint64_t m_p1 : 1; - uint64_t m_s00 : 3; - uint64_t m_s10 : 4; - uint64_t m_s20 : 4; - uint64_t m_s30 : 4; - - uint64_t m_s01 : 4; - uint64_t m_s11 : 4; - uint64_t m_s21 : 4; - uint64_t m_s31 : 4; - - uint64_t m_s02 : 4; - uint64_t m_s12 : 4; - uint64_t m_s22 : 4; - uint64_t m_s32 : 4; - - uint64_t m_s03 : 4; - uint64_t m_s13 : 4; - uint64_t m_s23 : 4; - uint64_t m_s33 : 4; - - } m_hi; - - uint64_t m_hi_bits; - }; - }; - - static void convert_etc1s_to_bc7_m6(bc7_mode_6* pDst_block, const endpoint *pEndpoint, const selector* pSelector) - { -#if !BASISD_WRITE_NEW_BC7_TABLES - const uint32_t low_selector = pSelector->m_lo_selector; - const uint32_t high_selector = pSelector->m_hi_selector; - - const uint32_t base_color_r = pEndpoint->m_color5.r; - const uint32_t base_color_g = pEndpoint->m_color5.g; - const uint32_t base_color_b = pEndpoint->m_color5.b; - const uint32_t inten_table = pEndpoint->m_inten5; - - if (pSelector->m_num_unique_selectors <= 2) - { - // Only two unique selectors so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks. - pDst_block->m_lo.m_mode = 64; - - pDst_block->m_lo.m_a0 = 127; - pDst_block->m_lo.m_a1 = 127; - - color32 block_colors[4]; - - decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table); - - const uint32_t r0 = block_colors[low_selector].r; - const uint32_t g0 = block_colors[low_selector].g; - const uint32_t b0 = block_colors[low_selector].b; - const uint32_t low_bits0 = (r0 & 1) + (g0 & 1) + (b0 & 1); - uint32_t p0 = low_bits0 >= 2; - - const uint32_t r1 = block_colors[high_selector].r; - const uint32_t g1 = block_colors[high_selector].g; - const uint32_t b1 = block_colors[high_selector].b; - const uint32_t low_bits1 = (r1 & 1) + (g1 & 1) + (b1 & 1); - uint32_t p1 = low_bits1 >= 2; - - pDst_block->m_lo.m_r0 = r0 >> 1; - pDst_block->m_lo.m_g0 = g0 >> 1; - pDst_block->m_lo.m_b0 = b0 >> 1; - pDst_block->m_lo.m_p0 = p0; - - pDst_block->m_lo.m_r1 = r1 >> 1; - pDst_block->m_lo.m_g1 = g1 >> 1; - pDst_block->m_lo.m_b1 = b1 >> 1; - - uint32_t output_low_selector = 0; - uint32_t output_bit_offset = 1; - uint64_t output_hi_bits = p1; - - for (uint32_t y = 0; y < 4; y++) - { - for (uint32_t x = 0; x < 4; x++) - { - uint32_t s = pSelector->get_selector(x, y); - uint32_t os = (s == low_selector) ? output_low_selector : (15 ^ output_low_selector); - - uint32_t num_bits = 4; - - if ((x | y) == 0) - { - if (os & 8) - { - pDst_block->m_lo.m_r0 = r1 >> 1; - pDst_block->m_lo.m_g0 = g1 >> 1; - pDst_block->m_lo.m_b0 = b1 >> 1; - pDst_block->m_lo.m_p0 = p1; - - pDst_block->m_lo.m_r1 = r0 >> 1; - pDst_block->m_lo.m_g1 = g0 >> 1; - pDst_block->m_lo.m_b1 = b0 >> 1; - - output_hi_bits &= ~1ULL; - output_hi_bits |= p0; - std::swap(p0, p1); - - output_low_selector = 15; - os = 0; - } - - num_bits = 3; - } - - output_hi_bits |= (static_cast(os) << output_bit_offset); - output_bit_offset += num_bits; - } - } - - pDst_block->m_hi_bits = output_hi_bits; - - assert(pDst_block->m_hi.m_p1 == p1); - - return; - } - - uint32_t selector_range_table = g_etc1_to_bc7_m6_selector_range_index[low_selector][high_selector]; - - const uint32_t* pTable_r = g_etc1_to_bc7_m6_table[base_color_r + inten_table * 32] + (selector_range_table * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS); - const uint32_t* pTable_g = g_etc1_to_bc7_m6_table[base_color_g + inten_table * 32] + (selector_range_table * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS); - const uint32_t* pTable_b = g_etc1_to_bc7_m6_table[base_color_b + inten_table * 32] + (selector_range_table * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS); - -#if 1 - assert(NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS == 48); - - uint32_t best_err0 = UINT_MAX, best_err1 = UINT_MAX; - -#define DO_ITER2(idx) \ - { \ - uint32_t v0 = ((pTable_r[(idx)+0] + pTable_g[(idx)+0] + pTable_b[(idx)+0]) << 14) | ((idx) + 0); if (v0 < best_err0) best_err0 = v0; \ - uint32_t v1 = ((pTable_r[(idx)+1] + pTable_g[(idx)+1] + pTable_b[(idx)+1]) << 14) | ((idx) + 1); if (v1 < best_err1) best_err1 = v1; \ - } -#define DO_ITER4(idx) DO_ITER2(idx); DO_ITER2((idx) + 2); -#define DO_ITER8(idx) DO_ITER4(idx); DO_ITER4((idx) + 4); -#define DO_ITER16(idx) DO_ITER8(idx); DO_ITER8((idx) + 8); - - DO_ITER16(0); - DO_ITER16(16); - DO_ITER16(32); -#undef DO_ITER2 -#undef DO_ITER4 -#undef DO_ITER8 -#undef DO_ITER16 - - uint32_t best_err = basisu::minimum(best_err0, best_err1); - uint32_t best_mapping = best_err & 0xFF; - //best_err >>= 14; -#else - uint32_t best_err = UINT_MAX; - uint32_t best_mapping = 0; - assert((NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS % 2) == 0); - for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS; m += 2) - { -#define DO_ITER(idx) { uint32_t total_err = (pTable_r[idx] + pTable_g[idx] + pTable_b[idx]) & 0x3FFFF; if (total_err < best_err) { best_err = total_err; best_mapping = idx; } } - DO_ITER(m); - DO_ITER(m + 1); -#undef DO_ITER - } -#endif - - pDst_block->m_lo.m_mode = 64; - - pDst_block->m_lo.m_a0 = 127; - pDst_block->m_lo.m_a1 = 127; - - uint64_t v = 0; - const uint8_t* pSelectors_xlat; - - if (g_etc1_to_bc7_selector_mappings[best_mapping][pSelector->get_selector(0, 0)] & 8) - { - pDst_block->m_lo.m_r1 = (pTable_r[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_g1 = (pTable_g[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_b1 = (pTable_b[best_mapping] >> 18) & 0x7F; - - pDst_block->m_lo.m_r0 = (pTable_r[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_g0 = (pTable_g[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_b0 = (pTable_b[best_mapping] >> 25) & 0x7F; - - pDst_block->m_lo.m_p0 = 1; - pDst_block->m_hi.m_p1 = 0; - - v = 0; - pSelectors_xlat = &g_etc1_to_bc7_selector_mappings_inv[best_mapping][0]; - } - else - { - pDst_block->m_lo.m_r0 = (pTable_r[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_g0 = (pTable_g[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_b0 = (pTable_b[best_mapping] >> 18) & 0x7F; - - pDst_block->m_lo.m_r1 = (pTable_r[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_g1 = (pTable_g[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_b1 = (pTable_b[best_mapping] >> 25) & 0x7F; - - pDst_block->m_lo.m_p0 = 0; - pDst_block->m_hi.m_p1 = 1; - - v = 1; - pSelectors_xlat = &g_etc1_to_bc7_selector_mappings[best_mapping][0]; - } - - uint64_t v1 = 0, v2 = 0, v3 = 0; - -#define DO_X(x, s0, s1, s2, s3) { \ - v |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[0] >> ((x) * 2)) & 3] << (s0)); \ - v1 |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[1] >> ((x) * 2)) & 3] << (s1)); \ - v2 |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[2] >> ((x) * 2)) & 3] << (s2)); \ - v3 |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[3] >> ((x) * 2)) & 3] << (s3)); } - - // 1 4 8 12 - // 16 20 24 28 - // 32 36 40 44 - // 48 52 56 60 - - DO_X(0, 1, 16, 32, 48); - DO_X(1, 4, 20, 36, 52); - DO_X(2, 8, 24, 40, 56); - DO_X(3, 12, 28, 44, 60); -#undef DO_X - - pDst_block->m_hi_bits = v | v1 | v2 | v3; -#endif - - } -#endif // BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - -#if BASISD_SUPPORT_BC7_MODE5 - static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] = - { - { 0, 3 }, - { 1, 3 }, - { 0, 2 }, - { 1, 2 }, - { 2, 3 }, - { 0, 1 }, - }; + { 0, 3 }, + { 1, 3 }, + { 0, 2 }, + { 1, 2 }, + { 2, 3 }, + { 0, 1 }, + }; const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]); @@ -4085,7 +3834,7 @@ namespace basist assert(num_bits < 32); assert(val < (1ULL << num_bits)); - uint32_t mask = (1 << num_bits) - 1; + uint32_t mask = static_cast((1ULL << num_bits) - 1); while (num_bits) { @@ -4425,9 +4174,11 @@ namespace basist { bc7_mode_5* pDst_block = static_cast(pDst); + // First ensure the block is cleared to all 0's static_cast(pDst)[0] = 0; static_cast(pDst)[1] = 0; + // Set alpha to 255 pDst_block->m_lo.m_mode = 1 << 5; pDst_block->m_lo.m_a0 = 255; pDst_block->m_lo.m_a1_0 = 63; @@ -4690,7 +4441,11 @@ namespace basist set_block_bits((uint8_t*)pDst, output_bits, 31, 97); } #endif // BASISD_SUPPORT_BC7_MODE5 - + +#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC + static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; +#endif + #if BASISD_SUPPORT_ETC2_EAC_A8 static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector) { @@ -4712,8 +4467,7 @@ namespace basist pDst_block->m_multiplier = 1; // selectors are all 4's - static const uint8_t s_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; - memcpy(pDst_block->m_selectors, s_etc2_eac_a8_sel4, sizeof(s_etc2_eac_a8_sel4)); + memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); return; } @@ -5325,31 +5079,30 @@ namespace basist #endif -#if BASISD_SUPPORT_ASTC - struct astc_block_params - { - // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) - uint8_t m_endpoints[10]; - uint8_t m_weights[32]; - }; - +#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC // Table encodes 5 trits to 8 output bits. 3^5 entries. // Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding - static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, - 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, - 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, - 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, - 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, + static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, + 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, + 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, + 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, + 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, 191, 223, 124, 125, 126 }; + // Extracts bits [low,high] + static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) + { + return (bits >> low) & ((1 << (high - low + 1)) - 1); + } + // Writes bits to output in an endian safe way - static inline void astc_set_bits(uint32_t *pOutput, int &bit_pos, uint32_t value, int total_bits) + static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits) { uint8_t* pBytes = reinterpret_cast(pOutput); - + while (total_bits) { - const uint32_t bits_to_write = std::min(total_bits, 8 - (bit_pos & 7)); + const uint32_t bits_to_write = basisu::minimum(total_bits, 8 - (bit_pos & 7)); pBytes[bit_pos >> 3] |= static_cast(value << (bit_pos & 7)); @@ -5359,14 +5112,8 @@ namespace basist } } - // Extracts bits [low,high] - static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) - { - return (bits >> low) & ((1 << (high - low + 1)) - 1); - } - // Encodes 5 values to output, usable for any range that uses trits and bits - static void astc_encode_trits(uint32_t *pOutput, const uint8_t *pValues, int& bit_pos, int n) + static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n) { // First extract the trits and the bits from the 5 input values int trits = 0, bits[5]; @@ -5374,9 +5121,9 @@ namespace basist for (int i = 0; i < 5; i++) { static const int s_muls[5] = { 1, 3, 9, 27, 81 }; - + const int t = pValues[i] >> n; - + trits += t * s_muls[i]; bits[i] = pValues[i] & bit_mask; } @@ -5386,14 +5133,23 @@ namespace basist assert(trits < 243); const int T = g_astc_trit_encode[trits]; - + // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); - astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | + astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); } +#endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC +#if BASISD_SUPPORT_ASTC + struct astc_block_params + { + // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) + uint8_t m_endpoints[10]; + uint8_t m_weights[32]; + }; + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. @@ -6255,10 +6011,13 @@ namespace basist static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_pvrtc2_45.inc" }; - + +#if 0 static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_pvrtc2_alpha_33.inc" }; +#endif + #endif static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { @@ -7167,10 +6926,7 @@ namespace basist } typedef struct { float c[4]; } vec4F; - - static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } - static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } - static inline float saturate(float value) { return clampf(value, 0, 1.0f); } + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } @@ -7188,7 +6944,7 @@ namespace basist } static inline int sq(int x) { return x * x; } - + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. @@ -7461,7 +7217,7 @@ namespace basist } vec4F_normalize_in_place(&axis); - + if (vec4F_dot(&axis, &axis) < .5f) vec4F_set_scalar(&axis, .5f); @@ -7488,7 +7244,15 @@ namespace basist minColor = vec4F_saturate(&c0); maxColor = vec4F_saturate(&c1); if (minColor.c[3] > maxColor.c[3]) - std::swap(minColor, maxColor); + { + // VS 2019 release Code Generator issue + //std::swap(minColor, maxColor); + + float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3]; + minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3]; + minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3]; + maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d; + } } else { @@ -7648,7 +7412,7 @@ namespace basist uint32_t m = (le * 5 + he * 3) / 8; - int err = labs((int)v - (int)m); + int err = (int)labs((int)v - (int)m); if (err < lowest_err) { lowest_err = err; @@ -7671,7 +7435,7 @@ namespace basist uint32_t le = (l << 1); le = (le << 4) | le; - int err = labs((int)v - (int)le); + int err = (int)labs((int)v - (int)le); if (err < lowest_err) { lowest_err = err; @@ -7693,7 +7457,7 @@ namespace basist uint32_t he = (h << 1) | 1; he = (he << 4) | he; - int err = labs((int)v - (int)he); + int err = (int)labs((int)v - (int)he); if (err < lowest_err) { lowest_err = err; @@ -7722,7 +7486,7 @@ namespace basist uint32_t m = (le * 5 + he * 3) / 8; - int err = labs((int)v - (int)m); + int err = (int)labs((int)v - (int)m); if (err < lowest_err) { lowest_err = err; @@ -7752,7 +7516,7 @@ namespace basist uint32_t m = (le * 5 + he * 3) / 8; - int err = labs((int)v - (int)m); + int err = (int)labs((int)v - (int)m); if (err < lowest_err) { lowest_err = err; @@ -7768,59 +7532,65 @@ namespace basist } #endif // BASISD_SUPPORT_PVRTC2 - basisu_lowlevel_transcoder::basisu_lowlevel_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : + basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : + m_pGlobal_codebook(nullptr), m_pGlobal_sel_codebook(pGlobal_sel_codebook), m_selector_history_buf_size(0) { } - bool basisu_lowlevel_transcoder::decode_palettes( + bool basisu_lowlevel_etc1s_transcoder::decode_palettes( uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size, uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size) { + if (m_pGlobal_codebook) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n"); + return false; + } bitwise_decoder sym_codec; huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model; if (!sym_codec.init(pEndpoints_data, endpoints_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 0\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n"); return false; } if (!sym_codec.read_huffman_table(color5_delta_model0)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 1\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n"); return false; } if (!sym_codec.read_huffman_table(color5_delta_model1)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 1a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n"); return false; } if (!sym_codec.read_huffman_table(color5_delta_model2)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 2a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n"); return false; } if (!sym_codec.read_huffman_table(inten_delta_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 2b\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n"); return false; } if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 2b\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n"); return false; } const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0; - m_endpoints.resize(num_endpoints); + m_local_endpoints.resize(num_endpoints); color32 prev_color5(16, 16, 16, 0); uint32_t prev_inten = 0; @@ -7828,8 +7598,8 @@ namespace basist for (uint32_t i = 0; i < num_endpoints; i++) { uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model); - m_endpoints[i].m_inten5 = static_cast((inten_delta + prev_inten) & 7); - prev_inten = m_endpoints[i].m_inten5; + m_local_endpoints[i].m_inten5 = static_cast((inten_delta + prev_inten) & 7); + prev_inten = m_local_endpoints[i].m_inten5; for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++) { @@ -7843,25 +7613,25 @@ namespace basist int v = (prev_color5[c] + delta) & 31; - m_endpoints[i].m_color5[c] = static_cast(v); + m_local_endpoints[i].m_color5[c] = static_cast(v); prev_color5[c] = static_cast(v); } if (endpoints_are_grayscale) { - m_endpoints[i].m_color5[1] = m_endpoints[i].m_color5[0]; - m_endpoints[i].m_color5[2] = m_endpoints[i].m_color5[0]; + m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0]; + m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0]; } } sym_codec.stop(); - m_selectors.resize(num_selectors); + m_local_selectors.resize(num_selectors); if (!sym_codec.init(pSelectors_data, selectors_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 5\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); return false; } @@ -7880,12 +7650,12 @@ namespace basist { if (!sym_codec.read_huffman_table(mod_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 6\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 6\n"); return false; } if (!mod_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 6a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 6a\n"); return false; } } @@ -7902,7 +7672,7 @@ namespace basist if (pal_index >= m_pGlobal_sel_codebook->size()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 7z\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7z\n"); return false; } @@ -7911,9 +7681,9 @@ namespace basist // TODO: Optimize this for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) - m_selectors[i].set_selector(x, y, e[x + y * 4]); + m_local_selectors[i].set_selector(x, y, e[x + y * 4]); - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); } } else @@ -7928,12 +7698,12 @@ namespace basist basist::huffman_decoding_table uses_global_cb_bitflags_model; if (!sym_codec.read_huffman_table(uses_global_cb_bitflags_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 7\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7\n"); return false; } if (!uses_global_cb_bitflags_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 7a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7a\n"); return false; } @@ -7942,12 +7712,12 @@ namespace basist { if (!sym_codec.read_huffman_table(global_mod_indices_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 8\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8\n"); return false; } if (!global_mod_indices_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 8a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8a\n"); return false; } } @@ -7975,7 +7745,7 @@ namespace basist if (pal_index >= m_pGlobal_sel_codebook->size()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 8b\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8b\n"); return false; } @@ -7983,7 +7753,7 @@ namespace basist for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) - m_selectors[q].set_selector(x, y, e[x + y * 4]); + m_local_selectors[q].set_selector(x, y, e[x + y * 4]); } else { @@ -7992,11 +7762,11 @@ namespace basist uint32_t cur_byte = sym_codec.get_bits(8); for (uint32_t k = 0; k < 4; k++) - m_selectors[q].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[q].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } } - m_selectors[q].init_flags(); + m_local_selectors[q].init_flags(); } } else @@ -8012,23 +7782,23 @@ namespace basist uint32_t cur_byte = sym_codec.get_bits(8); for (uint32_t k = 0; k < 4; k++) - m_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); } } else { if (!sym_codec.read_huffman_table(delta_selector_pal_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 10\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n"); return false; } if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid())) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 10a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n"); return false; } @@ -8044,9 +7814,9 @@ namespace basist prev_bytes[j] = static_cast(cur_byte); for (uint32_t k = 0; k < 4; k++) - m_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); continue; } @@ -8058,9 +7828,9 @@ namespace basist prev_bytes[j] = static_cast(cur_byte); for (uint32_t k = 0; k < 4; k++) - m_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); } } } @@ -8071,60 +7841,60 @@ namespace basist return true; } - bool basisu_lowlevel_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size) + bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size) { basist::bitwise_decoder sym_codec; if (!sym_codec.init(pTable_data, table_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 0\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n"); return false; } if (!sym_codec.read_huffman_table(m_endpoint_pred_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 1\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n"); return false; } if (m_endpoint_pred_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 1a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n"); return false; } if (!sym_codec.read_huffman_table(m_delta_endpoint_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 2\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n"); return false; } if (m_delta_endpoint_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 2a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n"); return false; } if (!sym_codec.read_huffman_table(m_selector_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 3\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n"); return false; } if (m_selector_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 3a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n"); return false; } if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 4\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n"); return false; } if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 4a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n"); return false; } @@ -8135,27 +7905,37 @@ namespace basist return true; } - bool basisu_lowlevel_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels, + bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels) { - (void)transcode_alpha; - (void)pAlpha_blocks; + // 'pDst_blocks' unused when disabling *all* hardware transcode options + // (and 'bc1_allow_threecolor_blocks' when disabling DXT) + BASISU_NOTE_UNUSED(pDst_blocks); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + BASISU_NOTE_UNUSED(transcode_alpha); + BASISU_NOTE_UNUSED(pAlpha_blocks); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } if (!pState) pState = &m_def_state; - const bool is_video = (header.m_tex_type == cBASISTexTypeVideoFrames); const uint32_t total_blocks = num_blocks_x * num_blocks_y; if (!output_row_pitch_in_blocks_or_pixels) { if (basis_block_format_is_uncompressed(fmt)) - output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width; + output_row_pitch_in_blocks_or_pixels = orig_width; else { if (fmt == block_format::cFXT1_RGB) - output_row_pitch_in_blocks_or_pixels = (slice_desc.m_orig_width + 7) / 8; + output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8; else output_row_pitch_in_blocks_or_pixels = num_blocks_x; } @@ -8164,23 +7944,23 @@ namespace basist if (basis_block_format_is_uncompressed(fmt)) { if (!output_rows_in_pixels) - output_rows_in_pixels = slice_desc.m_orig_height; + output_rows_in_pixels = orig_height; } - std::vector* pPrev_frame_indices = nullptr; + basisu::vector* pPrev_frame_indices = nullptr; if (is_video) { // TODO: Add check to make sure the caller hasn't tried skipping past p-frames - const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsIsAlphaData) != 0; - const uint32_t level_index = slice_desc.m_level_index; + //const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + //const uint32_t level_index = slice_desc.m_level_index; if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: unsupported level_index\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n"); return false; } - pPrev_frame_indices = &pState->m_prev_frame_indices[alpha_flag][level_index]; + pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index]; if (pPrev_frame_indices->size() < total_blocks) pPrev_frame_indices->resize(total_blocks); } @@ -8189,14 +7969,12 @@ namespace basist if (!sym_codec.init(pImage_data, image_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: sym_codec.init failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n"); return false; } approx_move_to_front selector_history_buf(m_selector_history_buf_size); - - const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)m_selectors.size(); - const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; + uint32_t cur_selector_rle_count = 0; decoder_etc_block block; @@ -8212,7 +7990,7 @@ namespace basist pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t))); if (!pPVRTC_work_mem) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: malloc failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n"); return false; } pPVRTC_endpoints = (uint32_t*) & ((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y]; @@ -8228,6 +8006,16 @@ namespace basist int prev_endpoint_pred_sym = 0; int endpoint_pred_repeat_count = 0; uint32_t prev_endpoint_index = 0; + const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints; + const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors; + if (!endpoints.size() || !selectors.size()) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n"); + return false; + } + + const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size(); + const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { @@ -8279,7 +8067,7 @@ namespace basist // Left if (!block_x) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (0)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8292,7 +8080,7 @@ namespace basist // Upper if (!block_y) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (1)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8314,7 +8102,7 @@ namespace basist // Upper left if ((!block_x) || (!block_y)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (2)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8329,8 +8117,8 @@ namespace basist const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model); endpoint_index = delta_sym + prev_endpoint_index; - if (endpoint_index >= m_endpoints.size()) - endpoint_index -= (int)m_endpoints.size(); + if (endpoint_index >= endpoints.size()) + endpoint_index -= (int)endpoints.size(); } pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index; @@ -8345,7 +8133,7 @@ namespace basist { cur_selector_rle_count--; - selector_sym = (int)m_selectors.size(); + selector_sym = (int)selectors.size(); } else { @@ -8363,28 +8151,28 @@ namespace basist if (cur_selector_rle_count > total_blocks) { // The file is corrupted or we've got a bug. - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (3)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; } - selector_sym = (int)m_selectors.size(); + selector_sym = (int)selectors.size(); cur_selector_rle_count--; } } - if (selector_sym >= (int)m_selectors.size()) + if (selector_sym >= (int)selectors.size()) { assert(m_selector_history_buf_size > 0); - int history_buf_index = selector_sym - (int)m_selectors.size(); + int history_buf_index = selector_sym - (int)selectors.size(); if (history_buf_index >= (int)selector_history_buf.size()) { // The file is corrupted or we've got a bug. - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (4)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8404,10 +8192,10 @@ namespace basist } } - if ((endpoint_index >= m_endpoints.size()) || (selector_index >= m_selectors.size())) + if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size())) { // The file is corrupted or we've got a bug. - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (5)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8428,8 +8216,8 @@ namespace basist } #endif - const endpoint* pEndpoints = &m_endpoints[endpoint_index]; - const selector* pSelector = &m_selectors[selector_index]; + const endpoint* pEndpoints = &endpoints[endpoint_index]; + const selector* pSelector = &selectors[selector_index]; switch (fmt) { @@ -8448,9 +8236,8 @@ namespace basist } case block_format::cBC1: { - void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - #if BASISD_SUPPORT_DXT1 + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; #if BASISD_ENABLE_DEBUG_FLAGS if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints)) convert_etc1s_to_dxt1_vis(static_cast(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks); @@ -8534,8 +8321,8 @@ namespace basist const uint16_t* pAlpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t)); - const endpoint* pAlpha_endpoints = &m_endpoints[pAlpha_block[0]]; - const selector* pAlpha_selector = &m_selectors[pAlpha_block[1]]; + const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]]; + const selector* pAlpha_selector = &selectors[pAlpha_block[1]]; const color32& alpha_base_color = pAlpha_endpoints->m_color5; const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5; @@ -8559,16 +8346,7 @@ namespace basist break; } - case block_format::cBC7_M6_OPAQUE_ONLY: - { -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - convert_etc1s_to_bc7_m6(static_cast(pDst_block), pEndpoints, pSelector); -#else - assert(0); -#endif - break; - } + case block_format::cBC7: // for more consistency with UASTC case block_format::cBC7_M5_COLOR: { #if BASISD_SUPPORT_BC7_MODE5 @@ -8603,7 +8381,7 @@ namespace basist { #if BASISD_SUPPORT_ASTC void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &m_endpoints[0], &m_selectors[0]); + convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]); #else assert(0); #endif @@ -8648,8 +8426,8 @@ namespace basist assert(transcode_alpha); void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - - convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &m_endpoints[0], &m_selectors[0]); + + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]); #endif break; } @@ -8665,8 +8443,8 @@ namespace basist assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - const uint32_t max_x = basisu::minimum(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); int colors[4]; decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8705,8 +8483,8 @@ namespace basist assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - const uint32_t max_x = basisu::minimum(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8734,8 +8512,8 @@ namespace basist assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - const uint32_t max_x = basisu::minimum(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8765,8 +8543,8 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8775,12 +8553,20 @@ namespace basist if (fmt == block_format::cRGB565) { for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast(((colors[i].r >> 3) << 11) | ((colors[i].g >> 2) << 5) | (colors[i].b >> 3)); + { + packed_colors[i] = static_cast((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31)); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } } else { for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast(((colors[i].b >> 3) << 11) | ((colors[i].g >> 2) << 5) | (colors[i].r >> 3)); + { + packed_colors[i] = static_cast((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31)); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } } for (uint32_t y = 0; y < max_y; y++) @@ -8800,15 +8586,17 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); uint16_t packed_colors[4]; for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast(((colors[i].r >> 4) << 12) | ((colors[i].g >> 4) << 8) | ((colors[i].b >> 4) << 4)); + { + packed_colors[i] = static_cast((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4)); + } for (uint32_t y = 0; y < max_y; y++) { @@ -8817,7 +8605,14 @@ namespace basist for (uint32_t x = 0; x < max_x; x++) { uint16_t cur = reinterpret_cast(pDst_pixels)[x]; + if (BASISD_IS_BIG_ENDIAN) + cur = byteswap_uint16(cur); + cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; + + if (BASISD_IS_BIG_ENDIAN) + cur = byteswap_uint16(cur); + reinterpret_cast(pDst_pixels)[x] = cur; } @@ -8831,15 +8626,19 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); uint16_t packed_colors[4]; for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast(((colors[i].r >> 4) << 12) | ((colors[i].g >> 4) << 8) | ((colors[i].b >> 4) << 4) | 0xF); + { + packed_colors[i] = static_cast((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } for (uint32_t y = 0; y < max_y; y++) { @@ -8858,22 +8657,28 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); uint16_t packed_colors[4]; for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = colors[i].g >> 4; + { + packed_colors[i] = mul_8(colors[i].g, 15); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } for (uint32_t y = 0; y < max_y; y++) { const uint32_t s = pSelector->m_selectors[y]; for (uint32_t x = 0; x < max_x; x++) + { reinterpret_cast(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3]; + } pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); } @@ -8903,7 +8708,7 @@ namespace basist if (endpoint_pred_repeat_count != 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n"); return false; } @@ -8914,7 +8719,7 @@ namespace basist if (fmt == block_format::cPVRTC1_4_RGB) fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y); else if (fmt == block_format::cPVRTC1_4_RGBA) - fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &m_endpoints[0], &m_selectors[0]); + fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]); #endif // BASISD_SUPPORT_PVRTC1 if (pPVRTC_work_mem) @@ -8923,1610 +8728,8875 @@ namespace basist return true; } - basisu_transcoder::basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : - m_lowlevel_decoder(pGlobal_sel_codebook) - { - } - - bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const + bool basis_validate_output_buffer_size(transcoder_texture_format target_format, + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t orig_width, uint32_t orig_height, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels, + uint32_t total_slice_blocks) { - if (!validate_header(pData, data_size)) - return false; + if (basis_transcoder_format_is_uncompressed(target_format)) + { + // Assume the output buffer is orig_width by orig_height + if (!output_row_pitch_in_blocks_or_pixels) + output_row_pitch_in_blocks_or_pixels = orig_width; - const basis_file_header* pHeader = reinterpret_cast(pData); + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; -#if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS - if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n"); - return false; + // Now make sure the output buffer is large enough, or we'll overwrite memory. + if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); + return false; + } } - - if (full_validation) + else if (target_format == transcoder_texture_format::cTFFXT1_RGB) { - if (crc16(reinterpret_cast(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16) + const uint32_t num_blocks_fxt1_x = (orig_width + 7) / 8; + const uint32_t num_blocks_fxt1_y = (orig_height + 3) / 4; + const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y; + + if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n"); + return false; + } + } + else + { + if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n"); return false; } } -#endif - return true; } - bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const + bool basisu_lowlevel_etc1s_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length, + uint32_t decode_flags, + bool basis_file_has_alpha_slices, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels) { - if (data_size <= sizeof(basis_file_header)) - return false; - - const basis_file_header* pHeader = reinterpret_cast(pData); - - if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header))) + if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n"); return false; } - uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size; - if (data_size < expected_file_size) + if (alpha_length) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n"); - return false; + if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n"); + return false; + } } - - if ((!pHeader->m_total_slices) || (!pHeader->m_total_images)) + else { - BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n"); - return false; + assert(!basis_file_has_alpha_slices); } - if ((pHeader->m_slice_desc_file_ofs >= data_size) || - ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) - ) + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) { - BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n"); - return false; + if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } } - return true; - } - - bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const - { - if (data_size <= sizeof(basis_file_header)) + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices)) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n"); - return false; + // Switch to PVRTC1 RGB if the input doesn't have alpha. + target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; } + + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; - const basis_file_header* pHeader = reinterpret_cast(pData); - - if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header))) + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks)) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n"); return false; } - uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size; - if (data_size < expected_file_size) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n"); - return false; - } + bool status = false; - if ((!pHeader->m_total_images) || (!pHeader->m_total_slices)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n"); - return false; - } + const uint8_t* pData = pCompressed_data + rgb_offset; + uint32_t data_len = rgb_length; + bool is_alpha_slice = false; - if (pHeader->m_total_images > pHeader->m_total_slices) + // If the caller wants us to transcode the mip level's alpha data, then use the next slice. + if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n"); - return false; + pData = pCompressed_data + alpha_offset; + data_len = alpha_length; + is_alpha_slice = true; } - if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) + switch (target_format) + { + case transcoder_texture_format::cTFETC1_RGB: { - if (pHeader->m_total_slices & 1) + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + + if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha basis file\n"); - return false; + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); } + break; } - - if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) + case transcoder_texture_format::cTFBC1_RGB: { - // We only support ETC1S in basis universal - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (ETC1S flag check)\n"); +#if !BASISD_SUPPORT_DXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n"); return false; +#else + // status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n"); + } + break; +#endif } - - if ((pHeader->m_slice_desc_file_ofs >= data_size) || - ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) - ) + case transcoder_texture_format::cTFBC4_R: { - BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n"); +#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n"); return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n"); + } + break; +#endif } - - return true; - } - - basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const - { - if (!validate_header_quick(pData, data_size)) + case transcoder_texture_format::cTFPVRTC1_4_RGB: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n"); - return cBASISTexType2DArray; +#if !BASISD_SUPPORT_PVRTC1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n"); + return false; +#else + // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n"); + } + break; +#endif } + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + { +#if !BASISD_SUPPORT_PVRTC1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n"); + return false; +#else + assert(basis_file_has_alpha_slices); + assert(alpha_length); - const basis_file_header* pHeader = static_cast(pData); + // Temp buffer to hold alpha block endpoint/selector indices + basisu::vector temp_block_indices(total_slice_blocks); - basis_texture_type btt = static_cast(static_cast(pHeader->m_tex_type)); + // First transcode alpha data to temp buffer + //status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState); + status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n"); + } + else + { + // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n"); + } + } - if (btt >= cBASISTexTypeTotal) - { - BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n"); - return cBASISTexType2DArray; + break; +#endif } - - return btt; - } - - bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const - { - if (!validate_header_quick(pData, data_size)) + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n"); +#if !BASISD_SUPPORT_BC7_MODE5 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n"); return false; - } - - const basis_file_header* pHeader = static_cast(pData); - - userdata0 = pHeader->m_userdata0; - userdata1 = pHeader->m_userdata1; - return true; - } +#else + assert(bytes_per_block_or_pixel == 16); + // We used to support transcoding just alpha to BC7 - but is that useful at all? - uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const - { - if (!validate_header_quick(pData, data_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n"); - return 0; - } + // First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); - const basis_file_header* pHeader = static_cast(pData); + if ((status) && (basis_file_has_alpha_slices)) + { + // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha. + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } - return pHeader->m_total_images; - } + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n"); + } - bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const - { - if (!validate_header_quick(pData, data_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n"); - return false; + break; +#endif } - - int slice_index = find_first_slice_index(pData, data_size, image_index, 0); - if (slice_index < 0) + case transcoder_texture_format::cTFETC2_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n"); +#if !BASISD_SUPPORT_ETC2_EAC_A8 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n"); return false; - } +#else + assert(bytes_per_block_or_pixel == 16); - const basis_file_header* pHeader = static_cast(pData); + if (basis_file_has_alpha_slices) + { + // First decode the alpha data + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + //write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } - if (image_index >= pHeader->m_total_images) + if (status) + { + // Now decode the color data + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFBC3_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n"); +#if !BASISD_SUPPORT_DXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n"); return false; - } - - const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); +#elif !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + // First decode the alpha data + if (basis_file_has_alpha_slices) + { + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } - uint32_t total_levels = 1; - for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++) - if (pSlice_descs[i].m_image_index == image_index) - total_levels = basisu::maximum(total_levels, pSlice_descs[i].m_level_index + 1); + if (status) + { + // Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3. + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n"); + } + } else - break; + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n"); + } - if (total_levels > 16) + break; +#endif + } + case transcoder_texture_format::cTFBC5_RG: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n"); +#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); return false; - } +#else + assert(bytes_per_block_or_pixel == 16); - const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + //bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + // uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + // basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); - image_info.m_image_index = image_index; - image_info.m_total_levels = total_levels; - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; - image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; - image_info.m_width = slice_desc.m_num_blocks_x * 4; - image_info.m_height = slice_desc.m_num_blocks_y * 4; - image_info.m_orig_width = slice_desc.m_orig_width; - image_info.m_orig_height = slice_desc.m_orig_height; - image_info.m_num_blocks_x = slice_desc.m_num_blocks_x; - image_info.m_num_blocks_y = slice_desc.m_num_blocks_y; - image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y; - image_info.m_first_slice_index = slice_index; - - return true; - } - - uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const - { - if (!validate_header_quick(pData, data_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n"); - return false; + // Decode the R data (actually the green channel of the color data slice in the basis file) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (status) + { + if (basis_file_has_alpha_slices) + { + // Decode the G data (actually the green channel of the alpha data slice in the basis file) + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n"); + } + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n"); + } + break; +#endif } - - int slice_index = find_first_slice_index(pData, data_size, image_index, 0); - if (slice_index < 0) + case transcoder_texture_format::cTFASTC_4x4_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n"); +#if !BASISD_SUPPORT_ASTC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n"); return false; - } +#else + assert(bytes_per_block_or_pixel == 16); - const basis_file_header* pHeader = static_cast(pData); + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to the output (we're using the output texture as a temp buffer here). + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (status) + { + // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and + // transcode both the alpha and color data at the same time to ASTC. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels); + } + } + else + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); - if (image_index >= pHeader->m_total_images) + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFATC_RGB: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n"); +#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n"); return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n"); + } + break; +#endif } - - const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); - - uint32_t total_levels = 1; - for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++) - if (pSlice_descs[i].m_image_index == image_index) - total_levels = basisu::maximum(total_levels, pSlice_descs[i].m_level_index + 1); - else - break; - - const uint32_t cMaxSupportedLevels = 16; - if (total_levels > cMaxSupportedLevels) + case transcoder_texture_format::cTFATC_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n"); +#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n"); return false; - } +#elif !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); - return total_levels; - } + // First decode the alpha data + if (basis_file_has_alpha_slices) + { + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } - bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const - { - if (!validate_header_quick(pData, data_size)) + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC2_4_RGB: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n"); +#if !BASISD_SUPPORT_PVRTC2 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n"); return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n"); + } + break; +#endif } - - int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); - if (slice_index < 0) + case transcoder_texture_format::cTFPVRTC2_4_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n"); +#if !BASISD_SUPPORT_PVRTC2 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n"); return false; - } +#else + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to the output (we're using the output texture as a temp buffer here). + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n"); + } + else + { + // Now decode the color data and transcode to PVRTC2 RGBA. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels); + } + } + else + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); - const basis_file_header* pHeader = static_cast(pData); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n"); + } - if (image_index >= pHeader->m_total_images) + break; +#endif + } + case transcoder_texture_format::cTFRGBA32: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n"); - return false; + // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + + // First decode the alpha data + if (basis_file_has_alpha_slices) + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + else + status = true; + + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n"); + } + + break; } + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + { + // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n"); + } - const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + break; + } + case transcoder_texture_format::cTFRGBA4444: + { + // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - orig_width = slice_desc.m_orig_width; - orig_height = slice_desc.m_orig_height; - total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + // First decode the alpha data + if (basis_file_has_alpha_slices) + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + else + status = true; - return true; - } + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n"); + } - bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const - { - if (!validate_header_quick(pData, data_size)) + break; + } + case transcoder_texture_format::cTFFXT1_RGB: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n"); +#if !BASISD_SUPPORT_FXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n"); return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n"); + } + break; +#endif } - - int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); - if (slice_index < 0) + case transcoder_texture_format::cTFETC2_EAC_R11: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n"); +#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n"); return false; - } - - const basis_file_header* pHeader = static_cast(pData); +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n"); + } - if (image_index >= pHeader->m_total_images) + break; +#endif + } + case transcoder_texture_format::cTFETC2_EAC_RG11: { - BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n"); +#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n"); return false; - } - - const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); +#else + assert(bytes_per_block_or_pixel == 16); - const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to G + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } - image_info.m_image_index = image_index; - image_info.m_level_index = level_index; - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; - image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; - image_info.m_width = slice_desc.m_num_blocks_x * 4; - image_info.m_height = slice_desc.m_num_blocks_y * 4; - image_info.m_orig_width = slice_desc.m_orig_width; - image_info.m_orig_height = slice_desc.m_orig_height; - image_info.m_num_blocks_x = slice_desc.m_num_blocks_x; - image_info.m_num_blocks_y = slice_desc.m_num_blocks_y; - image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y; - image_info.m_first_slice_index = slice_index; + if (status) + { + // Now decode the color data to R + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n"); + } - return true; + break; +#endif + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n"); + break; + } + } + + return status; + } + + basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder() + { } - bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const + bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) { - if (!validate_file_checksums(pData, data_size, false)) + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder not globally initialized.\n"); return false; } - const basis_file_header* pHeader = static_cast(pData); - const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); +#if BASISD_SUPPORT_UASTC + const uint32_t total_blocks = num_blocks_x * num_blocks_y; - file_info.m_version = pHeader->m_ver; + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + { + if (fmt == block_format::cFXT1_RGB) + output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8; + else + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + } + } - file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc); + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } - file_info.m_total_selectors = pHeader->m_total_selectors; - file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size; + uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks; + if (image_data_size < total_expected_block_bytes) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n"); + return false; + } - file_info.m_total_endpoints = pHeader->m_total_endpoints; - file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size; + const uastc_block* pSource_block = reinterpret_cast(pImage_data); - file_info.m_tables_size = pHeader->m_tables_file_size; + const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0; + const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; - file_info.m_etc1s = (pHeader->m_flags & cBASISHeaderFlagETC1S) != 0; - file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; - file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + bool status = false; + if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) + { + if (fmt == block_format::cPVRTC1_4_RGBA) + transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality); + else + transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha); + } + else + { + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) + { + void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) + { + switch (fmt) + { + case block_format::cETC1: + { + if (from_alpha) + status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3); + else + status = transcode_uastc_to_etc1(*pSource_block, pDst_block); + break; + } + case block_format::cETC2_RGBA: + { + status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block); + break; + } + case block_format::cBC1: + { + status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality); + break; + } + case block_format::cBC3: + { + status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality); + break; + } + case block_format::cBC4: + { + if (channel0 < 0) + channel0 = 0; + status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); + break; + } + case block_format::cBC5: + { + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1); + break; + } + case block_format::cBC7: + case block_format::cBC7_M5_COLOR: // for consistently with ETC1S + { + status = transcode_uastc_to_bc7(*pSource_block, pDst_block); + break; + } + case block_format::cASTC_4x4: + { + status = transcode_uastc_to_astc(*pSource_block, pDst_block); + break; + } + case block_format::cETC2_EAC_R11: + { + if (channel0 < 0) + channel0 = 0; + status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0); + break; + } + case block_format::cETC2_EAC_RG11: + { + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1); + break; + } + case block_format::cRGBA32: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false); - const uint32_t total_slices = pHeader->m_total_slices; + assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - file_info.m_slice_info.resize(total_slices); + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - file_info.m_slices_size = 0; + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; - file_info.m_tex_type = static_cast(static_cast(pHeader->m_tex_type)); + pDst_pixels[0 + 4 * x] = c.r; + pDst_pixels[1 + 4 * x] = c.g; + pDst_pixels[2 + 4 * x] = c.b; + pDst_pixels[3 + 4 * x] = c.a; + } - if (file_info.m_tex_type > cBASISTexTypeTotal) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n"); - return false; - } + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } - file_info.m_us_per_frame = pHeader->m_us_per_frame; - file_info.m_userdata0 = pHeader->m_userdata0; - file_info.m_userdata1 = pHeader->m_userdata1; + break; + } + case block_format::cRGB565: + case block_format::cBGR565: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32*)block_pixels, false); - file_info.m_image_mipmap_levels.resize(0); - file_info.m_image_mipmap_levels.resize(pHeader->m_total_images); + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - file_info.m_total_images = pHeader->m_total_images; + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - for (uint32_t i = 0; i < total_slices; i++) - { - file_info.m_slices_size += pSlice_descs[i].m_file_size; + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; - basisu_slice_info& slice_info = file_info.m_slice_info[i]; + const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) : + static_cast((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31)); - slice_info.m_orig_width = pSlice_descs[i].m_orig_width; - slice_info.m_orig_height = pSlice_descs[i].m_orig_height; - slice_info.m_width = pSlice_descs[i].m_num_blocks_x * 4; - slice_info.m_height = pSlice_descs[i].m_num_blocks_y * 4; - slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x; - slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y; - slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y; - slice_info.m_compressed_size = pSlice_descs[i].m_file_size; - slice_info.m_slice_index = i; - slice_info.m_image_index = pSlice_descs[i].m_image_index; - slice_info.m_level_index = pSlice_descs[i].m_level_index; - slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16; - slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsIsAlphaData) != 0; - slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } - if (pSlice_descs[i].m_image_index >= pHeader->m_total_images) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n"); - return false; - } + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } - file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1); + break; + } + case block_format::cRGBA4444: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32*)block_pixels, false); - if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16) - { - BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n"); - return false; - } - } + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - return true; - } + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) const - { - if (m_lowlevel_decoder.m_endpoints.size()) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: already called start_transcoding\n"); - return true; - } + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; - if (!validate_header_quick(pData, data_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n"); - return false; - } + const uint16_t packed = static_cast((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15)); - const basis_file_header* pHeader = reinterpret_cast(pData); + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } - const uint8_t* pDataU8 = static_cast(pData); + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + break; + } + default: + assert(0); + break; - if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n"); - } + } - if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n"); - return false; - } + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n"); + return false; + } - if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n"); - return false; - } + } // block_x - if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); - return false; + } // block_y } - if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) + return true; +#else + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: UASTC is unsupported\n"); + + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(output_rows_in_pixels); + BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels); + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + BASISU_NOTE_UNUSED(fmt); + BASISU_NOTE_UNUSED(image_data_size); + BASISU_NOTE_UNUSED(pImage_data); + BASISU_NOTE_UNUSED(num_blocks_x); + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pDst_blocks); + + return false; +#endif + } + + bool basisu_lowlevel_uastc_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) + { + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n"); return false; - } + } - if (!m_lowlevel_decoder.decode_palettes( - pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size, - pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size)) + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n"); - return false; + if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } } - if (!m_lowlevel_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size)) + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha)) { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n"); - return false; + // Switch to PVRTC1 RGB if the input doesn't have alpha. + target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; } - return true; - } + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; - bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels) const - { - if (!m_lowlevel_decoder.m_endpoints.size()) + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks)) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n"); return false; } + + bool status = false; - if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2) + // UASTC4x4 + switch (target_format) { - // TODO: Not yet supported - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n"); - return false; - } - - if (!validate_header_quick(pData, data_size)) + case transcoder_texture_format::cTFETC1_RGB: { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n"); - return false; + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); + } + break; } - - const basis_file_header* pHeader = reinterpret_cast(pData); - - const uint8_t* pDataU8 = static_cast(pData); - - if (slice_index >= pHeader->m_total_slices) + case transcoder_texture_format::cTFETC2_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n"); - return false; + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC2 failed\n"); + } + break; } - - const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; - - uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - - if (basis_block_format_is_uncompressed(fmt)) + case transcoder_texture_format::cTFBC1_RGB: { - // Assume the output buffer is orig_width by orig_height - if (!output_row_pitch_in_blocks_or_pixels) - output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width; - - if (!output_rows_in_pixels) - output_rows_in_pixels = slice_desc.m_orig_height; - - // Now make sure the output buffer is large enough, or we'll overwrite memory. - if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)) + // TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1, + bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); - return false; + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC1 failed\n"); } + break; } - else if (fmt == block_format::cFXT1_RGB) + case transcoder_texture_format::cTFBC3_RGBA: { - const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8; - const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4; - const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y; - - if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n"); - return false; + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC3 failed\n"); } + break; } - else + case transcoder_texture_format::cTFBC4_R: { - if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n"); - return false; + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC4 failed\n"); } + break; } - - if (fmt != block_format::cETC1) + case transcoder_texture_format::cTFBC5_RG: { - if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // 0, 3); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3); + if (!status) { - if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4))) - { - // PVRTC1 only supports power of 2 dimensions - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n"); - return false; - } + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC5 failed\n"); } + break; } - - if (slice_desc.m_file_ofs > data_size) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n"); - return false; - } - - const uint32_t data_size_left = data_size - slice_desc.m_file_ofs; - if (data_size_left < slice_desc.m_file_size) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); - return false; - } - - return m_lowlevel_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, - pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, - fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, - (decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels); - } - - int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const - { - (void)data_size; - - const basis_file_header* pHeader = reinterpret_cast(pData); - const uint8_t* pDataU8 = static_cast(pData); - - // For very large basis files this search could be painful - // TODO: Binary search this - for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++) - { - const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter]; - if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index)) - return slice_iter; - } - - BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n"); - - return -1; - } - - int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const - { - if (!validate_header_quick(pData, data_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n"); - return false; - } - - const basis_file_header* pHeader = reinterpret_cast(pData); - const uint8_t* pDataU8 = static_cast(pData); - const basis_slice_desc* pSlice_descs = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs); - - // For very large basis files this search could be painful - // TODO: Binary search this - for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++) + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: { - const basis_slice_desc& slice_desc = pSlice_descs[slice_iter]; - if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index)) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) { - const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsIsAlphaData) != 0; - if (slice_alpha == alpha_data) - return slice_iter; + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC7 failed\n"); } + break; } - - BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n"); - - return -1; - } - - static void write_opaque_alpha_blocks( - uint32_t num_blocks_x, uint32_t num_blocks_y, - void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt, - uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels) - { - BASISU_NOTE_UNUSED(output_blocks_buf_size_in_blocks_or_pixels); - - if (!output_row_pitch_in_blocks_or_pixels) - output_row_pitch_in_blocks_or_pixels = num_blocks_x; - - if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) + case transcoder_texture_format::cTFPVRTC1_4_RGB: { -#if BASISD_SUPPORT_ETC2_EAC_A8 - eac_block blk; - blk.m_base = 255; - blk.m_multiplier = 1; - blk.m_table = 13; - - // Selectors are all 4's - static const uint8_t s_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; - memcpy(&blk.m_selectors, s_etc2_eac_a8_sel4, sizeof(s_etc2_eac_a8_sel4)); - - for (uint32_t y = 0; y < num_blocks_y; y++) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) { - uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes; - for (uint32_t x = 0; x < num_blocks_x; x++) - { - memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk)); - dst_ofs += block_stride_in_bytes; - } + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n"); } -#endif + break; } - else if (fmt == block_format::cBC4) + case transcoder_texture_format::cTFPVRTC1_4_RGBA: { -#if BASISD_SUPPORT_DXT5A - dxt5a_block blk; - blk.m_endpoints[0] = 255; - blk.m_endpoints[1] = 255; - memset(blk.m_selectors, 0, sizeof(blk.m_selectors)); - - for (uint32_t y = 0; y < num_blocks_y; y++) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) { - uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes; - for (uint32_t x = 0; x < num_blocks_x; x++) - { - memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk)); - dst_ofs += block_stride_in_bytes; - } + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n"); } -#endif + break; } - } - - bool basisu_transcoder::transcode_image_level( - const void* pData, uint32_t data_size, - uint32_t image_index, uint32_t level_index, - void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, - transcoder_texture_format fmt, - uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const - { - const uint32_t bytes_per_block = basis_get_bytes_per_block(fmt); - - if (!m_lowlevel_decoder.m_endpoints.size()) + case transcoder_texture_format::cTFASTC_4x4_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n"); - return false; + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n"); + } + break; } - - const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; - - if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2) + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n"); - // TODO: Not yet supported + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->ATC currently unsupported\n"); return false; } - - if (!validate_header_quick(pData, data_size)) + case transcoder_texture_format::cTFFXT1_RGB: { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n"); return false; } - - const basis_file_header* pHeader = reinterpret_cast(pData); - - const uint8_t* pDataU8 = static_cast(pData); - - const basis_slice_desc* pSlice_descs = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs); - - const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; - - int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); - if (slice_index < 0) + case transcoder_texture_format::cTFPVRTC2_4_RGB: { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); - // Unable to find the requested image/level + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); return false; } - - if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices)) - { - // Switch to PVRTC1 RGB if the input doesn't have alpha. - fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; - } - - if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsIsAlphaData) + case transcoder_texture_format::cTFPVRTC2_4_RGBA: { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n"); - - // The first slice shouldn't have alpha data in a properly formed basis file + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); return false; } - - if (basis_file_has_alpha_slices) + case transcoder_texture_format::cTFETC2_EAC_R11: { - // The alpha data should immediately follow the color data, and have the same resolution. - if ((slice_index + 1U) >= pHeader->m_total_slices) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n"); - // basis file is missing the alpha slice - return false; - } - - // Basic sanity checks - if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsIsAlphaData) == 0) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n"); - // This slice should have alpha data - return false; - } - - if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y)) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n"); - // Alpha slice should have been the same res as the color slice - return false; + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n"); } + break; } - - bool status = false; - - const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; - - if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) + case transcoder_texture_format::cTFETC2_EAC_RG11: { - // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. - // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. - // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block. This is all the transcoder actually writes to memory. - memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block); + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // 0, 3); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n"); + } + break; } - - switch (fmt) - { - case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFRGBA32: { - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC1 failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n"); } break; } - case transcoder_texture_format::cTFBC1_RGB: + case transcoder_texture_format::cTFRGB565: { -#if !BASISD_SUPPORT_DXT1 - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC1 failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); } break; } - case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFBGR565: { -#if !BASISD_SUPPORT_DXT5A - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC4 failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); } break; } - case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFRGBA4444: { -#if !BASISD_SUPPORT_PVRTC1 - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - - // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); if (!status) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to PVRTC1 4 RGB failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n"); } break; } - case transcoder_texture_format::cTFPVRTC1_4_RGBA: + default: { -#if !BASISD_SUPPORT_PVRTC1 - return false; -#endif - assert(basis_file_has_alpha_slices); + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: Invalid format\n"); + break; + } + } - // Temp buffer to hold alpha block endpoint/selector indices - std::vector temp_block_indices(total_slice_blocks); + return status; + } + + basisu_transcoder::basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : + m_lowlevel_etc1s_decoder(pGlobal_sel_codebook), + m_ready_to_transcode(false) + { + } - // First transcode alpha data to temp buffer - status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to PVRTC1 4 RGBA failed (0)\n"); - } - else - { - // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to PVRTC1 4 RGBA failed (1)\n"); - } - } + bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const + { + if (!validate_header(pData, data_size)) + return false; - break; - } - case transcoder_texture_format::cTFBC7_M6_RGB: + const basis_file_header* pHeader = reinterpret_cast(pData); + +#if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS + if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16) { -#if !BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n"); return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + } - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M6_OPAQUE_ONLY, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + if (full_validation) + { + if (crc16(reinterpret_cast(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC7 m6 opaque only failed\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n"); + return false; } - break; } - case transcoder_texture_format::cTFBC7_M5_RGBA: - { -#if !BASISD_SUPPORT_BC7_MODE5 - return false; -#else - assert(bytes_per_block == 16); - - // First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks. - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); +#endif - if ((status) && (basis_file_has_alpha_slices)) - { - // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha. - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - } + return true; + } - break; -#endif - } - case transcoder_texture_format::cTFETC2_RGBA: - { -#if !BASISD_SUPPORT_ETC2_EAC_A8 + bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const + { + if (data_size <= sizeof(basis_file_header)) return false; -#endif - assert(bytes_per_block == 16); - if (basis_file_has_alpha_slices) - { - // First decode the alpha data - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - } - else - { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); - status = true; - } + const basis_file_header* pHeader = reinterpret_cast(pData); - if (status) - { - // Now decode the color data - status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2 RGB failed\n"); - } - } - else - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2 A failed\n"); - } - break; - } - case transcoder_texture_format::cTFBC3_RGBA: + if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header))) { -#if !BASISD_SUPPORT_DXT1 - return false; -#endif -#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n"); return false; -#endif - assert(bytes_per_block == 16); - - // First decode the alpha data - if (basis_file_has_alpha_slices) - { - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - } - else - { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); - status = true; - } - - if (status) - { - // Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3. - status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC3 RGB failed\n"); - } - } - else - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC3 A failed\n"); - } - - break; } - case transcoder_texture_format::cTFBC5_RG: + + uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size; + if (data_size < expected_file_size) { -#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n"); return false; -#endif - assert(bytes_per_block == 16); + } - // Decode the R data (actually the green channel of the color data slice in the basis file) - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (status) - { - if (basis_file_has_alpha_slices) - { - // Decode the G data (actually the green channel of the alpha data slice in the basis file) - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC5 1 failed\n"); - } - } - else - { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); - status = true; - } - } - else - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC5 channel 0 failed\n"); - } - break; + if ((!pHeader->m_total_slices) || (!pHeader->m_total_images)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n"); + return false; } - case transcoder_texture_format::cTFASTC_4x4_RGBA: + + if ((pHeader->m_slice_desc_file_ofs >= data_size) || + ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) + ) { -#if !BASISD_SUPPORT_ASTC + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n"); return false; -#endif - assert(bytes_per_block == 16); + } - if (basis_file_has_alpha_slices) - { - // First decode the alpha data to the output (we're using the output texture as a temp buffer here). - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to failed\n"); - } - else - { - // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and - // transcode both the alpha and color data at the same time to ASTC. - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); - } - } - else - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + return true; + } - break; - } - case transcoder_texture_format::cTFATC_RGB: + bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const + { + if (data_size <= sizeof(basis_file_header)) { -#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n"); return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + } - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ATC_RGB failed\n"); - } - break; + const basis_file_header* pHeader = reinterpret_cast(pData); + + if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header))) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n"); + return false; } - case transcoder_texture_format::cTFATC_RGBA: + + uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size; + if (data_size < expected_file_size) { -#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n"); return false; -#endif -#if !BASISD_SUPPORT_DXT5A + } + + if ((!pHeader->m_total_images) || (!pHeader->m_total_slices)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n"); return false; -#endif - assert(bytes_per_block == 16); + } - // First decode the alpha data - if (basis_file_has_alpha_slices) - { - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - } - else - { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); - status = true; - } + if (pHeader->m_total_images > pHeader->m_total_slices) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n"); + return false; + } - if (status) + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) { - status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + if (pHeader->m_total_slices & 1) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ATC RGB failed\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n"); + return false; } } - else + + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. + if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ATC A failed\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n"); + return false; } - break; } - case transcoder_texture_format::cTFPVRTC2_4_RGB: + else { -#if !BASISD_SUPPORT_PVRTC2 - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to cPVRTC2_4_RGB failed\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n"); + return false; } - break; } - case transcoder_texture_format::cTFPVRTC2_4_RGBA: + + if ((pHeader->m_slice_desc_file_ofs >= data_size) || + ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) + ) { -#if !BASISD_SUPPORT_PVRTC2 + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n"); return false; -#endif - if (basis_file_has_alpha_slices) - { - // First decode the alpha data to the output (we're using the output texture as a temp buffer here). - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to failed\n"); - } - else - { - // Now decode the color data and transcode to PVRTC2 RGBA. - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); - } - } - else - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + } - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to cPVRTC2_4_RGBA failed\n"); - } + return true; + } - break; - } - case transcoder_texture_format::cTFRGBA32: + basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const + { + if (!validate_header_quick(pData, data_size)) { - // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n"); + return cBASISTexType2DArray; + } - // First decode the alpha data - if (basis_file_has_alpha_slices) - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - else - status = true; + const basis_file_header* pHeader = static_cast(pData); - if (status) - { - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA32 RGB failed\n"); - } - } - else - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA32 A failed\n"); - } + basis_texture_type btt = static_cast(static_cast(pHeader->m_tex_type)); - break; - } - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: + if (btt >= cBASISTexTypeTotal) { - // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n"); + return cBASISTexType2DArray; + } - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGB565 RGB failed\n"); - } + return btt; + } - break; + bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n"); + return false; } - case transcoder_texture_format::cTFRGBA4444: + + const basis_file_header* pHeader = static_cast(pData); + + userdata0 = pHeader->m_userdata0; + userdata1 = pHeader->m_userdata1; + return true; + } + + uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const + { + if (!validate_header_quick(pData, data_size)) { - // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n"); + return 0; + } - // First decode the alpha data - if (basis_file_has_alpha_slices) - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - else - status = true; + const basis_file_header* pHeader = static_cast(pData); - if (status) - { - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA4444 RGB failed\n"); - } - } - else - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA4444 A failed\n"); - } + return pHeader->m_total_images; + } - break; + basis_tex_format basisu_transcoder::get_tex_format(const void* pData, uint32_t data_size) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n"); + return basis_tex_format::cETC1S; } - case transcoder_texture_format::cTFFXT1_RGB: + + const basis_file_header* pHeader = static_cast(pData); + + return (basis_tex_format)(uint32_t)pHeader->m_tex_format; + } + + bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const + { + if (!validate_header_quick(pData, data_size)) { -#if !BASISD_SUPPORT_FXT1 + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n"); return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to FXT1_RGB failed\n"); - } - break; } - case transcoder_texture_format::cTFETC2_EAC_R11: + + int slice_index = find_first_slice_index(pData, data_size, image_index, 0); + if (slice_index < 0) { -#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n"); return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + } - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2_EAC_R11 failed\n"); - } + const basis_file_header* pHeader = static_cast(pData); - break; - } - case transcoder_texture_format::cTFETC2_EAC_RG11: + if (image_index >= pHeader->m_total_images) { -#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n"); return false; -#endif - assert(bytes_per_block == 16); + } - if (basis_file_has_alpha_slices) - { - // First decode the alpha data to G - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t *)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - } - else - { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, (uint8_t *)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels); - status = true; - } + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); - if (status) - { - // Now decode the color data to R - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2_EAC_R11 R failed\n"); - } - } + uint32_t total_levels = 1; + for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++) + if (pSlice_descs[i].m_image_index == image_index) + total_levels = basisu::maximum(total_levels, pSlice_descs[i].m_level_index + 1); else - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2_EAC_R11 G failed\n"); - } + break; - break; - } - default: + if (total_levels > 16) { - assert(0); - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Invalid fmt\n"); - break; - } + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n"); + return false; } - return status; + const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + + image_info.m_image_index = image_index; + image_info.m_total_levels = total_levels; + + image_info.m_alpha_flag = false; + + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + else + image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + + image_info.m_width = slice_desc.m_num_blocks_x * 4; + image_info.m_height = slice_desc.m_num_blocks_y * 4; + image_info.m_orig_width = slice_desc.m_orig_width; + image_info.m_orig_height = slice_desc.m_orig_height; + image_info.m_num_blocks_x = slice_desc.m_num_blocks_x; + image_info.m_num_blocks_y = slice_desc.m_num_blocks_y; + image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y; + image_info.m_first_slice_index = slice_index; + + return true; } - uint32_t basis_get_bytes_per_block(transcoder_texture_format fmt) + uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const { - switch (fmt) + if (!validate_header_quick(pData, data_size)) { - case transcoder_texture_format::cTFETC1_RGB: - case transcoder_texture_format::cTFBC1_RGB: - case transcoder_texture_format::cTFBC4_R: - case transcoder_texture_format::cTFPVRTC1_4_RGB: - case transcoder_texture_format::cTFPVRTC1_4_RGBA: - case transcoder_texture_format::cTFATC_RGB: - case transcoder_texture_format::cTFPVRTC2_4_RGB: - case transcoder_texture_format::cTFPVRTC2_4_RGBA: - case transcoder_texture_format::cTFETC2_EAC_R11: - return 8; - case transcoder_texture_format::cTFBC7_M6_RGB: - case transcoder_texture_format::cTFBC7_M5_RGBA: - case transcoder_texture_format::cTFETC2_RGBA: - case transcoder_texture_format::cTFBC3_RGBA: - case transcoder_texture_format::cTFBC5_RG: - case transcoder_texture_format::cTFASTC_4x4_RGBA: - case transcoder_texture_format::cTFATC_RGBA: - case transcoder_texture_format::cTFFXT1_RGB: - case transcoder_texture_format::cTFETC2_EAC_RG11: - return 16; - case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t) * 16; - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: - return sizeof(uint16_t) * 16; - default: - assert(0); - BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); - break; + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n"); + return false; } - return 0; - } - const char* basis_get_format_name(transcoder_texture_format fmt) - { - switch (fmt) + int slice_index = find_first_slice_index(pData, data_size, image_index, 0); + if (slice_index < 0) { - case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB"; - case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB"; - case transcoder_texture_format::cTFBC4_R: return "BC4_R"; - case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB"; - case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; - case transcoder_texture_format::cTFBC7_M6_RGB: return "BC7_M6_RGB"; - case transcoder_texture_format::cTFBC7_M5_RGBA: return "BC7_M5_RGBA"; - case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA"; - case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA"; - case transcoder_texture_format::cTFBC5_RG: return "BC5_RG"; - case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA"; - case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB"; - case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA"; - case transcoder_texture_format::cTFRGBA32: return "RGBA32"; - case transcoder_texture_format::cTFRGB565: return "RGB565"; - case transcoder_texture_format::cTFBGR565: return "BGR565"; - case transcoder_texture_format::cTFRGBA4444: return "RGBA4444"; - case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB"; - case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB"; - case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; - case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11"; - case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11"; - default: - assert(0); - BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); - break; + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n"); + return false; } - return ""; - } - const char* basis_get_texture_type_name(basis_texture_type tex_type) - { - switch (tex_type) + const basis_file_header* pHeader = static_cast(pData); + + if (image_index >= pHeader->m_total_images) { - case cBASISTexType2D: return "2D"; + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n"); + return false; + } + + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + uint32_t total_levels = 1; + for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++) + if (pSlice_descs[i].m_image_index == image_index) + total_levels = basisu::maximum(total_levels, pSlice_descs[i].m_level_index + 1); + else + break; + + const uint32_t cMaxSupportedLevels = 16; + if (total_levels > cMaxSupportedLevels) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n"); + return false; + } + + return total_levels; + } + + bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n"); + return false; + } + + int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + + if (image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n"); + return false; + } + + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + + orig_width = slice_desc.m_orig_width; + orig_height = slice_desc.m_orig_height; + total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + + return true; + } + + bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n"); + return false; + } + + int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + + if (image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n"); + return false; + } + + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + + image_info.m_image_index = image_index; + image_info.m_level_index = level_index; + + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + else + image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + image_info.m_width = slice_desc.m_num_blocks_x * 4; + image_info.m_height = slice_desc.m_num_blocks_y * 4; + image_info.m_orig_width = slice_desc.m_orig_width; + image_info.m_orig_height = slice_desc.m_orig_height; + image_info.m_num_blocks_x = slice_desc.m_num_blocks_x; + image_info.m_num_blocks_y = slice_desc.m_num_blocks_y; + image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y; + image_info.m_first_slice_index = slice_index; + + image_info.m_rgb_file_ofs = slice_desc.m_file_ofs; + image_info.m_rgb_file_len = slice_desc.m_file_size; + image_info.m_alpha_file_ofs = 0; + image_info.m_alpha_file_len = 0; + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) + { + assert((slice_index + 1) < (int)pHeader->m_total_slices); + image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs; + image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size; + } + } + + return true; + } + + bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const + { + if (!validate_file_checksums(pData, data_size, false)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + file_info.m_version = pHeader->m_ver; + + file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc); + + file_info.m_total_selectors = pHeader->m_total_selectors; + file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs; + file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size; + + file_info.m_total_endpoints = pHeader->m_total_endpoints; + file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs; + file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size; + + file_info.m_tables_ofs = pHeader->m_tables_file_ofs; + file_info.m_tables_size = pHeader->m_tables_file_size; + + file_info.m_tex_format = static_cast(static_cast(pHeader->m_tex_format)); + + file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); + + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; + file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + + const uint32_t total_slices = pHeader->m_total_slices; + + file_info.m_slice_info.resize(total_slices); + + file_info.m_slices_size = 0; + + file_info.m_tex_type = static_cast(static_cast(pHeader->m_tex_type)); + + if (file_info.m_tex_type > cBASISTexTypeTotal) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n"); + return false; + } + + file_info.m_us_per_frame = pHeader->m_us_per_frame; + file_info.m_userdata0 = pHeader->m_userdata0; + file_info.m_userdata1 = pHeader->m_userdata1; + + file_info.m_image_mipmap_levels.resize(0); + file_info.m_image_mipmap_levels.resize(pHeader->m_total_images); + + file_info.m_total_images = pHeader->m_total_images; + + for (uint32_t i = 0; i < total_slices; i++) + { + file_info.m_slices_size += pSlice_descs[i].m_file_size; + + basisu_slice_info& slice_info = file_info.m_slice_info[i]; + + slice_info.m_orig_width = pSlice_descs[i].m_orig_width; + slice_info.m_orig_height = pSlice_descs[i].m_orig_height; + slice_info.m_width = pSlice_descs[i].m_num_blocks_x * 4; + slice_info.m_height = pSlice_descs[i].m_num_blocks_y * 4; + slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x; + slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y; + slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y; + slice_info.m_compressed_size = pSlice_descs[i].m_file_size; + slice_info.m_slice_index = i; + slice_info.m_image_index = pSlice_descs[i].m_image_index; + slice_info.m_level_index = pSlice_descs[i].m_level_index; + slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16; + slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0; + slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + + if (pSlice_descs[i].m_image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n"); + return false; + } + + file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1); + + if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n"); + return false; + } + } + + return true; + } + + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + const uint8_t* pDataU8 = static_cast(pData); + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (m_lowlevel_etc1s_decoder.m_local_endpoints.size()) + { + m_lowlevel_etc1s_decoder.clear(); + } + + if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook) + { + if (!m_lowlevel_etc1s_decoder.get_global_codebooks()) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n"); + return false; + } + if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size()) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n"); + return false; + } + if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) || + (m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n"); + return false; + } + if (!pHeader->m_tables_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n"); + return false; + } + if (pHeader->m_tables_file_ofs > data_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n"); + return false; + } + if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n"); + return false; + } + } + else + { + if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n"); + return false; + } + + if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n"); + return false; + } + + if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n"); + return false; + } + + if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); + return false; + } + + if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); + return false; + } + + if (!m_lowlevel_etc1s_decoder.decode_palettes( + pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size, + pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n"); + return false; + } + } + + if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n"); + return false; + } + } + else + { + // Nothing special to do for UASTC. + if (m_lowlevel_etc1s_decoder.m_local_endpoints.size()) + { + m_lowlevel_etc1s_decoder.clear(); + } + } + + m_ready_to_transcode = true; + + return true; + } + + bool basisu_transcoder::stop_transcoding() + { + m_lowlevel_etc1s_decoder.clear(); + + m_ready_to_transcode = false; + + return true; + } + + bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const + { + if (!m_ready_to_transcode) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n"); + return false; + } + + if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2) + { + // TODO: Not yet supported + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n"); + return false; + } + + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + + const uint8_t* pDataU8 = static_cast(pData); + + if (slice_index >= pHeader->m_total_slices) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n"); + return false; + } + + const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; + + uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + + if (basis_block_format_is_uncompressed(fmt)) + { + // Assume the output buffer is orig_width by orig_height + if (!output_row_pitch_in_blocks_or_pixels) + output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width; + + if (!output_rows_in_pixels) + output_rows_in_pixels = slice_desc.m_orig_height; + + // Now make sure the output buffer is large enough, or we'll overwrite memory. + if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); + return false; + } + } + else if (fmt == block_format::cFXT1_RGB) + { + const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8; + const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4; + const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y; + + if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n"); + return false; + } + } + else + { + if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n"); + return false; + } + } + + if (fmt != block_format::cETC1) + { + if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) + { + if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } + } + } + + if (slice_desc.m_file_ofs > data_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n"); + return false; + } + + const uint32_t data_size_left = data_size - slice_desc.m_file_ofs; + if (data_size_left < slice_desc.m_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); + return false; + } + + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + { + return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else + { + return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + (decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels); + } + } + + int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const + { + BASISU_NOTE_UNUSED(data_size); + + const basis_file_header* pHeader = reinterpret_cast(pData); + const uint8_t* pDataU8 = static_cast(pData); + + // For very large basis files this search could be painful + // TODO: Binary search this + for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++) + { + const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter]; + if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index)) + return slice_iter; + } + + BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n"); + + return -1; + } + + int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + const uint8_t* pDataU8 = static_cast(pData); + const basis_slice_desc* pSlice_descs = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs); + + // For very large basis files this search could be painful + // TODO: Binary search this + for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++) + { + const basis_slice_desc& slice_desc = pSlice_descs[slice_iter]; + if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index)) + { + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + if (slice_alpha == alpha_data) + return slice_iter; + } + else + { + return slice_iter; + } + } + } + + BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n"); + + return -1; + } + + void basisu_transcoder::write_opaque_alpha_blocks( + uint32_t num_blocks_x, uint32_t num_blocks_y, + void* pOutput_blocks, block_format fmt, + uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels) + { + // 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused + // when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pOutput_blocks); + BASISU_NOTE_UNUSED(block_stride_in_bytes); + + if (!output_row_pitch_in_blocks_or_pixels) + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + + if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) + { +#if BASISD_SUPPORT_ETC2_EAC_A8 + eac_block blk; + blk.m_base = 255; + blk.m_multiplier = 1; + blk.m_table = 13; + + // Selectors are all 4's + memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes; + for (uint32_t x = 0; x < num_blocks_x; x++) + { + memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk)); + dst_ofs += block_stride_in_bytes; + } + } +#endif + } + else if (fmt == block_format::cBC4) + { +#if BASISD_SUPPORT_DXT5A + dxt5a_block blk; + blk.m_endpoints[0] = 255; + blk.m_endpoints[1] = 255; + memset(blk.m_selectors, 0, sizeof(blk.m_selectors)); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes; + for (uint32_t x = 0; x < num_blocks_x; x++) + { + memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk)); + dst_ofs += block_stride_in_bytes; + } + } +#endif + } + } + + bool basisu_transcoder::transcode_image_level( + const void* pData, uint32_t data_size, + uint32_t image_index, uint32_t level_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + transcoder_texture_format fmt, + uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const + { + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt); + + if (!m_ready_to_transcode) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n"); + return false; + } + + if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n"); + // TODO: Not yet supported + return false; + } + + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + + const uint8_t* pDataU8 = static_cast(pData); + + const basis_slice_desc* pSlice_descs = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs); + + const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + + int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); + // Unable to find the requested image/level + return false; + } + + if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices)) + { + // Switch to PVRTC1 RGB if the input doesn't have alpha. + fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; + } + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n"); + + // The first slice shouldn't have alpha data in a properly formed basis file + return false; + } + + if (basis_file_has_alpha_slices) + { + // The alpha data should immediately follow the color data, and have the same resolution. + if ((slice_index + 1U) >= pHeader->m_total_slices) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n"); + // basis file is missing the alpha slice + return false; + } + + // Basic sanity checks + if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n"); + // This slice should have alpha data + return false; + } + + if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n"); + // Alpha slice should have been the same res as the color slice + return false; + } + } + } + + bool status = false; + + const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; + + if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) + { + // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. + // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. + // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. + memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); + } + + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + { + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + // Use the container independent image transcode method. + status = m_lowlevel_uastc_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else + { + // ETC1S + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr; + + assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0); + + if (pAlpha_slice_desc) + { + // Basic sanity checks + assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0); + assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x); + assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y); + assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index); + } + + // Use the container independent image transcode method. + status = m_lowlevel_etc1s_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + + } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); + } + else + { + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + } + + return status; + } + + uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFBC1_RGB: + case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFETC2_EAC_R11: + return 8; + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + case transcoder_texture_format::cTFETC2_RGBA: + case transcoder_texture_format::cTFBC3_RGBA: + case transcoder_texture_format::cTFBC5_RG: + case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFFXT1_RGB: + case transcoder_texture_format::cTFETC2_EAC_RG11: + return 16; + case transcoder_texture_format::cTFRGBA32: + return sizeof(uint32_t); + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return sizeof(uint16_t); + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return 0; + } + + const char* basis_get_format_name(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB"; + case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB"; + case transcoder_texture_format::cTFBC4_R: return "BC4_R"; + case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB"; + case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; + case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA"; + case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA"; + case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA"; + case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA"; + case transcoder_texture_format::cTFBC5_RG: return "BC5_RG"; + case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA"; + case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB"; + case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA"; + case transcoder_texture_format::cTFRGBA32: return "RGBA32"; + case transcoder_texture_format::cTFRGB565: return "RGB565"; + case transcoder_texture_format::cTFBGR565: return "BGR565"; + case transcoder_texture_format::cTFRGBA4444: return "RGBA4444"; + case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB"; + case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB"; + case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; + case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11"; + case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return ""; + } + + const char* basis_get_block_format_name(block_format fmt) + { + switch (fmt) + { + case block_format::cETC1: return "ETC1"; + case block_format::cBC1: return "BC1"; + case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB"; + case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; + case block_format::cBC7: return "BC7"; + case block_format::cETC2_RGBA: return "ETC2_RGBA"; + case block_format::cBC3: return "BC3"; + case block_format::cASTC_4x4: return "ASTC_4x4"; + case block_format::cATC_RGB: return "ATC_RGB"; + case block_format::cRGBA32: return "RGBA32"; + case block_format::cRGB565: return "RGB565"; + case block_format::cBGR565: return "BGR565"; + case block_format::cRGBA4444: return "RGBA4444"; + case block_format::cFXT1_RGB: return "FXT1_RGB"; + case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB"; + case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; + case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11"; + case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return ""; + } + + const char* basis_get_texture_type_name(basis_texture_type tex_type) + { + switch (tex_type) + { + case cBASISTexType2D: return "2D"; case cBASISTexType2DArray: return "2D array"; case cBASISTexTypeCubemapArray: return "cubemap array"; case cBASISTexTypeVideoFrames: return "video"; case cBASISTexTypeVolume: return "3D"; default: assert(0); - BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n"); - break; + BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n"); + break; + } + return ""; + } + + bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC2_RGBA: + case transcoder_texture_format::cTFBC3_RGBA: + case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGBA4444: + return true; + default: + break; + } + return false; + } + + basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1; + case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1; + case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4; + case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB; + case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA; + case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7; + case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7; + case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA; + case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3; + case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5; + case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4; + case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB; + case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA; + case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32; + case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565; + case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565; + case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444; + case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB; + case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA; + case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA; + case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC; + case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return basisu::texture_format::cInvalidTextureFormat; + } + + bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type) + { + switch (tex_type) + { + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return true; + default: + break; + } + return false; + } + + bool basis_block_format_is_uncompressed(block_format blk_fmt) + { + switch (blk_fmt) + { + case block_format::cRGB32: + case block_format::cRGBA32: + case block_format::cA32: + case block_format::cRGB565: + case block_format::cBGR565: + case block_format::cRGBA4444: + case block_format::cRGBA4444_COLOR: + case block_format::cRGBA4444_ALPHA: + case block_format::cRGBA4444_COLOR_OPAQUE: + return true; + default: + break; + } + return false; + } + + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFRGBA32: + return sizeof(uint32_t); + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return sizeof(uint16_t); + default: + break; + } + return 0; + } + + uint32_t basis_get_block_width(transcoder_texture_format tex_type) + { + switch (tex_type) + { + case transcoder_texture_format::cTFFXT1_RGB: + return 8; + default: + break; + } + return 4; + } + + uint32_t basis_get_block_height(transcoder_texture_format tex_type) + { + BASISU_NOTE_UNUSED(tex_type); + return 4; + } + + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) + { + if (fmt == basis_tex_format::cUASTC4x4) + { +#if BASISD_SUPPORT_UASTC + switch (tex_type) + { + // These niche formats aren't currently supported for UASTC - everything else is. + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFFXT1_RGB: + return false; + default: + return true; + } +#endif + } + else + { + switch (tex_type) + { + // ETC1 and uncompressed are always supported. + case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return true; +#if BASISD_SUPPORT_DXT1 + case transcoder_texture_format::cTFBC1_RGB: + return true; +#endif +#if BASISD_SUPPORT_DXT5A + case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFBC5_RG: + return true; +#endif +#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A + case transcoder_texture_format::cTFBC3_RGBA: + return true; +#endif +#if BASISD_SUPPORT_PVRTC1 + case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_BC7_MODE5 + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + return true; +#endif +#if BASISD_SUPPORT_ETC2_EAC_A8 + case transcoder_texture_format::cTFETC2_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ASTC + case transcoder_texture_format::cTFASTC_4x4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ATC + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + return true; +#endif +#if BASISD_SUPPORT_FXT1 + case transcoder_texture_format::cTFFXT1_RGB: + return true; +#endif +#if BASISD_SUPPORT_PVRTC2 + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ETC2_EAC_RG11 + case transcoder_texture_format::cTFETC2_EAC_R11: + case transcoder_texture_format::cTFETC2_EAC_RG11: + return true; +#endif + default: + break; + } + } + + return false; + } + + // ------------------------------------------------------------------------------------------------------ + // UASTC + // ------------------------------------------------------------------------------------------------------ + +#if BASISD_SUPPORT_UASTC + const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = + { + { 0, 28, false }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false }, + { 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true }, + { 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true }, + { 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true }, + { 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false }, + { 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false }, + { 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true }, + { 33, 694, true }, { 52, 524, true } + }; + + const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] = + { + { 10, 36, 4 }, { 11, 48, 4 }, { 0, 61, 3 }, { 2, 137, 4 }, + { 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 }, + { 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 }, + { 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 }, + { 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 } + }; + + const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] = + { + { 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 }, + { 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 }, + { 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 } + }; + + const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } }; + + const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } }; + + uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k) + { + assert(k < 6); + switch (k >> 1) + { + case 0: + if (p <= 1) + p = 0; + else + p = 1; + break; + case 1: + if (p == 0) + p = 0; + else + p = 1; + break; + case 2: + if ((p == 0) || (p == 2)) + p = 0; + else + p = 1; + break; + } + if (k & 1) + p = 1 - p; + return p; + } + + static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] = + { + { 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 }, + { 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 }, + { 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 }, + { 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 }, + { 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 }, + { 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 }, + { 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 }, + { 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 } + }; + + const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] = + { + { 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 }, { 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 }, + { 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 }, { 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 }, + { 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 } + }; + + const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] = + { + { 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 }, + { 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 }, { 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, + { 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 }, { 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 }, + { 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 }, { 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 }, + { 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 } + }; + + const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] = + { + { 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 }, + { 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 }, + { 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 }, + { 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 } + }; + + const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] = + { + { 0, 8, 10 }, { 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 } + }; + + const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] = + { + { 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 } + }; + + const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] = + { + { 0x1, 4 }, + { 0x35, 6 }, + { 0x1D, 5 }, + { 0x3, 5 }, + + { 0x13, 5 }, + { 0xB, 5 }, + { 0x1B, 5 }, + { 0x7, 5 }, + + { 0x17, 5 }, + { 0xF, 5 }, + { 0x2, 3 }, + { 0x0, 2 }, + + { 0x6, 3 }, + { 0x1F, 5 }, + { 0xD, 5 }, + { 0x5, 7 }, + + { 0x15, 6 }, + { 0x25, 6 }, + { 0x9, 4 }, + { 0x45, 7 } // future expansion + }; + + // If g_uastc_mode_huff_codes[] changes this table must be updated! + static const uint8_t g_uastc_huff_modes[128] = + { + 11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11, + 19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13 + }; + + const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2, 0, 2, 4, 2, 3, 1, 2, 4, 2, 2, 5 }; + const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2, 0, 2, 8, 2, 5, 0, 2, 8, 2, 2, 11 }; + const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12, 0, 8, 13, 13, 19, 20, 20, 20, 20, 20, 11 }; + const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1 }; + const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1 }; + const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 3 }; + const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 12, 12, 4, 4, 4, 8 }; + const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }; + const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0 }; + const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 }; + + // bits, trits, quints + const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] = + { + { 1, 0, 0 }, // 0-1 0 + { 0, 1, 0 }, // 0-2 1 + { 2, 0, 0 }, // 0-3 2 + { 0, 0, 1 }, // 0-4 3 + + { 1, 1, 0 }, // 0-5 4 + { 3, 0, 0 }, // 0-7 5 + { 1, 0, 1 }, // 0-9 6 + { 2, 1, 0 }, // 0-11 7 + + { 4, 0, 0 }, // 0-15 8 + { 2, 0, 1 }, // 0-19 9 + { 3, 1, 0 }, // 0-23 10 + { 5, 0, 0 }, // 0-31 11 + + { 3, 0, 1 }, // 0-39 12 + { 4, 1, 0 }, // 0-47 13 + { 6, 0, 0 }, // 0-63 14 + { 4, 0, 1 }, // 0-79 15 + + { 5, 1, 0 }, // 0-95 16 + { 7, 0, 0 }, // 0-127 17 + { 5, 0, 1 }, // 0-159 18 + { 6, 1, 0 }, // 0-191 19 + + { 8, 0, 0 }, // 0-255 20 + }; + + int astc_get_levels(int range) + { + assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES); + return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0]; + } + + // g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[] + astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index] + + // Taken right from the ASTC spec. + static struct + { + const char* m_pB_str; + uint32_t m_c; + } g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] = + { + { "", 0 }, + { "", 0 }, + { "", 0 }, + { "", 0 }, + { "000000000", 204, }, // 0-5 + { "", 0 }, + { "000000000", 113, }, // 0-9 + { "b000b0bb0", 93 }, // 0-11 + { "", 0 }, + { "b0000bb00", 54 }, // 0-19 + { "cb000cbcb", 44 }, // 0-23 + { "", 0 }, + { "cb0000cbc", 26 }, // 0-39 + { "dcb000dcb", 22 }, // 0-47 + { "", 0 }, + { "dcb0000dc", 13 }, // 0-79 + { "edcb000ed", 11 }, // 0-95 + { "", 0 }, + { "edcb0000e", 6 }, // 0-159 + { "fedcb000f", 5 }, // 0-191 + { "", 0 }, + }; + + bool astc_is_valid_endpoint_range(uint32_t range) + { + if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0)) + return true; + + return g_astc_endpoint_unquant_params[range].m_c != 0; + } + + uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range) + { + assert(range < BC7ENC_TOTAL_ASTC_RANGES); + + const uint32_t bits = g_astc_bise_range_table[range][0]; + const uint32_t trits = g_astc_bise_range_table[range][1]; + const uint32_t quints = g_astc_bise_range_table[range][2]; + + uint32_t val = 0; + if ((!trits) && (!quints)) + { + assert(!packed_trits && !packed_quints); + + int bits_left = 8; + while (bits_left > 0) + { + uint32_t v = packed_bits; + + int n = basisu::minimumi(bits_left, bits); + if (n < (int)bits) + v >>= (bits - n); + + assert(v < (1U << n)); + + val |= (v << (bits_left - n)); + bits_left -= n; + } + } + else + { + const uint32_t A = (packed_bits & 1) ? 511 : 0; + const uint32_t C = g_astc_endpoint_unquant_params[range].m_c; + const uint32_t D = trits ? packed_trits : packed_quints; + + assert(C); + + uint32_t B = 0; + for (uint32_t i = 0; i < 9; i++) + { + B <<= 1; + + char c = g_astc_endpoint_unquant_params[range].m_pB_str[i]; + if (c != '0') + { + c -= 'a'; + B |= ((packed_bits >> c) & 1); + } + } + + val = D * C + B; + val = val ^ A; + val = (A & 0x80) | (val >> 2); + } + + return val; + } + + uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range) + { + assert(range < BC7ENC_TOTAL_ASTC_RANGES); + assert(packed_val < (uint32_t)astc_get_levels(range)); + + const uint32_t bits = g_astc_bise_range_table[range][0]; + const uint32_t trits = g_astc_bise_range_table[range][1]; + const uint32_t quints = g_astc_bise_range_table[range][2]; + + if ((!trits) && (!quints)) + return unquant_astc_endpoint(packed_val, 0, 0, range); + else if (trits) + return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range); + else + return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range); + } + + // BC7 - Various BC7 tables/helpers + const uint32_t g_bc7_weights1[2] = { 0, 64 }; + const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; + const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }; + const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 }; + const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 }; + + const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + + const uint8_t g_bc7_partition2[64 * 16] = + { + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 + }; + + const uint8_t g_bc7_partition3[64 * 16] = + { + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, + }; + + const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 }; + + const uint8_t g_bc7_table_anchor_index_third_subset_1[64] = + { + 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3 + }; + + const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = + { + 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 + }; + + const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 }; + const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 }; + const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 }; + + const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 }; + const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 }; + const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 }; + const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 }; + + const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 }; + + endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit] + endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c] + + static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs) + { + assert((num_bits <= 32) && (val < (1ULL << num_bits))); + while (num_bits) + { + const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits); + pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7)); + val >>= n; + num_bits -= n; + *pCur_ofs += n; + } + assert(*pCur_ofs <= 128); + } + + // TODO: Optimize this. + void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults) + { + const uint32_t best_mode = pResults->m_mode; + + const uint32_t total_subsets = g_bc7_num_subsets[best_mode]; + const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode]; + //const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode]; + //const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1; + + const uint8_t* pPartition; + if (total_subsets == 1) + pPartition = &g_bc7_partition1[0]; + else if (total_subsets == 2) + pPartition = &g_bc7_partition2[pResults->m_partition * 16]; + else + pPartition = &g_bc7_partition3[pResults->m_partition * 16]; + + uint8_t color_selectors[16]; + memcpy(color_selectors, pResults->m_selectors, 16); + + uint8_t alpha_selectors[16]; + memcpy(alpha_selectors, pResults->m_alpha_selectors, 16); + + color_quad_u8 low[3], high[3]; + memcpy(low, pResults->m_low, sizeof(low)); + memcpy(high, pResults->m_high, sizeof(high)); + + uint32_t pbits[3][2]; + memcpy(pbits, pResults->m_pbits, sizeof(pbits)); + + int anchor[3] = { -1, -1, -1 }; + + for (uint32_t k = 0; k < total_subsets; k++) + { + uint32_t anchor_index = 0; + if (k) + { + if ((total_subsets == 3) && (k == 1)) + anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition]; + else if ((total_subsets == 3) && (k == 2)) + anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition]; + else + anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition]; + } + + anchor[k] = anchor_index; + + const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector); + const uint32_t num_color_indices = 1 << color_index_bits; + + if (color_selectors[anchor_index] & (num_color_indices >> 1)) + { + for (uint32_t i = 0; i < 16; i++) + if (pPartition[i] == k) + color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]); + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + for (uint32_t q = 0; q < 3; q++) + { + uint8_t t = low[k].m_c[q]; + low[k].m_c[q] = high[k].m_c[q]; + high[k].m_c[q] = t; + } + } + else + { + color_quad_u8 tmp = low[k]; + low[k] = high[k]; + high[k] = tmp; + } + + if (!g_bc7_mode_has_shared_p_bits[best_mode]) + { + uint32_t t = pbits[k][0]; + pbits[k][0] = pbits[k][1]; + pbits[k][1] = t; + } + } + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector); + const uint32_t num_alpha_indices = 1 << alpha_index_bits; + + if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1)) + { + for (uint32_t i = 0; i < 16; i++) + if (pPartition[i] == k) + alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]); + + uint8_t t = low[k].m_c[3]; + low[k].m_c[3] = high[k].m_c[3]; + high[k].m_c[3] = t; + } + } + } + + uint8_t* pBlock_bytes = (uint8_t*)(pBlock); + memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE); + + uint32_t cur_bit_ofs = 0; + bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs); + + if ((best_mode == 4) || (best_mode == 5)) + bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs); + + if (best_mode == 4) + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs); + + if (total_partitions > 1) + bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs); + + const uint32_t total_comps = (best_mode >= 4) ? 4 : 3; + for (uint32_t comp = 0; comp < total_comps; comp++) + { + for (uint32_t subset = 0; subset < total_subsets; subset++) + { + bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + } + } + + if (g_bc7_mode_has_p_bits[best_mode]) + { + for (uint32_t subset = 0; subset < total_subsets; subset++) + { + bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs); + if (!g_bc7_mode_has_shared_p_bits[best_mode]) + bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs); + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int idx = x + y * 4; + + uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector); + + if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2])) + n--; + + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs); + } + } + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int idx = x + y * 4; + + uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector); + + if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2])) + n--; + + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs); + } + } + } + + assert(cur_bit_ofs == 128); + } + + // ASTC + static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize) + { + uint8_t* pBuf = reinterpret_cast(pDst); + + assert(codesize <= 9); + if (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t val = code << byte_bit_offset; + + uint32_t index = bit_offset >> 3; + pBuf[index] |= (uint8_t)val; + + if (codesize > (8 - byte_bit_offset)) + pBuf[index + 1] |= (uint8_t)(val >> 8); + + bit_offset += codesize; + } + } + + void pack_astc_solid_block(void* pDst_block, const color32& color) + { + uint32_t r = color[0], g = color[1], b = color[2]; + uint32_t a = color[3]; + + uint32_t* pOutput = static_cast(pDst_block); + uint8_t* pBytes = reinterpret_cast(pDst_block); + + pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff; + + pOutput[1] = 0xffffffff; + pOutput[2] = 0; + pOutput[3] = 0; + + int bit_pos = 64; + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, r | (r << 8), 16); + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, g | (g << 8), 16); + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, b | (b << 8), 16); + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, a | (a << 8), 16); + } + + // See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation +#ifdef _DEBUG + static inline uint32_t astc_hash52(uint32_t v) + { + uint32_t p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; + } + + int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block) + { + if (small_block) + { + x <<= 1; y <<= 1; z <<= 1; + } + seed += (partitioncount - 1) * 1024; + uint32_t rnum = astc_hash52(seed); + uint8_t seed1 = rnum & 0xF; + uint8_t seed2 = (rnum >> 4) & 0xF; + uint8_t seed3 = (rnum >> 8) & 0xF; + uint8_t seed4 = (rnum >> 12) & 0xF; + uint8_t seed5 = (rnum >> 16) & 0xF; + uint8_t seed6 = (rnum >> 20) & 0xF; + uint8_t seed7 = (rnum >> 24) & 0xF; + uint8_t seed8 = (rnum >> 28) & 0xF; + uint8_t seed9 = (rnum >> 18) & 0xF; + uint8_t seed10 = (rnum >> 22) & 0xF; + uint8_t seed11 = (rnum >> 26) & 0xF; + uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; + + seed1 *= seed1; seed2 *= seed2; + seed3 *= seed3; seed4 *= seed4; + seed5 *= seed5; seed6 *= seed6; + seed7 *= seed7; seed8 *= seed8; + seed9 *= seed9; seed10 *= seed10; + seed11 *= seed11; seed12 *= seed12; + + int sh1, sh2, sh3; + if (seed & 1) + { + sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5); + } + else + { + sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5); + } + sh3 = (seed & 0x10) ? sh1 : sh2; + + seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; + seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; + seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; + + int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); + int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); + int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); + int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + + a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; + + if (partitioncount < 4) d = 0; + if (partitioncount < 3) c = 0; + + if (a >= b && a >= c && a >= d) + return 0; + else if (b >= c && b >= d) + return 1; + else if (c >= d) + return 2; + else + return 3; + } +#endif + + static const uint8_t g_astc_quint_encode[125] = + { + 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, + 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104, + 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, + 126, 127, 94, 95, 62, 39, 47, 55, 63, 31 + }; + + // Encodes 3 values to output, usable for any range that uses quints and bits + static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n) + { + // First extract the trits and the bits from the 5 input values + int quints = 0, bits[3]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 3; i++) + { + static const int s_muls[3] = { 1, 5, 25 }; + + const int t = pValues[i] >> n; + + quints += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(quints < 125); + const int T = g_astc_quint_encode[quints]; + + // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) | + (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3); + } + + // Packs values using ASTC's BISE to output buffer. + static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range) + { + uint32_t temp[5] = { 0, 0, 0, 0, 0 }; + + const int num_bits = g_astc_bise_range_table[range][0]; + + int group_size = 0; + if (g_astc_bise_range_table[range][1]) + group_size = 5; + else if (g_astc_bise_range_table[range][2]) + group_size = 3; + + if (group_size) + { + // Range has trits or quints - pack each group of 5 or 3 values + const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); + + for (int group_index = 0; group_index < total_groups; group_index++) + { + uint8_t vals[5] = { 0, 0, 0, 0, 0 }; + + const int limit = basisu::minimum(group_size, num_vals - group_index * group_size); + for (int i = 0; i < limit; i++) + vals[i] = pSrc_vals[group_index * group_size + i]; + + if (group_size == 5) + astc_encode_trits(temp, vals, bit_pos, num_bits); + else + astc_encode_quints(temp, vals, bit_pos, num_bits); + } + } + else + { + for (int i = 0; i < num_vals; i++) + astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits); + } + + pDst[0] |= temp[0]; pDst[1] |= temp[1]; + pDst[2] |= temp[2]; pDst[3] |= temp[3]; + } + + const uint32_t ASTC_BLOCK_MODE_BITS = 11; + const uint32_t ASTC_PART_BITS = 2; + const uint32_t ASTC_CEM_BITS = 4; + const uint32_t ASTC_PARTITION_INDEX_BITS = 10; + const uint32_t ASTC_CCS_BITS = 2; + + const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 }; + + bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode) + { + assert(uastc_mode < TOTAL_UASTC_MODES); + uint8_t* pDst_bytes = reinterpret_cast(pDst); + + const int total_weights = pBlock->m_dual_plane ? 32 : 16; + + // Set mode bits - see Table 146-147 + uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode]; + pDst_bytes[0] = (uint8_t)mode; + pDst_bytes[1] = (uint8_t)(mode >> 8); + + memset(pDst_bytes + 2, 0, 16 - 2); + + int bit_pos = ASTC_BLOCK_MODE_BITS; + + // We only support 1-5 bit weight indices + assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]); + const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0]; + + // See table 143 - PART + astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS); + + if (pBlock->m_subsets == 1) + astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS); + else + { + // See table 145 + astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS); + + // Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM + astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2); + } + + if (pBlock->m_dual_plane) + { + const int total_weight_bits = total_weights * bits_per_weight; + + // See Illegal Encodings 23.24 + // https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings + assert((total_weight_bits >= 24) && (total_weight_bits <= 96)); + + int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS; + astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS); + } + + const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets; + assert(num_cem_pairs <= 9); + + astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]); + + // Write the weight bits in reverse bit order. + switch (bits_per_weight) + { + case 1: + { + const uint32_t N = 1; + for (int i = 0; i < total_weights; i++) + { + const uint32_t ofs = 128 - N - i; + assert((ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7)); + } + break; + } + case 2: + { + const uint32_t N = 2; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 }; + const uint32_t ofs = 128 - N - (i * N); + assert((ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7)); + } + break; + } + case 3: + { + const uint32_t N = 3; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + + const uint32_t ofs = 128 - N - (i * N); + const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7); + + uint32_t index = ofs >> 3; + assert(index < 16); + pDst_bytes[index++] |= rev & 0xFF; + if (index < 16) + pDst_bytes[index++] |= (rev >> 8); + } + break; + } + case 4: + { + const uint32_t N = 4; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const int ofs = 128 - N - (i * N); + assert(ofs >= 0 && (ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7)); + } + break; + } + case 5: + { + const uint32_t N = 5; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 }; + + const uint32_t ofs = 128 - N - (i * N); + const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7); + + uint32_t index = ofs >> 3; + assert(index < 16); + pDst_bytes[index++] |= rev & 0xFF; + if (index < 16) + pDst_bytes[index++] |= (rev >> 8); + } + + break; + } + default: + assert(0); + break; + } + + return true; + } + + const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern) + { + const uint8_t* pSubset_anchor_indices = g_zero_pattern; + pPartition_pattern = g_zero_pattern; + + if (subsets >= 2) + { + if (subsets == 3) + { + pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0]; + pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0]; + } + else if (mode == 7) + { + pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0]; + pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0]; + } + else + { + pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0]; + pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0]; + } + } + + return pSubset_anchor_indices; + } + + static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset) + { + uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7); + bit_offset += 1; + return byte_bits & 1; + } + + static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 9); + if (!codesize) + return 0; + + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112)) + { + const uint8_t* pBytes = &pBuf[bit_offset >> 3U]; + + uint32_t byte_bit_offset = bit_offset & 7U; + + uint32_t bits = pBytes[0] >> byte_bit_offset; + uint32_t bits_read = basisu::minimum(codesize, 8 - byte_bit_offset); + + uint32_t bits_remaining = codesize - bits_read; + if (bits_remaining) + bits |= ((uint32_t)pBytes[1]) << bits_read; + + bit_offset += codesize; + + return bits & ((1U << codesize) - 1U); + } + + uint32_t byte_bit_offset = bit_offset & 7U; + const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]); + bit_offset += codesize; + return (w >> byte_bit_offset) & ((1U << codesize) - 1U); + } + + inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 64U); + uint64_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7U; + uint32_t bits_to_read = basisu::minimum(codesize - total_bits, 8U - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset; + byte_bits &= ((1U << bits_to_read) - 1U); + + bits |= ((uint64_t)(byte_bits) << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 9); + if (!codesize) + return 0; + assert(bit_offset < 112); + + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) + { + const uint8_t* pBytes = &pBuf[bit_offset >> 3U]; + + uint32_t byte_bit_offset = bit_offset & 7U; + + uint32_t bits = pBytes[0] >> byte_bit_offset; + uint32_t bits_read = basisu::minimum(codesize, 8 - byte_bit_offset); + + uint32_t bits_remaining = codesize - bits_read; + if (bits_remaining) + bits |= ((uint32_t)pBytes[1]) << bits_read; + + bit_offset += codesize; + + return bits & ((1U << codesize) - 1U); + } + + uint32_t byte_bit_offset = bit_offset & 7U; + const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]); + bit_offset += codesize; + return (w >> byte_bit_offset)& ((1U << codesize) - 1U); + } + + bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) + { + //memset(&unpacked, 0, sizeof(unpacked)); + +#if 0 + uint8_t table[128]; + memset(table, 0xFF, sizeof(table)); + + { + for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++) + { + const uint32_t code = g_uastc_mode_huff_codes[mode][0]; + const uint32_t codesize = g_uastc_mode_huff_codes[mode][1]; + + table[code] = mode; + + uint32_t bits_left = 7 - codesize; + for (uint32_t i = 0; i < (1 << bits_left); i++) + table[code | (i << codesize)] = mode; + } + + for (uint32_t i = 0; i < 128; i++) + printf("%u,", table[i]); + exit(0); + } +#endif + + const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127]; + if (mode >= (int)TOTAL_UASTC_MODES) + return false; + + unpacked.m_mode = mode; + + uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1]; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + + if (read_hints) + { + unpacked.m_etc1_flip = false; + unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + unpacked.m_etc1_inten1 = 0; + unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2); + unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_bias = 0; + unpacked.m_etc2_hints = 0; + } + + return true; + } + + if (read_hints) + { + if (g_uastc_mode_has_bc1_hint0[mode]) + unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0; + else + unpacked.m_bc1_hint0 = false; + + if (g_uastc_mode_has_bc1_hint1[mode]) + unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0; + else + unpacked.m_bc1_hint1 = false; + + unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + + if (g_uastc_mode_has_etc1_bias[mode]) + unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + else + unpacked.m_etc1_bias = 0; + + if (g_uastc_mode_has_alpha[mode]) + { + unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + //assert(unpacked.m_etc2_hints > 0); + } + else + unpacked.m_etc2_hints = 0; + } + else + bit_ofs += g_uastc_mode_total_hint_bits[mode]; + + uint32_t subsets = 1; + switch (mode) + { + case 2: + case 4: + case 7: + case 9: + case 16: + unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + subsets = 2; + break; + case 3: + unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4); + subsets = 3; + break; + default: + break; + } + + uint32_t part_seed = 0; + switch (mode) + { + case 2: + case 4: + case 9: + case 16: + if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2) + return false; + + part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc; + break; + case 3: + if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3) + return false; + + part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc; + break; + case 7: + if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS) + return false; + + part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2; + break; + default: + break; + } + + uint32_t total_planes = 1; + switch (mode) + { + case 6: + case 11: + case 13: + unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2); + total_planes = 2; + break; + case 17: + unpacked.m_astc.m_ccs = 3; + total_planes = 2; + break; + default: + break; + } + + unpacked.m_astc.m_dual_plane = (total_planes == 2); + + unpacked.m_astc.m_subsets = subsets; + unpacked.m_astc.m_partition_seed = part_seed; + + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + const uint32_t weight_bits = g_uastc_mode_weight_bits[mode]; + + unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode]; + + const uint32_t total_values = total_comps * 2 * subsets; + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + + const uint32_t cem = g_uastc_mode_cem[mode]; + unpacked.m_astc.m_cem = cem; + + const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0]; + const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1]; + const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2]; + + uint32_t total_tqs = 0; + uint32_t bundle_size = 0, mul = 0; + if (ep_trits) + { + total_tqs = (total_values + 4) / 5; + bundle_size = 5; + mul = 3; + } + else if (ep_quints) + { + total_tqs = (total_values + 2) / 3; + bundle_size = 3; + mul = 5; + } + + uint32_t tq_values[8]; + for (uint32_t i = 0; i < total_tqs; i++) + { + uint32_t num_bits = ep_trits ? 8 : 7; + if (i == (total_tqs - 1)) + { + uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size; + if (ep_trits) + { + switch (num_remaining) + { + case 1: num_bits = 2; break; + case 2: num_bits = 4; break; + case 3: num_bits = 5; break; + case 4: num_bits = 7; break; + default: break; + } + } + else if (ep_quints) + { + switch (num_remaining) + { + case 1: num_bits = 3; break; + case 2: num_bits = 5; break; + default: break; + } + } + } + + tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits); + } // i + + uint32_t accum = 0; + uint32_t accum_remaining = 0; + uint32_t next_tq_index = 0; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits); + + if (total_tqs) + { + if (!accum_remaining) + { + assert(next_tq_index < total_tqs); + accum = tq_values[next_tq_index++]; + accum_remaining = bundle_size; + } + + // TODO: Optimize with tables + uint32_t v = accum % mul; + accum /= mul; + accum_remaining--; + + value |= (v << ep_bits); + } + + unpacked.m_astc.m_endpoints[i] = (uint8_t)value; + } + + const uint8_t* pPartition_pattern; + const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern); + +#ifdef _DEBUG + for (uint32_t i = 0; i < 16; i++) + assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true)); + + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) + { + uint32_t anchor_index = 0; + + for (uint32_t i = 0; i < 16; i++) + { + if (pPartition_pattern[i] == subset_index) + { + anchor_index = i; + break; + } + } + + assert(pSubset_anchor_indices[subset_index] == anchor_index); + } +#endif + +#if 0 + const uint32_t total_planes_shift = total_planes - 1; + for (uint32_t i = 0; i < 16 * total_planes; i++) + { + uint32_t num_bits = weight_bits; + for (uint32_t s = 0; s < subsets; s++) + { + if (pSubset_anchor_indices[s] == (i >> total_planes_shift)) + { + num_bits--; + break; + } + } + + unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits); + } +#endif + + if (mode == 18) + { + // Mode 18 is the only mode with more than 64 weight bits. + for (uint32_t i = 0; i < 16; i++) + unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1)); + } + else + { + // All other modes have <= 64 weight bits. + uint64_t bits; + + // Read the weight bits + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) + bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum(64, 128 - (int)bit_ofs)); + else + { +#ifdef __EMSCRIPTEN__ + bits = blk.m_dwords[2]; + bits |= (((uint64_t)blk.m_dwords[3]) << 32U); +#else + bits = blk.m_qwords[1]; +#endif + + if (bit_ofs >= 64U) + bits >>= (bit_ofs - 64U); + else + { + assert(bit_ofs >= 56U); + + uint32_t bits_needed = 64U - bit_ofs; + bits <<= bits_needed; + bits |= (blk.m_bytes[7] >> (8U - bits_needed)); + } + } + + bit_ofs = 0; + + const uint32_t mask = (1U << weight_bits) - 1U; + const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; + + if (total_planes == 2) + { + // Dual plane modes always have a single subset, and the first 2 weights are anchors. + + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + for (uint32_t i = 2; i < 32; i++) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + else + { + if (subsets == 1) + { + // Specialize the single subset case. + if (weight_bits == 4) + { + assert(bit_ofs == 0); + + // Specialize the most common case: 4-bit weights. + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); + unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15); + unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15); + + unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15); + unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15); + unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15); + unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15); + + unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15); + unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15); + unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15); + unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15); + + unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15); + unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15); + unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15); + unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15); + } + else + { + // First weight is always an anchor. + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + for (uint32_t i = 1; i < 16; i++) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + } + else + { + const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2]; + + for (uint32_t i = 0; i < 16; i++) + { + if ((i == a0) || (i == a1) || (i == a2)) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + } + else + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + } + } + } + + if ((blue_contract_check) && (total_comps >= 3)) + { + // We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care. + bool invert_subset[3] = { false, false, false }; + bool any_flag = false; + + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) + { + const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant; + + const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant; + + if (s1 < s0) + { + for (uint32_t c = 0; c < total_comps; c++) + std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]); + + invert_subset[subset_index] = true; + any_flag = true; + } + } + + if (any_flag) + { + const uint32_t weight_mask = (1 << weight_bits) - 1; + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t subset = pPartition_pattern[i]; + + if (invert_subset[subset]) + { + unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]); + + if (total_planes == 2) + unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]); + } + } + } + } + + return true; + } + + static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 }; + + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb) + { + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = solid_color; + return true; + } + + color32 endpoints[3][2]; + + const uint32_t total_subsets = g_uastc_mode_subsets[mode]; + const uint32_t total_comps = basisu::minimum(4U, g_uastc_mode_comps[mode]); + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + const uint32_t total_planes = g_uastc_mode_planes[mode]; + const uint32_t weight_bits = g_uastc_mode_weight_bits[mode]; + const uint32_t weight_levels = 1 << weight_bits; + + for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++) + { + if (total_comps == 2) + { + const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant; + const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant; + + const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant; + const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant; + + endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al); + endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah); + } + else + { + for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++) + { + endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant; + endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant; + } + for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++) + { + endpoints[subset_index][0][comp_index] = 255; + endpoints[subset_index][1][comp_index] = 255; + } + } + } + + color32 block_colors[3][32]; + + const uint32_t* pWeights = g_astc_weight_tables[weight_bits]; + + for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++) + { + for (uint32_t l = 0; l < weight_levels; l++) + { + if (total_comps == 2) + { + const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb); + const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb); + + block_colors[subset_index][l].set(lc, lc, lc, ac); + } + else + { + uint32_t comp_index; + for (comp_index = 0; comp_index < total_comps; comp_index++) + block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb); + + for (; comp_index < 4; comp_index++) + block_colors[subset_index][l][comp_index] = 255; + } + } + } + + const uint8_t* pPartition_pattern = g_zero_pattern; + + if (total_subsets >= 2) + { + if (total_subsets == 3) + pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0]; + else if (mode == 7) + pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0]; + else + pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0]; + +#ifdef _DEBUG + for (uint32_t i = 0; i < 16; i++) + { + assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true)); + } +#endif + } + + if (total_planes == 1) + { + if (total_subsets == 1) + { + for (uint32_t i = 0; i < 16; i++) + { + assert(astc.m_weights[i] < weight_levels); + pPixels[i] = block_colors[0][astc.m_weights[i]]; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + assert(astc.m_weights[i] < weight_levels); + pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]]; + } + } + } + else + { + assert(total_subsets == 1); + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = 0; // pPartition_pattern[i]; + + const uint32_t weight_index0 = astc.m_weights[i * 2]; + const uint32_t weight_index1 = astc.m_weights[i * 2 + 1]; + + assert(weight_index0 < weight_levels && weight_index1 < weight_levels); + + color32& c = pPixels[i]; + for (uint32_t comp = 0; comp < 4; comp++) + { + if ((int)comp == astc.m_ccs) + c[comp] = block_colors[subset_index][weight_index1][comp]; + else + c[comp] = block_colors[subset_index][weight_index0][comp]; + } + } + } + + return true; + } + + bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb) + { + return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb); + } + + bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb) + { + unpacked_uastc_block unpacked_blk; + + if (!unpack_uastc(blk, unpacked_blk, false, false)) + return false; + + return unpack_uastc(unpacked_blk, pPixels, srgb); + } + + // Determines the best shared pbits to use to encode xl/xh + static void determine_shared_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2]) + { + const uint32_t total_bits = comp_bits + 1; + assert(total_bits >= 4 && total_bits <= 8); + + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow, scaledHigh; + + for (uint32_t i = 0; i < 4; i++) + { + scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits)); + scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits); + assert(scaledLow.m_c[i] <= 255); + + scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits)); + scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits); + assert(scaledHigh.m_c[i] <= 255); + } + + float err = 0; + for (uint32_t i = 0; i < total_comps; i++) + err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]); + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + } + } + + // Determines the best unique pbits to use to encode xl/xh + static void determine_unique_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2]) + { + const uint32_t total_bits = comp_bits + 1; + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err0 = 1e+9f; + float best_err1 = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow, scaledHigh; + for (uint32_t i = 0; i < 4; i++) + { + scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits)); + scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits); + assert(scaledLow.m_c[i] <= 255); + + scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits)); + scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits); + assert(scaledHigh.m_c[i] <= 255); + } + + float err0 = 0, err1 = 0; + for (uint32_t i = 0; i < total_comps; i++) + { + err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f); + err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f); + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; + + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; + + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } + } + } + + bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, true, false)) + return false; + + bool success = false; + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color); + success = true; + } + else + { + success = pack_astc_block(static_cast(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode); + } + + return success; + } + + bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk) + { + memset(&dst_blk, 0, sizeof(dst_blk)); + + const uint32_t mode = unpacked_src_blk.m_mode; + + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + switch (mode) + { + case 0: + case 5: + case 10: + case 12: + case 14: + case 15: + case 18: + { + // MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB + // MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB + // MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6 + // MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6 + // MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 + // MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6 + // MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6 + dst_blk.m_mode = 6; + + float xl[4], xh[4]; + if (total_comps == 2) + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f; + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f; + + xl[1] = xl[0]; + xh[1] = xh[0]; + + xl[2] = xl[0]; + xh[2] = xh[0]; + + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f; + } + else + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f; + xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f; + xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f; + + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f; + xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f; + xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f; + + if (total_comps == 4) + { + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f; + } + else + { + xl[3] = 1.0f; + xh[3] = 1.0f; + } + } + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + dst_blk.m_low[0] = bestMinColor; + dst_blk.m_high[0] = bestMaxColor; + + if (total_comps == 3) + { + dst_blk.m_low[0].m_c[3] = 127; + dst_blk.m_high[0].m_c[3] = 127; + } + + dst_blk.m_pbits[0][0] = best_pbits[0]; + dst_blk.m_pbits[0][1] = best_pbits[1]; + + if (mode == 18) + { + const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else if (mode == 14) + { + const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else if ((mode == 5) || (mode == 12)) + { + const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else + { + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + } + + break; + } + case 1: + { + // DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3 + // Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables. + dst_blk.m_mode = 3; + + float xl[4], xh[4]; + xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f; + xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f; + xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f; + xl[3] = 1.0f; + + xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f; + xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f; + xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f; + xh[3] = 1.0f; + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i]; + dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i]; + } + dst_blk.m_pbits[0][0] = best_pbits[0]; + dst_blk.m_pbits[0][1] = best_pbits[1]; + dst_blk.m_pbits[1][0] = best_pbits[0]; + dst_blk.m_pbits[1][1] = best_pbits[1]; + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 2: + { + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + dst_blk.m_mode = 1; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + float xl[4], xh[4]; + xl[3] = 1.0f; + xh[3] = 1.0f; + + for (uint32_t subset = 0; subset < 2; subset++) + { + for (uint32_t i = 0; i < 3; i++) + { + uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]; + v = (v << 4) | v; + xl[i] = v / 255.0f; + + v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]; + v = (v << 4) | v; + xh[i] = v / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset; + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i]; + } + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + } // subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 3: + { + // DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2 + dst_blk.m_mode = 2; + dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7; + + const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm; + + for (uint32_t subset = 0; subset < 3; subset++) + { + for (uint32_t comp = 0; comp < 3; comp++) + { + uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant; + uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant; + + // TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion. + lo = (lo * 31 + 127) / 255; + hi = (hi * 31 + 127) / 255; + + const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset]; + + dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo; + dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi; + } + } + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 4: + { + // 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3 + dst_blk.m_mode = 3; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + float xl[4], xh[4]; + xl[3] = 1.0f; + xh[3] = 1.0f; + + for (uint32_t subset = 0; subset < 2; subset++) + { + for (uint32_t i = 0; i < 3; i++) + { + xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f; + xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset; + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i]; + } + dst_blk.m_low[bc7_subset_index].m_c[3] = 127; + dst_blk.m_high[bc7_subset_index].m_c[3] = 127; + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1]; + + } // subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 6: + case 11: + case 13: + case 17: + { + // MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB + // MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5 + // MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5 + // MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5 + dst_blk.m_mode = 5; + dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3; + + if (total_comps == 2) + { + assert(unpacked_src_blk.m_astc.m_ccs == 3); + + dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255); + dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255); + + dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0]; + dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0]; + + dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0]; + dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0]; + + dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant); + dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant); + } + else + { + for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++) + { + uint32_t bc7_comp = astc_comp; + // ASTC and BC7 handle dual plane component rotations differently: + // ASTC: 2nd plane separately interpolates the CCS channel. + // BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel. + if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs) + bc7_comp = 3; + else if (astc_comp == 3) + bc7_comp = unpacked_src_blk.m_astc.m_ccs; + + uint32_t l = 255, h = 255; + if (astc_comp < total_comps) + { + l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant; + h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant; + } + + if (bc7_comp < 3) + { + l = (l * 127 + 127) / 255; + h = (h * 127 + 127) / 255; + } + + dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l; + dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h; + } + } + + if (mode == 13) + { + for (uint32_t i = 0; i < 16; i++) + { + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0; + dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2]; + dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1]; + } + } + + break; + } + case 7: + { + // DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2 + dst_blk.m_mode = 2; + dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73; + + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k; + + for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k); + + for (uint32_t c = 0; c < 3; c++) + { + dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255; + dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255; + } + } + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case UASTC_MODE_INDEX_SOLID_COLOR: + { + // Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6) + const color32& solid_color = unpacked_src_blk.m_solid_color; + + uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error + + g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error; + + uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error + + g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error; + + if (best_err0 > 0 && best_err1 > 0) + { + dst_blk.m_mode = 5; + + for (uint32_t c = 0; c < 3; c++) + { + dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo; + dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi; + } + + memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16); + + dst_blk.m_low[0].m_c[3] = solid_color.c[3]; + dst_blk.m_high[0].m_c[3] = solid_color.c[3]; + + //memset(dst_blk.m_alpha_selectors, 0, 16); + } + else + { + dst_blk.m_mode = 6; + + uint32_t best_p = 0; + if (best_err1 < best_err0) + best_p = 1; + + for (uint32_t c = 0; c < 4; c++) + { + dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo; + dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi; + } + + dst_blk.m_pbits[0][0] = best_p; + dst_blk.m_pbits[0][1] = best_p; + memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16); + } + + break; + } + case 9: + case 16: + { + // 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7 + // 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7 + + dst_blk.m_mode = 7; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++) + { + float xl[4], xh[4]; + + if (total_comps == 2) + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f; + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f; + + xl[1] = xl[0]; + xh[1] = xh[0]; + + xl[2] = xl[0]; + xh[2] = xh[0]; + + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f; + } + else + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f; + xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f; + xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f; + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f; + + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f; + xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f; + xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset; + + dst_blk.m_low[bc7_subset_index] = bestMinColor; + dst_blk.m_high[bc7_subset_index] = bestMaxColor; + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1]; + } // astc_subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + default: + return false; + } + + return true; + } + + bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false, false)) + return false; + + return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk); + } + + bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst) + { + bc7_optimization_results temp; + if (!transcode_uastc_to_bc7(src_blk, temp)) + return false; + + encode_bc7_block(pDst, &temp); + return true; + } + + color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock) + { + color32 result; + + for (uint32_t c = 0; c < 3; c++) + { + static const int s_divs[3] = { 1, 3, 9 }; + + int delta = 0; + + switch (bias) + { + case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break; + case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break; + case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break; + + case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break; + case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break; + case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break; + + case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break; + case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break; + case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break; + + case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break; + case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break; + case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break; + + case 10: delta = -2; break; + + case 27: delta = subblock ? 0 : -1; break; + case 28: delta = subblock ? -1 : 1; break; + case 29: delta = subblock ? 1 : 0; break; + case 30: delta = subblock ? -1 : 0; break; + case 31: delta = subblock ? 0 : 1; break; + + default: + delta = ((bias / s_divs[c]) % 3) - 1; + break; + } + + int v = block_color[c]; + if (v == 0) + { + if (delta == -2) + v += 3; + else + v += delta + 1; + } + else if (v == (int)limit) + { + v += (delta - 1); + } + else + { + v += delta; + if ((v < 0) || (v > (int)limit)) + v = (v - delta) - delta; + } + + assert(v >= 0); + assert(v <= (int)limit); + + result[c] = (uint8_t)v; + } + + return result; + } + + static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock) + { + static const uint8_t s_tran[4] = { 1, 0, 2, 3 }; + + uint16_t l_bitmask = 0; + uint16_t h_bitmask = 0; + + for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++) + { + color32 block_colors[4]; + dst_blk.get_block_colors(block_colors, subblock); + + uint32_t block_y[4]; + for (uint32_t i = 0; i < 4; i++) + block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19; + + const uint32_t block_y01 = block_y[0] + block_y[1]; + const uint32_t block_y12 = block_y[1] + block_y[2]; + const uint32_t block_y23 = block_y[2] + block_y[3]; + + // X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3 + // Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 + + if (dst_blk.get_flip_bit()) + { + uint32_t ofs = subblock * 2; + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + const color32& c = pSource_pixels[subblock * 2 + x + y * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + } + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + } + + static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } }; + + struct etc_coord2 + { + uint8_t m_x, m_y; + }; + + // [flip][subblock][pixel_index] + const etc_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, + } + }; + + void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst) + { + decoder_etc_block& dst_blk = *static_cast(pDst); + + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2)); + + if (unpacked_src_blk.m_etc1_diff) + { + dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3); + dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3); + dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3); + } + else + { + dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4)); + dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4)); + dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4)); + } + + memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4); + + return; + } + + const bool flip = unpacked_src_blk.m_etc1_flip != 0; + const bool diff = unpacked_src_blk.m_etc1_diff != 0; + + dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2)); + + const uint32_t limit = diff ? 31 : 15; + + color32 block_colors[2]; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint32_t avg_color[3]; + memset(avg_color, 0, sizeof(avg_color)); + + for (uint32_t j = 0; j < 8; j++) + { + const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j]; + + avg_color[0] += block_pixels[c.m_y][c.m_x].r; + avg_color[1] += block_pixels[c.m_y][c.m_x].g; + avg_color[2] += block_pixels[c.m_y][c.m_x].b; + } // j + + block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255)); + block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255)); + block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255)); + block_colors[subset][3] = 0; + + if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode]) + { + block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset); + } + + } // subset + + if (diff) + { + int dr = block_colors[1].r - block_colors[0].r; + int dg = block_colors[1].g - block_colors[0].g; + int db = block_colors[1].b - block_colors[0].b; + + dr = basisu::clamp(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + dg = basisu::clamp(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + db = basisu::clamp(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + if (dr < 0) dr += 8; + if (dg < 0) dg += 8; + if (db < 0) db += 8; + + dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr); + dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg); + dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db); + } + else + { + dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4)); + dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4)); + dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4)); + } + + etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2); + } + + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + color32 block_pixels[4][4]; + if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR) + { + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + } + + transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst); + + return true; + } + + static inline int gray_distance2(const uint8_t c, int y) + { + int gray_dist = (int)c - y; + return gray_dist * gray_dist; + } + + static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels, + int& upper_avg, int& lower_avg, int& left_avg, int& right_avg) + { + int sums[2][2]; + +#define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)] + + sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1); + sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1); + sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3); + sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3); + + upper_avg = (sums[0][0] + sums[1][0] + 4) / 8; + lower_avg = (sums[0][1] + sums[1][1] + 4) / 8; + left_avg = (sums[0][0] + sums[0][1] + 4) / 8; + right_avg = (sums[1][0] + sums[1][1] + 4) / 8; + +#undef GET_XY +#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a) + + int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0; + for (uint32_t i = 0; i < 4; i++) + { + for (uint32_t j = 0; j < 2; j++) + { + upper_gray_dist += GET_XY(i, j, upper_avg); + lower_gray_dist += GET_XY(i, 2 + j, lower_avg); + left_gray_dist += GET_XY(j, i, left_avg); + right_gray_dist += GET_XY(2 + j, i, right_avg); + } + } + +#undef GET_XY + + int upper_lower_sum = upper_gray_dist + lower_gray_dist; + int left_right_sum = left_gray_dist + right_gray_dist; + + return upper_lower_sum < left_right_sum; + } + + // Base Sel Table + // XXXXX XX XXX + static const uint16_t g_etc1_y_solid_block_configs[256] = + { + 0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167, + 355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361, + 204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210, + 51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57, + 155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126 + }; + + // individual + // table base sel0 sel1 sel2 sel3 + static const uint16_t g_etc1_y_solid_block_4i_configs[256] = + { + 0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511, + 0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15, + 0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928, + 0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33, + 0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40, + 0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B, + 0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560, + 0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D, + 0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278 + }; + + static const uint16_t g_etc1_y_solid_block_2i_configs[256] = + { + 0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09, + 0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19, + 0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29, + 0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39, + 0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49, + 0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59, + 0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69, + 0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278 + }; + + static const uint16_t g_etc1_y_solid_block_1i_configs[256] = + { + 0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309, + 0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C, + 0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230, + 0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240, + 0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B, + 0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261, + 0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271, + 0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278 + }; + + // We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch. + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + +#if 0 + for (uint32_t individ = 0; individ < 2; individ++) + { + uint32_t overall_error = 0; + + for (uint32_t c = 0; c < 256; c++) + { + uint32_t best_err = UINT32_MAX; + uint32_t best_individ = 0; + uint32_t best_base = 0; + uint32_t best_sels[4] = { 0,0,0,0 }; + uint32_t best_table = 0; + + const uint32_t limit = individ ? 16 : 32; + + for (uint32_t table = 0; table < 8; table++) + { + for (uint32_t base = 0; base < limit; base++) + { + uint32_t total_e = 0; + uint32_t sels[4] = { 0,0,0,0 }; + + const uint32_t N = 4; + for (uint32_t i = 0; i < basisu::minimum(N, (256 - c)); i++) + { + uint32_t best_sel_e = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t sel = 0; sel < 4; sel++) + { + int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2)); + val = clamp255(val + g_etc1_inten_tables[table][sel]); + + int e = iabs(val - clamp255(c + i)); + if (e < best_sel_e) + { + best_sel_e = e; + best_sel = sel; + } + + } // sel + + sels[i] = best_sel; + total_e += best_sel_e * best_sel_e; + + } // i + + if (total_e < best_err) + { + best_err = total_e; + best_individ = individ; + best_base = base; + memcpy(best_sels, sels, sizeof(best_sels)); + best_table = table; + } + + } // base + } // table + + //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]); + + uint32_t encoded = best_table | (best_base << 3) | + (best_sels[0] << 8) | + (best_sels[1] << 10) | + (best_sels[2] << 12) | + (best_sels[3] << 14); + + printf("0x%X,", encoded); + + overall_error += best_err; + } // c + + printf("\n"); + printf("Overall error: %u\n", overall_error); + + } // individ + + exit(0); +#endif + +#if 0 + for (uint32_t individ = 0; individ < 2; individ++) + { + uint32_t overall_error = 0; + + for (uint32_t c = 0; c < 256; c++) + { + uint32_t best_err = UINT32_MAX; + uint32_t best_individ = 0; + uint32_t best_base = 0; + uint32_t best_sels[4] = { 0,0,0,0 }; + uint32_t best_table = 0; + + const uint32_t limit = individ ? 16 : 32; + + for (uint32_t table = 0; table < 8; table++) + { + for (uint32_t base = 0; base < limit; base++) + { + uint32_t total_e = 0; + uint32_t sels[4] = { 0,0,0,0 }; + + const uint32_t N = 1; + for (uint32_t i = 0; i < basisu::minimum(N, (256 - c)); i++) + { + uint32_t best_sel_e = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t sel = 0; sel < 4; sel++) + { + int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2)); + val = clamp255(val + g_etc1_inten_tables[table][sel]); + + int e = iabs(val - clamp255(c + i)); + if (e < best_sel_e) + { + best_sel_e = e; + best_sel = sel; + } + + } // sel + + sels[i] = best_sel; + total_e += best_sel_e * best_sel_e; + + } // i + + if (total_e < best_err) + { + best_err = total_e; + best_individ = individ; + best_base = base; + memcpy(best_sels, sels, sizeof(best_sels)); + best_table = table; + } + + } // base + } // table + + //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]); + + uint32_t encoded = best_table | (best_base << 3) | + (best_sels[0] << 8) | + (best_sels[1] << 10) | + (best_sels[2] << 12) | + (best_sels[3] << 14); + + printf("0x%X,", encoded); + + overall_error += best_err; + } // c + + printf("\n"); + printf("Overall error: %u\n", overall_error); + + } // individ + + exit(0); +#endif + + decoder_etc_block& dst_blk = *static_cast(pDst); + + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + const uint32_t y = unpacked_src_blk.m_solid_color[channel]; + const uint32_t encoded_config = g_etc1_y_solid_block_configs[y]; + + const uint32_t base = encoded_config & 31; + const uint32_t sel = (encoded_config >> 5) & 3; + const uint32_t table = encoded_config >> 7; + + dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2)); + + dst_blk.m_bytes[0] = (uint8_t)(base << 3); + dst_blk.m_bytes[1] = (uint8_t)(base << 3); + dst_blk.m_bytes[2] = (uint8_t)(base << 3); + + memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + uint8_t block_y[4][4]; + for (uint32_t i = 0; i < 16; i++) + ((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel]; + + int upper_avg, lower_avg, left_avg, right_avg; + bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); + + // non-flipped: | | + // vs. + // flipped: -- + // -- + + uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 }; + + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[0] = basisu::minimum(low[0], v); + high[0] = basisu::maximum(high[0], v); + } + } + for (uint32_t y = 2; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[1] = basisu::minimum(low[1], v); + high[1] = basisu::maximum(high[1], v); + } + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const uint32_t v = block_y[y][x]; + low[0] = basisu::minimum(low[0], v); + high[0] = basisu::maximum(high[0], v); + } + } + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 2; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[1] = basisu::minimum(low[1], v); + high[1] = basisu::maximum(high[1], v); + } + } + } + + const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] }; + + dst_blk.m_bytes[3] = (uint8_t)((int)flip); + + if ((range[0] <= 3) && (range[1] <= 3)) + { + // This is primarily for better gradients. + dst_blk.m_bytes[0] = 0; + dst_blk.m_bytes[1] = 0; + dst_blk.m_bytes[2] = 0; + + uint16_t l_bitmask = 0, h_bitmask = 0; + + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]); + + const uint32_t table = encoded & 7; + const uint32_t base = (encoded >> 3) & 31; + assert(base <= 15); + const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 }; + + dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5)); + + const uint32_t sv = base << (subblock ? 0 : 4); + dst_blk.m_bytes[0] |= (uint8_t)(sv); + dst_blk.m_bytes[1] |= (uint8_t)(sv); + dst_blk.m_bytes[2] |= (uint8_t)(sv); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = block_y[y + subblock * 2][x]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = block_y[y][x + subblock * 2]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + } // subblock + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + + return true; + } + + uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255; + uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255; + + bool diff = true; + + int dy = y1 - y0; + + if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax)) + { + diff = false; + + y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255; + y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255; + + dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4)); + dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4)); + dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4)); + } + else + { + dy = basisu::clamp(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + y1 = y0 + dy; + + if (dy < 0) dy += 8; + + dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy); + dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy); + dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy); + + dst_blk.m_bytes[3] |= 2; + } + + const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) }; + + uint32_t enc_range[2]; + for (uint32_t subset = 0; subset < 2; subset++) + { + const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]); + const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]); + + enc_range[subset] = basisu::maximum(pos, neg); + } + + uint16_t l_bitmask = 0, h_bitmask = 0; + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + if ((!diff) && (range[subblock] <= 3)) + { + const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]); + + const uint32_t table = encoded & 7; + const uint32_t base = (encoded >> 3) & 31; + assert(base <= 15); + const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 }; + + dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5)); + + const uint32_t mask = ~(0xF << (subblock ? 0 : 4)); + + dst_blk.m_bytes[0] &= mask; + dst_blk.m_bytes[1] &= mask; + dst_blk.m_bytes[2] &= mask; + + const uint32_t sv = base << (subblock ? 0 : 4); + dst_blk.m_bytes[0] |= (uint8_t)(sv); + dst_blk.m_bytes[1] |= (uint8_t)(sv); + dst_blk.m_bytes[2] |= (uint8_t)(sv); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = block_y[y + subblock * 2][x]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = block_y[y][x + subblock * 2]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + + continue; + } // if + + uint32_t best_err = UINT32_MAX; + uint8_t best_sels[8]; + uint32_t best_inten = 0; + + const int base = base_y[subblock]; + + const int low_limit = -base; + const int high_limit = 255 - base; + + assert(low_limit <= 0 && high_limit >= 0); + + uint32_t inten_table_mask = 0xFF; + const uint32_t er = enc_range[subblock]; + // Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful. + if (er <= 51) + { + inten_table_mask = 0xF; + + if (er > 22) + inten_table_mask &= ~(1 << 0); + + if ((er < 4) || (er > 39)) + inten_table_mask &= ~(1 << 1); + + if (er < 9) + inten_table_mask &= ~(1 << 2); + + if (er < 12) + inten_table_mask &= ~(1 << 3); + } + else + { + inten_table_mask &= ~((1 << 0) | (1 << 1)); + + if (er > 60) + inten_table_mask &= ~(1 << 2); + + if (er > 89) + inten_table_mask &= ~(1 << 3); + + if (er > 120) + inten_table_mask &= ~(1 << 4); + + if (er > 136) + inten_table_mask &= ~(1 << 5); + + if (er > 174) + inten_table_mask &= ~(1 << 6); + } + + for (uint32_t inten = 0; inten < 8; inten++) + { + if ((inten_table_mask & (1 << inten)) == 0) + continue; + + const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]); + const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]); + const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]); + const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]); + assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3)); + + const int tv[4] = { t2, t3, t1, t0 }; + + const int thresh01 = t0 + t1; + const int thresh12 = t1 + t2; + const int thresh23 = t2 + t3; + + assert(thresh01 <= thresh12 && thresh12 <= thresh23); + + static const uint8_t s_table[4] = { 1, 0, 2, 3 }; + + uint32_t total_err = 0; + uint8_t sels[8]; + + if (flip) + { + if (((int)high[subblock] - base) * 2 < thresh01) + { + memset(sels, 3, 8); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + + const uint32_t c = 3; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else if (((int)low[subblock] - base) * 2 >= thresh23) + { + memset(sels, 1, 8); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + + const uint32_t c = 1; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + const int delta2 = delta * 2; + + uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)]; + sels[y * 4 + x] = (uint8_t)c; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + } + else + { + if (((int)high[subblock] - base) * 2 < thresh01) + { + memset(sels, 3, 8); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + + const uint32_t c = 3; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else if (((int)low[subblock] - base) * 2 >= thresh23) + { + memset(sels, 1, 8); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + + const uint32_t c = 1; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + const int delta2 = delta * 2; + + uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)]; + sels[y * 2 + x] = (uint8_t)c; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + } + + if (total_err < best_err) + { + best_err = total_err; + best_inten = inten; + memcpy(best_sels, sels, 8); + } + + } // inten + + //g_inten_hist[best_inten][enc_range[subblock]]++; + + dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5)); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = best_sels[y * 4 + x]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = best_sels[y * 2 + x]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + + } // subblock + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + + return true; + } + + const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7; + + void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst) + { + eac_block& dst = *static_cast(pDst); + const color32* pSrc_pixels = &block_pixels[0][0]; + + if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)) + { + const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255; + + dst.m_base = a; + dst.m_table = 13; + dst.m_multiplier = 1; + + memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + return; + } + + uint32_t min_a = 255, max_a = 0; + for (uint32_t i = 0; i < 16; i++) + { + min_a = basisu::minimum(min_a, pSrc_pixels[i].a); + max_a = basisu::maximum(max_a, pSrc_pixels[i].a); + } + + if (min_a == max_a) + { + dst.m_base = min_a; + dst.m_table = 13; + dst.m_multiplier = 1; + + memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + return; + } + + const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF; + const int multiplier = unpacked_src_blk.m_etc2_hints >> 4; + + assert(multiplier >= 1); + + dst.m_multiplier = multiplier; + dst.m_table = table; + + const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + dst.m_base = center; + + const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0]; + + uint32_t vals[8]; + for (uint32_t j = 0; j < 8; j++) + vals[j] = clamp255(center + (pTable[j] * multiplier)); + + uint64_t sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = block_pixels[i & 3][i >> 2].a; + + const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0; + const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1; + const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2; + const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3; + const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4; + const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5; + const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6; + const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7; + + const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7); + + const uint64_t best_index = min_err & 7; + sels |= (best_index << (45 - i * 3)); + } + + dst.set_selector_bits(sels); + } + + bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst) + { + eac_block& dst_etc2_eac_a8_blk = *static_cast(pDst); + decoder_etc_block& dst_etc1_blk = static_cast(pDst)[1]; + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + color32 block_pixels[4][4]; + if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR) + { + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + } + + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk); + + transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk); + + return true; + } + + static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 }; + static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 }; + static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 }; + static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; + static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; + const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; + + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; + + { + min0_v = max0_v = pPixels[0 * stride]; + min1_v = max1_v = pPixels[1 * stride]; + min2_v = max2_v = pPixels[2 * stride]; + min3_v = max3_v = pPixels[3 * stride]; + } + + { + uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + { + uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + { + uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v); + const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v); + + uint8_t* pDst_bytes = static_cast(pDst); + pDst_bytes[0] = (uint8_t)max_v; + pDst_bytes[1] = (uint8_t)min_v; + + if (max_v == min_v) + { + memset(pDst_bytes + 2, 0, 6); + return; + } + + const uint32_t delta = max_v - min_v; + + // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. + const int t0 = delta * 13; + const int t1 = delta * 11; + const int t2 = delta * 9; + const int t3 = delta * 7; + const int t4 = delta * 5; + const int t5 = delta * 3; + const int t6 = delta * 1; + + // BC4 floors in its divisions, which we compensate for with the 4 bias. + // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). + const int bias = 4 - min_v * 14; + + static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U }; + static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U }; + static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U }; + static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U }; + + uint64_t a0, a1, a2, a3; + { + const int v0 = pPixels[0 * stride] * 14 + bias; + const int v1 = pPixels[1 * stride] * 14 + bias; + const int v2 = pPixels[2 * stride] * 14 + bias; + const int v3 = pPixels[3 * stride] * 14 + bias; + a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; + a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; + a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; + a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; + } + + { + const int v0 = pPixels[4 * stride] * 14 + bias; + const int v1 = pPixels[5 * stride] * 14 + bias; + const int v2 = pPixels[6 * stride] * 14 + bias; + const int v3 = pPixels[7 * stride] * 14 + bias; + a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); + a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); + a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); + a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); + } + + { + const int v0 = pPixels[8 * stride] * 14 + bias; + const int v1 = pPixels[9 * stride] * 14 + bias; + const int v2 = pPixels[10 * stride] * 14 + bias; + const int v3 = pPixels[11 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); + } + + { + const int v0 = pPixels[12 * stride] * 14 + bias; + const int v1 = pPixels[13 * stride] * 14 + bias; + const int v2 = pPixels[14 * stride] * 14 + bias; + const int v3 = pPixels[15 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); + } + + const uint64_t f = a0 | a1 | a2 | a3; + + pDst_bytes[2] = (uint8_t)f; + pDst_bytes[3] = (uint8_t)(f >> 8U); + pDst_bytes[4] = (uint8_t)(f >> 16U); + pDst_bytes[5] = (uint8_t)(f >> 24U); + pDst_bytes[6] = (uint8_t)(f >> 32U); + pDst_bytes[7] = (uint8_t)(f >> 40U); + } + + static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) + { + uint32_t block_r[4], block_g[4], block_b[4]; + + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + for (uint32_t i = 0; i < 16; i++) + { + const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + // Rounding matters here! + // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. + sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; + } + } + + static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) + { + uint32_t block_r[4], block_g[4], block_b[4]; + + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab; + const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab; + const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab; + const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab; + + sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; + sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; + sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; + sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; + } + } + + struct vec3F { float c[3]; }; + + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) + { + // Derived from bc7enc16's LS function. + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; + + // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) + // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. + static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; + + uint32_t weight_accum = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; + const uint32_t sel = pSelectors[i]; + ut_r += r; + ut_g += g; + ut_b += b; + weight_accum += s_weight_vals[sel]; + uq00_r += sel * r; + uq00_g += sel * g; + uq00_b += sel * b; + } + + float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r; + float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g; + float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b; + + q10_r = t_r * 3.0f - q00_r; + q10_g = t_g * 3.0f - q00_g; + q10_b = t_b * 3.0f - q00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 3.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r; + pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g; + pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b; + + // Check and fix channel singularities - might not be needed, but is in UASTC's encoder. + for (uint32_t c = 0; c < 3; c++) + { + if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < 16; i++) + { + lo_v = basisu::minimumu(lo_v, pColors[i].c[c]); + hi_v = basisu::maximumu(hi_v, pColors[i].c[c]); + } + + if (lo_v == hi_v) + { + pXl->c[c] = (float)lo_v; + pXh->c[c] = (float)hi_v; + } + } + } + + return true; + } + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + { + dxt1_block* pDst_block = static_cast(pDst); + + uint32_t mask = 0xAA; + uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi; + uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo; + + if (min16 == max16) + { + // Always forbid 3 color blocks + // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. + mask = 0; + + // Make l > h + if (min16 > 0) + min16--; + else + { + // l = h = 0 + assert(min16 == max16 && max16 == 0); + + max16 = 1; + min16 = 0; + mask = 0x55; + } + + assert(max16 > min16); + } + + if (max16 < min16) + { + std::swap(max16, min16); + mask ^= 0x55; + } + + pDst_block->set_low_color(static_cast(max16)); + pDst_block->set_high_color(static_cast(min16)); + pDst_block->m_selectors[0] = static_cast(mask); + pDst_block->m_selectors[1] = static_cast(mask); + pDst_block->m_selectors[2] = static_cast(mask); + pDst_block->m_selectors[3] = static_cast(mask); + } + + static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + + // Good references: squish library, stb_dxt. + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags) + { + const color32* pSrc_pixels = (const color32*)pPixels; + dxt1_block* pDst_block = static_cast(pDst); + + int avg_r = -1, avg_g = 0, avg_b = 0; + int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + uint8_t sels[16]; + + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; + if (use_sels) + { + // Caller is jamming in their own selectors for us to try. + const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); + + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; + + for (uint32_t i = 0; i < 16; i++) + sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; + } + else + { + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 1; j < 16; j++) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 16) + { + encode_bc1_solid_block(pDst, fr, fg, fb); + return; + } + + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + int total_r = fr, total_g = fg, total_b = fb; + int max_r = fr, max_g = fg, max_b = fb; + int min_r = fr, min_g = fg, min_b = fb; + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = static_cast(icov[i])* (1.0f / 255.0f); + +#if 0 + // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). + // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta + int saxis_r = max_r - min_r; + int saxis_g = max_g - min_g; + int saxis_b = max_b - min_b; +#else + float xr = (float)(max_r - min_r); + float xg = (float)(max_g - min_g); + float xb = (float)(max_b - min_b); + //float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's + //float xg = (float)(max_g - avg_g); + //float xb = (float)(max_b - avg_b); + for (uint32_t power_iter = 0; power_iter < 4; power_iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + xr = r; xg = g; xb = b; + } + + float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb)); + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + if (k >= 2) + { + float m = 1024.0f / k; + saxis_r = (int)(xr * m); + saxis_g = (int)(xg * m); + saxis_b = (int)(xb * m); + } +#endif + + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; + for (uint32_t i = 0; i < 16; i++) + { + int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b; + if (dot < low_dot) + { + low_dot = dot; + low_c = i; + } + if (dot > high_dot) + { + high_dot = dot; + high_c = i; + } + } + + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); + + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } // if (use_sels) + + const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1); + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors. + vec3F xl, xh; + if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh)) + { + if (avg_r < 0) + { + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + total_r += pSrc_pixels[i].r; + total_g += pSrc_pixels[i].g; + total_b += pSrc_pixels[i].b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + } + + // All selectors equal - treat it as a solid block which should always be equal or better. + lr = g_bc1_match5_equals_1[avg_r].m_hi; + lg = g_bc1_match6_equals_1[avg_g].m_hi; + lb = g_bc1_match5_equals_1[avg_b].m_hi; + + hr = g_bc1_match5_equals_1[avg_r].m_lo; + hg = g_bc1_match6_equals_1[avg_g].m_lo; + hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + + hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + } + + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } + + uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) + { + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast(lc16)); + pDst_block->set_high_color(static_cast(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + } + } + + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) + { + const color32* pSrc_pixels = (const color32*)pPixels; + dxt1_block* pDst_block = static_cast(pDst); + + int avg_r = -1, avg_g = 0, avg_b = 0; + int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + uint8_t sels[16]; + + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; + if (use_sels) + { + // Caller is jamming in their own selectors for us to try. + const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); + + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; + + for (uint32_t i = 0; i < 16; i++) + sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; + } + else + { + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 1; j < 16; j++) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 16) + { + encode_bc1_solid_block(pDst, fr, fg, fb); + return; + } + + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + int total_r = fr, total_g = fg, total_b = fb; + int max_r = fr, max_g = fg, max_b = fb; + int min_r = fr, min_g = fg, min_b = fb; + uint32_t grayscale_flag = (fr == fg) && (fr == fb); + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + grayscale_flag &= ((r == g) && (r == b)); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + if (grayscale_flag) + { + // Grayscale blocks are a common enough case to specialize. + if ((max_r - min_r) < 2) + { + lr = lb = hr = hb = to_5(fr); + lg = hg = to_6(fr); + } + else + { + lr = lb = to_5(min_r); + lg = to_6(min_r); + + hr = hb = to_5(max_r); + hg = to_6(max_r); + } + } + else + { + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + + // Find the shortest vector from a AABB corner to the block's average color. + // This is to help avoid outliers. + + uint32_t dist[3][2]; + dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3; + dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3; + dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3; + + uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]); + uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4; + min_d0 = basisu::minimum(min_d0, d4); + + uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1; + uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5; + min_d1 = basisu::minimum(min_d1, d5); + + uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2; + min_d0 = basisu::minimum(min_d0, d2); + + uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3; + min_d1 = basisu::minimum(min_d1, d3); + + uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6; + min_d0 = basisu::minimum(min_d0, d6); + + uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7; + min_d1 = basisu::minimum(min_d1, d7); + + uint32_t min_d = basisu::minimum(min_d0, min_d1); + uint32_t best_i = min_d & 7; + + int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r); + int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g); + int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b); + + // Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in. + uint32_t low_c = 0, high_c = 0; + if ((delta_r | delta_g | delta_b) != 0) + { + // Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB. + // Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection. + // One of these vectors will be a decent approximation of the block's PCA. + const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b; + + int low_dot0 = INT_MAX, high_dot0 = INT_MIN; + int low_dot1 = INT_MAX, high_dot1 = INT_MIN; + int low_dot2 = INT_MAX, high_dot2 = INT_MIN; + int low_dot3 = INT_MAX, high_dot3 = INT_MIN; + + //int low_c0, low_c1, low_c2, low_c3; + //int high_c0, high_c1, high_c2, high_c3; + + for (uint32_t i = 0; i < 16; i++) + { + const int dotx = pSrc_pixels[i].r * saxis0_r; + const int doty = pSrc_pixels[i].g * saxis0_g; + const int dotz = pSrc_pixels[i].b * saxis0_b; + + const int dot0 = ((dotz + dotx + doty) << 4) + i; + const int dot1 = ((dotz - dotx - doty) << 4) + i; + const int dot2 = ((dotz - dotx + doty) << 4) + i; + const int dot3 = ((dotz + dotx - doty) << 4) + i; + + if (dot0 < low_dot0) + { + low_dot0 = dot0; + //low_c0 = i; + } + if ((dot0 ^ 15) > high_dot0) + { + high_dot0 = dot0 ^ 15; + //high_c0 = i; + } + + if (dot1 < low_dot1) + { + low_dot1 = dot1; + //low_c1 = i; + } + if ((dot1 ^ 15) > high_dot1) + { + high_dot1 = dot1 ^ 15; + //high_c1 = i; + } + + if (dot2 < low_dot2) + { + low_dot2 = dot2; + //low_c2 = i; + } + if ((dot2 ^ 15) > high_dot2) + { + high_dot2 = dot2 ^ 15; + //high_c2 = i; + } + + if (dot3 < low_dot3) + { + low_dot3 = dot3; + //low_c3 = i; + } + if ((dot3 ^ 15) > high_dot3) + { + high_dot3 = dot3 ^ 15; + //high_c3 = i; + } + } + + low_c = low_dot0 & 15; + high_c = ~high_dot0 & 15; + uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15); + + uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15); + if (tr > r) { + low_c = low_dot1 & 15; + high_c = ~high_dot1 & 15; + r = tr; + } + + tr = (high_dot2 & ~15) - (low_dot2 & ~15); + if (tr > r) { + low_c = low_dot2 & 15; + high_c = ~high_dot2 & 15; + r = tr; + } + + tr = (high_dot3 & ~15) - (low_dot3 & ~15); + if (tr > r) { + low_c = low_dot3 & 15; + high_c = ~high_dot3 & 15; + } + } + + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); + } + + bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } // if (use_sels) + + const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1); + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb; + + // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors. + vec3F xl, xh; + if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh)) + { + if (avg_r < 0) + { + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + total_r += pSrc_pixels[i].r; + total_g += pSrc_pixels[i].g; + total_b += pSrc_pixels[i].b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + } + + // All selectors equal - treat it as a solid block which should always be equal or better. + lr = g_bc1_match5_equals_1[avg_r].m_hi; + lg = g_bc1_match6_equals_1[avg_g].m_hi; + lb = g_bc1_match5_equals_1[avg_b].m_hi; + + hr = g_bc1_match5_equals_1[avg_r].m_lo; + hg = g_bc1_match6_equals_1[avg_g].m_lo; + hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + + hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + } + + if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb)) + break; + + bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } + + uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) + { + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast(lc16)); + pDst_block->set_high_color(static_cast(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + } + } + + // Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest. + void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst) + { + const uint32_t mode = unpacked_src_blk.m_mode; + const astc_block_desc& astc_blk = unpacked_src_blk.m_astc; + + dxt1_block& b = *static_cast(pDst); + + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + if (total_comps == 2) + { + const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant; + const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant; + + b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127)); + b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127)); + } + else + { + b.set_low_color(dxt1_block::pack_color( + color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant, + 255), true, 127) + ); + + b.set_high_color(dxt1_block::pack_color( + color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant, + 255), true, 127) + ); + } + + if (b.get_low_color() == b.get_high_color()) + { + // Always forbid 3 color blocks + uint16_t lc16 = (uint16_t)b.get_low_color(); + uint16_t hc16 = (uint16_t)b.get_high_color(); + + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + b.set_low_color(static_cast(lc16)); + b.set_high_color(static_cast(hc16)); + + b.m_selectors[0] = mask; + b.m_selectors[1] = mask; + b.m_selectors[2] = mask; + b.m_selectors[3] = mask; + } + else + { + bool invert = false; + if (b.get_low_color() < b.get_high_color()) + { + std::swap(b.m_low_color[0], b.m_high_color[0]); + std::swap(b.m_low_color[1], b.m_high_color[1]); + invert = true; + } + + const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]]; + + const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1; + + uint32_t sels = 0; + for (int i = 15; i >= 0; --i) + { + uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]]; + + if (invert) + s ^= 1; + + sels = (sels << 2) | s; + } + b.m_selectors[0] = sels & 0xFF; + b.m_selectors[1] = (sels >> 8) & 0xFF; + b.m_selectors[2] = (sels >> 16) & 0xFF; + b.m_selectors[3] = (sels >> 24) & 0xFF; + } + } + + // Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed. + void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality) + { + const uint32_t mode = unpacked_src_blk.m_mode; + + const astc_block_desc& astc_blk = unpacked_src_blk.m_astc; + + dxt1_block& b = *static_cast(pDst); + + b.set_low_color(1); + b.set_high_color(0); + + const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]]; + + const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1; + + uint32_t sels = 0; + for (int i = 15; i >= 0; --i) + { + sels <<= 2; + sels |= pTran[astc_blk.m_weights[i << plane_shift]]; + } + + b.m_selectors[0] = sels & 0xFF; + b.m_selectors[1] = (sels >> 8) & 0xFF; + b.m_selectors[2] = (sels >> 16) & 0xFF; + b.m_selectors[3] = (sels >> 24) & 0xFF; + + encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors); + } + + bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b); + return true; + } + + if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0)) + transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst); + else + { + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (unpacked_src_blk.m_bc1_hint1) + transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality); + else + encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0); + } + + return true; + } + + static void write_bc4_solid_block(uint8_t* pDst, uint32_t a) + { + pDst[0] = (uint8_t)a; + pDst[1] = (uint8_t)a; + memset(pDst + 2, 0, 6); + } + + bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block = pDst; + dxt1_block* pBC1_block = &static_cast(pDst)[1]; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + write_bc4_solid_block(static_cast(pBC4_block), unpacked_src_blk.m_solid_color.a); + encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32)); + + if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0)) + transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block); + else + { + if (unpacked_src_blk.m_bc1_hint1) + transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality); + else + encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0); + } + + return true; + } + + bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0) + { + BASISU_NOTE_UNUSED(high_quality); + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block = pDst; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + write_bc4_solid_block(static_cast(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32)); + + return true; + } + + bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1) + { + BASISU_NOTE_UNUSED(high_quality); + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block0 = pDst; + void* pBC4_block1 = (uint8_t*)pDst + 8; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + write_bc4_solid_block(static_cast(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]); + write_bc4_solid_block(static_cast(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32)); + basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32)); + + return true; + } + + static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3, 36, 24, 12, 0 }; + + static void pack_eac_solid_block(eac_block& blk, uint32_t a) + { + blk.m_base = static_cast(a); + blk.m_table = 13; + blk.m_multiplier = 0; + + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + return; + } + + // Only checks 4 tables. + static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = pPixels[i * stride]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + pack_eac_solid_block(blk, min_alpha); + return; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + const uint32_t SINGLE_TABLE_THRESH = 5; + if (alpha_range <= SINGLE_TABLE_THRESH) + { + // If alpha_range <= 5 table 13 is lossless + int base = clamp255((int)max_alpha - 2); + + blk.m_base = base; + blk.m_multiplier = 1; + blk.m_table = 13; + + base -= 3; + + uint64_t packed_sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 }; + + int sel = a - base; + assert(sel >= 0 && sel <= 5); + + packed_sels |= (static_cast(s_sels[sel]) << s_etc2_eac_bit_ofs[i]); + } + + blk.set_selector_bits(packed_sels); + + return; + } + + const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13; + static const uint8_t s_tables[4] = { T0, T1, T2, T3 }; + + int base[4], mul[4]; + uint32_t mul_or = 0; + for (uint32_t i = 0; i < 4; i++) + { + const uint32_t table = s_tables[i]; + + const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + + base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range))); + mul[i] = clampi((int)roundf(alpha_range / range), 1, 15); + mul_or |= mul[i]; + } + + uint32_t total_err[4] = { 0, 0, 0, 0 }; + uint8_t sels[4][16]; + + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX; + + if ((a < 7) || (a > (255 - 7))) + { + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]); + const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]); + const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]); + const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]); + + l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s); + } + } + else if (mul_or == 1) + { + const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a; + + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = g_eac_modifier_table[T0][s] + a0; + const int v1 = g_eac_modifier_table[T1][s] + a1; + const int v2 = g_eac_modifier_table[T2][s] + a2; + const int v3 = g_eac_modifier_table[T3][s] + a3; + + l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s); + } + } + else + { + const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a; + + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0; + const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1; + const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2; + const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3; + + l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s); + } + } + + sels[0][i] = l0 & 7; + sels[1][i] = l1 & 7; + sels[2][i] = l2 & 7; + sels[3][i] = l3 & 7; + + total_err[0] += basisu::square(l0 >> 3); + total_err[1] += basisu::square(l1 >> 3); + total_err[2] += basisu::square(l2 >> 3); + total_err[3] += basisu::square(l3 >> 3); + } + + uint32_t min_err = total_err[0], min_index = 0; + for (uint32_t i = 1; i < 4; i++) + { + if (total_err[i] < min_err) + { + min_err = total_err[i]; + min_index = i; + } + } + + blk.m_base = base[min_index]; + blk.m_multiplier = mul[min_index]; + blk.m_table = s_tables[min_index]; + + uint64_t packed_sels = 0; + const uint8_t* pSels = &sels[min_index][0]; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= (static_cast(pSels[i]) << s_etc2_eac_bit_ofs[i]); + + blk.set_selector_bits(packed_sels); + } + + // Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal. + static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = pPixels[i * stride]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + pack_eac_solid_block(blk, min_alpha); + return; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + const uint32_t SINGLE_TABLE_THRESH = 5; + if (alpha_range <= SINGLE_TABLE_THRESH) + { + // If alpha_range <= 5 table 13 is lossless + int base = clamp255((int)max_alpha - 2); + + blk.m_base = base; + blk.m_multiplier = 1; + blk.m_table = 13; + + base -= 3; + + uint64_t packed_sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 }; + + int sel = a - base; + assert(sel >= 0 && sel <= 5); + + packed_sels |= (static_cast(s_sels[sel]) << s_etc2_eac_bit_ofs[i]); + } + + blk.set_selector_bits(packed_sels); + + return; + } + + int base[16], mul[16]; + for (uint32_t table = 0; table < 16; table++) + { + const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + + base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range))); + mul[table] = clampi((int)roundf(alpha_range / range), 1, 15); + } + + uint32_t total_err[16]; + memset(total_err, 0, sizeof(total_err)); + + uint8_t sels[16][16]; + + for (uint32_t table = 0; table < 16; table++) + { + const int8_t* pTable = &g_eac_modifier_table[table][0]; + const int m = mul[table], b = base[table]; + + uint32_t prev_l = 0, prev_a = UINT32_MAX; + + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + if ((uint32_t)a == prev_a) + { + sels[table][i] = prev_l & 7; + total_err[table] += basisu::square(prev_l >> 3); + } + else + { + uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3; + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7); + + sels[table][i] = l & 7; + total_err[table] += basisu::square(l >> 3); + + prev_l = l; + prev_a = a; + } + } + } + + uint32_t min_err = total_err[0], min_index = 0; + for (uint32_t i = 1; i < 16; i++) + { + if (total_err[i] < min_err) + { + min_err = total_err[i]; + min_index = i; + } + } + + blk.m_base = base[min_index]; + blk.m_multiplier = mul[min_index]; + blk.m_table = min_index; + + uint64_t packed_sels = 0; + const uint8_t* pSels = &sels[min_index][0]; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= (static_cast(pSels[i]) << s_etc2_eac_bit_ofs[i]); + + blk.set_selector_bits(packed_sels); + } + + bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_eac_solid_block(*static_cast(pDst), unpacked_src_blk.m_solid_color.c[chan0]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (chan0 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst); + else + (high_quality ? pack_eac_high_quality : pack_eac)(*static_cast(pDst), &block_pixels[0][0].c[chan0], sizeof(color32)); + + return true; + } + + bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_eac_solid_block(static_cast(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]); + pack_eac_solid_block(static_cast(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (chan0 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast(pDst)[0]); + else + (high_quality ? pack_eac_high_quality : pack_eac)(static_cast(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32)); + + if (chan1 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast(pDst)[1]); + else + (high_quality ? pack_eac_high_quality : pack_eac)(static_cast(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32)); + return true; + } + + // PVRTC1 + static void fixup_pvrtc1_4_modulation_rgb( + const uastc_block* pSrc_blocks, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, + uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha) + { + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + const uastc_block& src_block = pSrc_blocks[block_index]; + + color32 block_pixels[4][4]; + unpack_uastc(src_block, &block_pixels[0][0], false); + if (from_alpha) + { + // Just set RGB to alpha to avoid adding complexity below. + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t a = ((color32*)block_pixels)[i].a; + ((color32*)block_pixels)[i].set(a, a, a, 255); + } + } + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \ + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } + + static void fixup_pvrtc1_4_modulation_rgba( + const uastc_block* pSrc_blocks, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y) + { + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = get_endpoint_l8(e, 0); + e1[ex][ey] = get_endpoint_l8(e, 1); + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + const uastc_block& src_block = pSrc_blocks[block_index]; + + color32 block_pixels[4][4]; + unpack_uastc(src_block, &block_pixels[0][0], false); + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = get_endpoint_l8(e, 0); \ + e1[ex][ey] = get_endpoint_l8(e, 1); \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } + + bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha) + { + BASISU_NOTE_UNUSED(high_quality); + + if ((!num_blocks_x) || (!num_blocks_y)) + return false; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + if (!basisu::is_pow2(width) || !basisu::is_pow2(height)) + return false; + + basisu::vector temp_endpoints(num_blocks_x * num_blocks_y); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + color32 block_pixels[16]; + if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) + return false; + + // Get block's RGB bounding box + color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); + + if (from_alpha) + { + uint32_t low_a = 255, high_a = 0; + for (uint32_t i = 0; i < 16; i++) + { + low_a = basisu::minimum(low_a, block_pixels[i].a); + high_a = basisu::maximum(high_a, block_pixels[i].a); + } + low_color.set(low_a, low_a, low_a, 255); + high_color.set(high_a, high_a, high_a, 255); + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + low_color = color32::comp_min(low_color, block_pixels[i]); + high_color = color32::comp_max(high_color, block_pixels[i]); + } + } + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_opaque_endpoint_floor(0, low_color); + temp.set_opaque_endpoint_ceil(1, high_color); + + temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints; + } + } + + fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha); + + return true; + } + + bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality) + { + BASISU_NOTE_UNUSED(high_quality); + + if ((!num_blocks_x) || (!num_blocks_y)) + return false; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + if (!basisu::is_pow2(width) || !basisu::is_pow2(height)) + return false; + + basisu::vector temp_endpoints(num_blocks_x * num_blocks_y); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + color32 block_pixels[16]; + if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) + return false; + + // Get block's RGBA bounding box + color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); + + for (uint32_t i = 0; i < 16; i++) + { + low_color = color32::comp_min(low_color, block_pixels[i]); + high_color = color32::comp_max(high_color, block_pixels[i]); + } + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_endpoint_floor(0, low_color); + temp.set_endpoint_ceil(1, high_color); + + temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints; + } + } + + fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y); + + return true; + } + + void uastc_init() + { + for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++) + { + if (!astc_is_valid_endpoint_range(range)) + continue; + + const uint32_t levels = astc_get_levels(range); + + uint32_t vals[256]; + for (uint32_t i = 0; i < levels; i++) + vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; + + std::sort(vals, vals + levels); + + for (uint32_t i = 0; i < levels; i++) + { + const uint32_t order = vals[i] & 0xFF; + const uint32_t unq = vals[i] >> 8; + + g_astc_unquant[range][order].m_unquant = (uint8_t)unq; + g_astc_unquant[range][order].m_index = (uint8_t)i; + + } // i + } + + // TODO: Precompute? + // BC7 777.1 + for (int c = 0; c < 256; c++) + { + for (uint32_t lp = 0; lp < 2; lp++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + + for (uint32_t l = 0; l < 128; l++) + { + const uint32_t low = (l << 1) | lp; + + for (uint32_t h = 0; h < 128; h++) + { + const uint32_t high = (h << 1) | lp; + + const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6; + + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_bc7_mode_6_optimal_endpoints[c][lp] = best; + } // lp + + } // c + + // BC7 777 + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + + for (uint32_t l = 0; l < 128; l++) + { + const uint32_t low = (l << 1) | (l >> 6); + + for (uint32_t h = 0; h < 128; h++) + { + const uint32_t high = (h << 1) | (h >> 6); + + const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6; + + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_bc7_mode_5_optimal_endpoints[c] = best; + + } // c + } + +#endif // #if BASISD_SUPPORT_UASTC + +// ------------------------------------------------------------------------------------------------------ +// KTX2 +// ------------------------------------------------------------------------------------------------------ + +#if BASISD_SUPPORT_KTX2 + const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; + + ktx2_transcoder::ktx2_transcoder(basist::etc1_global_selector_codebook* pGlobal_sel_codebook) : + m_etc1s_transcoder(pGlobal_sel_codebook) + { + clear(); + } + + void ktx2_transcoder::clear() + { + m_pData = nullptr; + m_data_size = 0; + + memset(&m_header, 0, sizeof(m_header)); + m_levels.clear(); + m_dfd.clear(); + m_key_values.clear(); + memset(&m_etc1s_header, 0, sizeof(m_etc1s_header)); + m_etc1s_image_descs.clear(); + + m_format = basist::basis_tex_format::cETC1S; + + m_dfd_color_model = 0; + m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED; + m_dfd_transfer_func = 0; + m_dfd_flags = 0; + m_dfd_samples = 0; + m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB; + m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB; + + m_etc1s_transcoder.clear(); + + m_def_transcoder_state.clear(); + + m_has_alpha = false; + m_is_video = false; + } + + bool ktx2_transcoder::init(const void* pData, uint32_t data_size) + { + clear(); + + if (!pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n"); + assert(0); + return false; + } + + if (data_size <= sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n"); + return false; + } + + if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n"); + return false; + } + + m_pData = static_cast(pData); + m_data_size = data_size; + + memcpy(&m_header, pData, sizeof(m_header)); + + // We only support UASTC and ETC1S + if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n"); + return false; + } + + // 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1." + if (m_header.m_type_size != 1) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n"); + return false; + } + + // We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case. + // The BasisU library does not support 1D or 3D textures at all. + if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n"); + return false; + } + + // Face count must be 1 or 6 + if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + // 3.4: Make sure cubemaps are square. + if (m_header.m_pixel_width != m_header.m_pixel_height) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n"); + return false; + } + } + + // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats" + if (m_header.m_level_count < 1) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n"); + return false; + } + + // Sanity check the level count. + if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n"); + return false; + } + + if (m_header.m_sgd_byte_offset < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n"); + return false; + } + + if (m_header.m_sgd_byte_offset + m_header.m_sgd_byte_length > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n"); + return false; + } + } + + if (!m_levels.try_resize(m_header.m_level_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n"); + return false; + } + + const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index); + + if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n"); + return false; + } + + memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes); + + // Sanity check the level offsets and byte sizes + for (uint32_t i = 0; i < m_levels.size(); i++) + { + if (m_levels[i].m_byte_offset < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n"); + return false; + } + + if (!m_levels[i].m_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n"); + } + + if ((m_levels[i].m_byte_offset + m_levels[i].m_byte_length) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n"); + return false; + } + + const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL; + + if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + if (m_levels[i].m_uncompressed_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n"); + return false; + } + } + else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD) + { + if (!m_levels[i].m_uncompressed_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n"); + return false; + } + } + } + + const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60; + if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n"); + return false; + } + + if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header))) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n"); + return false; + } + + const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset; + + if (!m_dfd.try_resize(m_header.m_dfd_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n"); + return false; + } + + memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length); + + // This is all hard coded for only ETC1S and UASTC. + uint32_t dfd_total_size = basisu::read_le_dword(pDFD); + + // 3.10.3: Sanity check + if (dfd_total_size != m_header.m_dfd_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n"); + return false; + } + + // 3.10.3: More sanity checking + if (m_header.m_kvd_byte_length) + { + if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n"); + return false; + } + } + + const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t)); + const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t)); + + m_dfd_color_model = dfd_bits & 255; + m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255); + m_dfd_transfer_func = (dfd_bits >> 16) & 255; + m_dfd_flags = (dfd_bits >> 24) & 255; + + // See 3.10.1.Restrictions + if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n"); + return false; + } + + if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) + { + m_format = basist::basis_tex_format::cETC1S; + + // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD’s sample count." + // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that. + m_has_alpha = (m_header.m_dfd_byte_length == 60); + + m_dfd_samples = m_has_alpha ? 2 : 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + if (m_has_alpha) + { + const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t)); + m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15); + } + } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC) + { + m_format = basist::basis_tex_format::cUASTC4x4; + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + // We're assuming "DATA" means RGBA so it has alpha. + m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); + } + else + { + // Unsupported DFD color model. + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n"); + return false; + } + + if (!read_key_values()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n"); + return false; + } + + // Check for a KTXanimData key + for (uint32_t i = 0; i < m_key_values.size(); i++) + { + if (strcmp(reinterpret_cast(m_key_values[i].m_key.data()), "KTXanimData") == 0) + { + m_is_video = true; + break; + } + } + + return true; + } + + uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const + { + const uint32_t etc1s_image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + if (etc1s_image_index >= get_etc1s_image_descs().size()) + { + assert(0); + return 0; + } + + return get_etc1s_image_descs()[etc1s_image_index].m_image_flags; + } + + const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const + { + for (uint32_t i = 0; i < m_key_values.size(); i++) + if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0) + return &m_key_values[i].m_value; + + return nullptr; + } + + bool ktx2_transcoder::start_transcoding() + { + if (!m_pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + // Check if we've already decompressed the ETC1S global data. If so don't unpack it again. + if (!m_etc1s_transcoder.get_endpoints().empty()) + return true; + + if (!decompress_etc1s_global_data()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n"); + return false; + } + + if (!m_is_video) + { + // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key. + // Video cannot be a cubemap, and it must be a texture array. + if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1)) + { + for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++) + { + if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME) + { + m_is_video = true; + break; + } + } + } + } + } + else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { +#if !BASISD_SUPPORT_KTX2_ZSTD + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n"); + return false; +#endif + } + + return true; + } + + bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const + { + if (level_index >= m_levels.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + if (face_index >= 6) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n"); + return false; + } + } + else if (face_index != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n"); + return false; + } + + if (layer_index >= basisu::maximum(m_header.m_layer_count, 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum(m_header.m_layer_count, 1)\n"); + return false; + } + + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); + const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); + const uint32_t num_blocks_x = (level_width + 3) >> 2; + const uint32_t num_blocks_y = (level_height + 3) >> 2; + + level_info.m_face_index = face_index; + level_info.m_layer_index = layer_index; + level_info.m_level_index = level_index; + level_info.m_orig_width = level_width; + level_info.m_orig_height = level_height; + level_info.m_width = num_blocks_x * 4; + level_info.m_height = num_blocks_y * 4; + level_info.m_num_blocks_x = num_blocks_x; + level_info.m_num_blocks_y = num_blocks_y; + level_info.m_total_blocks = num_blocks_x * num_blocks_y; + level_info.m_alpha_flag = m_has_alpha; + level_info.m_iframe_flag = false; + if (m_etc1s_image_descs.size()) + { + const uint32_t etc1s_image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0; + } + + return true; + } + + bool ktx2_transcoder::transcode_image_level( + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + basist::transcoder_texture_format fmt, + uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1, + ktx2_transcoder_state* pState) + { + if (!m_pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n"); + return false; + } + + if (!pState) + pState = &m_def_transcoder_state; + + if (level_index >= m_levels.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + if (face_index >= 6) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n"); + return false; + } + } + else if (face_index != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n"); + return false; + } + + if (layer_index >= basisu::maximum(m_header.m_layer_count, 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum(m_header.m_layer_count, 1)\n"); + return false; + } + + const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset; + uint64_t comp_level_data_size = m_levels[level_index].m_byte_length; + + const uint8_t* pUncomp_level_data = pComp_level_data; + uint64_t uncomp_level_data_size = comp_level_data_size; + + if (uncomp_level_data_size > UINT32_MAX) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { + // Check if we've already decompressed this level's supercompressed data. + if ((int)level_index != pState->m_uncomp_data_level_index) + { + // Uncompress the entire level's supercompressed data. + if (!decompress_level_data(level_index, pState->m_level_uncomp_data)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n"); + return false; + } + pState->m_uncomp_data_level_index = level_index; + } + + pUncomp_level_data = pState->m_level_uncomp_data.data(); + uncomp_level_data_size = pState->m_level_uncomp_data.size(); + } + + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); + const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); + const uint32_t num_blocks_x = (level_width + 3) >> 2; + const uint32_t num_blocks_y = (level_height + 3) >> 2; + + if (m_format == basist::basis_tex_format::cETC1S) + { + // Ensure start_transcoding() was called. + if (m_etc1s_transcoder.get_endpoints().empty()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n"); + return false; + } + + const uint32_t etc1s_image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + // Sanity check + if (etc1s_image_index >= m_etc1s_image_descs.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n"); + assert(0); + return false; + } + + if (static_cast(m_data_size) != m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: File is too large\n"); + return false; + } + + const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index]; + + if (!m_etc1s_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, static_cast(m_data_size), + num_blocks_x, num_blocks_y, level_width, level_height, + level_index, + m_levels[level_index].m_byte_offset + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length, + image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length, + decode_flags, m_has_alpha, + m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else if (m_format == basist::basis_tex_format::cUASTC4x4) + { + // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. + assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length); + const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE; + + const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + + // Sanity checks + if (uncomp_ofs >= uncomp_level_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n"); + return false; + } + + if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); + return false; + } + + if (!m_uastc_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else + { + // Shouldn't get here. + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n"); + assert(0); + return false; + } + + return true; + } + + bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) + { + const uint64_t comp_size = m_levels[level_index].m_byte_length; + + const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length; + + if (((size_t)comp_size) != comp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n"); + return false; + } + if (((size_t)uncomp_size) != uncomp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n"); + return false; + } + + if (!uncomp_data.try_resize(uncomp_size)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { +#if BASISD_SUPPORT_KTX2_ZSTD + const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData; + size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size); + if (ZSTD_isError(actualUncompSize)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n"); + return false; + } + if (actualUncompSize != uncomp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n"); + return false; + } +#else + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n"); + return false; +#endif } - return ""; - } - bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt) + return true; + } + + bool ktx2_transcoder::decompress_etc1s_global_data() { - switch (fmt) + // Note: we don't actually support 3D textures in here yet + //uint32_t layer_pixel_depth = basisu::maximum(m_header.m_pixel_depth, 1); + //for (uint32_t i = 1; i < m_header.m_level_count; i++) + // layer_pixel_depth += basisu::maximum(m_header.m_pixel_depth >> i, 1); + + const uint32_t image_count = basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count; + assert(image_count); + + const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset; + + memcpy(&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header)); + pSrc += sizeof(ktx2_etc1s_global_data_header); + + if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length)) { - case transcoder_texture_format::cTFETC2_RGBA: - case transcoder_texture_format::cTFBC3_RGBA: - case transcoder_texture_format::cTFASTC_4x4_RGBA: - case transcoder_texture_format::cTFBC7_M5_RGBA: - case transcoder_texture_format::cTFPVRTC1_4_RGBA: - case transcoder_texture_format::cTFPVRTC2_4_RGBA: - case transcoder_texture_format::cTFATC_RGBA: - case transcoder_texture_format::cTFRGBA32: - case transcoder_texture_format::cTFRGBA4444: - return true; - default: - break; + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n"); + return false; } - return false; - } - basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt) - { - switch (fmt) + if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count)) { - case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1; - case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1; - case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4; - case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB; - case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA; - case transcoder_texture_format::cTFBC7_M6_RGB: return basisu::texture_format::cBC7; - case transcoder_texture_format::cTFBC7_M5_RGBA: return basisu::texture_format::cBC7; - case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA; - case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3; - case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5; - case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4; - case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB; - case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA; - case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32; - case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565; - case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565; - case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444; - case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB; - case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA; - case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA; - case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC; - case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC; - default: - assert(0); - BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); - break; + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n"); + return false; } - return basisu::texture_format::cInvalidTextureFormat; - } - bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type) - { - switch (tex_type) + // Sanity check the ETC1S header. + if ((sizeof(ktx2_etc1s_global_data_header) + + sizeof(ktx2_etc1s_image_desc) * image_count + + m_etc1s_header.m_endpoints_byte_length + + m_etc1s_header.m_selectors_byte_length + + m_etc1s_header.m_tables_byte_length + + m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length) { - case transcoder_texture_format::cTFRGBA32: - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: - return true; - default: - break; + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n"); + return false; } - return false; + + if (!m_etc1s_image_descs.try_resize(image_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n"); + return false; + } + + memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count); + pSrc += sizeof(ktx2_etc1s_image_desc) * image_count; + + // Sanity check the ETC1S image descs + for (uint32_t i = 0; i < image_count; i++) + { + // m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding. + + if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n"); + return false; + } + + if (m_has_alpha) + { + if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n"); + return false; + } + } + } + + const uint8_t* pEndpoint_data = pSrc; + const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length; + const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length; + + if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n"); + return false; + } + + if (!m_etc1s_transcoder.decode_palettes( + m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length, + m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n"); + return false; + } + + return true; } - bool basis_block_format_is_uncompressed(block_format tex_type) + bool ktx2_transcoder::read_key_values() { - switch (tex_type) + if (!m_header.m_kvd_byte_length) { - case block_format::cRGB32: - case block_format::cRGBA32: - case block_format::cA32: - case block_format::cRGB565: - case block_format::cBGR565: - case block_format::cRGBA4444_COLOR: - case block_format::cRGBA4444_ALPHA: - case block_format::cRGBA4444_COLOR_OPAQUE: + if (m_header.m_kvd_byte_offset) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n"); + return false; + } + return true; - default: - break; } - return false; - } - - uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) - { - switch (fmt) + + if (m_header.m_kvd_byte_offset < sizeof(ktx2_header)) { - case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t); - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: - return sizeof(uint16_t); - default: - break; + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n"); + return false; } - return 0; - } - - uint32_t basis_get_block_width(transcoder_texture_format tex_type) - { - switch (tex_type) + + if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size) { - case transcoder_texture_format::cTFFXT1_RGB: - return 8; - default: - break; + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n"); + return false; } - return 4; - } - uint32_t basis_get_block_height(transcoder_texture_format tex_type) - { - (void)tex_type; - return 4; + const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset; + uint32_t src_left = m_header.m_kvd_byte_length; + + if (!m_key_values.try_reserve(8)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + while (src_left > sizeof(uint32_t)) + { + uint32_t l = basisu::read_le_dword(pSrc); + + pSrc += sizeof(uint32_t); + src_left -= sizeof(uint32_t); + + if (l < 2) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n"); + return false; + } + + if (src_left < l) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n"); + return false; + } + + if (!m_key_values.try_resize(m_key_values.size() + 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + basisu::uint8_vec& key_data = m_key_values.back().m_key; + basisu::uint8_vec& value_data = m_key_values.back().m_value; + + do + { + if (!l) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n"); + return false; + } + + if (!key_data.try_push_back(*pSrc++)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + src_left--; + l--; + + } while (key_data.back()); + + if (!value_data.try_resize(l)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + if (l) + { + memcpy(value_data.data(), pSrc, l); + pSrc += l; + src_left -= l; + } + + uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3; + uint32_t alignment_bytes = (4 - ofs) & 3; + + if (src_left < alignment_bytes) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n"); + return false; + } + + pSrc += alignment_bytes; + src_left -= alignment_bytes; + } + + return true; } - - bool basis_is_format_supported(transcoder_texture_format tex_type) + +#endif // BASISD_SUPPORT_KTX2 + + bool basisu_transcoder_supports_ktx2() { - switch (tex_type) - { - // ETC1 and uncompressed are always supported. - case transcoder_texture_format::cTFETC1_RGB: - case transcoder_texture_format::cTFRGBA32: - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: - return true; -#if BASISD_SUPPORT_DXT1 - case transcoder_texture_format::cTFBC1_RGB: - return true; -#endif -#if BASISD_SUPPORT_DXT5A - case transcoder_texture_format::cTFBC4_R: - case transcoder_texture_format::cTFBC5_RG: - return true; -#endif -#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A - case transcoder_texture_format::cTFBC3_RGBA: - return true; -#endif -#if BASISD_SUPPORT_PVRTC1 - case transcoder_texture_format::cTFPVRTC1_4_RGB: - case transcoder_texture_format::cTFPVRTC1_4_RGBA: - return true; -#endif -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - case transcoder_texture_format::cTFBC7_M6_RGB: - return true; -#endif -#if BASISD_SUPPORT_BC7_MODE5 - case transcoder_texture_format::cTFBC7_M5_RGBA: - return true; -#endif -#if BASISD_SUPPORT_ETC2_EAC_A8 - case transcoder_texture_format::cTFETC2_RGBA: - return true; -#endif -#if BASISD_SUPPORT_ASTC - case transcoder_texture_format::cTFASTC_4x4_RGBA: - return true; -#endif -#if BASISD_SUPPORT_ATC - case transcoder_texture_format::cTFATC_RGB: - case transcoder_texture_format::cTFATC_RGBA: - return true; -#endif -#if BASISD_SUPPORT_FXT1 - case transcoder_texture_format::cTFFXT1_RGB: - return true; -#endif -#if BASISD_SUPPORT_PVRTC2 - case transcoder_texture_format::cTFPVRTC2_4_RGB: - case transcoder_texture_format::cTFPVRTC2_4_RGBA: - return true; -#endif -#if BASISD_SUPPORT_ETC2_EAC_RG11 - case transcoder_texture_format::cTFETC2_EAC_R11: - case transcoder_texture_format::cTFETC2_EAC_RG11: - return true; +#if BASISD_SUPPORT_KTX2 + return true; +#else + return false; #endif - default: - break; - } + } + bool basisu_transcoder_supports_ktx2_zstd() + { +#if BASISD_SUPPORT_KTX2_ZSTD + return true; +#else return false; +#endif } } // namespace basist - diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder.h b/src/deps/basis-universal/transcoder/basisu_transcoder.h index 770c64122d..bf3aed3dc3 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder.h +++ b/src/deps/basis-universal/transcoder/basisu_transcoder.h @@ -1,5 +1,5 @@ // basisu_transcoder.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,10 +15,25 @@ // limitations under the License. #pragma once -// Set BASISU_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development. -//#define BASISU_DEVEL_MESSAGES 1 +// By default KTX2 support is enabled to simplify compilation. This implies the need for the Zstandard library (which we distribute as a single source file in the "zstd" directory) by default. +// Set BASISD_SUPPORT_KTX2 to 0 to completely disable KTX2 support as well as Zstd/miniz usage which is only required for UASTC supercompression in KTX2 files. +// Also see BASISD_SUPPORT_KTX2_ZSTD in basisu_transcoder.cpp, which individually disables Zstd usage. +#ifndef BASISD_SUPPORT_KTX2 + #define BASISD_SUPPORT_KTX2 1 +#endif + +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #define BASISD_SUPPORT_KTX2_ZSTD 1 +#endif + +// Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development. +#ifndef BASISU_FORCE_DEVEL_MESSAGES + #define BASISU_FORCE_DEVEL_MESSAGES 0 +#endif #include "basisu_transcoder_internal.h" +#include "basisu_transcoder_uastc.h" #include "basisu_global_selector_palette.h" #include "basisu_file_headers.h" @@ -45,12 +60,11 @@ namespace basist cTFBC3_RGBA = 3, // Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's) - cTFBC7_M6_RGB = 6, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. - cTFBC7_M5_RGBA = 7, // Opaque+alpha, alpha channel will be opaque for opaque .basis files + cTFBC7_RGBA = 6, // RGB or RGBA, mode 5 for ETC1S, modes (1,2,3,5,6,7) for UASTC // PVRTC1 4bpp (mobile, PowerVR devices) cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format. - cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doens't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. + cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. // ASTC (mobile, Intel devices, hopefully all desktop GPU's one day) cTFASTC_4x4_RGBA = 10, // Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions. @@ -69,10 +83,10 @@ namespace basist cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned) cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps - + // Uncompressed (raw pixel) formats cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte. - cTFRGB565 = 14, // 166pp RGB image stored in raster (not block) order in memory, R at bit position 11 + cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11 cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0 cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0 @@ -85,27 +99,62 @@ namespace basist cTFBC3 = cTFBC3_RGBA, cTFBC4 = cTFBC4_R, cTFBC5 = cTFBC5_RG, - cTFBC7_M6_OPAQUE_ONLY = cTFBC7_M6_RGB, - cTFBC7_M5 = cTFBC7_M5_RGBA, + + // Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC, which supports numerous modes. + cTFBC7_M6_RGB = cTFBC7_RGBA, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. + cTFBC7_M5_RGBA = cTFBC7_RGBA, // Opaque+alpha, alpha channel will be opaque for opaque .basis files + cTFBC7_M6_OPAQUE_ONLY = cTFBC7_RGBA, + cTFBC7_M5 = cTFBC7_RGBA, + cTFBC7_ALT = 7, + cTFASTC_4x4 = cTFASTC_4x4_RGBA, + cTFATC_RGBA_INTERPOLATED_ALPHA = cTFATC_RGBA, }; - uint32_t basis_get_bytes_per_block(transcoder_texture_format fmt); + // For compressed texture formats, this returns the # of bytes per block. For uncompressed, it returns the # of bytes per pixel. + // NOTE: Previously, this function was called basis_get_bytes_per_block(), and it always returned 16*bytes_per_pixel for uncompressed formats which was confusing. + uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt); + + // Returns format's name in ASCII const char* basis_get_format_name(transcoder_texture_format fmt); + + // Returns block format name in ASCII + const char* basis_get_block_format_name(block_format fmt); + + // Returns true if the format supports an alpha channel. bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt); + + // Returns the basisu::texture_format corresponding to the specified transcoder_texture_format. basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt); + + // Returns the texture type's name in ASCII. const char* basis_get_texture_type_name(basis_texture_type tex_type); - + + // Returns true if the transcoder texture type is an uncompressed (raw pixel) format. bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type); + + // Returns the # of bytes per pixel for uncompressed formats, or 0 for block texture formats. uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt); - + + // Returns the block width for the specified texture format, which is currently either 4 or 8 for FXT1. uint32_t basis_get_block_width(transcoder_texture_format tex_type); + + // Returns the block height for the specified texture format, which is currently always 4. uint32_t basis_get_block_height(transcoder_texture_format tex_type); // Returns true if the specified format was enabled at compile time. - bool basis_is_format_supported(transcoder_texture_format tex_type); - + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S); + + // Validates that the output buffer is large enough to hold the entire transcoded texture. + // For uncompressed texture formats, most input parameters are in pixels, not blocks. Blocks are 4x4 pixels. + bool basis_validate_output_buffer_size(transcoder_texture_format target_format, + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t orig_width, uint32_t orig_height, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels, + uint32_t total_slice_blocks); + class basisu_transcoder; // This struct holds all state used during transcoding. For video, it needs to persist between image transcodes (it holds the previous frame). @@ -118,46 +167,161 @@ namespace basist uint8_t m_pred_bits; }; - std::vector m_block_endpoint_preds[2]; - + basisu::vector m_block_endpoint_preds[2]; + enum { cMaxPrevFrameLevels = 16 }; - std::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + + void clear() + { + for (uint32_t i = 0; i < 2; i++) + { + m_block_endpoint_preds[i].clear(); + + for (uint32_t j = 0; j < cMaxPrevFrameLevels; j++) + m_prev_frame_indices[i][j].clear(); + } + } }; - + // Low-level helper class that does the actual transcoding. - class basisu_lowlevel_transcoder + class basisu_lowlevel_etc1s_transcoder { friend class basisu_transcoder; - + public: - basisu_lowlevel_transcoder(const basist::etc1_global_selector_codebook *pGlobal_sel_codebook); + basisu_lowlevel_etc1s_transcoder(const basist::etc1_global_selector_codebook* pGlobal_sel_codebook); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_pGlobal_codebook = pGlobal_codebook; } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_pGlobal_codebook; } bool decode_palettes( - uint32_t num_endpoints, const uint8_t *pEndpoints_data, uint32_t endpoints_data_size, - uint32_t num_selectors, const uint8_t *pSelectors_data, uint32_t selectors_data_size); + uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size, + uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size); + + bool decode_tables(const uint8_t* pTable_data, uint32_t table_data_size); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, + header.m_tex_type == cBASISTexTypeVideoFrames, (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0, slice_desc.m_level_index, + slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, pState, + astc_transcode_alpha, + pAlpha_blocks, + output_rows_in_pixels); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length, + uint32_t decode_flags = 0, + bool basis_file_has_alpha_slices = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0); + + void clear() + { + m_local_endpoints.clear(); + m_local_selectors.clear(); + m_endpoint_pred_model.clear(); + m_delta_endpoint_model.clear(); + m_selector_model.clear(); + m_selector_history_buf_rle_model.clear(); + m_selector_history_buf_size = 0; + } - bool decode_tables(const uint8_t *pTable_data, uint32_t table_data_size); + // Low-level methods + typedef basisu::vector endpoint_vec; + const endpoint_vec& get_endpoints() const { return m_local_endpoints; } - bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header &header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, - basisu_transcoder_state *pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); + typedef basisu::vector selector_vec; + const selector_vec& get_selectors() const { return m_local_selectors; } + + const etc1_global_selector_codebook* get_global_sel_codebook() const { return m_pGlobal_sel_codebook; } private: - typedef std::vector endpoint_vec; - endpoint_vec m_endpoints; + const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook; - typedef std::vector selector_vec; - selector_vec m_selectors; + endpoint_vec m_local_endpoints; + selector_vec m_local_selectors; - const etc1_global_selector_codebook *m_pGlobal_sel_codebook; + const etc1_global_selector_codebook* m_pGlobal_sel_codebook; huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; uint32_t m_selector_history_buf_size; - + basisu_transcoder_state m_def_state; }; + enum basisu_decode_flags + { + // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. + cDecodeFlagsPVRTCDecodeToNextPow2 = 2, + + // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. + // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). + cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, + + // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). + // This flag is used internally when decoding to BC3. + cDecodeFlagsBC1ForbidThreeColorBlocks = 8, + + // The output buffer contains alpha endpoint/selector indices. + // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. + cDecodeFlagsOutputHasAlphaIndices = 16, + + cDecodeFlagsHighQuality = 32 + }; + + class basisu_lowlevel_uastc_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + struct basisu_slice_info { uint32_t m_orig_width; @@ -175,19 +339,19 @@ namespace basist uint32_t m_slice_index; // the slice index in the .basis file uint32_t m_image_index; // the source image index originally provided to the encoder uint32_t m_level_index; // the mipmap level within this image - + uint32_t m_unpacked_slice_crc16; - + bool m_alpha_flag; // true if the slice has alpha data bool m_iframe_flag; // true if the slice is an I-Frame }; - typedef std::vector basisu_slice_info_vec; + typedef basisu::vector basisu_slice_info_vec; struct basisu_image_info { uint32_t m_image_index; - uint32_t m_total_levels; + uint32_t m_total_levels; uint32_t m_orig_width; uint32_t m_orig_height; @@ -199,8 +363,8 @@ namespace basist uint32_t m_num_blocks_y; uint32_t m_total_blocks; - uint32_t m_first_slice_index; - + uint32_t m_first_slice_index; + bool m_alpha_flag; // true if the image has alpha data bool m_iframe_flag; // true if the image is an I-Frame }; @@ -220,8 +384,13 @@ namespace basist uint32_t m_num_blocks_y; uint32_t m_total_blocks; - uint32_t m_first_slice_index; - + uint32_t m_first_slice_index; + + uint32_t m_rgb_file_ofs; + uint32_t m_rgb_file_len; + uint32_t m_alpha_file_ofs; + uint32_t m_alpha_file_len; + bool m_alpha_flag; // true if the image has alpha data bool m_iframe_flag; // true if the image is an I-Frame }; @@ -232,13 +401,19 @@ namespace basist uint32_t m_total_header_size; uint32_t m_total_selectors; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_selector_codebook_ofs; uint32_t m_selector_codebook_size; uint32_t m_total_endpoints; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_endpoint_codebook_ofs; uint32_t m_endpoint_codebook_size; + uint32_t m_tables_ofs; uint32_t m_tables_size; - uint32_t m_slices_size; + + uint32_t m_slices_size; basis_texture_type m_tex_type; uint32_t m_us_per_frame; @@ -247,14 +422,16 @@ namespace basist basisu_slice_info_vec m_slice_info; uint32_t m_total_images; // total # of images - std::vector m_image_mipmap_levels; // the # of mipmap levels for each image + basisu::vector m_image_mipmap_levels; // the # of mipmap levels for each image uint32_t m_userdata0; uint32_t m_userdata1; - - bool m_etc1s; // always true for basis universal + + basis_tex_format m_tex_format; // ETC1S, UASTC, etc. + bool m_y_flipped; // true if the image was Y flipped - bool m_has_alpha_slices; // true if the texture has alpha slices (even slices RGB, odd slices alpha) + bool m_etc1s; // true if the file is ETC1S + bool m_has_alpha_slices; // true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha) }; // High-level transcoder class which accepts .basis file data and allows the caller to query information about the file and transcode image levels to various texture formats. @@ -265,81 +442,67 @@ namespace basist basisu_transcoder& operator= (const basisu_transcoder&); public: - basisu_transcoder(const etc1_global_selector_codebook *pGlobal_sel_codebook); + basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook); // Validates the .basis file. This computes a crc16 over the entire file, so it's slow. - bool validate_file_checksums(const void *pData, uint32_t data_size, bool full_validation) const; + bool validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const; // Quick header validation - no crc16 checks. - bool validate_header(const void *pData, uint32_t data_size) const; + bool validate_header(const void* pData, uint32_t data_size) const; + + basis_texture_type get_texture_type(const void* pData, uint32_t data_size) const; + bool get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const; - basis_texture_type get_texture_type(const void *pData, uint32_t data_size) const; - bool get_userdata(const void *pData, uint32_t data_size, uint32_t &userdata0, uint32_t &userdata1) const; - // Returns the total number of images in the basis file (always 1 or more). // Note that the number of mipmap levels for each image may differ, and that images may have different resolutions. - uint32_t get_total_images(const void *pData, uint32_t data_size) const; + uint32_t get_total_images(const void* pData, uint32_t data_size) const; + + basis_tex_format get_tex_format(const void* pData, uint32_t data_size) const; // Returns the number of mipmap levels in an image. - uint32_t get_total_image_levels(const void *pData, uint32_t data_size, uint32_t image_index) const; - + uint32_t get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const; + // Returns basic information about an image. Note that orig_width/orig_height may not be a multiple of 4. - bool get_image_level_desc(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t &orig_width, uint32_t &orig_height, uint32_t &total_blocks) const; + bool get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const; // Returns information about the specified image. - bool get_image_info(const void *pData, uint32_t data_size, basisu_image_info &image_info, uint32_t image_index) const; + bool get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const; // Returns information about the specified image's mipmap level. - bool get_image_level_info(const void *pData, uint32_t data_size, basisu_image_level_info &level_info, uint32_t image_index, uint32_t level_index) const; - + bool get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& level_info, uint32_t image_index, uint32_t level_index) const; + // Get a description of the basis file and low-level information about each slice. - bool get_file_info(const void *pData, uint32_t data_size, basisu_file_info &file_info) const; - + bool get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const; + // start_transcoding() must be called before calling transcode_slice() or transcode_image_level(). - // This decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level). - bool start_transcoding(const void *pData, uint32_t data_size) const; - + // For ETC1S files, this call decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level). + bool start_transcoding(const void* pData, uint32_t data_size); + + bool stop_transcoding(); + // Returns true if start_transcoding() has been called. - bool get_ready_to_transcode() const { return m_lowlevel_decoder.m_endpoints.size() > 0; } + bool get_ready_to_transcode() const { return m_ready_to_transcode; } - enum - { - // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. - cDecodeFlagsPVRTCDecodeToNextPow2 = 2, - - // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. - // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). - cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, - - // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). - // This flag is used internally when decoding to BC3. - cDecodeFlagsBC1ForbidThreeColorBlocks = 8, - - // The output buffer contains alpha endpoint/selector indices. - // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. - cDecodeFlagsOutputHasAlphaIndices = 16 - }; - // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). - // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). + // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in // a first pass, which will be read in a second pass. bool transcode_image_level( - const void *pData, uint32_t data_size, - uint32_t image_index, uint32_t level_index, - void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const void* pData, uint32_t data_size, + uint32_t image_index, uint32_t level_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, transcoder_texture_format fmt, - uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state *pState = nullptr, uint32_t output_rows_in_pixels = 0) const; + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0) const; // Finds the basis slice corresponding to the specified image/level/alpha params, or -1 if the slice can't be found. - int find_slice(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const; + int find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const; // transcode_slice() decodes a single slice from the .basis file. It's a low-level API - most likely you want to use transcode_image_level(). // This is a low-level API, and will be needed to be called multiple times to decode some texture formats (like BC3, BC5, or ETC2). @@ -350,21 +513,39 @@ namespace basist // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. - bool transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, - void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, - block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state * pState = nullptr, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0) const; + bool transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, void* pAlpha_blocks = nullptr, + uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1) const; + + static void write_opaque_alpha_blocks( + uint32_t num_blocks_x, uint32_t num_blocks_y, + void* pOutput_blocks, block_format fmt, + uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_lowlevel_etc1s_decoder.set_global_codebooks(pGlobal_codebook); } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_lowlevel_etc1s_decoder.get_global_codebooks(); } + + const basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() const { return m_lowlevel_etc1s_decoder; } + basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() { return m_lowlevel_etc1s_decoder; } + + const basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_decoder; } + basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_decoder; } private: - mutable basisu_lowlevel_transcoder m_lowlevel_decoder; + mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder; + mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder; + + bool m_ready_to_transcode; int find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const; - + bool validate_header_quick(const void* pData, uint32_t data_size) const; }; - // basisu_transcoder_init() must be called before a .basis file can be transcoded. + // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. void basisu_transcoder_init(); - + enum debug_flags_t { cDebugFlagVisCRs = 1, @@ -374,4 +555,387 @@ namespace basist uint32_t get_debug_flags(); void set_debug_flags(uint32_t f); + // ------------------------------------------------------------------------------------------------------ + // Optional .KTX2 file format support + // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. + // ------------------------------------------------------------------------------------------------------ +#if BASISD_SUPPORT_KTX2 +#pragma pack(push) +#pragma pack(1) + struct ktx2_header + { + uint8_t m_identifier[12]; + basisu::packed_uint<4> m_vk_format; + basisu::packed_uint<4> m_type_size; + basisu::packed_uint<4> m_pixel_width; + basisu::packed_uint<4> m_pixel_height; + basisu::packed_uint<4> m_pixel_depth; + basisu::packed_uint<4> m_layer_count; + basisu::packed_uint<4> m_face_count; + basisu::packed_uint<4> m_level_count; + basisu::packed_uint<4> m_supercompression_scheme; + basisu::packed_uint<4> m_dfd_byte_offset; + basisu::packed_uint<4> m_dfd_byte_length; + basisu::packed_uint<4> m_kvd_byte_offset; + basisu::packed_uint<4> m_kvd_byte_length; + basisu::packed_uint<8> m_sgd_byte_offset; + basisu::packed_uint<8> m_sgd_byte_length; + }; + + struct ktx2_level_index + { + basisu::packed_uint<8> m_byte_offset; + basisu::packed_uint<8> m_byte_length; + basisu::packed_uint<8> m_uncompressed_byte_length; + }; + + struct ktx2_etc1s_global_data_header + { + basisu::packed_uint<2> m_endpoint_count; + basisu::packed_uint<2> m_selector_count; + basisu::packed_uint<4> m_endpoints_byte_length; + basisu::packed_uint<4> m_selectors_byte_length; + basisu::packed_uint<4> m_tables_byte_length; + basisu::packed_uint<4> m_extended_byte_length; + }; + + struct ktx2_etc1s_image_desc + { + basisu::packed_uint<4> m_image_flags; + basisu::packed_uint<4> m_rgb_slice_byte_offset; + basisu::packed_uint<4> m_rgb_slice_byte_length; + basisu::packed_uint<4> m_alpha_slice_byte_offset; + basisu::packed_uint<4> m_alpha_slice_byte_length; + }; + + struct ktx2_animdata + { + basisu::packed_uint<4> m_duration; + basisu::packed_uint<4> m_timescale; + basisu::packed_uint<4> m_loopcount; + }; +#pragma pack(pop) + + const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0; + const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166; + const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163; + const uint32_t KTX2_IMAGE_IS_P_FRAME = 2; + const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; + const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased + + // The KTX2 transfer functions supported by KTX2 + const uint32_t KTX2_KHR_DF_TRANSFER_LINEAR = 1; + const uint32_t KTX2_KHR_DF_TRANSFER_SRGB = 2; + + enum ktx2_supercompression + { + KTX2_SS_NONE = 0, + KTX2_SS_BASISLZ = 1, + KTX2_SS_ZSTANDARD = 2 + }; + + extern const uint8_t g_ktx2_file_identifier[12]; + + enum ktx2_df_channel_id + { + KTX2_DF_CHANNEL_ETC1S_RGB = 0U, + KTX2_DF_CHANNEL_ETC1S_RRR = 3U, + KTX2_DF_CHANNEL_ETC1S_GGG = 4U, + KTX2_DF_CHANNEL_ETC1S_AAA = 15U, + + KTX2_DF_CHANNEL_UASTC_DATA = 0U, + KTX2_DF_CHANNEL_UASTC_RGB = 0U, + KTX2_DF_CHANNEL_UASTC_RGBA = 3U, + KTX2_DF_CHANNEL_UASTC_RRR = 4U, + KTX2_DF_CHANNEL_UASTC_RRRG = 5U, + KTX2_DF_CHANNEL_UASTC_RG = 6U, + }; + + inline const char* ktx2_get_etc1s_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_ETC1S_RGB: return "RGB"; + case KTX2_DF_CHANNEL_ETC1S_RRR: return "RRR"; + case KTX2_DF_CHANNEL_ETC1S_GGG: return "GGG"; + case KTX2_DF_CHANNEL_ETC1S_AAA: return "AAA"; + default: break; + } + return "?"; + } + + inline const char* ktx2_get_uastc_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_UASTC_RGB: return "RGB"; + case KTX2_DF_CHANNEL_UASTC_RGBA: return "RGBA"; + case KTX2_DF_CHANNEL_UASTC_RRR: return "RRR"; + case KTX2_DF_CHANNEL_UASTC_RRRG: return "RRRG"; + case KTX2_DF_CHANNEL_UASTC_RG: return "RG"; + default: break; + } + return "?"; + } + + enum ktx2_df_color_primaries + { + KTX2_DF_PRIMARIES_UNSPECIFIED = 0, + KTX2_DF_PRIMARIES_BT709 = 1, + KTX2_DF_PRIMARIES_SRGB = 1, + KTX2_DF_PRIMARIES_BT601_EBU = 2, + KTX2_DF_PRIMARIES_BT601_SMPTE = 3, + KTX2_DF_PRIMARIES_BT2020 = 4, + KTX2_DF_PRIMARIES_CIEXYZ = 5, + KTX2_DF_PRIMARIES_ACES = 6, + KTX2_DF_PRIMARIES_ACESCC = 7, + KTX2_DF_PRIMARIES_NTSC1953 = 8, + KTX2_DF_PRIMARIES_PAL525 = 9, + KTX2_DF_PRIMARIES_DISPLAYP3 = 10, + KTX2_DF_PRIMARIES_ADOBERGB = 11 + }; + + inline const char* ktx2_get_df_color_primaries_str(ktx2_df_color_primaries p) + { + switch (p) + { + case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; + case KTX2_DF_PRIMARIES_BT709: return "BT709"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; + case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; + case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; + case KTX2_DF_PRIMARIES_ACES: return "ACES"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; + case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; + case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; + case KTX2_DF_PRIMARIES_ADOBERGB: return "ADOBERGB"; + default: break; + } + return "?"; + } + + // Information about a single 2D texture "image" in a KTX2 file. + struct ktx2_image_level_info + { + // The mipmap level index (0=largest), texture array layer index, and cubemap face index of the image. + uint32_t m_level_index; + uint32_t m_layer_index; + uint32_t m_face_index; + + // The image's actual (or the original source image's) width/height in pixels, which may not be divisible by 4 pixels. + uint32_t m_orig_width; + uint32_t m_orig_height; + + // The image's physical width/height, which will always be divisible by 4 pixels. + uint32_t m_width; + uint32_t m_height; + + // The texture's dimensions in 4x4 texel blocks. + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + + // The total number of blocks + uint32_t m_total_blocks; + + // true if the image has alpha data + bool m_alpha_flag; + + // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. + bool m_iframe_flag; + }; + + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) + struct ktx2_transcoder_state + { + basist::basisu_transcoder_state m_transcoder_state; + basisu::uint8_vec m_level_uncomp_data; + int m_uncomp_data_level_index; + + void clear() + { + m_transcoder_state.clear(); + m_level_uncomp_data.clear(); + m_uncomp_data_level_index = -1; + } + }; + + // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. + // It does not support 1D or 3D textures. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). + class ktx2_transcoder + { + public: + ktx2_transcoder(basist::etc1_global_selector_codebook* pGlobal_sel_codebook); + + // Frees all allocations, resets object. + void clear(); + + // init() parses the KTX2 header, level index array, DFD, and key values, but nothing else. + // Importantly, it does not parse or decompress the ETC1S global supercompressed data, so some things (like which frames are I/P-Frames) won't be available until start_transcoding() is called. + // This method holds a pointer to the file data until clear() is called. + bool init(const void* pData, uint32_t data_size); + + // Returns the data/size passed to init(). + const uint8_t* get_data() const { return m_pData; } + uint32_t get_data_size() const { return m_data_size; } + + // Returns the KTX2 header. Valid after init(). + const ktx2_header& get_header() const { return m_header; } + + // Returns the KTX2 level index array. There will be one entry for each mipmap level. Valid after init(). + const basisu::vector& get_level_index() const { return m_levels; } + + // Returns the texture's width in texels. Always non-zero, might not be divisible by 4. Valid after init(). + uint32_t get_width() const { return m_header.m_pixel_width; } + + // Returns the texture's height in texels. Always non-zero, might not be divisible by 4. Valid after init(). + uint32_t get_height() const { return m_header.m_pixel_height; } + + // Returns the texture's number of mipmap levels. Always returns 1 or higher. Valid after init(). + uint32_t get_levels() const { return m_header.m_level_count; } + + // Returns the number of faces. Returns 1 for 2D textures and or 6 for cubemaps. Valid after init(). + uint32_t get_faces() const { return m_header.m_face_count; } + + // Returns 0 or the number of layers in the texture array or texture video. Valid after init(). + uint32_t get_layers() const { return m_header.m_layer_count; } + + // Returns cETC1S or cUASTC4x4. Valid after init(). + basist::basis_tex_format get_format() const { return m_format; } + + bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; } + + bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; } + + // Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init(). + uint32_t get_has_alpha() const { return m_has_alpha; } + + // Returns the entire Data Format Descriptor (DFD) from the KTX2 file. Valid after init(). + // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#_the_khronos_data_format_descriptor_overview + const basisu::uint8_vec& get_dfd() const { return m_dfd; } + + // Some basic DFD accessors. Valid after init(). + uint32_t get_dfd_color_model() const { return m_dfd_color_model; } + + // Returns the DFD color primary. + // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. + ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } + + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. + uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } + + uint32_t get_dfd_flags() const { return m_dfd_flags; } + + // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). + uint32_t get_dfd_total_samples() const { return m_dfd_samples; } + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + // It's up to the caller to decide what to do if the value isn't in the enum. + ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } + ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } + + // Key value field data. + struct key_value + { + // The key field is UTF8 and always zero terminated. + basisu::uint8_vec m_key; + + // The value may be empty. It consists of raw bytes which may or may not be zero terminated. + basisu::uint8_vec m_value; + + bool operator< (const key_value& rhs) const { return strcmp((const char*)m_key.data(), (const char *)rhs.m_key.data()) < 0; } + }; + typedef basisu::vector key_value_vec; + + // Returns the array of key-value entries. This may be empty. Valid after init(). + // The order of key values fields in this array exactly matches the order they were stored in the file. The keys are supposed to be sorted by their Unicode code points. + const key_value_vec& get_key_values() const { return m_key_values; } + + const basisu::uint8_vec *find_key(const std::string& key_name) const; + + // Low-level ETC1S specific accessors + + // Returns the ETC1S global supercompression data header, which is only valid after start_transcoding() is called. + const ktx2_etc1s_global_data_header& get_etc1s_header() const { return m_etc1s_header; } + + // Returns the array of ETC1S image descriptors, which is only valid after get_etc1s_image_descs() is called. + const basisu::vector& get_etc1s_image_descs() const { return m_etc1s_image_descs; } + + // Must have called startTranscoding() first + uint32_t get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // is_video() is only valid after start_transcoding() is called. + // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. + bool is_video() const { return m_is_video; } + + // start_transcoding() MUST be called before calling transcode_image(). + // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. + bool start_transcoding(); + + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. + // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. + bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. + // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. + // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. + // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. + bool transcode_image_level( + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + basist::transcoder_texture_format fmt, + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, + ktx2_transcoder_state *pState = nullptr); + + private: + const uint8_t* m_pData; + uint32_t m_data_size; + + ktx2_header m_header; + basisu::vector m_levels; + basisu::uint8_vec m_dfd; + key_value_vec m_key_values; + + ktx2_etc1s_global_data_header m_etc1s_header; + basisu::vector m_etc1s_image_descs; + + basist::basis_tex_format m_format; + + uint32_t m_dfd_color_model; + ktx2_df_color_primaries m_dfd_color_prims; + uint32_t m_dfd_transfer_func; + uint32_t m_dfd_flags; + uint32_t m_dfd_samples; + ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; + + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; + basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder; + + ktx2_transcoder_state m_def_transcoder_state; + + bool m_has_alpha; + bool m_is_video; + + bool decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data); + bool decompress_etc1s_global_data(); + bool read_key_values(); + }; + +#endif // BASISD_SUPPORT_KTX2 + + // Returns true if the transcoder was compiled with KTX2 support. + bool basisu_transcoder_supports_ktx2(); + + // Returns true if the transcoder was compiled with Zstandard support. + bool basisu_transcoder_supports_ktx2_zstd(); + } // namespace basisu + diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_internal.h b/src/deps/basis-universal/transcoder/basisu_transcoder_internal.h index a9c6823d92..2422d788a9 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder_internal.h +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_internal.h @@ -1,5 +1,5 @@ // basisu_transcoder_internal.h - Universal texture format transcoder library. -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // @@ -20,8 +20,8 @@ #pragma warning (disable: 4127) // conditional expression is constant #endif -#define BASISD_LIB_VERSION 107 -#define BASISD_VERSION_STRING "01.11" +#define BASISD_LIB_VERSION 115 +#define BASISD_VERSION_STRING "01.15" #ifdef _DEBUG #define BASISD_BUILD_DEBUG @@ -45,38 +45,44 @@ namespace basist enum class block_format { cETC1, // ETC1S RGB + cETC2_RGBA, // full ETC2 EAC RGBA8 block cBC1, // DXT1 RGB + cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) + cBC5, // two BC4 blocks cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA - cBC7_M6_OPAQUE_ONLY, // RGB BC7 mode 6 + cBC7, // Full BC7 block, any mode cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. + cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size + + cPVRTC2_4_RGB, + cPVRTC2_4_RGBA, + + cETC2_EAC_R11, + cETC2_EAC_RG11, cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels - + cRGB565, cBGR565, cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, - - cPVRTC2_4_RGB, - cPVRTC2_4_RGBA, - - cETC2_EAC_R11, - + cRGBA4444, + cTotalBlockFormats }; @@ -116,7 +122,7 @@ namespace basist basisu::clear_vector(m_tree); } - bool init(uint32_t total_syms, const uint8_t *pCode_sizes) + bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits) { if (!total_syms) { @@ -127,8 +133,10 @@ namespace basist m_code_sizes.resize(total_syms); memcpy(&m_code_sizes[0], pCode_sizes, total_syms); + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; + m_lookup.resize(0); - m_lookup.resize(basisu::cHuffmanFastLookupSize); + m_lookup.resize(huffman_fast_lookup_size); m_tree.resize(0); m_tree.resize(total_syms * 2); @@ -166,10 +174,10 @@ namespace basist for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); - if (code_size <= basisu::cHuffmanFastLookupBits) + if (code_size <= fast_lookup_bits) { uint32_t k = (code_size << 16) | sym_index; - while (rev_code < basisu::cHuffmanFastLookupSize) + while (rev_code < huffman_fast_lookup_size) { if (m_lookup[rev_code] != 0) { @@ -184,9 +192,9 @@ namespace basist } int tree_cur; - if (0 == (tree_cur = m_lookup[rev_code & (basisu::cHuffmanFastLookupSize - 1)])) + if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)])) { - const uint32_t idx = rev_code & (basisu::cHuffmanFastLookupSize - 1); + const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1); if (m_lookup[idx] != 0) { // Supplied codesizes can't create a valid prefix code. @@ -204,9 +212,9 @@ namespace basist return false; } - rev_code >>= (basisu::cHuffmanFastLookupBits - 1); + rev_code >>= (fast_lookup_bits - 1); - for (int j = code_size; j > (basisu::cHuffmanFastLookupBits + 1); j--) + for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--) { tree_cur -= ((rev_code >>= 1) & 1); @@ -254,6 +262,8 @@ namespace basist } const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } + const basisu::int_vec get_lookup() const { return m_lookup; } + const basisu::int16_vec get_tree() const { return m_tree; } bool is_valid() const { return m_code_sizes.size() > 0; } @@ -430,9 +440,11 @@ namespace basist return v; } - inline uint32_t decode_huffman(const huffman_decoding_table &ct) + inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits) { assert(ct.m_code_sizes.size()); + + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; while (m_bit_buf_size < 16) { @@ -448,14 +460,14 @@ namespace basist int code_len; int sym; - if ((sym = ct.m_lookup[m_bit_buf & (basisu::cHuffmanFastLookupSize - 1)]) >= 0) + if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0) { code_len = sym >> 16; sym &= 0xFFFF; } else { - code_len = basisu::cHuffmanFastLookupBits; + code_len = fast_lookup_bits; do { sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1 @@ -635,6 +647,11 @@ namespace basist return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } + enum eNoClamp + { + cNoClamp = 0 + }; + struct color32 { union @@ -655,21 +672,33 @@ namespace basist color32() { } color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } + void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } + void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } bool operator== (const color32&rhs) const { return m == rhs.m; } + + static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } }; struct endpoint { color32 m_color5; uint8_t m_inten5; + bool operator== (const endpoint& rhs) const + { + return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5); + } + bool operator!= (const endpoint& rhs) const { return !(*this == rhs); } }; struct selector @@ -682,6 +711,17 @@ namespace basist uint8_t m_lo_selector, m_hi_selector; uint8_t m_num_unique_selectors; + bool operator== (const selector& rhs) const + { + return (m_selectors[0] == rhs.m_selectors[0]) && + (m_selectors[1] == rhs.m_selectors[1]) && + (m_selectors[2] == rhs.m_selectors[2]) && + (m_selectors[3] == rhs.m_selectors[3]); + } + bool operator!= (const selector& rhs) const + { + return !(*this == rhs); + } void init_flags() { diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc.inc index 7f38f4a863..cd634c0df5 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc.inc +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc.inc @@ -478,4 +478,4 @@ {31,1,10801},{47,1,12162},{14,1,6117},{14,1,6117},{8,1,50},{20,1,7322},{0,1,1241},{21,1,914},{21,1,914},{21,1,914},{7,1,274},{35,5,1513},{9,1,585},{9,1,585},{26,1,0},{27,1,1513},{26,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{47,0,9250},{47,0,9250},{47,0,9250},{47,0,9250},{12,1,3690}, {12,1,3690},{12,1,3690},{8,1,50},{0,1,1241},{0,1,1241},{45,1,65535},{14,1,33274},{42,1,19608},{42,1,13375},{47,1,62627},{42,1,22211},{10,1,6045},{24,1,138},{36,1,39015},{0,1,1732},{35,1,1048},{5,1,766},{5,1,666},{37,1,212},{3,3,1473},{7,1,675},{23,1,410},{14,1,1},{3,3,1473},{14,1,1},{13,1,14121},{13,1,14121},{13,1,14121},{45,1,10571},{45,1,11434},{30,1,6081},{30,1,6081}, {40,1,137},{36,1,6926},{2,1,1445},{5,1,666},{5,1,666},{5,1,666},{37,1,212},{35,3,1105},{23,1,410},{23,1,410},{14,1,1},{25,1,1105},{14,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{15,0,9256},{15,0,9256},{15,0,9256},{15,0,9256},{14,1,3985},{14,1,3985},{14,1,3985},{40,1,137},{2,1,1445}, -{2,1,1445}, \ No newline at end of file +{2,1,1445}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc_0_255.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc_0_255.inc index 5e7a75396d..da4e7fee98 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc_0_255.inc +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_astc_0_255.inc @@ -478,4 +478,4 @@ {137,255,10742},{135,255,12066},{107,255,6089},{107,255,6089},{67,255,45},{37,255,7233},{1,255,1184},{218,255,900},{218,255,900},{218,255,900},{204,255,272},{255,167,1513},{189,255,562},{189,255,562},{86,255,0},{253,213,1513},{86,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{132,0,9248},{132,0,9248},{132,0,9248},{132,0,9248},{98,255,3656}, {98,255,3656},{98,255,3656},{67,255,45},{1,255,1184},{1,255,1184},{138,255,65535},{107,255,33448},{95,255,19729},{89,255,13446},{135,255,62717},{95,255,22307},{79,255,6021},{73,255,105},{40,255,38959},{0,254,1627},{230,255,996},{224,255,756},{221,255,653},{213,255,194},{255,204,1473},{207,255,675},{198,255,405},{110,255,0},{255,230,1473},{110,255,0},{162,255,14060},{162,255,14060},{162,255,14060},{146,255,10545},{141,255,11378},{116,255,6077},{116,255,6077}, {76,255,137},{40,255,6873},{7,255,1412},{221,255,653},{221,255,653},{221,255,653},{213,255,194},{255,180,1105},{198,255,405},{198,255,405},{110,255,0},{255,218,1105},{110,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{140,0,9248},{140,0,9248},{140,0,9248},{140,0,9248},{107,255,3929},{107,255,3929},{107,255,3929},{76,255,137},{7,255,1412}, -{7,255,1412}, \ No newline at end of file +{7,255,1412}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_55.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_55.inc new file mode 100644 index 0000000000..7acedd6a6f --- /dev/null +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_55.inc @@ -0,0 +1,481 @@ +{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,52},{0,0,68},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{1,0,35},{0,1,61},{0,1,1},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,1,10},{0,1,1},{0,1,9},{1,0,18},{0,1,18},{0,1,9},{0,1,36},{0,1,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,4,56},{0,3,38},{0,2,52}, +{0,2,36},{0,4,56},{0,3,35},{0,2,0},{0,2,52},{0,2,88},{0,1,78},{1,3,24},{1,2,14},{1,2,5},{1,2,13},{1,2,51},{0,3,35},{0,2,0},{0,2,52},{2,1,51},{0,2,52},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10},{0,2,0},{0,2,0},{0,1,5},{0,1,35},{0,1,14},{1,2,5},{1,2,5},{1,2,5},{1,1,8},{1,1,8}, +{0,2,0},{0,2,0},{0,1,5},{1,1,8},{0,1,5},{2,1,18},{0,3,2},{1,2,1},{0,2,0},{2,1,18},{1,2,18},{0,2,0},{0,2,36},{1,2,18},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,5,56},{1,4,38},{1,3,52},{1,3,36},{1,5,56},{1,4,35},{1,3,0}, +{1,3,52},{0,4,72},{0,3,38},{2,4,24},{2,3,14},{2,3,5},{2,3,13},{2,3,51},{0,4,24},{1,3,0},{0,3,37},{3,2,51},{0,3,37},{1,4,37},{1,4,37},{1,4,37},{1,3,36},{1,4,10},{1,3,0},{1,3,0},{1,2,5},{0,3,11},{1,2,14},{2,3,5},{2,3,5},{2,3,5},{2,2,8},{2,2,8},{1,3,0},{1,3,0},{1,2,5},{2,2,8}, +{1,2,5},{3,2,18},{1,4,2},{2,3,1},{1,3,0},{3,2,18},{7,0,18},{1,3,0},{0,3,36},{7,0,18},{0,3,36},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,3,0},{1,3,0},{1,3,0},{1,2,1},{0,3,2},{0,3,2},{2,6,56},{2,5,38},{2,4,53},{2,4,37},{2,6,56},{2,5,35},{2,4,1},{2,4,66},{0,5,60},{1,4,52},{3,5,24}, +{3,4,14},{3,4,6},{3,4,14},{3,4,51},{1,5,24},{2,4,1},{1,4,51},{9,0,51},{1,4,51},{2,5,37},{2,5,37},{2,5,37},{2,4,36},{2,5,10},{2,4,0},{2,4,0},{2,3,5},{1,4,11},{2,3,14},{3,4,5},{3,4,5},{3,4,5},{3,3,8},{3,3,8},{2,4,0},{2,4,0},{2,3,5},{8,0,8},{2,3,5},{4,3,18},{2,5,2},{3,4,2}, +{2,4,1},{4,3,18},{8,1,18},{2,4,1},{0,4,50},{8,1,18},{0,4,50},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,4,0},{2,4,0},{2,4,0},{2,3,1},{1,4,2},{1,4,2},{3,8,70},{3,6,58},{4,5,69},{3,5,51},{3,7,52},{3,6,25},{3,5,3},{3,5,46},{2,6,60},{2,5,36},{4,6,24},{4,5,14},{4,5,5},{4,5,13},{6,2,51}, +{3,6,24},{3,5,2},{2,5,36},{7,3,51},{2,5,36},{3,7,51},{3,7,51},{3,7,51},{3,5,51},{3,6,9},{3,5,3},{3,5,3},{3,4,9},{1,6,12},{3,4,12},{4,5,5},{4,5,5},{4,5,5},{4,4,8},{6,1,8},{3,5,2},{3,5,2},{3,4,8},{11,0,8},{3,4,8},{7,1,18},{3,6,8},{4,5,1},{3,5,1},{7,1,18},{11,1,18},{3,5,1}, +{0,5,36},{11,1,18},{0,5,36},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,5,2},{3,5,2},{3,5,2},{3,4,5},{2,5,0},{2,5,0},{4,8,56},{4,7,38},{4,6,52},{4,6,36},{4,8,56},{4,7,35},{4,6,0},{4,6,52},{3,7,60},{3,6,36},{5,7,24},{5,6,14},{5,6,5},{5,6,13},{8,1,51},{3,7,35},{4,6,0},{3,6,36},{6,5,51}, +{3,6,36},{4,7,37},{4,7,37},{4,7,37},{4,6,36},{4,7,10},{4,6,0},{4,6,0},{4,5,5},{2,7,12},{4,5,14},{5,6,5},{5,6,5},{5,6,5},{5,5,8},{8,0,8},{4,6,0},{4,6,0},{4,5,5},{5,5,8},{4,5,5},{9,0,18},{4,7,2},{5,6,1},{4,6,0},{9,0,18},{15,0,18},{4,6,0},{0,6,36},{15,0,18},{0,6,36},{4,0,36}, +{4,0,36},{4,0,36},{4,0,36},{4,6,0},{4,6,0},{4,6,0},{4,5,1},{3,6,0},{3,6,0},{5,9,56},{5,8,38},{5,7,52},{5,7,36},{5,9,56},{5,8,35},{5,7,0},{5,7,52},{3,8,63},{4,7,38},{6,8,24},{6,7,14},{6,7,5},{6,7,13},{9,2,51},{4,8,24},{5,7,0},{4,7,37},{17,0,51},{4,7,37},{5,8,37},{5,8,37},{5,8,37}, +{5,7,36},{5,8,10},{5,7,0},{5,7,0},{5,6,5},{4,7,11},{5,6,14},{6,7,5},{6,7,5},{6,7,5},{6,6,8},{9,1,8},{5,7,0},{5,7,0},{5,6,5},{16,0,8},{5,6,5},{10,1,18},{5,8,2},{6,7,1},{5,7,0},{10,1,18},{16,1,18},{5,7,0},{0,7,36},{16,1,18},{0,7,36},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,7,0}, +{5,7,0},{5,7,0},{5,6,1},{4,7,2},{4,7,2},{6,10,56},{6,9,38},{6,8,53},{6,8,37},{6,10,56},{6,9,35},{6,8,1},{6,8,66},{4,9,60},{5,8,52},{7,9,24},{7,8,14},{7,8,6},{7,8,14},{10,3,51},{5,9,24},{6,8,1},{5,8,51},{18,1,51},{5,8,51},{6,9,37},{6,9,37},{6,9,37},{6,8,36},{6,9,10},{6,8,0},{6,8,0}, +{6,7,5},{5,8,11},{6,7,14},{7,8,5},{7,8,5},{7,8,5},{7,7,8},{10,2,8},{6,8,0},{6,8,0},{6,7,5},{17,1,8},{6,7,5},{11,2,18},{6,9,2},{7,8,2},{6,8,1},{11,2,18},{17,2,18},{6,8,1},{0,8,50},{17,2,18},{0,8,50},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,8,0},{6,8,0},{6,8,0},{6,7,1},{5,8,2}, +{5,8,2},{7,12,70},{7,10,58},{8,9,69},{7,9,51},{7,11,52},{7,10,25},{7,9,3},{7,9,46},{6,10,60},{6,9,36},{8,10,24},{8,9,14},{8,9,5},{8,9,13},{13,1,51},{7,10,24},{7,9,2},{6,9,36},{21,1,51},{6,9,36},{7,11,51},{7,11,51},{7,11,51},{7,9,51},{7,10,9},{7,9,3},{7,9,3},{7,8,9},{5,10,12},{7,8,12},{8,9,5}, +{8,9,5},{8,9,5},{8,8,8},{13,0,8},{7,9,2},{7,9,2},{7,8,8},{20,1,8},{7,8,8},{14,0,18},{7,10,8},{8,9,1},{7,9,1},{14,0,18},{20,2,18},{7,9,1},{0,9,36},{20,2,18},{0,9,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,9,2},{7,9,2},{7,9,2},{7,8,5},{6,9,0},{6,9,0},{8,12,56},{8,11,38},{8,10,52}, +{8,10,36},{8,12,56},{8,11,35},{8,10,0},{8,10,52},{7,11,60},{7,10,36},{9,11,24},{9,10,14},{9,10,5},{9,10,13},{14,2,51},{7,11,35},{8,10,0},{7,10,36},{25,0,51},{7,10,36},{8,11,37},{8,11,37},{8,11,37},{8,10,36},{8,11,10},{8,10,0},{8,10,0},{8,9,5},{6,11,12},{8,9,14},{9,10,5},{9,10,5},{9,10,5},{9,9,8},{14,1,8}, +{8,10,0},{8,10,0},{8,9,5},{24,0,8},{8,9,5},{15,1,18},{8,11,2},{9,10,1},{8,10,0},{15,1,18},{24,1,18},{8,10,0},{0,10,36},{24,1,18},{0,10,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,10,0},{8,10,0},{8,10,0},{8,9,1},{7,10,0},{7,10,0},{9,13,56},{9,12,38},{9,11,52},{9,11,36},{9,13,56},{9,12,35},{9,11,0}, +{9,11,52},{7,12,63},{8,11,38},{10,12,24},{10,11,14},{10,11,5},{10,11,13},{16,1,51},{8,12,24},{9,11,0},{8,11,37},{26,1,51},{8,11,37},{9,12,37},{9,12,37},{9,12,37},{9,11,36},{9,12,10},{9,11,0},{9,11,0},{9,10,5},{8,11,11},{9,10,14},{10,11,5},{10,11,5},{10,11,5},{10,10,8},{16,0,8},{9,11,0},{9,11,0},{9,10,5},{25,1,8}, +{9,10,5},{17,0,18},{9,12,2},{10,11,1},{9,11,0},{17,0,18},{25,2,18},{9,11,0},{0,11,36},{25,2,18},{0,11,36},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,11,0},{9,11,0},{9,11,0},{9,10,1},{8,11,2},{8,11,2},{10,14,56},{10,13,38},{10,12,53},{10,12,37},{10,14,56},{10,13,35},{10,12,1},{10,12,66},{8,13,60},{9,12,52},{11,13,24}, +{11,12,14},{11,12,6},{11,12,14},{17,2,51},{9,13,24},{10,12,1},{9,12,51},{27,2,51},{9,12,51},{10,13,37},{10,13,37},{10,13,37},{10,12,36},{10,13,10},{10,12,0},{10,12,0},{10,11,5},{9,12,11},{10,11,14},{11,12,5},{11,12,5},{11,12,5},{11,11,8},{17,1,8},{10,12,0},{10,12,0},{10,11,5},{26,2,8},{10,11,5},{18,1,18},{10,13,2},{11,12,2}, +{10,12,1},{18,1,18},{31,0,18},{10,12,1},{0,12,50},{31,0,18},{0,12,50},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,12,0},{10,12,0},{10,12,0},{10,11,1},{9,12,2},{9,12,2},{11,16,70},{11,14,58},{12,13,69},{11,13,51},{11,15,52},{11,14,25},{11,13,3},{11,13,46},{10,14,60},{10,13,36},{12,14,24},{12,13,14},{12,13,5},{12,13,13},{17,5,51}, +{11,14,24},{11,13,2},{10,13,36},{30,2,51},{10,13,36},{11,15,51},{11,15,51},{11,15,51},{11,13,51},{11,14,9},{11,13,3},{11,13,3},{11,12,9},{9,14,12},{11,12,12},{12,13,5},{12,13,5},{12,13,5},{12,12,8},{17,4,8},{11,13,2},{11,13,2},{11,12,8},{29,2,8},{11,12,8},{18,4,18},{11,14,8},{12,13,1},{11,13,1},{18,4,18},{29,3,18},{11,13,1}, +{0,13,36},{29,3,18},{0,13,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,13,2},{11,13,2},{11,13,2},{11,12,5},{10,13,0},{10,13,0},{12,16,56},{12,15,38},{12,14,52},{12,14,36},{12,16,56},{12,15,35},{12,14,0},{12,14,52},{11,15,60},{11,14,36},{13,15,24},{13,14,14},{13,14,5},{13,14,13},{18,6,51},{11,15,35},{12,14,0},{11,14,36},{31,3,51}, +{11,14,36},{12,15,37},{12,15,37},{12,15,37},{12,14,36},{12,15,10},{12,14,0},{12,14,0},{12,13,5},{10,15,12},{12,13,14},{13,14,5},{13,14,5},{13,14,5},{13,13,8},{18,5,8},{12,14,0},{12,14,0},{12,13,5},{30,3,8},{12,13,5},{20,3,18},{12,15,2},{13,14,1},{12,14,0},{20,3,18},{28,5,18},{12,14,0},{0,14,36},{28,5,18},{0,14,36},{12,0,36}, +{12,0,36},{12,0,36},{12,0,36},{12,14,0},{12,14,0},{12,14,0},{12,13,1},{11,14,0},{11,14,0},{13,17,56},{13,16,38},{13,15,52},{13,15,36},{13,17,56},{13,16,35},{13,15,0},{13,15,52},{11,16,63},{12,15,38},{14,16,24},{14,15,14},{14,15,5},{14,15,13},{23,0,51},{12,16,24},{13,15,0},{12,15,37},{30,5,51},{12,15,37},{13,16,37},{13,16,37},{13,16,37}, +{13,15,36},{13,16,10},{13,15,0},{13,15,0},{13,14,5},{12,15,11},{13,14,14},{14,15,5},{14,15,5},{14,15,5},{14,14,8},{20,4,8},{13,15,0},{13,15,0},{13,14,5},{29,5,8},{13,14,5},{21,4,18},{13,16,2},{14,15,1},{13,15,0},{21,4,18},{29,6,18},{13,15,0},{0,15,36},{29,6,18},{0,15,36},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,15,0}, +{13,15,0},{13,15,0},{13,14,1},{12,15,2},{12,15,2},{14,18,56},{14,17,38},{14,16,53},{14,16,37},{14,18,56},{14,17,35},{14,16,1},{14,16,66},{12,17,60},{13,16,52},{15,17,24},{15,16,14},{15,16,6},{15,16,14},{24,1,51},{13,17,24},{14,16,1},{13,16,51},{31,6,51},{13,16,51},{14,17,37},{14,17,37},{14,17,37},{14,16,36},{14,17,10},{14,16,0},{14,16,0}, +{14,15,5},{13,16,11},{14,15,14},{15,16,5},{15,16,5},{15,16,5},{15,15,8},{24,0,8},{14,16,0},{14,16,0},{14,15,5},{30,6,8},{14,15,5},{25,0,18},{14,17,2},{15,16,2},{14,16,1},{25,0,18},{30,7,18},{14,16,1},{0,16,50},{30,7,18},{0,16,50},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,16,0},{14,16,0},{14,16,0},{14,15,1},{13,16,2}, +{13,16,2},{15,20,70},{15,18,58},{16,17,69},{15,17,51},{15,19,52},{15,18,25},{15,17,3},{15,17,46},{14,18,60},{14,17,36},{16,18,24},{16,17,14},{16,17,5},{16,17,13},{21,9,51},{15,18,24},{15,17,2},{14,17,36},{29,9,51},{14,17,36},{15,19,51},{15,19,51},{15,19,51},{15,17,51},{15,18,9},{15,17,3},{15,17,3},{15,16,9},{13,18,12},{15,16,12},{16,17,5}, +{16,17,5},{16,17,5},{16,16,8},{24,3,8},{15,17,2},{15,17,2},{15,16,8},{28,9,8},{15,16,8},{25,3,18},{15,18,8},{16,17,1},{15,17,1},{25,3,18},{28,10,18},{15,17,1},{0,17,36},{28,10,18},{0,17,36},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,17,2},{15,17,2},{15,17,2},{15,16,5},{14,17,0},{14,17,0},{16,20,56},{16,19,38},{16,18,52}, +{16,18,36},{16,20,56},{16,19,35},{16,18,0},{16,18,52},{15,19,60},{15,18,36},{17,19,24},{17,18,14},{17,18,5},{17,18,13},{22,10,51},{15,19,35},{16,18,0},{15,18,36},{30,10,51},{15,18,36},{16,19,37},{16,19,37},{16,19,37},{16,18,36},{16,19,10},{16,18,0},{16,18,0},{16,17,5},{14,19,12},{16,17,14},{17,18,5},{17,18,5},{17,18,5},{17,17,8},{22,9,8}, +{16,18,0},{16,18,0},{16,17,5},{29,10,8},{16,17,5},{24,7,18},{16,19,2},{17,18,1},{16,18,0},{24,7,18},{29,11,18},{16,18,0},{0,18,36},{29,11,18},{0,18,36},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,18,0},{16,18,0},{16,18,0},{16,17,1},{15,18,0},{15,18,0},{17,21,56},{17,20,38},{17,19,52},{17,19,36},{17,21,56},{17,20,35},{17,19,0}, +{17,19,52},{15,20,63},{16,19,38},{18,20,24},{18,19,14},{18,19,5},{18,19,13},{27,4,51},{16,20,24},{17,19,0},{16,19,37},{31,11,51},{16,19,37},{17,20,37},{17,20,37},{17,20,37},{17,19,36},{17,20,10},{17,19,0},{17,19,0},{17,18,5},{16,19,11},{17,18,14},{18,19,5},{18,19,5},{18,19,5},{18,18,8},{24,8,8},{17,19,0},{17,19,0},{17,18,5},{30,11,8}, +{17,18,5},{28,3,18},{17,20,2},{18,19,1},{17,19,0},{28,3,18},{28,13,18},{17,19,0},{0,19,36},{28,13,18},{0,19,36},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,19,0},{17,19,0},{17,19,0},{17,18,1},{16,19,2},{16,19,2},{18,22,56},{18,21,38},{18,20,53},{18,20,37},{18,22,56},{18,21,35},{18,20,1},{18,20,66},{16,21,60},{17,20,52},{19,21,24}, +{19,20,14},{19,20,6},{19,20,14},{31,0,51},{17,21,24},{18,20,1},{17,20,51},{30,13,51},{17,20,51},{18,21,37},{18,21,37},{18,21,37},{18,20,36},{18,21,10},{18,20,0},{18,20,0},{18,19,5},{17,20,11},{18,19,14},{19,20,5},{19,20,5},{19,20,5},{19,19,8},{28,4,8},{18,20,0},{18,20,0},{18,19,5},{29,13,8},{18,19,5},{29,4,18},{18,21,2},{19,20,2}, +{18,20,1},{29,4,18},{29,14,18},{18,20,1},{0,20,50},{29,14,18},{0,20,50},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,20,0},{18,20,0},{18,20,0},{18,19,1},{17,20,2},{17,20,2},{19,24,70},{19,22,58},{20,21,69},{19,21,51},{19,23,52},{19,22,25},{19,21,3},{19,21,46},{18,22,60},{18,21,36},{20,22,24},{20,21,14},{20,21,5},{20,21,13},{31,3,51}, +{19,22,24},{19,21,2},{18,21,36},{23,19,51},{18,21,36},{19,23,51},{19,23,51},{19,23,51},{19,21,51},{19,22,9},{19,21,3},{19,21,3},{19,20,9},{17,22,12},{19,20,12},{20,21,5},{20,21,5},{20,21,5},{20,20,8},{31,2,8},{19,21,2},{19,21,2},{19,20,8},{27,16,8},{19,20,8},{29,7,18},{19,22,8},{20,21,1},{19,21,1},{29,7,18},{27,17,18},{19,21,1}, +{0,21,36},{27,17,18},{0,21,36},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,21,2},{19,21,2},{19,21,2},{19,20,5},{18,21,0},{18,21,0},{20,24,56},{20,23,38},{20,22,52},{20,22,36},{20,24,56},{20,23,35},{20,22,0},{20,22,52},{19,23,60},{19,22,36},{21,23,24},{21,22,14},{21,22,5},{21,22,13},{26,14,51},{19,23,35},{20,22,0},{19,22,36},{22,21,51}, +{19,22,36},{20,23,37},{20,23,37},{20,23,37},{20,22,36},{20,23,10},{20,22,0},{20,22,0},{20,21,5},{18,23,12},{20,21,14},{21,22,5},{21,22,5},{21,22,5},{21,21,8},{26,13,8},{20,22,0},{20,22,0},{20,21,5},{21,21,8},{20,21,5},{28,11,18},{20,23,2},{21,22,1},{20,22,0},{28,11,18},{31,16,18},{20,22,0},{0,22,36},{31,16,18},{0,22,36},{20,0,36}, +{20,0,36},{20,0,36},{20,0,36},{20,22,0},{20,22,0},{20,22,0},{20,21,1},{19,22,0},{19,22,0},{21,25,56},{21,24,38},{21,23,52},{21,23,36},{21,25,56},{21,24,35},{21,23,0},{21,23,52},{19,24,63},{20,23,38},{22,24,24},{22,23,14},{22,23,5},{22,23,13},{31,8,51},{20,24,24},{21,23,0},{20,23,37},{28,19,51},{20,23,37},{21,24,37},{21,24,37},{21,24,37}, +{21,23,36},{21,24,10},{21,23,0},{21,23,0},{21,22,5},{20,23,11},{21,22,14},{22,23,5},{22,23,5},{22,23,5},{22,22,8},{28,12,8},{21,23,0},{21,23,0},{21,22,5},{22,22,8},{21,22,5},{29,12,18},{21,24,2},{22,23,1},{21,23,0},{29,12,18},{27,20,18},{21,23,0},{0,23,36},{27,20,18},{0,23,36},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,23,0}, +{21,23,0},{21,23,0},{21,22,1},{20,23,2},{20,23,2},{22,26,56},{22,25,38},{22,24,53},{22,24,37},{22,26,56},{22,25,35},{22,24,1},{22,24,66},{20,25,60},{21,24,52},{23,25,24},{23,24,14},{23,24,6},{23,24,14},{29,14,51},{21,25,24},{22,24,1},{21,24,51},{29,20,51},{21,24,51},{22,25,37},{22,25,37},{22,25,37},{22,24,36},{22,25,10},{22,24,0},{22,24,0}, +{22,23,5},{21,24,11},{22,23,14},{23,24,5},{23,24,5},{23,24,5},{23,23,8},{29,13,8},{22,24,0},{22,24,0},{22,23,5},{28,20,8},{22,23,5},{30,13,18},{22,25,2},{23,24,2},{22,24,1},{30,13,18},{28,21,18},{22,24,1},{0,24,50},{28,21,18},{0,24,50},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,24,0},{22,24,0},{22,24,0},{22,23,1},{21,24,2}, +{21,24,2},{23,28,70},{23,26,58},{24,25,69},{23,25,51},{23,27,52},{23,26,25},{23,25,3},{23,25,46},{22,26,60},{22,25,36},{24,26,24},{24,25,14},{24,25,5},{24,25,13},{29,17,51},{23,26,24},{23,25,2},{22,25,36},{27,23,51},{22,25,36},{23,27,51},{23,27,51},{23,27,51},{23,25,51},{23,26,9},{23,25,3},{23,25,3},{23,24,9},{21,26,12},{23,24,12},{24,25,5}, +{24,25,5},{24,25,5},{24,24,8},{29,16,8},{23,25,2},{23,25,2},{23,24,8},{31,20,8},{23,24,8},{30,16,18},{23,26,8},{24,25,1},{23,25,1},{30,16,18},{31,21,18},{23,25,1},{0,25,36},{31,21,18},{0,25,36},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,25,2},{23,25,2},{23,25,2},{23,24,5},{22,25,0},{22,25,0},{24,28,56},{24,27,38},{24,26,52}, +{24,26,36},{24,28,56},{24,27,35},{24,26,0},{24,26,52},{23,27,60},{23,26,36},{25,27,24},{25,26,14},{25,26,5},{25,26,13},{30,18,51},{23,27,35},{24,26,0},{23,26,36},{26,25,51},{23,26,36},{24,27,37},{24,27,37},{24,27,37},{24,26,36},{24,27,10},{24,26,0},{24,26,0},{24,25,5},{22,27,12},{24,25,14},{25,26,5},{25,26,5},{25,26,5},{25,25,8},{30,17,8}, +{24,26,0},{24,26,0},{24,25,5},{25,25,8},{24,25,5},{31,17,18},{24,27,2},{25,26,1},{24,26,0},{31,17,18},{25,26,18},{24,26,0},{0,26,36},{25,26,18},{0,26,36},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,26,0},{24,26,0},{24,26,0},{24,25,1},{23,26,0},{23,26,0},{25,29,56},{25,28,38},{25,27,52},{25,27,36},{25,29,56},{25,28,35},{25,27,0}, +{25,27,52},{23,28,63},{24,27,38},{26,28,24},{26,27,14},{26,27,5},{26,27,13},{31,19,51},{24,28,24},{25,27,0},{24,27,37},{27,26,51},{24,27,37},{25,28,37},{25,28,37},{25,28,37},{25,27,36},{25,28,10},{25,27,0},{25,27,0},{25,26,5},{24,27,11},{25,26,14},{26,27,5},{26,27,5},{26,27,5},{26,26,8},{31,18,8},{25,27,0},{25,27,0},{25,26,5},{26,26,8}, +{25,26,5},{30,21,18},{25,28,2},{26,27,1},{25,27,0},{30,21,18},{31,24,18},{25,27,0},{0,27,36},{31,24,18},{0,27,36},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,27,0},{25,27,0},{25,27,0},{25,26,1},{24,27,2},{24,27,2},{26,30,56},{26,29,38},{26,28,53},{26,28,37},{26,30,56},{26,29,35},{26,28,1},{26,28,66},{24,29,60},{25,28,52},{27,29,24}, +{27,28,14},{27,28,6},{27,28,14},{30,23,51},{25,29,24},{26,28,1},{25,28,51},{28,27,51},{25,28,51},{26,29,37},{26,29,37},{26,29,37},{26,28,36},{26,29,10},{26,28,0},{26,28,0},{26,27,5},{25,28,11},{26,27,14},{27,28,5},{27,28,5},{27,28,5},{27,27,8},{30,22,8},{26,28,0},{26,28,0},{26,27,5},{27,27,8},{26,27,5},{31,22,18},{26,29,2},{27,28,2}, +{26,28,1},{31,22,18},{27,28,18},{26,28,1},{0,28,50},{27,28,18},{0,28,50},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,28,0},{26,28,0},{26,28,0},{26,27,1},{25,28,2},{25,28,2},{27,31,76},{27,30,58},{28,29,69},{27,29,51},{27,31,52},{27,30,25},{27,29,3},{27,29,46},{26,30,60},{26,29,36},{28,30,24},{28,29,14},{28,29,5},{28,29,13},{30,26,51}, +{27,30,24},{27,29,2},{26,29,36},{31,27,51},{26,29,36},{27,31,51},{27,31,51},{27,31,51},{27,29,51},{27,30,9},{27,29,3},{27,29,3},{27,28,9},{25,30,12},{27,28,12},{28,29,5},{28,29,5},{28,29,5},{28,28,8},{30,25,8},{27,29,2},{27,29,2},{27,28,8},{30,27,8},{27,28,8},{31,25,18},{27,30,8},{28,29,1},{27,29,1},{31,25,18},{28,29,18},{27,29,1}, +{0,29,36},{28,29,18},{0,29,36},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,29,2},{27,29,2},{27,29,2},{27,28,5},{26,29,0},{26,29,0},{28,31,86},{28,31,38},{28,30,52},{28,30,36},{28,31,59},{28,31,35},{28,30,0},{28,30,52},{27,31,60},{27,30,36},{29,31,24},{29,30,14},{29,30,5},{29,30,13},{31,27,51},{27,31,35},{28,30,0},{27,30,36},{30,29,51}, +{27,30,36},{28,31,37},{28,31,37},{28,31,37},{28,30,36},{28,31,10},{28,30,0},{28,30,0},{28,29,5},{26,31,12},{28,29,14},{29,30,5},{29,30,5},{29,30,5},{29,29,8},{31,26,8},{28,30,0},{28,30,0},{28,29,5},{29,29,8},{28,29,5},{30,29,18},{28,31,2},{29,30,1},{28,30,0},{30,29,18},{29,30,18},{28,30,0},{0,30,36},{29,30,18},{0,30,36},{28,0,36}, +{28,0,36},{28,0,36},{28,0,36},{28,30,0},{28,30,0},{28,30,0},{28,29,1},{27,30,0},{27,30,0},{30,31,94},{30,31,78},{29,31,52},{29,31,36},{30,31,115},{29,31,36},{29,31,0},{29,31,52},{29,31,88},{28,31,38},{30,31,30},{30,31,14},{30,31,5},{30,31,13},{30,31,51},{29,31,36},{29,31,0},{28,31,37},{31,30,51},{28,31,37},{29,31,52},{29,31,52},{29,31,52}, +{29,31,36},{29,31,16},{29,31,0},{29,31,0},{29,30,5},{28,31,11},{29,30,14},{30,31,5},{30,31,5},{30,31,5},{30,30,8},{30,30,8},{29,31,0},{29,31,0},{29,30,5},{30,30,8},{29,30,5},{31,30,18},{30,31,10},{30,31,1},{29,31,0},{31,30,18},{30,31,18},{29,31,0},{0,31,36},{30,31,18},{0,31,36},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,31,0}, +{29,31,0},{29,31,0},{29,30,1},{28,31,2},{28,31,2},{31,31,68},{31,31,68},{30,31,61},{30,31,45},{30,31,59},{30,31,27},{30,31,18},{30,31,1},{30,31,28},{30,31,10},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{30,31,61},{30,31,61},{30,31,61},{30,31,45},{30,31,34},{30,31,18},{30,31,18}, +{30,31,1},{30,31,19},{30,31,10},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,31,9},{30,31,9},{30,31,9},{30,31,1},{30,31,10}, +{30,31,10},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{0,1,178},{0,1,124},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{1,1,154},{0,2,110},{0,2,62},{0,1,115},{1,1,154},{0,1,115},{0,2,1},{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{0,2,1}, +{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{1,0,13},{0,0,25},{1,2,72},{0,3,20},{0,2,2},{0,2,26},{1,2,72},{2,1,72},{0,2,26},{0,1,90},{2,1,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,6,83},{0,5,13},{0,3,26}, +{0,3,14},{0,5,248},{0,3,140},{0,3,41},{0,2,139},{0,2,319},{0,2,175},{0,6,83},{0,5,13},{0,3,26},{0,3,14},{1,3,243},{0,3,140},{0,3,41},{0,2,139},{3,1,243},{0,2,139},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52},{0,2,18},{0,2,18},{0,1,29},{0,1,77},{0,1,38},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{1,1,50}, +{0,2,18},{0,2,18},{0,1,29},{1,1,50},{0,1,29},{2,3,72},{0,5,4},{1,3,2},{0,3,5},{2,3,72},{3,2,72},{0,3,5},{0,2,90},{3,2,72},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{1,7,147},{1,6,77},{1,4,89},{1,4,77},{0,7,244},{0,5,96},{0,4,2}, +{0,3,106},{0,4,395},{0,3,187},{1,7,83},{1,6,13},{1,4,25},{1,4,13},{2,4,243},{0,5,96},{0,4,2},{0,3,106},{4,2,243},{0,3,106},{1,5,74},{1,5,74},{1,5,74},{1,4,77},{0,5,52},{0,4,2},{0,4,2},{0,3,25},{0,3,133},{0,2,62},{1,5,10},{1,5,10},{1,5,10},{1,4,13},{2,2,50},{0,4,2},{0,4,2},{0,3,25},{2,2,50}, +{0,3,25},{3,4,72},{1,6,4},{2,4,1},{0,4,1},{3,4,72},{9,0,72},{0,4,1},{0,3,90},{9,0,72},{0,3,90},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{2,8,164},{2,7,94},{2,5,106},{2,5,94},{1,8,245},{1,6,97},{1,5,3},{1,4,97},{0,6,364},{0,4,106},{2,8,83}, +{2,7,13},{2,5,25},{2,5,13},{6,0,243},{0,7,76},{1,5,2},{0,4,81},{10,0,243},{0,4,81},{2,6,91},{2,6,91},{2,6,91},{2,5,94},{1,6,53},{1,5,3},{1,5,3},{1,3,27},{0,4,107},{0,4,42},{2,6,10},{2,6,10},{2,6,10},{2,5,13},{3,3,50},{1,5,2},{1,5,2},{0,4,17},{8,0,50},{0,4,17},{7,0,72},{2,7,4},{3,5,1}, +{1,5,1},{7,0,72},{10,1,72},{1,5,1},{0,4,80},{10,1,72},{0,4,80},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,5,2},{1,5,2},{1,5,2},{1,3,2},{0,3,20},{0,3,20},{3,10,154},{3,8,81},{3,6,106},{3,6,82},{2,10,248},{2,7,99},{2,6,5},{2,5,99},{0,7,308},{0,5,100},{3,10,90},{3,8,17},{4,6,27},{3,6,18},{6,3,243}, +{1,8,73},{2,6,5},{1,5,90},{8,3,243},{1,5,90},{3,8,81},{3,8,81},{3,8,81},{3,6,81},{2,8,52},{2,6,4},{2,6,4},{2,5,18},{0,6,72},{0,5,19},{3,8,17},{3,8,17},{3,8,17},{3,6,17},{6,1,50},{2,6,4},{2,6,4},{1,5,9},{11,0,50},{1,5,9},{8,1,72},{3,8,1},{4,6,2},{3,6,2},{8,1,72},{6,5,72},{3,6,2}, +{0,5,90},{6,5,72},{0,5,90},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,6,0},{2,6,0},{2,6,0},{2,4,4},{0,6,8},{0,6,8},{4,10,164},{4,9,94},{4,7,107},{4,7,95},{3,11,248},{3,8,89},{3,7,5},{3,6,99},{0,9,253},{1,6,100},{4,10,83},{4,9,13},{4,7,26},{4,7,14},{8,2,243},{2,9,73},{3,7,5},{2,6,90},{12,2,243}, +{2,6,90},{4,8,91},{4,8,91},{4,8,91},{4,7,94},{3,9,52},{3,7,4},{3,7,4},{3,6,18},{0,8,50},{1,6,19},{4,8,10},{4,8,10},{4,8,10},{4,7,13},{8,0,50},{3,7,4},{3,7,4},{2,6,9},{5,5,50},{2,6,9},{9,2,72},{4,9,4},{5,7,2},{3,7,5},{9,2,72},{17,0,72},{3,7,5},{0,6,90},{17,0,72},{0,6,90},{4,0,90}, +{4,0,90},{4,0,90},{4,0,90},{3,7,0},{3,7,0},{3,7,0},{3,5,4},{1,7,8},{1,7,8},{5,11,164},{5,10,94},{5,8,106},{5,8,94},{4,11,245},{4,9,97},{4,8,3},{4,7,107},{0,11,249},{2,7,100},{5,11,83},{5,10,13},{5,8,25},{5,8,13},{9,3,243},{3,10,73},{4,8,2},{3,7,90},{18,0,243},{3,7,90},{5,9,91},{5,9,91},{5,9,91}, +{5,8,94},{4,9,53},{4,8,3},{4,8,3},{4,7,26},{1,9,50},{2,7,19},{5,9,10},{5,9,10},{5,9,10},{5,8,13},{9,1,50},{4,8,2},{4,8,2},{3,7,9},{16,0,50},{3,7,9},{10,3,72},{5,10,4},{6,8,1},{4,8,1},{10,3,72},{18,1,72},{4,8,1},{0,7,90},{18,1,72},{0,7,90},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,8,2}, +{4,8,2},{4,8,2},{4,6,2},{1,8,9},{1,8,9},{6,12,164},{6,11,94},{6,9,106},{6,9,94},{5,12,245},{5,10,97},{5,9,3},{5,8,97},{1,12,252},{3,8,85},{6,12,83},{6,11,13},{6,9,25},{6,9,13},{10,4,243},{4,11,76},{5,9,2},{4,8,81},{19,1,243},{4,8,81},{6,10,91},{6,10,91},{6,10,91},{6,9,94},{5,10,53},{5,9,3},{5,9,3}, +{5,7,27},{2,10,50},{3,8,21},{6,10,10},{6,10,10},{6,10,10},{6,9,13},{10,2,50},{5,9,2},{5,9,2},{4,8,17},{17,1,50},{4,8,17},{11,4,72},{6,11,4},{7,9,1},{5,9,1},{11,4,72},{19,2,72},{5,9,1},{0,8,80},{19,2,72},{0,8,80},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,9,2},{5,9,2},{5,9,2},{5,7,2},{3,8,5}, +{3,8,5},{7,14,154},{7,12,81},{7,10,106},{7,10,82},{6,14,248},{6,11,99},{6,10,5},{6,9,99},{2,13,244},{4,9,100},{7,14,90},{7,12,17},{8,10,27},{7,10,18},{13,2,243},{5,12,73},{6,10,5},{5,9,90},{22,1,243},{5,9,90},{7,12,81},{7,12,81},{7,12,81},{7,10,81},{6,12,52},{6,10,4},{6,10,4},{6,9,18},{3,11,53},{4,9,19},{7,12,17}, +{7,12,17},{7,12,17},{7,10,17},{13,0,50},{6,10,4},{6,10,4},{5,9,9},{20,1,50},{5,9,9},{14,2,72},{7,12,1},{8,10,2},{7,10,2},{14,2,72},{25,0,72},{7,10,2},{0,9,90},{25,0,72},{0,9,90},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,10,0},{6,10,0},{6,10,0},{6,8,4},{4,10,8},{4,10,8},{8,14,164},{8,13,94},{8,11,107}, +{8,11,95},{7,15,248},{7,12,89},{7,11,5},{7,10,99},{3,14,244},{5,10,100},{8,14,83},{8,13,13},{8,11,26},{8,11,14},{14,3,243},{6,13,73},{7,11,5},{6,10,90},{26,0,243},{6,10,90},{8,12,91},{8,12,91},{8,12,91},{8,11,94},{7,13,52},{7,11,4},{7,11,4},{7,10,18},{4,12,50},{5,10,19},{8,12,10},{8,12,10},{8,12,10},{8,11,13},{14,1,50}, +{7,11,4},{7,11,4},{6,10,9},{24,0,50},{6,10,9},{16,1,72},{8,13,4},{9,11,2},{7,11,5},{16,1,72},{26,1,72},{7,11,5},{0,10,90},{26,1,72},{0,10,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,11,0},{7,11,0},{7,11,0},{7,9,4},{5,11,8},{5,11,8},{9,15,164},{9,14,94},{9,12,106},{9,12,94},{8,15,245},{8,13,97},{8,12,3}, +{8,11,107},{4,15,249},{6,11,100},{9,15,83},{9,14,13},{9,12,25},{9,12,13},{16,2,243},{7,14,73},{8,12,2},{7,11,90},{27,1,243},{7,11,90},{9,13,91},{9,13,91},{9,13,91},{9,12,94},{8,13,53},{8,12,3},{8,12,3},{8,11,26},{5,13,50},{6,11,19},{9,13,10},{9,13,10},{9,13,10},{9,12,13},{16,0,50},{8,12,2},{8,12,2},{7,11,9},{25,1,50}, +{7,11,9},{17,2,72},{9,14,4},{10,12,1},{8,12,1},{17,2,72},{27,2,72},{8,12,1},{0,11,90},{27,2,72},{0,11,90},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,12,2},{8,12,2},{8,12,2},{8,10,2},{5,12,9},{5,12,9},{10,16,164},{10,15,94},{10,13,106},{10,13,94},{9,16,245},{9,14,97},{9,13,3},{9,12,97},{5,16,252},{7,12,85},{10,16,83}, +{10,15,13},{10,13,25},{10,13,13},{17,3,243},{8,15,76},{9,13,2},{8,12,81},{28,2,243},{8,12,81},{10,14,91},{10,14,91},{10,14,91},{10,13,94},{9,14,53},{9,13,3},{9,13,3},{9,11,27},{6,14,50},{7,12,21},{10,14,10},{10,14,10},{10,14,10},{10,13,13},{17,1,50},{9,13,2},{9,13,2},{8,12,17},{26,2,50},{8,12,17},{18,3,72},{10,15,4},{11,13,1}, +{9,13,1},{18,3,72},{28,3,72},{9,13,1},{0,12,80},{28,3,72},{0,12,80},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,13,2},{9,13,2},{9,13,2},{9,11,2},{7,12,5},{7,12,5},{11,18,154},{11,16,81},{11,14,106},{11,14,82},{10,18,248},{10,15,99},{10,14,5},{10,13,99},{6,17,244},{8,13,100},{11,18,90},{11,16,17},{12,14,27},{11,14,18},{17,6,243}, +{9,16,73},{10,14,5},{9,13,90},{31,2,243},{9,13,90},{11,16,81},{11,16,81},{11,16,81},{11,14,81},{10,16,52},{10,14,4},{10,14,4},{10,13,18},{7,15,53},{8,13,19},{11,16,17},{11,16,17},{11,16,17},{11,14,17},{17,4,50},{10,14,4},{10,14,4},{9,13,9},{29,2,50},{9,13,9},{18,6,72},{11,16,1},{12,14,2},{11,14,2},{18,6,72},{31,3,72},{11,14,2}, +{0,13,90},{31,3,72},{0,13,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,14,0},{10,14,0},{10,14,0},{10,12,4},{8,14,8},{8,14,8},{12,18,164},{12,17,94},{12,15,107},{12,15,95},{11,19,248},{11,16,89},{11,15,5},{11,14,99},{7,18,244},{9,14,100},{12,18,83},{12,17,13},{12,15,26},{12,15,14},{22,0,243},{10,17,73},{11,15,5},{10,14,90},{30,4,243}, +{10,14,90},{12,16,91},{12,16,91},{12,16,91},{12,15,94},{11,17,52},{11,15,4},{11,15,4},{11,14,18},{8,16,50},{9,14,19},{12,16,10},{12,16,10},{12,16,10},{12,15,13},{18,5,50},{11,15,4},{11,15,4},{10,14,9},{30,3,50},{10,14,9},{23,0,72},{12,17,4},{13,15,2},{11,15,5},{23,0,72},{30,5,72},{11,15,5},{0,14,90},{30,5,72},{0,14,90},{12,0,90}, +{12,0,90},{12,0,90},{12,0,90},{11,15,0},{11,15,0},{11,15,0},{11,13,4},{9,15,8},{9,15,8},{13,19,164},{13,18,94},{13,16,106},{13,16,94},{12,19,245},{12,17,97},{12,16,3},{12,15,107},{8,19,249},{10,15,100},{13,19,83},{13,18,13},{13,16,25},{13,16,13},{23,1,243},{11,18,73},{12,16,2},{11,15,90},{31,5,243},{11,15,90},{13,17,91},{13,17,91},{13,17,91}, +{13,16,94},{12,17,53},{12,16,3},{12,16,3},{12,15,26},{9,17,50},{10,15,19},{13,17,10},{13,17,10},{13,17,10},{13,16,13},{20,4,50},{12,16,2},{12,16,2},{11,15,9},{29,5,50},{11,15,9},{24,1,72},{13,18,4},{14,16,1},{12,16,1},{24,1,72},{31,6,72},{12,16,1},{0,15,90},{31,6,72},{0,15,90},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,16,2}, +{12,16,2},{12,16,2},{12,14,2},{9,16,9},{9,16,9},{14,20,164},{14,19,94},{14,17,106},{14,17,94},{13,20,245},{13,18,97},{13,17,3},{13,16,97},{9,20,252},{11,16,85},{14,20,83},{14,19,13},{14,17,25},{14,17,13},{24,2,243},{12,19,76},{13,17,2},{12,16,81},{27,9,243},{12,16,81},{14,18,91},{14,18,91},{14,18,91},{14,17,94},{13,18,53},{13,17,3},{13,17,3}, +{13,15,27},{10,18,50},{11,16,21},{14,18,10},{14,18,10},{14,18,10},{14,17,13},{24,0,50},{13,17,2},{13,17,2},{12,16,17},{30,6,50},{12,16,17},{25,2,72},{14,19,4},{15,17,1},{13,17,1},{25,2,72},{27,10,72},{13,17,1},{0,16,80},{27,10,72},{0,16,80},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,17,2},{13,17,2},{13,17,2},{13,15,2},{11,16,5}, +{11,16,5},{15,22,154},{15,20,81},{15,18,106},{15,18,82},{14,22,248},{14,19,99},{14,18,5},{14,17,99},{10,21,244},{12,17,100},{15,22,90},{15,20,17},{16,18,27},{15,18,18},{27,0,243},{13,20,73},{14,18,5},{13,17,90},{30,9,243},{13,17,90},{15,20,81},{15,20,81},{15,20,81},{15,18,81},{14,20,52},{14,18,4},{14,18,4},{14,17,18},{11,19,53},{12,17,19},{15,20,17}, +{15,20,17},{15,20,17},{15,18,17},{24,3,50},{14,18,4},{14,18,4},{13,17,9},{28,9,50},{13,17,9},{22,10,72},{15,20,1},{16,18,2},{15,18,2},{22,10,72},{30,10,72},{15,18,2},{0,17,90},{30,10,72},{0,17,90},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,18,0},{14,18,0},{14,18,0},{14,16,4},{12,18,8},{12,18,8},{16,22,164},{16,21,94},{16,19,107}, +{16,19,95},{15,23,248},{15,20,89},{15,19,5},{15,18,99},{11,22,244},{13,18,100},{16,22,83},{16,21,13},{16,19,26},{16,19,14},{26,4,243},{14,21,73},{15,19,5},{14,18,90},{31,10,243},{14,18,90},{16,20,91},{16,20,91},{16,20,91},{16,19,94},{15,21,52},{15,19,4},{15,19,4},{15,18,18},{12,20,50},{13,18,19},{16,20,10},{16,20,10},{16,20,10},{16,19,13},{22,9,50}, +{15,19,4},{15,19,4},{14,18,9},{29,10,50},{14,18,9},{27,4,72},{16,21,4},{17,19,2},{15,19,5},{27,4,72},{31,11,72},{15,19,5},{0,18,90},{31,11,72},{0,18,90},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,19,0},{15,19,0},{15,19,0},{15,17,4},{13,19,8},{13,19,8},{17,23,164},{17,22,94},{17,20,106},{17,20,94},{16,23,245},{16,21,97},{16,20,3}, +{16,19,107},{12,23,249},{14,19,100},{17,23,83},{17,22,13},{17,20,25},{17,20,13},{30,0,243},{15,22,73},{16,20,2},{15,19,90},{30,12,243},{15,19,90},{17,21,91},{17,21,91},{17,21,91},{17,20,94},{16,21,53},{16,20,3},{16,20,3},{16,19,26},{13,21,50},{14,19,19},{17,21,10},{17,21,10},{17,21,10},{17,20,13},{24,8,50},{16,20,2},{16,20,2},{15,19,9},{30,11,50}, +{15,19,9},{31,0,72},{17,22,4},{18,20,1},{16,20,1},{31,0,72},{30,13,72},{16,20,1},{0,19,90},{30,13,72},{0,19,90},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,20,2},{16,20,2},{16,20,2},{16,18,2},{13,20,9},{13,20,9},{18,24,164},{18,23,94},{18,21,106},{18,21,94},{17,24,245},{17,22,97},{17,21,3},{17,20,97},{13,24,252},{15,20,85},{18,24,83}, +{18,23,13},{18,21,25},{18,21,13},{31,1,243},{16,23,76},{17,21,2},{16,20,81},{31,13,243},{16,20,81},{18,22,91},{18,22,91},{18,22,91},{18,21,94},{17,22,53},{17,21,3},{17,21,3},{17,19,27},{14,22,50},{15,20,21},{18,22,10},{18,22,10},{18,22,10},{18,21,13},{28,4,50},{17,21,2},{17,21,2},{16,20,17},{29,13,50},{16,20,17},{29,6,72},{18,23,4},{19,21,1}, +{17,21,1},{29,6,72},{31,14,72},{17,21,1},{0,20,80},{31,14,72},{0,20,80},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,21,2},{17,21,2},{17,21,2},{17,19,2},{15,20,5},{15,20,5},{19,26,154},{19,24,81},{19,22,106},{19,22,82},{18,26,248},{18,23,99},{18,22,5},{18,21,99},{14,25,244},{16,21,100},{19,26,90},{19,24,17},{20,22,27},{19,22,18},{31,4,243}, +{17,24,73},{18,22,5},{17,21,90},{24,19,243},{17,21,90},{19,24,81},{19,24,81},{19,24,81},{19,22,81},{18,24,52},{18,22,4},{18,22,4},{18,21,18},{15,23,53},{16,21,19},{19,24,17},{19,24,17},{19,24,17},{19,22,17},{31,2,50},{18,22,4},{18,22,4},{17,21,9},{27,16,50},{17,21,9},{26,14,72},{19,24,1},{20,22,2},{19,22,2},{26,14,72},{22,21,72},{19,22,2}, +{0,21,90},{22,21,72},{0,21,90},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,22,0},{18,22,0},{18,22,0},{18,20,4},{16,22,8},{16,22,8},{20,26,164},{20,25,94},{20,23,107},{20,23,95},{19,27,248},{19,24,89},{19,23,5},{19,22,99},{15,26,244},{17,22,100},{20,26,83},{20,25,13},{20,23,26},{20,23,14},{30,8,243},{18,25,73},{19,23,5},{18,22,90},{28,18,243}, +{18,22,90},{20,24,91},{20,24,91},{20,24,91},{20,23,94},{19,25,52},{19,23,4},{19,23,4},{19,22,18},{16,24,50},{17,22,19},{20,24,10},{20,24,10},{20,24,10},{20,23,13},{26,13,50},{19,23,4},{19,23,4},{18,22,9},{21,21,50},{18,22,9},{31,8,72},{20,25,4},{21,23,2},{19,23,5},{31,8,72},{28,19,72},{19,23,5},{0,22,90},{28,19,72},{0,22,90},{20,0,90}, +{20,0,90},{20,0,90},{20,0,90},{19,23,0},{19,23,0},{19,23,0},{19,21,4},{17,23,8},{17,23,8},{21,27,164},{21,26,94},{21,24,106},{21,24,94},{20,27,245},{20,25,97},{20,24,3},{20,23,107},{16,27,249},{18,23,100},{21,27,83},{21,26,13},{21,24,25},{21,24,13},{31,9,243},{19,26,73},{20,24,2},{19,23,90},{29,19,243},{19,23,90},{21,25,91},{21,25,91},{21,25,91}, +{21,24,94},{20,25,53},{20,24,3},{20,24,3},{20,23,26},{17,25,50},{18,23,19},{21,25,10},{21,25,10},{21,25,10},{21,24,13},{28,12,50},{20,24,2},{20,24,2},{19,23,9},{22,22,50},{19,23,9},{29,14,72},{21,26,4},{22,24,1},{20,24,1},{29,14,72},{29,20,72},{20,24,1},{0,23,90},{29,20,72},{0,23,90},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,24,2}, +{20,24,2},{20,24,2},{20,22,2},{17,24,9},{17,24,9},{22,28,164},{22,27,94},{22,25,106},{22,25,94},{21,28,245},{21,26,97},{21,25,3},{21,24,97},{17,28,252},{19,24,85},{22,28,83},{22,27,13},{22,25,25},{22,25,13},{29,15,243},{20,27,76},{21,25,2},{20,24,81},{30,20,243},{20,24,81},{22,26,91},{22,26,91},{22,26,91},{22,25,94},{21,26,53},{21,25,3},{21,25,3}, +{21,23,27},{18,26,50},{19,24,21},{22,26,10},{22,26,10},{22,26,10},{22,25,13},{29,13,50},{21,25,2},{21,25,2},{20,24,17},{28,20,50},{20,24,17},{30,15,72},{22,27,4},{23,25,1},{21,25,1},{30,15,72},{30,21,72},{21,25,1},{0,24,80},{30,21,72},{0,24,80},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,25,2},{21,25,2},{21,25,2},{21,23,2},{19,24,5}, +{19,24,5},{23,30,154},{23,28,81},{23,26,106},{23,26,82},{22,30,248},{22,27,99},{22,26,5},{22,25,99},{18,29,244},{20,25,100},{23,30,90},{23,28,17},{24,26,27},{23,26,18},{29,18,243},{21,28,73},{22,26,5},{21,25,90},{28,23,243},{21,25,90},{23,28,81},{23,28,81},{23,28,81},{23,26,81},{22,28,52},{22,26,4},{22,26,4},{22,25,18},{19,27,53},{20,25,19},{23,28,17}, +{23,28,17},{23,28,17},{23,26,17},{29,16,50},{22,26,4},{22,26,4},{21,25,9},{31,20,50},{21,25,9},{30,18,72},{23,28,1},{24,26,2},{23,26,2},{30,18,72},{26,25,72},{23,26,2},{0,25,90},{26,25,72},{0,25,90},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,26,0},{22,26,0},{22,26,0},{22,24,4},{20,26,8},{20,26,8},{24,30,164},{24,29,94},{24,27,107}, +{24,27,95},{23,31,248},{23,28,89},{23,27,5},{23,26,99},{19,30,244},{21,26,100},{24,30,83},{24,29,13},{24,27,26},{24,27,14},{30,19,243},{22,29,73},{23,27,5},{22,26,90},{27,25,243},{22,26,90},{24,28,91},{24,28,91},{24,28,91},{24,27,94},{23,29,52},{23,27,4},{23,27,4},{23,26,18},{20,28,50},{21,26,19},{24,28,10},{24,28,10},{24,28,10},{24,27,13},{30,17,50}, +{23,27,4},{23,27,4},{22,26,9},{25,25,50},{22,26,9},{31,19,72},{24,29,4},{25,27,2},{23,27,5},{31,19,72},{27,26,72},{23,27,5},{0,26,90},{27,26,72},{0,26,90},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,27,0},{23,27,0},{23,27,0},{23,25,4},{21,27,8},{21,27,8},{25,31,164},{25,30,94},{25,28,106},{25,28,94},{24,31,245},{24,29,97},{24,28,3}, +{24,27,107},{20,31,249},{22,27,100},{25,31,83},{25,30,13},{25,28,25},{25,28,13},{29,23,243},{23,30,73},{24,28,2},{23,27,90},{28,26,243},{23,27,90},{25,29,91},{25,29,91},{25,29,91},{25,28,94},{24,29,53},{24,28,3},{24,28,3},{24,27,26},{21,29,50},{22,27,19},{25,29,10},{25,29,10},{25,29,10},{25,28,13},{31,18,50},{24,28,2},{24,28,2},{23,27,9},{26,26,50}, +{23,27,9},{30,23,72},{25,30,4},{26,28,1},{24,28,1},{30,23,72},{28,27,72},{24,28,1},{0,27,90},{28,27,72},{0,27,90},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,28,2},{24,28,2},{24,28,2},{24,26,2},{21,28,9},{21,28,9},{26,31,194},{26,31,94},{26,29,106},{26,29,94},{25,31,284},{25,30,97},{25,29,3},{25,28,97},{22,31,253},{23,28,85},{27,31,99}, +{26,31,13},{26,29,25},{26,29,13},{30,24,243},{24,31,76},{25,29,2},{24,28,81},{29,27,243},{24,28,81},{26,30,91},{26,30,91},{26,30,91},{26,29,94},{25,30,53},{25,29,3},{25,29,3},{25,27,27},{22,30,50},{23,28,21},{26,30,10},{26,30,10},{26,30,10},{26,29,13},{30,22,50},{25,29,2},{25,29,2},{24,28,17},{27,27,50},{24,28,17},{31,24,72},{26,31,4},{27,29,1}, +{25,29,1},{31,24,72},{24,31,72},{25,29,1},{0,28,80},{24,31,72},{0,28,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,29,2},{25,29,2},{25,29,2},{25,27,2},{23,28,5},{23,28,5},{27,31,280},{27,31,120},{27,30,106},{27,30,82},{27,31,328},{26,31,99},{26,30,5},{26,29,99},{24,31,308},{24,29,100},{28,31,105},{28,31,45},{28,30,27},{27,30,18},{30,27,243}, +{26,31,99},{26,30,5},{25,29,90},{30,28,243},{25,29,90},{27,31,84},{27,31,84},{27,31,84},{27,30,81},{26,31,58},{26,30,4},{26,30,4},{26,29,18},{23,31,53},{24,29,19},{27,31,20},{27,31,20},{27,31,20},{27,30,17},{30,25,50},{26,30,4},{26,30,4},{25,29,9},{30,27,50},{25,29,9},{31,27,72},{28,31,20},{28,30,2},{27,30,2},{31,27,72},{30,29,72},{27,30,2}, +{0,29,90},{30,29,72},{0,29,90},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,30,0},{26,30,0},{26,30,0},{26,28,4},{24,30,8},{24,30,8},{28,31,331},{28,31,187},{28,31,106},{28,31,94},{28,31,358},{27,31,173},{27,31,4},{27,30,82},{26,31,355},{25,30,83},{29,31,126},{29,31,62},{28,31,25},{28,31,13},{30,29,221},{28,31,121},{27,31,4},{26,30,73},{29,30,221}, +{26,30,73},{28,31,106},{28,31,106},{28,31,106},{28,31,94},{27,31,100},{27,31,4},{27,31,4},{27,30,18},{25,31,72},{25,30,19},{28,31,25},{28,31,25},{28,31,25},{28,31,13},{31,26,50},{27,31,4},{27,31,4},{26,30,9},{29,29,50},{26,30,9},{31,29,61},{29,31,37},{29,31,1},{27,31,4},{31,29,61},{31,30,61},{27,31,4},{0,30,73},{31,30,61},{0,30,73},{28,0,90}, +{28,0,90},{28,0,90},{28,0,90},{27,31,0},{27,31,0},{27,31,0},{27,29,4},{25,31,8},{25,31,8},{29,31,239},{29,31,175},{29,31,139},{29,31,99},{29,31,239},{28,31,122},{28,31,41},{28,31,26},{28,31,233},{26,31,19},{30,31,54},{30,31,38},{30,31,29},{29,31,18},{30,31,93},{29,31,54},{29,31,18},{27,31,9},{31,30,93},{27,31,9},{29,31,139},{29,31,139},{29,31,139}, +{29,31,99},{29,31,139},{28,31,41},{28,31,41},{28,31,26},{27,31,116},{26,31,19},{30,31,29},{30,31,29},{30,31,29},{29,31,18},{30,30,50},{29,31,18},{29,31,18},{27,31,9},{30,30,50},{27,31,9},{31,30,9},{31,31,9},{30,31,4},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,31,5}, +{28,31,5},{28,31,5},{28,30,2},{26,31,10},{26,31,10},{30,31,140},{30,31,124},{30,31,115},{30,31,99},{30,31,131},{29,31,98},{29,31,62},{29,31,2},{29,31,122},{28,31,20},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,22},{30,31,18},{30,31,9},{29,31,1},{30,31,22},{29,31,1},{30,31,115},{30,31,115},{30,31,115},{30,31,99},{30,31,106},{29,31,62},{29,31,62}, +{29,31,2},{29,31,86},{28,31,20},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,13},{30,31,9},{30,31,9},{29,31,1},{30,31,13},{29,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,31,26},{29,31,26},{29,31,26},{29,31,2},{28,31,20}, +{28,31,20},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{0,4,442},{0,3,313},{0,3,142},{0,2,318},{0,2,498},{0,2,354},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{2,1,441},{0,3,313},{0,3,142},{0,2,318},{1,2,441},{0,2,318},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,50},{0,1,35},{0,3,0}, +{0,3,0},{0,3,0},{0,2,1},{1,0,41},{0,1,25},{0,1,25},{0,1,26},{0,1,41},{0,1,26},{2,3,200},{0,5,52},{0,3,25},{0,3,61},{2,3,200},{3,2,200},{0,3,61},{0,2,218},{3,2,200},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,200},{0,7,20},{0,5,20}, +{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{0,3,794},{0,3,524},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{3,1,686},{0,5,433},{0,4,169},{0,3,443},{6,0,686},{0,3,443},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145},{0,2,85},{0,2,85},{0,2,101},{0,1,178},{0,1,115},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{1,1,145}, +{0,2,85},{0,2,85},{0,2,101},{3,0,145},{0,2,101},{3,4,200},{0,7,20},{1,4,16},{0,4,25},{3,4,200},{9,0,200},{0,4,25},{0,3,218},{9,0,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,11,257},{0,9,54},{1,6,85},{0,5,65},{0,9,728},{0,6,371},{0,5,80}, +{0,4,377},{0,5,949},{0,4,521},{1,10,201},{1,8,18},{1,6,21},{1,5,26},{4,2,723},{0,6,371},{0,5,80},{0,4,377},{7,1,723},{0,4,377},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{0,5,164},{0,4,50},{0,4,50},{0,3,65},{0,3,245},{0,2,126},{1,7,2},{1,7,2},{1,7,2},{1,4,5},{2,2,162},{0,4,50},{0,4,50},{0,3,65},{2,2,162}, +{0,3,65},{7,0,200},{0,9,5},{2,5,16},{0,5,16},{7,0,200},{10,1,200},{0,5,16},{0,4,208},{10,1,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,2,1},{0,2,1},{0,2,1},{0,1,4},{0,1,13},{0,1,13},{1,12,315},{1,10,118},{1,7,178},{1,6,129},{0,11,724},{0,8,289},{0,6,34},{0,5,308},{0,6,1087},{0,5,533},{2,11,201}, +{2,9,18},{2,7,21},{2,6,26},{5,3,723},{0,8,289},{0,6,34},{0,5,308},{8,2,723},{0,5,308},{1,9,114},{1,9,114},{1,9,114},{1,6,113},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{0,4,338},{0,3,162},{2,8,2},{2,8,2},{2,8,2},{2,5,5},{3,3,162},{0,6,18},{0,6,18},{0,4,25},{8,0,162},{0,4,25},{6,4,200},{1,10,5},{3,6,16}, +{1,6,16},{6,4,200},{14,0,200},{1,6,16},{0,5,208},{14,0,200},{0,5,208},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{2,14,410},{2,11,209},{2,8,288},{2,7,234},{0,14,739},{0,10,254},{0,8,33},{0,6,270},{0,8,1131},{0,6,450},{3,12,200},{3,10,13},{3,8,25},{3,7,29},{6,4,723}, +{0,10,238},{0,8,17},{0,6,254},{14,0,723},{0,6,254},{2,11,209},{2,11,209},{2,11,209},{2,7,209},{0,11,178},{0,8,17},{0,8,17},{0,5,18},{0,6,376},{0,5,123},{3,9,0},{3,9,0},{3,9,0},{3,7,4},{6,1,162},{0,8,1},{0,8,1},{0,5,2},{11,0,162},{0,5,2},{9,2,200},{2,11,1},{4,7,25},{0,8,17},{9,2,200},{17,0,200},{0,8,17}, +{0,6,218},{17,0,200},{0,6,218},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{0,8,16},{0,8,16},{0,8,16},{1,4,17},{0,4,80},{0,4,80},{3,15,410},{3,12,212},{3,9,288},{3,8,224},{1,15,739},{1,11,254},{1,9,33},{1,7,270},{0,9,1013},{0,7,308},{4,13,201},{4,11,21},{4,9,21},{4,8,26},{10,0,723},{0,12,227},{1,9,17},{0,7,227},{15,1,723}, +{0,7,227},{3,11,212},{3,11,212},{3,11,212},{3,8,208},{2,10,180},{1,9,17},{1,9,17},{1,6,18},{0,8,306},{0,6,41},{4,10,2},{4,10,2},{4,10,2},{4,7,5},{8,0,162},{1,9,1},{1,9,1},{1,6,2},{5,5,162},{1,6,2},{10,3,200},{3,12,4},{5,8,16},{3,8,16},{10,3,200},{18,1,200},{3,8,16},{0,7,218},{18,1,200},{0,7,218},{3,0,208}, +{3,0,208},{3,0,208},{3,0,208},{1,9,16},{1,9,16},{1,9,16},{2,5,17},{0,6,40},{0,6,40},{4,15,426},{4,13,223},{4,10,283},{4,9,234},{2,16,739},{2,12,267},{2,10,33},{2,8,273},{0,11,913},{0,8,225},{5,14,201},{5,12,18},{5,10,21},{5,9,26},{11,1,723},{0,13,208},{2,10,17},{0,8,209},{16,2,723},{0,8,209},{4,12,219},{4,12,219},{4,12,219}, +{4,9,218},{3,11,180},{2,10,17},{2,10,17},{2,7,18},{0,9,229},{0,7,27},{5,11,2},{5,11,2},{5,11,2},{5,8,5},{9,1,162},{2,10,1},{2,10,1},{2,7,2},{16,0,162},{2,7,2},{11,4,200},{3,14,4},{6,9,16},{4,9,16},{11,4,200},{19,2,200},{4,9,16},{0,8,208},{19,2,200},{0,8,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{2,10,16}, +{2,10,16},{2,10,16},{3,6,17},{0,8,17},{0,8,17},{5,16,420},{5,14,223},{5,11,283},{5,10,234},{3,17,739},{3,13,267},{3,11,33},{3,9,273},{0,13,868},{0,9,213},{6,15,201},{6,13,18},{6,11,21},{6,10,26},{12,2,723},{0,15,204},{3,11,17},{1,9,209},{22,0,723},{1,9,209},{5,13,219},{5,13,219},{5,13,219},{5,10,218},{3,14,180},{3,11,17},{3,11,17}, +{3,8,20},{0,11,189},{1,8,17},{6,12,2},{6,12,2},{6,12,2},{6,9,5},{10,2,162},{3,11,1},{3,11,1},{2,8,1},{17,1,162},{2,8,1},{15,0,200},{5,14,5},{7,10,16},{5,10,16},{15,0,200},{20,3,200},{5,10,16},{0,9,208},{20,3,200},{0,9,208},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,11,16},{3,11,16},{3,11,16},{3,8,20},{0,9,5}, +{0,9,5},{6,18,410},{6,15,209},{6,12,288},{6,11,234},{4,18,739},{4,14,254},{4,12,33},{4,10,270},{0,15,804},{2,10,227},{7,16,200},{7,14,13},{7,12,25},{7,11,29},{15,0,723},{1,16,209},{4,12,17},{2,10,218},{20,3,723},{2,10,218},{6,15,209},{6,15,209},{6,15,209},{6,11,209},{4,15,178},{4,12,17},{4,12,17},{4,9,18},{0,13,171},{2,9,26},{7,13,0}, +{7,13,0},{7,13,0},{7,11,4},{13,0,162},{4,12,1},{4,12,1},{4,9,2},{20,1,162},{4,9,2},{16,1,200},{6,15,1},{8,11,25},{4,12,17},{16,1,200},{26,1,200},{4,12,17},{0,10,218},{26,1,200},{0,10,218},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{4,12,16},{4,12,16},{4,12,16},{5,8,17},{1,11,5},{1,11,5},{7,19,410},{7,16,212},{7,13,288}, +{7,12,224},{5,19,739},{5,15,254},{5,13,33},{5,11,270},{0,16,747},{3,11,227},{8,17,201},{8,15,21},{8,13,21},{8,12,26},{14,4,723},{3,16,209},{5,13,17},{3,11,218},{24,2,723},{3,11,218},{7,15,212},{7,15,212},{7,15,212},{7,12,208},{6,14,180},{5,13,17},{5,13,17},{5,10,18},{1,14,171},{3,10,26},{8,14,2},{8,14,2},{8,14,2},{8,11,5},{14,1,162}, +{5,13,1},{5,13,1},{5,10,2},{24,0,162},{5,10,2},{17,2,200},{7,16,4},{9,12,16},{7,12,16},{17,2,200},{27,2,200},{7,12,16},{0,11,218},{27,2,200},{0,11,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{5,13,16},{5,13,16},{5,13,16},{6,9,17},{2,12,8},{2,12,8},{8,19,426},{8,17,223},{8,14,283},{8,13,234},{6,20,739},{6,16,267},{6,14,33}, +{6,12,273},{0,18,727},{3,12,218},{9,18,201},{9,16,18},{9,14,21},{9,13,26},{18,0,723},{4,17,208},{6,14,17},{4,12,209},{30,0,723},{4,12,209},{8,16,219},{8,16,219},{8,16,219},{8,13,218},{7,15,180},{6,14,17},{6,14,17},{6,11,18},{2,15,171},{4,11,27},{9,15,2},{9,15,2},{9,15,2},{9,12,5},{16,0,162},{6,14,1},{6,14,1},{6,11,2},{25,1,162}, +{6,11,2},{18,3,200},{7,18,4},{10,13,16},{8,13,16},{18,3,200},{28,3,200},{8,13,16},{0,12,208},{28,3,200},{0,12,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{6,14,16},{6,14,16},{6,14,16},{7,10,17},{3,13,8},{3,13,8},{9,20,420},{9,18,223},{9,15,283},{9,14,234},{7,21,739},{7,17,267},{7,15,33},{7,13,273},{1,19,727},{4,13,213},{10,19,201}, +{10,17,18},{10,15,21},{10,14,26},{19,1,723},{4,19,204},{7,15,17},{5,13,209},{31,1,723},{5,13,209},{9,17,219},{9,17,219},{9,17,219},{9,14,218},{7,18,180},{7,15,17},{7,15,17},{7,12,20},{3,16,173},{5,12,17},{10,16,2},{10,16,2},{10,16,2},{10,13,5},{17,1,162},{7,15,1},{7,15,1},{6,12,1},{26,2,162},{6,12,1},{20,2,200},{9,18,5},{11,14,16}, +{9,14,16},{20,2,200},{24,7,200},{9,14,16},{0,13,208},{24,7,200},{0,13,208},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,15,16},{7,15,16},{7,15,16},{7,12,20},{4,13,5},{4,13,5},{10,22,410},{10,19,209},{10,16,288},{10,15,234},{8,22,739},{8,18,254},{8,16,33},{8,14,270},{2,20,724},{6,14,227},{11,20,200},{11,18,13},{11,16,25},{11,15,29},{20,2,723}, +{5,20,209},{8,16,17},{6,14,218},{24,7,723},{6,14,218},{10,19,209},{10,19,209},{10,19,209},{10,15,209},{8,19,178},{8,16,17},{8,16,17},{8,13,18},{3,18,170},{6,13,26},{11,17,0},{11,17,0},{11,17,0},{11,15,4},{17,4,162},{8,16,1},{8,16,1},{8,13,2},{29,2,162},{8,13,2},{23,0,200},{10,19,1},{12,15,25},{8,16,17},{23,0,200},{30,5,200},{8,16,17}, +{0,14,218},{30,5,200},{0,14,218},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{8,16,16},{8,16,16},{8,16,16},{9,12,17},{5,15,5},{5,15,5},{11,23,410},{11,20,212},{11,17,288},{11,16,224},{9,23,739},{9,19,254},{9,17,33},{9,15,270},{3,21,724},{7,15,227},{12,21,201},{12,19,21},{12,17,21},{12,16,26},{21,3,723},{7,20,209},{9,17,17},{7,15,218},{28,6,723}, +{7,15,218},{11,19,212},{11,19,212},{11,19,212},{11,16,208},{10,18,180},{9,17,17},{9,17,17},{9,14,18},{5,18,171},{7,14,26},{12,18,2},{12,18,2},{12,18,2},{12,15,5},{18,5,162},{9,17,1},{9,17,1},{9,14,2},{30,3,162},{9,14,2},{24,1,200},{11,20,4},{13,16,16},{11,16,16},{24,1,200},{31,6,200},{11,16,16},{0,15,218},{31,6,200},{0,15,218},{11,0,208}, +{11,0,208},{11,0,208},{11,0,208},{9,17,16},{9,17,16},{9,17,16},{10,13,17},{6,16,8},{6,16,8},{12,23,426},{12,21,223},{12,18,283},{12,17,234},{10,24,739},{10,20,267},{10,18,33},{10,16,273},{4,22,727},{7,16,218},{13,22,201},{13,20,18},{13,18,21},{13,17,26},{22,4,723},{8,21,208},{10,18,17},{8,16,209},{29,7,723},{8,16,209},{12,20,219},{12,20,219},{12,20,219}, +{12,17,218},{11,19,180},{10,18,17},{10,18,17},{10,15,18},{6,19,171},{8,15,27},{13,19,2},{13,19,2},{13,19,2},{13,16,5},{20,4,162},{10,18,1},{10,18,1},{10,15,2},{29,5,162},{10,15,2},{25,2,200},{11,22,4},{14,17,16},{12,17,16},{25,2,200},{27,10,200},{12,17,16},{0,16,208},{27,10,200},{0,16,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{10,18,16}, +{10,18,16},{10,18,16},{11,14,17},{7,17,8},{7,17,8},{13,24,420},{13,22,223},{13,19,283},{13,18,234},{11,25,739},{11,21,267},{11,19,33},{11,17,273},{5,23,727},{8,17,213},{14,23,201},{14,21,18},{14,19,21},{14,18,26},{26,0,723},{8,23,204},{11,19,17},{9,17,209},{30,8,723},{9,17,209},{13,21,219},{13,21,219},{13,21,219},{13,18,218},{11,22,180},{11,19,17},{11,19,17}, +{11,16,20},{7,20,173},{9,16,17},{14,20,2},{14,20,2},{14,20,2},{14,17,5},{24,0,162},{11,19,1},{11,19,1},{10,16,1},{30,6,162},{10,16,1},{26,3,200},{13,22,5},{15,18,16},{13,18,16},{26,3,200},{28,11,200},{13,18,16},{0,17,208},{28,11,200},{0,17,208},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,19,16},{11,19,16},{11,19,16},{11,16,20},{8,17,5}, +{8,17,5},{14,26,410},{14,23,209},{14,20,288},{14,19,234},{12,26,739},{12,22,254},{12,20,33},{12,18,270},{6,24,724},{10,18,227},{15,24,200},{15,22,13},{15,20,25},{15,19,29},{26,3,723},{9,24,209},{12,20,17},{10,18,218},{28,11,723},{10,18,218},{14,23,209},{14,23,209},{14,23,209},{14,19,209},{12,23,178},{12,20,17},{12,20,17},{12,17,18},{7,22,170},{10,17,26},{15,21,0}, +{15,21,0},{15,21,0},{15,19,4},{24,3,162},{12,20,1},{12,20,1},{12,17,2},{28,9,162},{12,17,2},{27,4,200},{14,23,1},{16,19,25},{12,20,17},{27,4,200},{31,11,200},{12,20,17},{0,18,218},{31,11,200},{0,18,218},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{12,20,16},{12,20,16},{12,20,16},{13,16,17},{9,19,5},{9,19,5},{15,27,410},{15,24,212},{15,21,288}, +{15,20,224},{13,27,739},{13,23,254},{13,21,33},{13,19,270},{7,25,724},{11,19,227},{16,25,201},{16,23,21},{16,21,21},{16,20,26},{28,2,723},{11,24,209},{13,21,17},{11,19,218},{27,13,723},{11,19,218},{15,23,212},{15,23,212},{15,23,212},{15,20,208},{14,22,180},{13,21,17},{13,21,17},{13,18,18},{9,22,171},{11,18,26},{16,22,2},{16,22,2},{16,22,2},{16,19,5},{22,9,162}, +{13,21,1},{13,21,1},{13,18,2},{29,10,162},{13,18,2},{31,0,200},{15,24,4},{17,20,16},{15,20,16},{31,0,200},{30,13,200},{15,20,16},{0,19,218},{30,13,200},{0,19,218},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{13,21,16},{13,21,16},{13,21,16},{14,17,17},{10,20,8},{10,20,8},{16,27,426},{16,25,223},{16,22,283},{16,21,234},{14,28,739},{14,24,267},{14,22,33}, +{14,20,273},{8,26,727},{11,20,218},{17,26,201},{17,24,18},{17,22,21},{17,21,26},{29,3,723},{12,25,208},{14,22,17},{12,20,209},{28,14,723},{12,20,209},{16,24,219},{16,24,219},{16,24,219},{16,21,218},{15,23,180},{14,22,17},{14,22,17},{14,19,18},{10,23,171},{12,19,27},{17,23,2},{17,23,2},{17,23,2},{17,20,5},{24,8,162},{14,22,1},{14,22,1},{14,19,2},{30,11,162}, +{14,19,2},{29,6,200},{15,26,4},{18,21,16},{16,21,16},{29,6,200},{31,14,200},{16,21,16},{0,20,208},{31,14,200},{0,20,208},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{14,22,16},{14,22,16},{14,22,16},{15,18,17},{11,21,8},{11,21,8},{17,28,420},{17,26,223},{17,23,283},{17,22,234},{15,29,739},{15,25,267},{15,23,33},{15,21,273},{9,27,727},{12,21,213},{18,27,201}, +{18,25,18},{18,23,21},{18,22,26},{30,4,723},{12,27,204},{15,23,17},{13,21,209},{29,15,723},{13,21,209},{17,25,219},{17,25,219},{17,25,219},{17,22,218},{15,26,180},{15,23,17},{15,23,17},{15,20,20},{11,24,173},{13,20,17},{18,24,2},{18,24,2},{18,24,2},{18,21,5},{28,4,162},{15,23,1},{15,23,1},{14,20,1},{29,13,162},{14,20,1},{30,7,200},{17,26,5},{19,22,16}, +{17,22,16},{30,7,200},{30,16,200},{17,22,16},{0,21,208},{30,16,200},{0,21,208},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,23,16},{15,23,16},{15,23,16},{15,20,20},{12,21,5},{12,21,5},{18,30,410},{18,27,209},{18,24,288},{18,23,234},{16,30,739},{16,26,254},{16,24,33},{16,22,270},{10,28,724},{14,22,227},{19,28,200},{19,26,13},{19,24,25},{19,23,29},{30,7,723}, +{13,28,209},{16,24,17},{14,22,218},{30,16,723},{14,22,218},{18,27,209},{18,27,209},{18,27,209},{18,23,209},{16,27,178},{16,24,17},{16,24,17},{16,21,18},{11,26,170},{14,21,26},{19,25,0},{19,25,0},{19,25,0},{19,23,4},{31,2,162},{16,24,1},{16,24,1},{16,21,2},{27,16,162},{16,21,2},{31,8,200},{18,27,1},{20,23,25},{16,24,17},{31,8,200},{28,19,200},{16,24,17}, +{0,22,218},{28,19,200},{0,22,218},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{16,24,16},{16,24,16},{16,24,16},{17,20,17},{13,23,5},{13,23,5},{19,31,410},{19,28,212},{19,25,288},{19,24,224},{17,31,739},{17,27,254},{17,25,33},{17,23,270},{11,29,724},{15,23,227},{20,29,201},{20,27,21},{20,25,21},{20,24,26},{29,11,723},{15,28,209},{17,25,17},{15,23,218},{31,17,723}, +{15,23,218},{19,27,212},{19,27,212},{19,27,212},{19,24,208},{18,26,180},{17,25,17},{17,25,17},{17,22,18},{13,26,171},{15,22,26},{20,26,2},{20,26,2},{20,26,2},{20,23,5},{26,13,162},{17,25,1},{17,25,1},{17,22,2},{21,21,162},{17,22,2},{29,14,200},{19,28,4},{21,24,16},{19,24,16},{29,14,200},{29,20,200},{19,24,16},{0,23,218},{29,20,200},{0,23,218},{19,0,208}, +{19,0,208},{19,0,208},{19,0,208},{17,25,16},{17,25,16},{17,25,16},{18,21,17},{14,24,8},{14,24,8},{20,31,426},{20,29,223},{20,26,283},{20,25,234},{19,30,740},{18,28,267},{18,26,33},{18,24,273},{12,30,727},{15,24,218},{21,30,201},{21,28,18},{21,26,21},{21,25,26},{30,12,723},{16,29,208},{18,26,17},{16,24,209},{27,21,723},{16,24,209},{20,28,219},{20,28,219},{20,28,219}, +{20,25,218},{19,27,180},{18,26,17},{18,26,17},{18,23,18},{14,27,171},{16,23,27},{21,27,2},{21,27,2},{21,27,2},{21,24,5},{28,12,162},{18,26,1},{18,26,1},{18,23,2},{22,22,162},{18,23,2},{30,15,200},{19,30,4},{22,25,16},{20,25,16},{30,15,200},{30,21,200},{20,25,16},{0,24,208},{30,21,200},{0,24,208},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{18,26,16}, +{18,26,16},{18,26,16},{19,22,17},{15,25,8},{15,25,8},{21,31,468},{21,30,223},{21,27,283},{21,26,234},{20,31,749},{19,29,267},{19,27,33},{19,25,273},{13,31,727},{16,25,213},{22,31,201},{22,29,18},{22,27,21},{22,26,26},{31,13,723},{16,31,204},{19,27,17},{17,25,209},{28,22,723},{17,25,209},{21,29,219},{21,29,219},{21,29,219},{21,26,218},{19,30,180},{19,27,17},{19,27,17}, +{19,24,20},{15,28,173},{17,24,17},{22,28,2},{22,28,2},{22,28,2},{22,25,5},{29,13,162},{19,27,1},{19,27,1},{18,24,1},{28,20,162},{18,24,1},{31,16,200},{21,30,5},{23,26,16},{21,26,16},{31,16,200},{31,22,200},{21,26,16},{0,25,208},{31,22,200},{0,25,208},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,27,16},{19,27,16},{19,27,16},{19,24,20},{16,25,5}, +{16,25,5},{22,31,570},{22,31,209},{22,28,288},{22,27,234},{21,31,804},{20,30,254},{20,28,33},{20,26,270},{15,31,753},{18,26,227},{23,31,232},{23,30,13},{23,28,25},{23,27,29},{31,16,723},{19,31,216},{20,28,17},{18,26,218},{31,22,723},{18,26,218},{22,31,209},{22,31,209},{22,31,209},{22,27,209},{20,31,178},{20,28,17},{20,28,17},{20,25,18},{15,30,170},{18,25,26},{23,29,0}, +{23,29,0},{23,29,0},{23,27,4},{29,16,162},{20,28,1},{20,28,1},{20,25,2},{31,20,162},{20,25,2},{31,19,200},{22,31,1},{24,27,25},{20,28,17},{31,19,200},{27,26,200},{20,28,17},{0,26,218},{27,26,200},{0,26,218},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{20,28,16},{20,28,16},{20,28,16},{21,24,17},{17,27,5},{17,27,5},{23,31,696},{23,31,237},{23,29,288}, +{23,28,224},{23,31,888},{21,31,254},{21,29,33},{21,27,270},{17,31,824},{19,27,227},{24,31,273},{24,31,21},{24,29,21},{24,28,26},{30,20,723},{20,31,233},{21,29,17},{19,27,218},{30,24,723},{19,27,218},{23,31,212},{23,31,212},{23,31,212},{23,28,208},{22,30,180},{21,29,17},{21,29,17},{21,26,18},{17,30,171},{19,26,26},{24,30,2},{24,30,2},{24,30,2},{24,27,5},{30,17,162}, +{21,29,1},{21,29,1},{21,26,2},{25,25,162},{21,26,2},{30,23,200},{24,31,20},{25,28,16},{23,28,16},{30,23,200},{28,27,200},{23,28,16},{0,27,218},{28,27,200},{0,27,218},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{21,29,16},{21,29,16},{21,29,16},{22,25,17},{18,28,8},{18,28,8},{25,31,804},{24,31,334},{24,30,283},{24,29,234},{24,31,957},{22,31,297},{22,30,33}, +{22,28,273},{20,31,913},{19,28,218},{26,31,313},{25,31,51},{25,30,21},{25,29,26},{31,21,723},{22,31,281},{22,30,17},{20,28,209},{31,25,723},{20,28,209},{24,31,234},{24,31,234},{24,31,234},{24,29,218},{23,31,180},{22,30,17},{22,30,17},{22,27,18},{18,31,171},{20,27,27},{25,31,2},{25,31,2},{25,31,2},{25,28,5},{31,18,162},{22,30,1},{22,30,1},{22,27,2},{26,26,162}, +{22,27,2},{31,24,200},{25,31,50},{26,29,16},{24,29,16},{31,24,200},{24,31,200},{24,29,16},{0,28,208},{24,31,200},{0,28,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{22,30,16},{22,30,16},{22,30,16},{23,26,17},{19,29,8},{19,29,8},{26,31,930},{25,31,492},{25,31,283},{25,30,234},{25,31,1068},{24,31,389},{23,31,33},{23,29,273},{21,31,999},{20,29,213},{27,31,379}, +{26,31,149},{26,31,21},{26,30,26},{29,27,723},{24,31,364},{23,31,17},{21,29,209},{27,29,723},{21,29,209},{25,31,267},{25,31,267},{25,31,267},{25,30,218},{24,31,205},{23,31,17},{23,31,17},{23,28,20},{20,31,189},{21,28,17},{26,31,5},{26,31,5},{26,31,5},{26,29,5},{30,22,162},{23,31,1},{23,31,1},{22,28,1},{27,27,162},{22,28,1},{30,28,200},{27,31,90},{27,30,16}, +{25,30,16},{30,28,200},{28,30,200},{25,30,16},{0,29,208},{28,30,200},{0,29,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,31,16},{23,31,16},{23,31,16},{23,28,20},{20,29,5},{20,29,5},{27,31,877},{26,31,585},{26,31,329},{26,31,209},{26,31,990},{25,31,397},{25,31,36},{24,30,165},{23,31,910},{22,30,122},{28,31,306},{28,31,162},{27,31,36},{27,31,4},{31,26,546}, +{26,31,306},{25,31,20},{22,30,113},{29,29,546},{22,30,113},{26,31,329},{26,31,329},{26,31,329},{26,31,209},{25,31,276},{25,31,36},{25,31,36},{24,29,18},{22,31,230},{22,29,26},{27,31,36},{27,31,36},{27,31,36},{27,31,4},{30,25,162},{25,31,20},{25,31,20},{24,29,2},{30,27,162},{24,29,2},{30,30,113},{29,31,61},{28,31,0},{26,31,1},{30,30,113},{30,30,113},{26,31,1}, +{0,30,113},{30,30,113},{0,30,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,30,17},{25,30,17},{25,30,17},{25,28,17},{21,31,5},{21,31,5},{28,31,731},{27,31,573},{27,31,404},{27,31,244},{27,31,797},{26,31,354},{26,31,98},{25,30,82},{25,31,737},{23,31,58},{29,31,190},{29,31,126},{28,31,65},{28,31,5},{30,29,333},{28,31,185},{27,31,52},{23,31,49},{29,30,333}, +{23,31,49},{27,31,404},{27,31,404},{27,31,404},{27,31,244},{27,31,356},{26,31,98},{26,31,98},{25,30,18},{24,31,315},{23,30,26},{28,31,65},{28,31,65},{28,31,65},{28,31,5},{31,26,162},{27,31,52},{27,31,52},{25,30,2},{29,29,162},{25,30,2},{31,30,25},{30,31,13},{30,31,4},{29,31,1},{31,30,25},{30,31,25},{29,31,1},{0,31,49},{30,31,25},{0,31,49},{27,0,208}, +{27,0,208},{27,0,208},{27,0,208},{26,31,17},{26,31,17},{26,31,17},{26,29,17},{23,31,9},{23,31,9},{29,31,642},{28,31,524},{28,31,443},{28,31,299},{28,31,623},{28,31,335},{27,31,201},{26,31,17},{27,31,610},{24,31,26},{30,31,131},{30,31,115},{29,31,101},{29,31,37},{31,29,193},{29,31,121},{28,31,85},{26,31,1},{29,31,193},{26,31,1},{28,31,443},{28,31,443},{28,31,443}, +{28,31,299},{28,31,398},{27,31,201},{27,31,201},{26,31,17},{26,31,378},{24,31,26},{29,31,101},{29,31,101},{29,31,101},{29,31,37},{31,28,145},{28,31,85},{28,31,85},{26,31,1},{30,30,145},{26,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,31,32}, +{27,31,32},{27,31,32},{27,30,17},{24,31,26},{24,31,26},{29,31,418},{29,31,354},{29,31,318},{29,31,254},{29,31,370},{28,31,223},{28,31,142},{28,31,25},{28,31,358},{26,31,58},{30,31,51},{30,31,35},{30,31,26},{30,31,10},{31,30,54},{30,31,34},{30,31,25},{28,31,0},{30,31,54},{28,31,0},{29,31,318},{29,31,318},{29,31,318},{29,31,254},{29,31,270},{28,31,142},{28,31,142}, +{28,31,25},{27,31,249},{26,31,58},{30,31,26},{30,31,26},{30,31,26},{30,31,10},{30,31,41},{30,31,25},{30,31,25},{28,31,0},{31,30,41},{28,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,31,61},{28,31,61},{28,31,61},{28,31,25},{26,31,58}, +{26,31,58},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{0,3,1039},{0,3,751},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{3,1,925},{0,5,658},{0,4,274},{0,3,670},{6,0,925},{0,3,670},{0,4,1},{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{0,1,98},{0,1,59},{0,4,1}, +{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{2,0,85},{0,1,50},{5,1,421},{0,7,113},{0,5,5},{0,4,130},{5,1,421},{9,0,421},{0,4,130},{0,3,445},{9,0,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,12,425},{0,9,52},{0,6,10}, +{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{0,4,1445},{0,4,977},{0,12,425},{0,9,52},{0,6,10},{0,6,82},{3,3,1261},{0,6,805},{0,5,322},{0,4,833},{8,0,1261},{0,4,833},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{0,3,225},{0,3,117},{0,3,117},{0,2,125},{0,2,257},{0,2,161},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{2,0,221}, +{0,3,117},{0,3,117},{0,2,125},{2,1,221},{0,2,125},{7,0,421},{0,9,52},{1,6,5},{0,6,82},{7,0,421},{10,1,421},{0,6,82},{0,4,433},{10,1,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{0,10,1514},{0,8,874},{0,6,307}, +{0,5,917},{0,5,1814},{0,4,1074},{0,14,430},{0,11,29},{1,7,35},{0,7,46},{6,0,1514},{0,8,874},{0,6,307},{0,5,917},{10,0,1514},{0,5,917},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{0,5,340},{0,5,160},{0,5,160},{0,3,169},{0,3,421},{0,3,250},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{2,2,338},{0,5,160},{0,5,160},{0,3,169},{2,2,338}, +{0,3,169},{8,1,421},{0,11,20},{2,7,5},{0,7,37},{8,1,421},{14,0,421},{0,7,37},{0,5,433},{14,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{1,15,494},{1,12,102},{1,8,137},{1,8,122},{0,12,1517},{0,9,737},{0,7,185},{0,6,794},{0,7,1982},{0,5,1062},{1,15,430}, +{1,12,38},{2,8,34},{1,8,58},{7,1,1514},{0,9,737},{0,7,185},{0,6,794},{11,1,1514},{0,6,794},{1,11,74},{1,11,74},{1,11,74},{1,7,74},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{0,4,514},{0,4,241},{1,11,10},{1,11,10},{1,11,10},{1,7,10},{3,3,338},{0,6,98},{0,6,98},{0,4,97},{8,0,338},{0,4,97},{10,0,421},{0,13,9},{3,8,4}, +{0,8,16},{10,0,421},{17,0,421},{0,8,16},{0,6,433},{17,0,421},{0,6,433},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,18,629},{1,14,213},{2,9,354},{1,9,218},{0,15,1517},{0,11,630},{0,9,50},{0,7,670},{0,8,2198},{0,6,1109},{3,15,437},{2,13,41},{3,9,33},{2,9,53},{8,2,1514}, +{0,11,630},{0,9,50},{0,7,670},{12,2,1514},{0,7,670},{1,13,209},{1,13,209},{1,13,209},{1,8,212},{0,11,338},{0,8,41},{0,8,41},{0,5,50},{0,6,680},{0,5,275},{3,10,17},{3,10,17},{3,10,17},{2,8,17},{6,1,338},{0,8,41},{0,8,41},{0,5,50},{11,0,338},{0,5,50},{12,0,421},{0,15,1},{4,9,5},{0,9,1},{12,0,421},{20,0,421},{0,9,1}, +{0,7,445},{20,0,421},{0,7,445},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,7,1},{0,7,1},{0,7,1},{0,4,0},{0,3,106},{0,3,106},{2,19,821},{2,15,405},{2,11,570},{2,10,410},{0,18,1514},{0,13,577},{0,10,14},{0,8,602},{0,10,2462},{0,7,1175},{3,17,441},{3,14,41},{4,10,35},{3,10,53},{9,3,1514},{0,13,577},{0,10,14},{0,8,602},{18,0,1514}, +{0,8,602},{2,14,401},{2,14,401},{2,14,401},{2,9,404},{0,13,340},{0,10,13},{0,10,13},{0,6,29},{0,7,851},{0,6,353},{3,13,17},{3,13,17},{3,13,17},{3,9,17},{8,0,338},{0,10,13},{0,10,13},{0,6,29},{5,5,338},{0,6,29},{13,1,421},{1,16,4},{5,10,5},{1,10,1},{13,1,421},{21,1,421},{1,10,1},{0,8,433},{21,1,421},{0,8,433},{2,0,400}, +{2,0,400},{2,0,400},{2,0,400},{0,9,1},{0,9,1},{0,9,1},{0,6,4},{0,4,208},{0,4,208},{3,20,854},{3,16,437},{3,12,597},{3,11,443},{1,19,1515},{0,15,570},{1,11,15},{0,9,582},{0,11,2337},{0,9,933},{4,18,430},{4,15,29},{5,11,35},{4,11,46},{10,4,1514},{0,15,521},{1,11,14},{0,9,533},{19,1,1514},{0,9,533},{3,15,434},{3,15,434},{3,15,434}, +{3,10,437},{1,14,341},{1,11,14},{1,11,14},{1,7,30},{0,8,755},{0,7,222},{4,14,10},{4,14,10},{4,14,10},{4,10,10},{9,1,338},{0,12,2},{0,12,2},{0,7,26},{16,0,338},{0,7,26},{15,0,421},{3,16,4},{6,11,5},{2,11,1},{15,0,421},{25,0,421},{2,11,1},{0,9,433},{25,0,421},{0,9,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,10,2}, +{1,10,2},{1,10,2},{1,7,5},{0,6,157},{0,6,157},{4,21,866},{4,17,454},{4,13,609},{4,12,461},{2,20,1515},{1,16,570},{2,12,19},{1,10,582},{0,13,2214},{0,10,707},{5,19,430},{5,16,38},{6,12,34},{5,12,58},{14,0,1514},{0,16,458},{2,12,18},{0,10,482},{20,2,1514},{0,10,482},{4,16,445},{4,16,445},{4,16,445},{4,11,449},{2,15,341},{2,12,19},{2,12,19}, +{2,8,26},{0,10,635},{0,8,106},{5,15,10},{5,15,10},{5,15,10},{5,11,10},{10,2,338},{1,13,2},{1,13,2},{1,8,16},{17,1,338},{1,8,16},{16,1,421},{3,18,4},{7,12,4},{3,12,0},{16,1,421},{26,1,421},{3,12,0},{0,10,433},{26,1,421},{0,10,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,11,2},{2,11,2},{2,11,2},{2,7,10},{0,8,90}, +{0,8,90},{5,22,854},{5,18,438},{5,14,603},{5,13,443},{3,21,1517},{3,16,554},{3,13,21},{3,11,589},{0,15,2046},{0,11,535},{7,19,437},{6,17,41},{7,13,33},{6,13,53},{14,3,1514},{0,18,429},{3,13,21},{0,11,454},{26,0,1514},{0,11,454},{5,17,434},{5,17,434},{5,17,434},{5,12,437},{3,17,340},{3,13,20},{3,13,20},{3,9,29},{0,12,557},{0,10,49},{7,14,17}, +{7,14,17},{7,14,17},{6,12,17},{13,0,338},{2,14,4},{2,14,4},{1,10,10},{20,1,338},{1,10,10},{17,2,421},{4,19,1},{8,13,5},{4,13,1},{17,2,421},{29,1,421},{4,13,1},{0,11,445},{29,1,421},{0,11,445},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,12,1},{3,12,1},{3,12,1},{3,9,4},{0,10,40},{0,10,40},{6,23,854},{6,19,438},{6,15,603}, +{6,14,443},{4,22,1515},{3,18,566},{4,14,15},{3,12,578},{0,16,1911},{0,12,458},{7,21,441},{7,18,41},{8,14,35},{7,14,53},{16,2,1514},{0,20,425},{4,14,14},{0,12,433},{27,1,1514},{0,12,433},{6,18,434},{6,18,434},{6,18,434},{6,13,437},{4,17,341},{4,14,14},{4,14,14},{4,10,30},{0,14,477},{0,11,35},{7,17,17},{7,17,17},{7,17,17},{7,13,17},{14,1,338}, +{3,15,4},{3,15,4},{2,11,10},{24,0,338},{2,11,10},{18,3,421},{5,20,4},{9,14,5},{5,14,1},{18,3,421},{30,2,421},{5,14,1},{0,12,433},{30,2,421},{0,12,433},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,13,2},{4,13,2},{4,13,2},{4,10,5},{0,12,25},{0,12,25},{7,24,854},{7,20,437},{7,16,597},{7,15,443},{5,23,1515},{4,19,570},{5,15,15}, +{4,13,582},{0,18,1787},{0,13,442},{8,22,430},{8,19,29},{9,15,35},{8,15,46},{17,3,1514},{2,20,425},{5,15,14},{1,13,433},{28,2,1514},{1,13,433},{7,19,434},{7,19,434},{7,19,434},{7,14,437},{5,18,341},{5,15,14},{5,15,14},{5,11,30},{0,16,419},{1,12,45},{8,18,10},{8,18,10},{8,18,10},{8,14,10},{16,0,338},{4,16,2},{4,16,2},{3,12,17},{25,1,338}, +{3,12,17},{20,2,421},{7,20,4},{10,15,5},{6,15,1},{20,2,421},{31,3,421},{6,15,1},{0,13,433},{31,3,421},{0,13,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,14,2},{5,14,2},{5,14,2},{5,11,5},{0,13,9},{0,13,9},{8,25,866},{8,21,454},{8,17,609},{8,16,461},{6,24,1515},{5,20,570},{6,16,19},{5,14,582},{0,20,1686},{1,14,442},{9,23,430}, +{9,20,38},{10,16,34},{9,16,58},{18,4,1514},{3,21,425},{6,16,18},{2,14,433},{29,3,1514},{2,14,433},{8,20,445},{8,20,445},{8,20,445},{8,15,449},{6,19,341},{6,16,19},{6,16,19},{6,12,26},{0,17,372},{2,13,45},{9,19,10},{9,19,10},{9,19,10},{9,15,10},{17,1,338},{5,17,2},{5,17,2},{5,12,16},{26,2,338},{5,12,16},{23,0,421},{7,22,4},{11,16,4}, +{7,16,0},{23,0,421},{30,5,421},{7,16,0},{0,14,433},{30,5,421},{0,14,433},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,15,2},{6,15,2},{6,15,2},{6,11,10},{0,15,5},{0,15,5},{9,26,854},{9,22,438},{9,18,603},{9,17,443},{7,25,1517},{7,20,554},{7,17,21},{7,15,589},{0,22,1614},{2,15,462},{11,23,437},{10,21,41},{11,17,33},{10,17,53},{22,0,1514}, +{3,23,422},{7,17,21},{3,15,446},{30,4,1514},{3,15,446},{9,21,434},{9,21,434},{9,21,434},{9,16,437},{7,21,340},{7,17,20},{7,17,20},{7,13,29},{0,19,347},{3,14,46},{11,18,17},{11,18,17},{11,18,17},{10,16,17},{17,4,338},{6,18,4},{6,18,4},{5,14,10},{29,2,338},{5,14,10},{24,1,421},{8,23,1},{12,17,5},{8,17,1},{24,1,421},{31,6,421},{8,17,1}, +{0,15,445},{31,6,421},{0,15,445},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,16,1},{7,16,1},{7,16,1},{7,13,4},{2,16,8},{2,16,8},{10,27,854},{10,23,438},{10,19,603},{10,18,443},{8,26,1515},{7,22,566},{8,18,15},{7,16,578},{0,23,1566},{3,16,443},{11,25,441},{11,22,41},{12,18,35},{11,18,53},{23,1,1514},{4,24,425},{8,18,14},{4,16,433},{31,5,1514}, +{4,16,433},{10,22,434},{10,22,434},{10,22,434},{10,17,437},{8,21,341},{8,18,14},{8,18,14},{8,14,30},{1,20,341},{4,15,35},{11,21,17},{11,21,17},{11,21,17},{11,17,17},{18,5,338},{7,19,4},{7,19,4},{6,15,10},{30,3,338},{6,15,10},{25,2,421},{9,24,4},{13,18,5},{9,18,1},{25,2,421},{29,9,421},{9,18,1},{0,16,433},{29,9,421},{0,16,433},{10,0,433}, +{10,0,433},{10,0,433},{10,0,433},{8,17,2},{8,17,2},{8,17,2},{8,14,5},{3,17,8},{3,17,8},{11,28,854},{11,24,437},{11,20,597},{11,19,443},{9,27,1515},{8,23,570},{9,19,15},{8,17,582},{0,25,1533},{4,17,442},{12,26,430},{12,23,29},{13,19,35},{12,19,46},{24,2,1514},{6,24,425},{9,19,14},{5,17,433},{27,9,1514},{5,17,433},{11,23,434},{11,23,434},{11,23,434}, +{11,18,437},{9,22,341},{9,19,14},{9,19,14},{9,15,30},{2,21,341},{5,16,45},{12,22,10},{12,22,10},{12,22,10},{12,18,10},{20,4,338},{8,20,2},{8,20,2},{7,16,17},{29,5,338},{7,16,17},{26,3,421},{11,24,4},{14,19,5},{10,19,1},{26,3,421},{30,10,421},{10,19,1},{0,17,433},{30,10,421},{0,17,433},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,18,2}, +{9,18,2},{9,18,2},{9,15,5},{4,17,9},{4,17,9},{12,29,866},{12,25,454},{12,21,609},{12,20,461},{10,28,1515},{9,24,570},{10,20,19},{9,18,582},{0,27,1521},{5,18,442},{13,27,430},{13,24,38},{14,20,34},{13,20,58},{25,3,1514},{7,25,425},{10,20,18},{6,18,433},{28,10,1514},{6,18,433},{12,24,445},{12,24,445},{12,24,445},{12,19,449},{10,23,341},{10,20,19},{10,20,19}, +{10,16,26},{3,22,341},{6,17,45},{13,23,10},{13,23,10},{13,23,10},{13,19,10},{24,0,338},{9,21,2},{9,21,2},{9,16,16},{30,6,338},{9,16,16},{28,2,421},{11,26,4},{15,20,4},{11,20,0},{28,2,421},{31,11,421},{11,20,0},{0,18,433},{31,11,421},{0,18,433},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,19,2},{10,19,2},{10,19,2},{10,15,10},{4,19,5}, +{4,19,5},{13,30,854},{13,26,438},{13,22,603},{13,21,443},{11,29,1517},{11,24,554},{11,21,21},{11,19,589},{1,28,1518},{6,19,462},{15,27,437},{14,25,41},{15,21,33},{14,21,53},{26,4,1514},{7,27,422},{11,21,21},{7,19,446},{31,10,1514},{7,19,446},{13,25,434},{13,25,434},{13,25,434},{13,20,437},{11,25,340},{11,21,20},{11,21,20},{11,17,29},{3,24,339},{7,18,46},{15,22,17}, +{15,22,17},{15,22,17},{14,20,17},{24,3,338},{10,22,4},{10,22,4},{9,18,10},{28,9,338},{9,18,10},{31,0,421},{12,27,1},{16,21,5},{12,21,1},{31,0,421},{30,13,421},{12,21,1},{0,19,445},{30,13,421},{0,19,445},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,20,1},{11,20,1},{11,20,1},{11,17,4},{6,20,8},{6,20,8},{14,31,854},{14,27,438},{14,23,603}, +{14,22,443},{12,30,1515},{11,26,566},{12,22,15},{11,20,578},{2,29,1518},{7,20,443},{15,29,441},{15,26,41},{16,22,35},{15,22,53},{30,0,1514},{8,28,425},{12,22,14},{8,20,433},{30,12,1514},{8,20,433},{14,26,434},{14,26,434},{14,26,434},{14,21,437},{12,25,341},{12,22,14},{12,22,14},{12,18,30},{5,24,341},{8,19,35},{15,25,17},{15,25,17},{15,25,17},{15,21,17},{22,9,338}, +{11,23,4},{11,23,4},{10,19,10},{29,10,338},{10,19,10},{31,3,421},{13,28,4},{17,22,5},{13,22,1},{31,3,421},{31,14,421},{13,22,1},{0,20,433},{31,14,421},{0,20,433},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,21,2},{12,21,2},{12,21,2},{12,18,5},{7,21,8},{7,21,8},{15,31,878},{15,28,437},{15,24,597},{15,23,443},{13,31,1515},{12,27,570},{13,23,15}, +{12,21,582},{3,30,1518},{8,21,442},{16,30,430},{16,27,29},{17,23,35},{16,23,46},{31,1,1514},{10,28,425},{13,23,14},{9,21,433},{31,13,1514},{9,21,433},{15,27,434},{15,27,434},{15,27,434},{15,22,437},{13,26,341},{13,23,14},{13,23,14},{13,19,30},{6,25,341},{9,20,45},{16,26,10},{16,26,10},{16,26,10},{16,22,10},{24,8,338},{12,24,2},{12,24,2},{11,20,17},{30,11,338}, +{11,20,17},{30,7,421},{15,28,4},{18,23,5},{14,23,1},{30,7,421},{30,16,421},{14,23,1},{0,21,433},{30,16,421},{0,21,433},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,22,2},{13,22,2},{13,22,2},{13,19,5},{8,21,9},{8,21,9},{16,31,926},{16,29,454},{16,25,609},{16,24,461},{14,31,1542},{13,28,570},{14,24,19},{13,22,582},{4,31,1521},{9,22,442},{17,31,430}, +{17,28,38},{18,24,34},{17,24,58},{29,7,1514},{11,29,425},{14,24,18},{10,22,433},{27,17,1514},{10,22,433},{16,28,445},{16,28,445},{16,28,445},{16,23,449},{14,27,341},{14,24,19},{14,24,19},{14,20,26},{7,26,341},{10,21,45},{17,27,10},{17,27,10},{17,27,10},{17,23,10},{28,4,338},{13,25,2},{13,25,2},{13,20,16},{29,13,338},{13,20,16},{31,8,421},{15,30,4},{19,24,4}, +{15,24,0},{31,8,421},{31,17,421},{15,24,0},{0,22,433},{31,17,421},{0,22,433},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,23,2},{14,23,2},{14,23,2},{14,19,10},{8,23,5},{8,23,5},{17,31,1034},{17,30,438},{17,26,603},{17,25,443},{16,31,1598},{15,28,554},{15,25,21},{15,23,589},{6,31,1535},{10,23,462},{19,31,437},{18,29,41},{19,25,33},{18,25,53},{30,8,1514}, +{11,31,422},{15,25,21},{11,23,446},{28,18,1514},{11,23,446},{17,29,434},{17,29,434},{17,29,434},{17,24,437},{15,29,340},{15,25,20},{15,25,20},{15,21,29},{7,28,339},{11,22,46},{19,26,17},{19,26,17},{19,26,17},{18,24,17},{31,2,338},{14,26,4},{14,26,4},{13,22,10},{27,16,338},{13,22,10},{31,11,421},{16,31,1},{20,25,5},{16,25,1},{31,11,421},{31,19,421},{16,25,1}, +{0,23,445},{31,19,421},{0,23,445},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,24,1},{15,24,1},{15,24,1},{15,21,4},{10,24,8},{10,24,8},{18,31,1166},{18,31,438},{18,27,603},{18,26,443},{17,31,1643},{15,30,566},{16,26,15},{15,24,578},{8,31,1566},{11,24,443},{20,31,458},{19,30,41},{20,26,35},{19,26,53},{31,9,1514},{13,31,429},{16,26,14},{12,24,433},{29,19,1514}, +{12,24,433},{18,30,434},{18,30,434},{18,30,434},{18,25,437},{16,29,341},{16,26,14},{16,26,14},{16,22,30},{9,28,341},{12,23,35},{19,29,17},{19,29,17},{19,29,17},{19,25,17},{26,13,338},{15,27,4},{15,27,4},{14,23,10},{21,21,338},{14,23,10},{30,15,421},{18,31,5},{21,26,5},{17,26,1},{30,15,421},{30,21,421},{17,26,1},{0,24,433},{30,21,421},{0,24,433},{18,0,433}, +{18,0,433},{18,0,433},{18,0,433},{16,25,2},{16,25,2},{16,25,2},{16,22,5},{11,25,8},{11,25,8},{20,31,1326},{19,31,470},{19,28,597},{19,27,443},{18,31,1742},{16,31,570},{17,27,15},{16,25,582},{10,31,1638},{12,25,442},{21,31,506},{20,31,29},{21,27,35},{20,27,46},{29,15,1514},{15,31,461},{17,27,14},{13,25,433},{30,20,1514},{13,25,433},{19,31,434},{19,31,434},{19,31,434}, +{19,26,437},{17,30,341},{17,27,14},{17,27,14},{17,23,30},{10,29,341},{13,24,45},{20,30,10},{20,30,10},{20,30,10},{20,26,10},{28,12,338},{16,28,2},{16,28,2},{15,24,17},{22,22,338},{15,24,17},{31,16,421},{20,31,20},{22,27,5},{18,27,1},{31,16,421},{31,22,421},{18,27,1},{0,25,433},{31,22,421},{0,25,433},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,26,2}, +{17,26,2},{17,26,2},{17,23,5},{12,25,9},{12,25,9},{21,31,1470},{20,31,561},{20,29,609},{20,28,461},{19,31,1895},{18,31,578},{18,28,19},{17,26,582},{12,31,1761},{13,26,442},{22,31,590},{21,31,59},{22,28,34},{21,28,58},{30,16,1514},{17,31,530},{18,28,18},{14,26,433},{31,21,1514},{14,26,433},{20,31,461},{20,31,461},{20,31,461},{20,27,449},{18,31,341},{18,28,19},{18,28,19}, +{18,24,26},{11,30,341},{14,25,45},{21,31,10},{21,31,10},{21,31,10},{21,27,10},{29,13,338},{17,29,2},{17,29,2},{17,24,16},{28,20,338},{17,24,16},{31,19,421},{21,31,50},{23,28,4},{19,28,0},{31,19,421},{30,24,421},{19,28,0},{0,26,433},{30,24,421},{0,26,433},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,27,2},{18,27,2},{18,27,2},{18,23,10},{12,27,5}, +{12,27,5},{22,31,1674},{21,31,753},{21,30,603},{21,29,443},{21,31,2046},{19,31,629},{19,29,21},{19,27,589},{15,31,1917},{14,27,462},{24,31,674},{23,31,120},{23,29,33},{22,29,53},{30,19,1514},{19,31,629},{19,29,21},{15,27,446},{27,25,1514},{15,27,446},{21,31,497},{21,31,497},{21,31,497},{21,28,437},{19,31,388},{19,29,20},{19,29,20},{19,25,29},{12,31,347},{15,26,46},{23,30,17}, +{23,30,17},{23,30,17},{22,28,17},{29,16,338},{18,30,4},{18,30,4},{17,26,10},{31,20,338},{17,26,10},{30,23,421},{23,31,104},{24,29,5},{20,29,1},{30,23,421},{30,26,421},{20,29,1},{0,27,445},{30,26,421},{0,27,445},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,28,1},{19,28,1},{19,28,1},{19,25,4},{14,28,8},{14,28,8},{23,31,1902},{22,31,995},{22,31,603}, +{22,30,443},{22,31,2235},{20,31,759},{20,30,15},{19,28,578},{17,31,2118},{15,28,443},{25,31,770},{24,31,250},{24,30,35},{23,30,53},{29,23,1514},{21,31,701},{20,30,14},{16,28,433},{28,26,1514},{16,28,433},{22,31,554},{22,31,554},{22,31,554},{22,29,437},{21,31,437},{20,30,14},{20,30,14},{20,26,30},{14,31,379},{16,27,35},{24,31,25},{24,31,25},{24,31,25},{23,29,17},{30,17,338}, +{19,31,4},{19,31,4},{18,27,10},{25,25,338},{18,27,10},{31,24,421},{25,31,169},{25,30,5},{21,30,1},{31,24,421},{31,27,421},{21,30,1},{0,28,433},{31,27,421},{0,28,433},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,29,2},{20,29,2},{20,29,2},{20,26,5},{15,29,8},{15,29,8},{24,31,2045},{24,31,1233},{23,31,629},{23,31,442},{24,31,2360},{22,31,914},{21,31,14}, +{20,29,549},{19,31,2241},{16,29,409},{26,31,849},{25,31,395},{25,31,34},{24,31,45},{30,24,1459},{23,31,778},{21,31,13},{17,29,400},{29,27,1459},{17,29,400},{23,31,629},{23,31,629},{23,31,629},{23,30,437},{22,31,491},{21,31,14},{21,31,14},{21,27,30},{16,31,446},{17,28,45},{25,31,34},{25,31,34},{25,31,34},{24,30,10},{31,18,338},{21,31,13},{21,31,13},{19,28,17},{26,26,338}, +{19,28,17},{30,28,392},{27,31,218},{26,31,4},{22,31,0},{30,28,392},{28,30,392},{22,31,0},{0,29,400},{28,30,392},{0,29,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,30,2},{21,30,2},{21,30,2},{21,27,5},{16,29,9},{16,29,9},{25,31,1767},{25,31,1167},{24,31,701},{24,31,449},{24,31,1976},{23,31,747},{22,31,66},{22,29,337},{20,31,1820},{17,30,217},{27,31,611}, +{26,31,317},{26,31,61},{25,31,10},{31,24,1064},{25,31,587},{23,31,41},{18,30,208},{24,31,1064},{18,30,208},{24,31,701},{24,31,701},{24,31,701},{24,31,449},{23,31,581},{22,31,66},{22,31,66},{22,28,26},{18,31,530},{18,29,45},{26,31,61},{26,31,61},{26,31,61},{25,31,10},{30,22,338},{23,31,41},{23,31,41},{21,28,16},{27,27,338},{21,28,16},{29,31,200},{28,31,106},{27,31,1}, +{25,31,1},{29,31,200},{31,29,200},{25,31,1},{0,30,208},{31,29,200},{0,30,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,31,2},{22,31,2},{22,31,2},{22,27,10},{16,31,5},{16,31,5},{26,31,1542},{26,31,1122},{25,31,833},{25,31,497},{26,31,1647},{24,31,687},{24,31,203},{23,30,122},{22,31,1515},{19,30,110},{28,31,410},{28,31,266},{27,31,116},{27,31,20},{31,26,722}, +{26,31,402},{25,31,100},{21,30,74},{29,29,722},{21,30,74},{25,31,833},{25,31,833},{25,31,833},{25,31,497},{24,31,707},{24,31,203},{24,31,203},{23,29,29},{20,31,619},{19,30,46},{27,31,116},{27,31,116},{27,31,116},{27,31,20},{30,25,338},{25,31,100},{25,31,100},{21,30,10},{30,27,338},{21,30,10},{31,29,61},{29,31,37},{29,31,1},{27,31,4},{31,29,61},{31,30,61},{27,31,4}, +{0,30,73},{31,30,61},{0,30,73},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,31,25},{23,31,25},{23,31,25},{23,29,4},{18,31,17},{18,31,17},{27,31,1406},{27,31,1134},{26,31,962},{26,31,602},{27,31,1454},{25,31,702},{25,31,341},{24,31,59},{24,31,1378},{20,31,35},{29,31,318},{28,31,250},{28,31,169},{28,31,61},{30,29,509},{28,31,313},{27,31,164},{22,31,10},{29,30,509}, +{22,31,10},{26,31,962},{26,31,962},{26,31,962},{26,31,602},{26,31,827},{25,31,341},{25,31,341},{24,30,30},{22,31,747},{20,31,35},{28,31,169},{28,31,169},{28,31,169},{28,31,61},{31,26,338},{27,31,164},{27,31,164},{22,31,10},{29,29,338},{22,31,10},{31,30,9},{31,31,9},{30,31,4},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{26,0,433}, +{26,0,433},{26,0,433},{26,0,433},{24,31,50},{24,31,50},{24,31,50},{24,30,5},{20,31,26},{20,31,26},{28,31,1135},{28,31,991},{27,31,874},{27,31,602},{28,31,1162},{26,31,618},{26,31,362},{25,31,5},{25,31,1087},{22,31,58},{30,31,219},{29,31,161},{29,31,125},{29,31,61},{31,29,297},{29,31,193},{28,31,117},{24,31,1},{30,30,297},{24,31,1},{27,31,874},{27,31,874},{27,31,874}, +{27,31,602},{27,31,730},{26,31,362},{26,31,362},{25,31,5},{24,31,681},{22,31,58},{29,31,125},{29,31,125},{29,31,125},{29,31,61},{30,29,221},{28,31,117},{28,31,117},{24,31,1},{31,29,221},{24,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{25,31,101}, +{25,31,101},{25,31,101},{25,31,5},{22,31,58},{22,31,58},{29,31,885},{28,31,751},{28,31,670},{28,31,526},{28,31,778},{27,31,483},{27,31,314},{26,31,10},{26,31,777},{24,31,117},{30,31,75},{30,31,59},{30,31,50},{30,31,34},{30,31,114},{29,31,81},{29,31,45},{27,31,0},{31,30,114},{27,31,0},{28,31,670},{28,31,670},{28,31,670},{28,31,526},{28,31,553},{27,31,314},{27,31,314}, +{26,31,10},{25,31,518},{24,31,117},{30,31,50},{30,31,50},{30,31,50},{30,31,34},{31,29,85},{29,31,45},{29,31,45},{27,31,0},{29,31,85},{27,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,31,145},{27,31,145},{27,31,145},{26,31,10},{24,31,117}, +{24,31,117},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{0,9,1899},{0,7,1355},{0,6,589},{0,4,1354},{0,5,2124},{0,4,1498},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{4,2,1896},{0,7,1355},{0,6,589},{0,4,1354},{7,1,1896},{0,4,1354},{0,6,0},{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,200},{0,1,134},{0,6,0}, +{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{3,0,162},{0,2,104},{6,3,882},{0,10,225},{0,7,18},{0,6,265},{6,3,882},{8,3,882},{0,6,265},{0,5,890},{8,3,882},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,170},{0,8,8}, +{0,7,202},{0,10,2360},{0,8,1530},{0,7,643},{0,5,1579},{0,6,2684},{0,5,1804},{0,15,884},{0,12,170},{0,8,8},{0,7,202},{5,2,2355},{0,8,1530},{0,7,643},{0,5,1579},{7,2,2355},{0,5,1579},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{0,4,340},{0,4,180},{0,4,180},{0,2,200},{0,2,392},{0,2,236},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{2,1,338}, +{0,4,180},{0,4,180},{0,2,200},{1,2,338},{0,2,200},{8,2,882},{0,12,170},{0,8,8},{0,7,202},{8,2,882},{12,2,882},{0,7,202},{0,6,890},{12,2,882},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{0,12,2899},{0,9,1773},{0,8,725}, +{0,6,1854},{0,7,3348},{0,5,2124},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{2,9,2899},{0,9,1773},{0,8,725},{0,6,1854},{12,0,2899},{0,6,1854},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{0,6,580},{0,5,306},{0,5,306},{0,3,325},{0,3,667},{0,3,406},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{1,4,578},{0,5,306},{0,5,306},{0,3,325},{4,1,578}, +{0,3,325},{9,3,882},{0,14,106},{1,9,8},{0,9,148},{9,3,882},{18,0,882},{0,9,148},{0,7,890},{18,0,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,21,920},{0,16,89},{1,11,120},{0,10,121},{0,14,3051},{0,11,1709},{0,9,557},{0,7,1795},{0,8,3651},{0,6,2174},{1,19,886}, +{0,16,89},{1,11,56},{0,10,121},{8,1,3048},{0,11,1709},{0,9,557},{0,7,1795},{6,5,3048},{0,7,1795},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{0,4,824},{0,4,421},{1,12,4},{1,12,4},{1,12,4},{1,8,8},{3,3,648},{0,7,274},{0,7,274},{0,4,277},{8,0,648},{0,4,277},{10,4,882},{0,16,53},{2,10,8}, +{0,10,85},{10,4,882},{19,1,882},{0,10,85},{0,8,900},{19,1,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,22,995},{1,17,158},{1,12,230},{1,11,186},{0,17,3051},{0,13,1579},{0,10,346},{0,8,1630},{0,9,3924},{0,7,2173},{2,20,885},{2,16,90},{2,12,53},{1,11,122},{8,4,3048}, +{0,13,1579},{0,10,346},{0,8,1630},{14,2,3048},{0,8,1630},{1,15,113},{1,15,113},{1,15,113},{1,9,117},{0,11,648},{0,9,169},{0,9,169},{0,5,200},{0,6,990},{0,5,425},{2,13,2},{2,13,2},{2,13,2},{2,9,2},{6,1,648},{0,9,169},{0,9,169},{0,5,200},{11,0,648},{0,5,200},{13,2,882},{0,18,17},{3,11,18},{0,11,34},{13,2,882},{22,1,882},{0,11,34}, +{0,9,890},{22,1,882},{0,9,890},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,24,1173},{1,19,306},{2,13,422},{1,12,318},{0,20,3048},{0,15,1443},{0,12,204},{0,9,1483},{0,11,4212},{0,8,2174},{3,21,885},{3,17,90},{3,13,53},{2,12,117},{12,0,3048},{0,15,1443},{0,12,204},{0,9,1483},{20,0,3048}, +{0,9,1483},{1,18,290},{1,18,290},{1,18,290},{1,11,289},{0,13,650},{0,11,109},{0,11,109},{0,7,148},{0,7,1161},{0,6,473},{3,14,2},{3,14,2},{3,14,2},{3,10,2},{8,0,648},{0,11,109},{0,11,109},{0,7,148},{5,5,648},{0,7,148},{14,3,882},{0,20,8},{4,12,8},{0,12,8},{14,3,882},{26,0,882},{0,12,8},{0,10,890},{26,0,882},{0,10,890},{1,0,289}, +{1,0,289},{1,0,289},{1,0,289},{0,8,1},{0,8,1},{0,8,1},{0,5,4},{0,3,145},{0,3,145},{2,25,1365},{2,20,497},{2,14,713},{2,13,510},{0,23,3051},{0,16,1278},{0,13,86},{0,10,1354},{0,12,4609},{0,9,2228},{4,22,886},{3,19,94},{4,14,56},{3,13,117},{13,1,3048},{0,16,1278},{0,13,86},{0,10,1354},{21,1,3048},{0,10,1354},{2,19,482},{2,19,482},{2,19,482}, +{2,12,481},{0,16,648},{0,12,72},{0,12,72},{0,8,101},{0,8,1352},{0,7,557},{4,15,5},{4,15,5},{4,15,5},{4,11,5},{9,1,648},{0,12,72},{0,12,72},{0,8,101},{16,0,648},{0,8,101},{16,2,882},{1,21,8},{5,13,8},{0,13,5},{16,2,882},{27,1,882},{0,13,5},{0,11,890},{27,1,882},{0,11,890},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,10,1}, +{0,10,1},{0,10,1},{0,6,1},{0,5,261},{0,5,261},{2,28,1667},{2,22,793},{3,15,1033},{2,14,793},{0,25,3048},{0,18,1170},{0,14,36},{0,11,1243},{0,14,5005},{0,10,2318},{5,23,886},{4,20,89},{5,15,56},{4,14,121},{14,2,3048},{0,18,1170},{0,14,36},{0,11,1243},{25,0,3048},{0,11,1243},{2,21,786},{2,21,786},{2,21,786},{2,14,789},{0,19,650},{0,14,32},{0,14,32}, +{0,9,50},{0,9,1619},{0,8,661},{5,16,4},{5,16,4},{5,16,4},{5,12,8},{10,2,648},{0,14,32},{0,14,32},{0,9,50},{17,1,648},{0,9,50},{17,3,882},{2,22,8},{6,14,8},{1,14,5},{17,3,882},{28,2,882},{1,14,5},{0,12,900},{28,2,882},{0,12,900},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,13,1},{0,13,1},{0,13,1},{0,8,4},{0,6,405}, +{0,6,405},{3,29,1784},{3,23,902},{4,16,1186},{3,15,910},{1,26,3055},{0,20,1095},{1,15,47},{0,12,1159},{0,16,4945},{0,12,2084},{6,24,885},{6,20,90},{6,16,53},{5,15,122},{12,8,3048},{0,20,1059},{0,16,41},{0,12,1123},{28,0,3048},{0,12,1123},{3,23,901},{3,23,901},{3,23,901},{3,15,901},{1,20,652},{1,15,38},{1,15,38},{1,10,44},{0,11,1577},{0,9,545},{6,17,2}, +{6,17,2},{6,17,2},{6,13,2},{13,0,648},{0,16,5},{0,16,5},{0,10,13},{20,1,648},{0,10,13},{17,6,882},{3,23,2},{7,15,18},{3,15,10},{17,6,882},{31,2,882},{3,15,10},{0,13,890},{31,2,882},{0,13,890},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,14,4},{1,14,4},{1,14,4},{1,9,5},{0,8,373},{0,8,373},{4,30,1772},{4,24,898},{5,17,1186}, +{4,16,898},{2,27,3055},{1,21,1095},{2,16,33},{1,13,1159},{0,17,4639},{0,13,1730},{7,25,885},{7,21,90},{7,17,53},{6,16,117},{16,4,3048},{0,21,996},{2,16,29},{0,13,1054},{29,1,3048},{0,13,1054},{4,23,891},{4,23,891},{4,23,891},{4,16,894},{2,21,652},{2,16,29},{2,16,29},{2,11,44},{0,13,1452},{0,11,365},{7,18,2},{7,18,2},{7,18,2},{7,14,2},{14,1,648}, +{0,18,1},{0,18,1},{0,11,4},{24,0,648},{0,11,4},{22,0,882},{4,24,8},{8,16,8},{3,16,5},{22,0,882},{30,4,882},{3,16,5},{0,14,890},{30,4,882},{0,14,890},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,15,4},{2,15,4},{2,15,4},{2,10,5},{0,9,269},{0,9,269},{5,31,1772},{5,25,898},{6,18,1186},{5,17,898},{3,28,3055},{2,22,1095},{3,17,33}, +{2,14,1159},{0,19,4419},{0,14,1444},{8,26,886},{7,23,94},{8,18,56},{7,17,117},{17,5,3048},{0,23,936},{3,17,29},{0,14,1003},{30,2,3048},{0,14,1003},{5,24,891},{5,24,891},{5,24,891},{5,17,894},{3,22,652},{3,17,29},{3,17,29},{3,12,41},{0,15,1296},{0,12,235},{8,19,5},{8,19,5},{8,19,5},{8,15,5},{16,0,648},{1,19,1},{1,19,1},{1,12,1},{25,1,648}, +{1,12,1},{23,1,882},{5,25,8},{9,17,8},{4,17,5},{23,1,882},{31,5,882},{4,17,5},{0,15,890},{31,5,882},{0,15,890},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,16,4},{3,16,4},{3,16,4},{3,11,5},{0,11,185},{0,11,185},{6,31,1790},{6,26,898},{7,19,1186},{6,18,898},{4,29,3057},{3,23,1095},{4,18,45},{3,15,1159},{0,20,4156},{0,15,1226},{9,27,886}, +{8,24,89},{9,19,56},{8,18,121},{18,6,3048},{0,25,909},{4,18,36},{0,15,970},{31,3,3048},{0,15,970},{6,25,891},{6,25,891},{6,25,891},{6,18,894},{4,23,659},{4,18,41},{4,18,41},{3,13,46},{0,16,1137},{0,13,137},{9,20,4},{9,20,4},{9,20,4},{9,16,8},{17,1,648},{2,20,2},{2,20,2},{2,13,1},{26,2,648},{2,13,1},{24,2,882},{6,26,8},{10,18,8}, +{5,18,5},{24,2,882},{27,9,882},{5,18,5},{0,16,900},{27,9,882},{0,16,900},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,17,10},{4,17,10},{4,17,10},{4,12,13},{0,13,136},{0,13,136},{8,31,1844},{7,27,902},{8,20,1186},{7,19,910},{5,30,3055},{4,24,1095},{5,19,47},{4,16,1159},{0,22,3940},{0,16,1055},{10,28,885},{10,24,90},{10,20,53},{9,19,122},{22,2,3048}, +{0,27,886},{4,20,41},{0,17,926},{22,10,3048},{0,17,926},{7,27,901},{7,27,901},{7,27,901},{7,19,901},{5,24,652},{5,19,38},{5,19,38},{5,14,44},{0,18,1002},{0,15,110},{10,21,2},{10,21,2},{10,21,2},{10,17,2},{17,4,648},{3,21,4},{3,21,4},{3,14,5},{29,2,648},{3,14,5},{27,0,882},{7,27,2},{11,19,18},{7,19,10},{27,0,882},{30,9,882},{7,19,10}, +{0,17,890},{30,9,882},{0,17,890},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,18,4},{5,18,4},{5,18,4},{5,13,5},{0,15,74},{0,15,74},{9,31,1886},{8,28,898},{9,21,1186},{8,20,898},{6,31,3055},{5,25,1095},{6,20,33},{5,17,1159},{0,23,3820},{0,18,963},{11,29,885},{11,25,90},{11,21,53},{10,20,117},{23,3,3048},{1,28,888},{6,20,29},{0,18,899},{28,8,3048}, +{0,18,899},{8,27,891},{8,27,891},{8,27,891},{8,20,894},{6,25,652},{6,20,29},{6,20,29},{6,15,44},{0,20,876},{0,16,102},{11,22,2},{11,22,2},{11,22,2},{11,18,2},{18,5,648},{4,22,1},{4,22,1},{4,15,4},{30,3,648},{4,15,4},{26,4,882},{8,28,8},{12,20,8},{7,20,5},{26,4,882},{31,10,882},{7,20,5},{0,18,890},{31,10,882},{0,18,890},{8,0,890}, +{8,0,890},{8,0,890},{8,0,890},{6,19,4},{6,19,4},{6,19,4},{6,14,5},{0,17,29},{0,17,29},{10,31,1964},{9,29,898},{10,22,1186},{9,21,898},{7,31,3100},{6,26,1095},{7,21,33},{6,18,1159},{0,25,3679},{0,19,899},{12,30,886},{11,27,94},{12,22,56},{11,21,117},{21,9,3048},{2,29,888},{7,21,29},{0,19,890},{29,9,3048},{0,19,890},{9,28,891},{9,28,891},{9,28,891}, +{9,21,894},{7,26,652},{7,21,29},{7,21,29},{7,16,41},{0,22,800},{2,16,98},{12,23,5},{12,23,5},{12,23,5},{12,19,5},{20,4,648},{5,23,1},{5,23,1},{5,16,1},{29,5,648},{5,16,1},{30,0,882},{9,29,8},{13,21,8},{8,21,5},{30,0,882},{30,12,882},{8,21,5},{0,19,890},{30,12,882},{0,19,890},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,20,4}, +{7,20,4},{7,20,4},{7,15,5},{0,19,9},{0,19,9},{11,31,2078},{10,30,898},{11,23,1186},{10,22,898},{9,31,3181},{7,27,1095},{8,22,45},{7,19,1159},{0,27,3523},{0,20,908},{13,31,886},{12,28,89},{13,23,56},{12,22,121},{22,10,3048},{3,30,888},{8,22,36},{1,20,901},{30,10,3048},{1,20,901},{10,29,891},{10,29,891},{10,29,891},{10,22,894},{8,27,659},{8,22,41},{8,22,41}, +{7,17,46},{0,23,747},{3,17,98},{13,24,4},{13,24,4},{13,24,4},{13,20,8},{24,0,648},{6,24,2},{6,24,2},{6,17,1},{30,6,648},{6,17,1},{31,1,882},{10,30,8},{14,22,8},{9,22,5},{31,1,882},{31,13,882},{9,22,5},{0,20,900},{31,13,882},{0,20,900},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,21,10},{8,21,10},{8,21,10},{8,16,13},{0,20,8}, +{0,20,8},{12,31,2228},{11,31,902},{12,24,1186},{11,23,910},{10,31,3256},{8,28,1095},{9,23,47},{8,20,1159},{0,29,3364},{2,21,894},{14,31,915},{14,28,90},{14,24,53},{13,23,122},{29,1,3048},{4,31,886},{8,24,41},{2,21,890},{26,14,3048},{2,21,890},{11,31,901},{11,31,901},{11,31,901},{11,23,901},{9,28,652},{9,23,38},{9,23,38},{9,18,44},{0,25,705},{3,19,101},{14,25,2}, +{14,25,2},{14,25,2},{14,21,2},{24,3,648},{7,25,4},{7,25,4},{7,18,5},{28,9,648},{7,18,5},{31,4,882},{11,31,2},{15,23,18},{11,23,10},{31,4,882},{24,19,882},{11,23,10},{0,21,890},{24,19,882},{0,21,890},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,22,4},{9,22,4},{9,22,4},{9,17,5},{2,21,4},{2,21,4},{13,31,2414},{12,31,907},{13,25,1186}, +{12,24,898},{11,31,3391},{9,29,1095},{10,24,33},{9,21,1159},{0,31,3276},{3,22,894},{15,31,981},{15,29,90},{15,25,53},{14,24,117},{30,2,3048},{6,31,906},{10,24,29},{3,22,890},{27,15,3048},{3,22,890},{12,31,891},{12,31,891},{12,31,891},{12,24,894},{10,29,652},{10,24,29},{10,24,29},{10,19,44},{0,27,665},{4,20,102},{15,26,2},{15,26,2},{15,26,2},{15,22,2},{22,9,648}, +{8,26,1},{8,26,1},{8,19,4},{29,10,648},{8,19,4},{30,8,882},{12,31,17},{16,24,8},{11,24,5},{30,8,882},{28,18,882},{11,24,5},{0,22,890},{28,18,882},{0,22,890},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,23,4},{10,23,4},{10,23,4},{10,18,5},{3,22,4},{3,22,4},{15,31,2606},{13,31,987},{14,26,1186},{13,25,898},{13,31,3517},{10,30,1095},{11,25,33}, +{10,22,1159},{1,31,3300},{4,23,899},{17,31,1014},{15,31,94},{16,26,56},{15,25,117},{31,3,3048},{8,31,936},{11,25,29},{4,23,890},{23,19,3048},{4,23,890},{13,31,906},{13,31,906},{13,31,906},{13,25,894},{11,30,652},{11,25,29},{11,25,29},{11,20,41},{0,29,651},{6,20,98},{16,27,5},{16,27,5},{16,27,5},{16,23,5},{24,8,648},{9,27,1},{9,27,1},{9,20,1},{30,11,648}, +{9,20,1},{31,9,882},{14,31,37},{17,25,8},{12,25,5},{31,9,882},{29,19,882},{12,25,5},{0,23,890},{29,19,882},{0,23,890},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,24,4},{11,24,4},{11,24,4},{11,19,5},{3,24,5},{3,24,5},{16,31,2792},{15,31,1079},{15,27,1186},{14,26,898},{14,31,3652},{11,31,1095},{12,26,45},{11,23,1159},{3,31,3436},{4,24,908},{18,31,1080}, +{17,31,110},{17,27,56},{16,26,121},{26,14,3048},{10,31,996},{12,26,36},{5,24,901},{22,21,3048},{5,24,901},{14,31,939},{14,31,939},{14,31,939},{14,26,894},{12,31,659},{12,26,41},{12,26,41},{11,21,46},{1,30,651},{7,21,98},{17,28,4},{17,28,4},{17,28,4},{17,24,8},{28,4,648},{10,28,2},{10,28,2},{10,21,1},{29,13,648},{10,21,1},{29,15,882},{16,31,80},{18,26,8}, +{13,26,5},{29,15,882},{30,20,882},{13,26,5},{0,24,900},{30,20,882},{0,24,900},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,25,10},{12,25,10},{12,25,10},{12,20,13},{4,24,8},{4,24,8},{17,31,3038},{16,31,1268},{16,28,1186},{15,27,910},{15,31,3879},{12,31,1146},{13,27,47},{12,24,1159},{5,31,3667},{6,25,894},{19,31,1205},{18,31,147},{18,28,53},{17,27,122},{30,10,3048}, +{12,31,1110},{12,28,41},{6,25,890},{30,18,3048},{6,25,890},{16,31,979},{16,31,979},{16,31,979},{15,27,901},{13,31,670},{13,27,38},{13,27,38},{13,22,44},{2,31,648},{7,23,101},{18,29,2},{18,29,2},{18,29,2},{18,25,2},{31,2,648},{11,29,4},{11,29,4},{11,22,5},{27,16,648},{11,22,5},{29,18,882},{18,31,146},{19,27,18},{15,27,10},{29,18,882},{28,23,882},{15,27,10}, +{0,25,890},{28,23,882},{0,25,890},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,26,4},{13,26,4},{13,26,4},{13,21,5},{6,25,4},{6,25,4},{18,31,3308},{17,31,1502},{17,29,1186},{16,28,898},{17,31,4077},{14,31,1230},{14,28,33},{13,25,1159},{8,31,3820},{7,26,894},{21,31,1368},{19,31,261},{19,29,53},{18,28,117},{31,11,3048},{14,31,1226},{14,28,29},{7,26,890},{31,19,3048}, +{7,26,890},{17,31,1018},{17,31,1018},{17,31,1018},{16,28,894},{14,31,724},{14,28,29},{14,28,29},{14,23,44},{4,31,665},{8,24,102},{19,30,2},{19,30,2},{19,30,2},{19,26,2},{26,13,648},{12,30,1},{12,30,1},{12,23,4},{21,21,648},{12,23,4},{30,19,882},{20,31,193},{20,28,8},{15,28,5},{30,19,882},{27,25,882},{15,28,5},{0,26,890},{27,25,882},{0,26,890},{16,0,890}, +{16,0,890},{16,0,890},{16,0,890},{14,27,4},{14,27,4},{14,27,4},{14,22,5},{7,26,4},{7,26,4},{19,31,3614},{18,31,1804},{18,30,1186},{17,29,898},{18,31,4284},{15,31,1417},{15,29,33},{14,26,1159},{9,31,4036},{8,27,899},{22,31,1494},{20,31,405},{20,30,56},{19,29,117},{29,17,3048},{16,31,1395},{15,29,29},{8,27,890},{27,23,3048},{8,27,890},{18,31,1075},{18,31,1075},{18,31,1075}, +{17,29,894},{16,31,787},{15,29,29},{15,29,29},{15,24,41},{6,31,705},{10,24,98},{20,31,5},{20,31,5},{20,31,5},{20,27,5},{28,12,648},{13,31,1},{13,31,1},{13,24,1},{22,22,648},{13,24,1},{29,23,882},{22,31,277},{21,29,8},{16,29,5},{29,23,882},{28,26,882},{16,29,5},{0,27,890},{28,26,882},{0,27,890},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,28,4}, +{15,28,4},{15,28,4},{15,23,5},{7,28,5},{7,28,5},{20,31,4014},{19,31,2174},{19,31,1186},{18,30,898},{19,31,4545},{17,31,1725},{16,30,45},{15,27,1159},{11,31,4300},{8,28,908},{23,31,1656},{22,31,585},{21,31,56},{20,30,121},{30,18,3048},{18,31,1563},{16,30,36},{9,28,901},{26,25,3048},{9,28,901},{19,31,1150},{19,31,1150},{19,31,1150},{18,30,894},{17,31,841},{16,30,41},{16,30,41}, +{15,25,46},{8,31,747},{11,25,98},{21,31,20},{21,31,20},{21,31,20},{21,28,8},{29,13,648},{15,31,5},{15,31,5},{14,25,1},{28,20,648},{14,25,1},{30,24,882},{23,31,397},{22,30,8},{17,30,5},{30,24,882},{29,27,882},{17,30,5},{0,28,900},{29,27,882},{0,28,900},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,29,10},{16,29,10},{16,29,10},{16,24,13},{8,28,8}, +{8,28,8},{22,31,4123},{21,31,2404},{20,31,1278},{19,31,901},{20,31,4626},{18,31,1849},{17,31,38},{16,28,1006},{14,31,4330},{10,29,789},{24,31,1629},{23,31,715},{22,31,65},{22,30,101},{31,19,2814},{20,31,1505},{17,31,34},{10,29,785},{27,26,2814},{10,29,785},{20,31,1278},{20,31,1278},{20,31,1278},{19,31,901},{18,31,948},{17,31,38},{17,31,38},{17,26,44},{10,31,840},{11,27,101},{22,31,65}, +{22,31,65},{22,31,65},{22,29,2},{29,16,648},{17,31,34},{17,31,34},{15,26,5},{31,20,648},{15,26,5},{31,25,761},{25,31,425},{23,31,9},{19,31,1},{31,25,761},{28,29,761},{19,31,1},{0,29,785},{28,29,761},{0,29,785},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,30,4},{17,30,4},{17,30,4},{17,25,5},{10,29,4},{10,29,4},{23,31,3735},{22,31,2314},{21,31,1395}, +{20,31,899},{22,31,4090},{19,31,1618},{18,31,104},{17,29,686},{15,31,3826},{11,29,507},{25,31,1285},{24,31,609},{23,31,122},{23,31,37},{30,22,2249},{21,31,1186},{19,31,74},{13,29,482},{27,27,2249},{13,29,482},{21,31,1395},{21,31,1395},{21,31,1395},{20,31,899},{19,31,1086},{18,31,104},{18,31,104},{18,27,44},{12,31,969},{12,28,102},{23,31,122},{23,31,122},{23,31,122},{23,30,2},{30,17,648}, +{19,31,74},{19,31,74},{16,27,4},{25,25,648},{16,27,4},{31,26,481},{27,31,269},{25,31,0},{21,31,0},{31,26,481},{31,28,481},{21,31,0},{0,29,481},{31,28,481},{0,29,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,31,4},{18,31,4},{18,31,4},{18,26,5},{11,30,4},{11,30,4},{23,31,3399},{23,31,2260},{22,31,1530},{21,31,954},{23,31,3639},{20,31,1402},{19,31,238}, +{18,29,405},{17,31,3443},{12,30,314},{26,31,1009},{25,31,525},{25,31,164},{24,31,5},{30,24,1769},{23,31,918},{21,31,113},{14,30,290},{29,27,1769},{14,30,290},{22,31,1530},{22,31,1530},{22,31,1530},{21,31,954},{21,31,1251},{19,31,238},{19,31,238},{19,28,41},{14,31,1105},{14,28,98},{25,31,164},{25,31,164},{25,31,164},{24,31,5},{31,18,648},{21,31,113},{21,31,113},{17,28,1},{26,26,648}, +{17,28,1},{30,29,265},{28,31,145},{27,31,4},{24,31,1},{30,29,265},{29,30,265},{24,31,1},{0,30,289},{29,30,265},{0,30,289},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,31,13},{19,31,13},{19,31,13},{19,27,5},{12,31,9},{12,31,9},{24,31,3069},{24,31,2257},{23,31,1683},{23,31,1054},{24,31,3258},{22,31,1330},{21,31,378},{19,30,213},{20,31,3102},{14,30,166},{27,31,801}, +{26,31,477},{26,31,221},{25,31,20},{31,24,1374},{24,31,758},{23,31,181},{16,30,114},{24,31,1374},{16,30,114},{23,31,1683},{23,31,1683},{23,31,1683},{23,31,1054},{22,31,1401},{21,31,378},{21,31,378},{19,29,46},{16,31,1296},{15,29,98},{26,31,221},{26,31,221},{26,31,221},{25,31,20},{30,22,648},{23,31,181},{23,31,181},{18,29,1},{27,27,648},{18,29,1},{30,30,113},{29,31,61},{28,31,0}, +{26,31,1},{30,30,113},{30,30,113},{26,31,1},{0,30,113},{30,30,113},{0,30,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,31,45},{20,31,45},{20,31,45},{20,28,13},{13,31,25},{13,31,25},{25,31,2860},{25,31,2260},{24,31,1854},{24,31,1210},{25,31,2932},{23,31,1310},{22,31,609},{21,30,108},{21,31,2731},{15,31,101},{28,31,630},{27,31,475},{27,31,306},{26,31,101},{31,26,1032}, +{26,31,612},{25,31,290},{18,31,37},{29,29,1032},{18,31,37},{24,31,1854},{24,31,1854},{24,31,1854},{24,31,1210},{23,31,1620},{22,31,609},{22,31,609},{21,30,44},{18,31,1515},{15,31,101},{27,31,306},{27,31,306},{27,31,306},{26,31,101},{30,25,648},{25,31,290},{25,31,290},{19,30,5},{30,27,648},{19,30,5},{31,30,18},{30,31,10},{30,31,1},{29,31,0},{31,30,18},{30,31,18},{29,31,0}, +{0,31,36},{30,31,18},{0,31,36},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{21,31,104},{21,31,104},{21,31,104},{21,29,5},{15,31,65},{15,31,65},{26,31,2626},{26,31,2206},{25,31,1915},{25,31,1315},{26,31,2641},{24,31,1333},{23,31,789},{22,31,40},{22,31,2445},{17,31,116},{29,31,524},{28,31,406},{28,31,325},{27,31,170},{30,29,771},{27,31,507},{26,31,320},{20,31,0},{29,30,771}, +{20,31,0},{25,31,1915},{25,31,1915},{25,31,1915},{25,31,1315},{24,31,1661},{23,31,789},{23,31,789},{22,31,40},{20,31,1517},{17,31,116},{28,31,325},{28,31,325},{28,31,325},{27,31,170},{31,26,580},{26,31,320},{26,31,320},{20,31,0},{30,28,580},{20,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,890}, +{24,0,890},{24,0,890},{24,0,890},{23,31,164},{23,31,164},{23,31,164},{22,30,5},{17,31,116},{17,31,116},{27,31,2156},{27,31,1884},{26,31,1630},{26,31,1210},{26,31,2081},{25,31,1108},{24,31,705},{23,31,5},{23,31,1927},{19,31,180},{29,31,300},{29,31,236},{29,31,200},{28,31,85},{31,28,451},{28,31,283},{27,31,194},{23,31,1},{28,31,451},{23,31,1},{26,31,1630},{26,31,1630},{26,31,1630}, +{26,31,1210},{25,31,1347},{24,31,705},{24,31,705},{23,31,5},{22,31,1229},{19,31,180},{29,31,200},{29,31,200},{29,31,200},{28,31,85},{31,27,338},{27,31,194},{27,31,194},{23,31,1},{30,29,338},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,31,221}, +{24,31,221},{24,31,221},{23,31,5},{19,31,180},{19,31,180},{28,31,1782},{27,31,1564},{27,31,1395},{27,31,1123},{27,31,1620},{26,31,937},{25,31,651},{24,31,25},{24,31,1560},{21,31,233},{30,31,150},{30,31,134},{29,31,104},{29,31,40},{31,29,216},{29,31,136},{28,31,90},{25,31,1},{29,31,216},{25,31,1},{27,31,1395},{27,31,1395},{27,31,1395},{27,31,1123},{26,31,1101},{25,31,651},{25,31,651}, +{24,31,25},{23,31,998},{21,31,233},{29,31,104},{29,31,104},{29,31,104},{29,31,40},{31,28,162},{28,31,90},{28,31,90},{25,31,1},{28,31,162},{25,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,31,290},{25,31,290},{25,31,290},{24,31,25},{21,31,233}, +{21,31,233},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{0,11,3379},{0,9,2369},{0,8,1061},{0,5,2435},{0,6,3760},{0,5,2660},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{7,0,3371},{0,9,2369},{0,8,1061},{0,5,2435},{10,1,3371},{0,5,2435},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{0,2,332},{0,2,200},{0,8,0}, +{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{4,0,288},{0,2,164},{9,2,1568},{0,14,442},{0,10,40},{0,8,485},{9,2,1568},{17,0,1568},{0,8,485},{0,6,1586},{17,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,1570},{0,16,325},{0,11,5}, +{0,9,392},{0,13,3968},{0,10,2630},{0,9,1121},{0,6,2710},{0,7,4484},{0,6,3034},{0,20,1570},{0,16,325},{0,11,5},{0,9,392},{2,10,3968},{0,10,2630},{0,9,1121},{0,6,2710},{13,0,3968},{0,6,2710},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{0,5,514},{0,5,274},{0,5,274},{0,3,289},{0,3,595},{0,3,370},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{2,2,512}, +{0,5,274},{0,5,274},{0,3,289},{2,2,512},{0,3,289},{10,3,1568},{0,16,325},{0,11,5},{0,9,392},{10,3,1568},{18,1,1568},{0,9,392},{0,7,1586},{18,1,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{0,15,4652},{0,11,2945},{0,10,1217}, +{0,7,3035},{0,8,5283},{0,7,3476},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{4,8,4651},{0,11,2945},{0,10,1217},{0,7,3035},{13,1,4651},{0,7,3035},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,7,802},{0,6,424},{0,6,424},{0,4,449},{0,3,931},{0,3,562},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{4,0,800},{0,6,424},{0,6,424},{0,4,449},{5,1,800}, +{0,4,449},{11,4,1568},{0,17,225},{1,12,13},{0,11,292},{11,4,1568},{19,2,1568},{0,11,292},{0,8,1576},{19,2,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1570},{0,19,149},{0,13,73},{0,12,194},{0,17,5424},{0,13,3368},{0,11,1349},{0,8,3449},{0,9,6213},{0,7,3956},{0,25,1570}, +{0,19,149},{0,13,73},{0,12,194},{10,0,5419},{0,13,3368},{0,11,1349},{0,8,3449},{15,1,5419},{0,8,3449},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{0,4,1328},{0,4,769},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{3,3,1152},{0,7,610},{0,7,610},{0,4,625},{8,0,1152},{0,4,625},{15,0,1568},{0,19,149},{2,13,13}, +{0,12,194},{15,0,1568},{20,3,1568},{0,12,194},{0,9,1576},{20,3,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,26,1633},{0,21,155},{1,14,150},{0,13,198},{0,20,5424},{0,15,3099},{0,12,996},{0,9,3179},{0,10,6544},{0,8,3890},{1,26,1569},{1,20,131},{2,14,69},{1,13,181},{10,3,5419}, +{0,15,3099},{0,12,996},{0,9,3179},{18,1,5419},{0,9,3179},{1,17,65},{1,17,65},{1,17,65},{1,11,69},{0,11,1152},{0,9,445},{0,9,445},{0,6,505},{0,6,1494},{0,5,737},{1,17,1},{1,17,1},{1,17,1},{1,11,5},{6,1,1152},{0,9,445},{0,9,445},{0,6,505},{11,0,1152},{0,6,505},{16,1,1568},{0,21,74},{3,14,5},{0,13,117},{16,1,1568},{26,1,1568},{0,13,117}, +{0,10,1586},{26,1,1568},{0,10,1586},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,25},{0,1,25},{1,29,1715},{1,22,219},{2,15,342},{1,14,262},{0,22,5420},{0,16,2834},{0,13,726},{0,10,2966},{0,11,6916},{0,9,3860},{2,27,1569},{2,21,131},{3,15,69},{2,14,181},{11,4,5419},{0,16,2834},{0,13,726},{0,10,2966},{19,2,5419}, +{0,10,2966},{1,20,146},{1,20,146},{1,20,146},{1,12,146},{0,13,1154},{0,11,337},{0,11,337},{0,7,388},{0,7,1665},{0,6,749},{2,18,1},{2,18,1},{2,18,1},{2,12,5},{8,0,1152},{0,11,337},{0,11,337},{0,7,388},{5,5,1152},{0,7,388},{17,2,1568},{0,23,34},{4,15,5},{0,14,72},{17,2,1568},{27,2,1568},{0,14,72},{0,11,1586},{27,2,1568},{0,11,1586},{1,0,145}, +{1,0,145},{1,0,145},{1,0,145},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,2,85},{0,2,85},{2,30,1907},{1,24,398},{2,16,542},{1,15,425},{0,25,5424},{0,18,2630},{0,15,486},{0,11,2771},{0,13,7299},{0,11,3860},{3,28,1569},{3,22,131},{4,16,82},{3,15,181},{15,0,5419},{0,18,2630},{0,15,486},{0,11,2771},{20,3,5419},{0,11,2771},{2,21,338},{2,21,338},{2,21,338}, +{2,13,338},{0,16,1152},{0,13,274},{0,13,274},{0,8,305},{0,8,1856},{0,7,797},{3,19,1},{3,19,1},{3,19,1},{3,13,5},{9,1,1152},{0,13,274},{0,13,274},{0,8,305},{16,0,1152},{0,8,305},{18,3,1568},{0,25,17},{5,16,13},{0,15,45},{18,3,1568},{28,3,1568},{0,15,45},{0,12,1576},{28,3,1568},{0,12,1576},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,8,1}, +{0,8,1},{0,8,1},{0,5,0},{0,4,169},{0,4,169},{2,31,2145},{2,25,590},{3,17,862},{2,16,619},{0,27,5420},{0,20,2424},{0,16,282},{0,12,2552},{0,15,7711},{0,11,3908},{4,29,1570},{4,23,149},{4,17,73},{4,16,194},{14,4,5419},{0,20,2424},{0,16,282},{0,12,2552},{24,2,5419},{0,12,2552},{2,23,546},{2,23,546},{2,23,546},{2,15,546},{0,19,1154},{0,15,194},{0,15,194}, +{0,9,218},{0,9,2123},{0,8,865},{4,20,1},{4,20,1},{4,20,1},{4,13,4},{10,2,1152},{0,15,194},{0,15,194},{0,9,218},{17,1,1152},{0,9,218},{20,2,1568},{0,27,5},{6,17,13},{0,16,26},{20,2,1568},{24,7,1568},{0,16,26},{0,13,1576},{24,7,1568},{0,13,1576},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289}, +{0,5,289},{3,31,2596},{3,26,941},{3,19,1289},{3,17,972},{0,30,5420},{0,22,2243},{0,17,145},{0,13,2386},{0,16,8161},{0,13,3986},{5,30,1569},{5,24,131},{6,18,69},{5,17,181},{17,2,5419},{0,22,2243},{0,17,145},{0,13,2386},{27,2,5419},{0,13,2386},{3,24,901},{3,24,901},{3,24,901},{3,16,901},{0,22,1154},{0,17,109},{0,17,109},{0,10,145},{0,11,2441},{0,9,1001},{5,21,1}, +{5,21,1},{5,21,1},{5,15,5},{13,0,1152},{0,17,109},{0,17,109},{0,10,145},{20,1,1152},{0,10,145},{23,0,1568},{1,28,2},{7,18,5},{0,18,8},{23,0,1568},{30,5,1568},{0,18,8},{0,14,1586},{30,5,1568},{0,14,1586},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,14,1},{0,14,1},{0,14,1},{0,8,1},{0,6,468},{0,6,468},{4,31,3146},{3,28,1262},{4,19,1743}, +{3,18,1297},{1,31,5484},{0,23,2096},{0,19,69},{0,14,2251},{0,18,8669},{0,14,4100},{6,31,1569},{6,25,131},{7,19,69},{6,18,181},{18,3,5419},{0,23,2096},{0,19,69},{0,14,2251},{28,3,5419},{0,14,2251},{3,27,1252},{3,27,1252},{3,27,1252},{3,17,1256},{0,24,1152},{0,18,61},{0,18,61},{0,11,100},{0,12,2859},{0,10,1157},{6,22,1},{6,22,1},{6,22,1},{6,16,5},{14,1,1152}, +{0,18,61},{0,18,61},{0,11,100},{24,0,1152},{0,11,100},{24,1,1568},{2,29,2},{8,19,5},{0,19,5},{24,1,1568},{31,6,1568},{0,19,5},{0,15,1586},{31,6,1568},{0,15,1586},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,16,1},{0,16,1},{0,16,1},{0,10,1},{0,8,657},{0,8,657},{5,31,3716},{4,29,1603},{4,21,2148},{4,19,1631},{2,31,5655},{0,25,2005},{0,20,31}, +{0,15,2138},{0,19,8963},{0,15,4070},{7,31,1587},{7,26,131},{8,20,82},{7,19,181},{20,2,5419},{0,25,2001},{0,20,27},{0,15,2134},{24,7,5419},{0,15,2134},{4,28,1587},{4,28,1587},{4,28,1587},{4,18,1590},{0,27,1158},{0,20,22},{0,20,22},{0,12,62},{0,14,3075},{0,11,1221},{7,23,1},{7,23,1},{7,23,1},{7,17,5},{16,0,1152},{0,20,18},{0,20,18},{0,12,58},{25,1,1152}, +{0,12,58},{25,2,1568},{3,30,2},{9,20,13},{1,20,9},{25,2,1568},{27,10,1568},{1,20,9},{0,16,1576},{27,10,1568},{0,16,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,19,4},{0,19,4},{0,19,4},{0,11,8},{0,9,769},{0,9,769},{6,31,3890},{5,30,1603},{5,22,2148},{5,20,1627},{3,31,5748},{0,27,1989},{1,21,31},{0,17,2117},{0,21,8560},{0,16,3545},{9,31,1634}, +{8,27,149},{8,21,73},{8,20,194},{21,3,5419},{0,27,1889},{1,21,27},{0,17,2017},{28,6,5419},{0,17,2017},{5,29,1587},{5,29,1587},{5,29,1587},{5,19,1590},{1,28,1158},{1,21,22},{1,21,22},{1,13,62},{0,16,2801},{0,13,949},{8,24,1},{8,24,1},{8,24,1},{8,17,4},{17,1,1152},{0,22,2},{0,22,2},{0,14,26},{26,2,1152},{0,14,26},{26,3,1568},{4,31,5},{10,21,13}, +{2,21,9},{26,3,1568},{28,11,1568},{2,21,9},{0,17,1576},{28,11,1568},{0,17,1576},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,20,4},{1,20,4},{1,20,4},{1,12,8},{0,10,625},{0,10,625},{7,31,4136},{6,31,1589},{7,23,2157},{6,21,1621},{4,31,5895},{1,28,1977},{2,22,33},{1,18,2107},{0,23,8196},{0,17,3043},{10,31,1667},{9,28,131},{10,22,69},{9,21,181},{24,1,5419}, +{0,29,1772},{2,22,24},{0,18,1875},{31,6,5419},{0,18,1875},{6,30,1576},{6,30,1576},{6,30,1576},{6,20,1580},{2,29,1161},{2,22,29},{2,22,29},{2,15,58},{0,17,2529},{0,14,656},{9,25,1},{9,25,1},{9,25,1},{9,19,5},{17,4,1152},{0,24,1},{0,24,1},{0,15,1},{29,2,1152},{0,15,1},{27,4,1568},{6,31,13},{11,22,5},{3,22,5},{27,4,1568},{31,11,1568},{3,22,5}, +{0,18,1586},{31,11,1568},{0,18,1586},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,21,10},{2,21,10},{2,21,10},{2,14,13},{0,12,520},{0,12,520},{8,31,4436},{7,31,1625},{8,23,2175},{7,22,1621},{6,31,6079},{3,28,1973},{3,23,33},{2,19,2107},{0,24,7969},{0,18,2675},{11,31,1745},{10,29,131},{11,23,69},{10,22,181},{25,2,5419},{0,30,1699},{3,23,24},{0,19,1782},{27,10,5419}, +{0,19,1782},{7,31,1576},{7,31,1576},{7,31,1576},{7,21,1580},{3,30,1161},{3,23,29},{3,23,29},{3,16,74},{0,19,2313},{0,15,474},{10,26,1},{10,26,1},{10,26,1},{10,20,5},{18,5,1152},{1,25,1},{1,25,1},{0,16,2},{30,3,1152},{0,16,2},{31,0,1568},{8,31,34},{12,23,5},{4,23,5},{31,0,1568},{30,13,1568},{4,23,5},{0,19,1586},{30,13,1568},{0,19,1586},{7,0,1576}, +{7,0,1576},{7,0,1576},{7,0,1576},{3,22,10},{3,22,10},{3,22,10},{3,15,13},{0,14,400},{0,14,400},{9,31,4730},{8,31,1716},{8,25,2148},{8,23,1631},{7,31,6244},{3,30,1977},{4,24,31},{4,19,2138},{0,26,7669},{0,19,2375},{12,31,1832},{11,30,131},{12,24,82},{11,23,181},{26,3,5419},{1,31,1699},{4,24,27},{0,20,1720},{28,11,5419},{0,20,1720},{8,31,1595},{8,31,1595},{8,31,1595}, +{8,22,1590},{4,31,1158},{4,24,22},{4,24,22},{4,16,62},{0,21,2091},{0,17,306},{11,27,1},{11,27,1},{11,27,1},{11,21,5},{20,4,1152},{2,26,1},{2,26,1},{1,17,2},{29,5,1152},{1,17,2},{29,6,1568},{9,31,68},{13,24,13},{5,24,9},{29,6,1568},{31,14,1568},{5,24,9},{0,20,1576},{31,14,1568},{0,20,1576},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,23,4}, +{4,23,4},{4,23,4},{4,15,8},{0,16,277},{0,16,277},{11,31,5010},{9,31,1878},{9,26,2148},{9,24,1627},{8,31,6508},{4,31,1989},{5,25,31},{4,21,2117},{0,28,7364},{0,21,2098},{14,31,1952},{12,31,149},{12,25,73},{12,24,194},{28,2,5419},{3,31,1787},{5,25,27},{0,21,1657},{27,13,5419},{0,21,1657},{9,31,1622},{9,31,1622},{9,31,1622},{9,23,1590},{5,31,1164},{5,25,22},{5,25,22}, +{5,17,62},{0,22,1928},{0,18,194},{12,28,1},{12,28,1},{12,28,1},{12,21,4},{24,0,1152},{3,27,1},{3,27,1},{2,18,2},{30,6,1152},{2,18,2},{30,7,1568},{11,31,116},{14,25,13},{6,25,9},{30,7,1568},{30,16,1568},{6,25,9},{0,21,1576},{30,16,1568},{0,21,1576},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,24,4},{5,24,4},{5,24,4},{5,16,8},{0,18,193}, +{0,18,193},{12,31,5316},{11,31,2154},{11,27,2157},{10,25,1621},{10,31,6800},{6,31,1999},{6,26,33},{5,22,2107},{0,29,7068},{0,22,1836},{15,31,2081},{13,31,206},{14,26,69},{13,25,181},{31,0,5419},{5,31,1937},{6,26,24},{0,22,1611},{30,13,5419},{0,22,1611},{10,31,1676},{10,31,1676},{10,31,1676},{10,24,1580},{7,31,1179},{6,26,29},{6,26,29},{6,19,58},{0,24,1798},{0,19,157},{13,29,1}, +{13,29,1},{13,29,1},{13,23,5},{24,3,1152},{4,28,1},{4,28,1},{4,19,1},{28,9,1152},{4,19,1},{31,8,1568},{13,31,205},{15,26,5},{7,26,5},{31,8,1568},{28,19,1568},{7,26,5},{0,22,1586},{28,19,1568},{0,22,1586},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,25,10},{6,25,10},{6,25,10},{6,18,13},{0,20,106},{0,20,106},{13,31,5658},{12,31,2435},{12,27,2175}, +{11,26,1621},{11,31,7055},{7,31,2090},{7,27,33},{6,23,2107},{0,31,6820},{0,23,1690},{16,31,2216},{14,31,334},{15,27,69},{14,26,181},{29,6,5419},{7,31,2081},{7,27,24},{0,23,1590},{31,14,5419},{0,23,1590},{11,31,1745},{11,31,1745},{11,31,1745},{11,25,1580},{8,31,1220},{7,27,29},{7,27,29},{7,20,74},{0,26,1650},{1,20,137},{14,30,1},{14,30,1},{14,30,1},{14,24,5},{22,9,1152}, +{5,29,1},{5,29,1},{4,20,2},{29,10,1152},{4,20,2},{29,14,1568},{15,31,289},{16,27,5},{8,27,5},{29,14,1568},{29,20,1568},{8,27,5},{0,23,1586},{29,20,1568},{0,23,1586},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,26,10},{7,26,10},{7,26,10},{7,19,13},{0,22,58},{0,22,58},{14,31,6036},{13,31,2751},{12,29,2148},{12,27,1631},{12,31,7316},{8,31,2228},{8,28,31}, +{8,23,2138},{0,31,6884},{0,24,1613},{17,31,2402},{16,31,500},{16,28,82},{15,27,181},{30,7,5419},{9,31,2195},{8,28,27},{0,24,1577},{30,16,5419},{0,24,1577},{12,31,1811},{12,31,1811},{12,31,1811},{12,26,1590},{9,31,1286},{8,28,22},{8,28,22},{8,20,62},{0,28,1508},{2,21,137},{15,31,1},{15,31,1},{15,31,1},{15,25,5},{24,8,1152},{6,30,1},{6,30,1},{5,21,2},{30,11,1152}, +{5,21,2},{30,15,1568},{17,31,410},{17,28,13},{9,28,9},{30,15,1568},{30,21,1568},{9,28,9},{0,24,1576},{30,21,1568},{0,24,1576},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,27,4},{8,27,4},{8,27,4},{8,19,8},{0,24,37},{0,24,37},{15,31,6450},{14,31,3135},{13,30,2148},{13,28,1627},{13,31,7661},{10,31,2448},{9,29,31},{8,25,2117},{2,31,7196},{0,25,1593},{19,31,2594}, +{17,31,698},{16,29,73},{16,28,194},{29,11,5419},{11,31,2379},{9,29,27},{1,25,1577},{31,17,5419},{1,25,1577},{13,31,1910},{13,31,1910},{13,31,1910},{13,27,1590},{10,31,1388},{9,29,22},{9,29,22},{9,21,62},{0,30,1416},{3,22,137},{16,31,4},{16,31,4},{16,31,4},{16,25,4},{28,4,1152},{7,31,1},{7,31,1},{6,22,2},{29,13,1152},{6,22,2},{31,16,1568},{19,31,530},{18,29,13}, +{10,29,9},{31,16,1568},{31,22,1568},{10,29,9},{0,25,1576},{31,22,1568},{0,25,1576},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,28,4},{9,28,4},{9,28,4},{9,20,8},{0,25,17},{0,25,17},{16,31,6900},{15,31,3657},{15,31,2157},{14,29,1621},{15,31,8023},{11,31,2845},{10,30,33},{9,26,2107},{5,31,7651},{1,26,1611},{20,31,2866},{18,31,1011},{18,30,69},{17,29,181},{29,14,5419}, +{13,31,2657},{10,30,24},{3,26,1587},{29,20,5419},{3,26,1587},{15,31,2057},{15,31,2057},{15,31,2057},{14,28,1580},{12,31,1476},{10,30,29},{10,30,29},{10,23,58},{0,31,1324},{4,23,157},{17,31,37},{17,31,37},{17,31,37},{17,27,5},{31,2,1152},{9,31,4},{9,31,4},{8,23,1},{27,16,1152},{8,23,1},{31,19,1568},{21,31,637},{19,30,5},{11,30,5},{31,19,1568},{27,26,1568},{11,30,5}, +{0,26,1586},{27,26,1568},{0,26,1586},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,29,10},{10,29,10},{10,29,10},{10,22,13},{1,27,9},{1,27,9},{18,31,7332},{16,31,4196},{16,31,2175},{15,30,1621},{16,31,8348},{13,31,3285},{11,31,33},{10,27,2107},{8,31,8004},{2,27,1611},{21,31,3112},{20,31,1281},{19,31,69},{18,30,181},{30,15,5419},{15,31,2897},{11,31,24},{3,27,1590},{30,21,5419}, +{3,27,1590},{16,31,2171},{16,31,2171},{16,31,2171},{15,29,1580},{13,31,1590},{11,31,29},{11,31,29},{11,24,74},{2,31,1424},{5,24,137},{19,31,65},{19,31,65},{19,31,65},{18,28,5},{26,13,1152},{11,31,20},{11,31,20},{8,24,2},{21,21,1152},{8,24,2},{30,23,1568},{23,31,785},{20,31,5},{12,31,5},{30,23,1568},{28,27,1568},{12,31,5},{0,27,1586},{28,27,1568},{0,27,1586},{15,0,1576}, +{15,0,1576},{15,0,1576},{15,0,1576},{11,30,10},{11,30,10},{11,30,10},{11,23,13},{1,28,8},{1,28,8},{19,31,7014},{17,31,4230},{17,31,2294},{16,31,1595},{17,31,7865},{14,31,3114},{12,31,85},{12,27,1706},{8,31,7436},{3,28,1268},{22,31,2794},{21,31,1221},{20,31,113},{19,30,114},{30,17,4803},{17,31,2648},{13,31,61},{4,28,1253},{25,25,4803},{4,28,1253},{17,31,2294},{17,31,2294},{17,31,2294}, +{16,30,1590},{14,31,1740},{12,31,85},{12,31,85},{12,24,62},{3,31,1571},{6,25,137},{20,31,113},{20,31,113},{20,31,113},{19,29,5},{28,12,1152},{13,31,61},{13,31,61},{9,25,2},{22,22,1152},{9,25,2},{31,23,1250},{24,31,680},{21,31,4},{15,31,0},{31,23,1250},{23,31,1250},{15,31,0},{0,28,1252},{23,31,1250},{0,28,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{12,31,4}, +{12,31,4},{12,31,4},{12,23,8},{2,29,8},{2,29,8},{19,31,6534},{18,31,4116},{18,31,2435},{17,31,1590},{18,31,7164},{15,31,2809},{14,31,161},{12,28,1256},{10,31,6748},{5,28,945},{23,31,2340},{22,31,1065},{21,31,164},{20,31,64},{31,17,4056},{18,31,2211},{15,31,113},{7,28,900},{25,26,4056},{7,28,900},{18,31,2435},{18,31,2435},{18,31,2435},{17,31,1590},{16,31,1923},{14,31,161},{14,31,161}, +{13,25,62},{6,31,1729},{7,26,137},{21,31,164},{21,31,164},{21,31,164},{20,29,4},{29,13,1152},{15,31,113},{15,31,113},{10,26,2},{28,20,1152},{10,26,2},{30,26,882},{25,31,482},{23,31,0},{18,31,1},{30,26,882},{31,27,882},{18,31,1},{0,28,900},{31,27,882},{0,28,900},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{13,31,13},{13,31,13},{13,31,13},{13,24,8},{3,30,8}, +{3,30,8},{21,31,6091},{20,31,4022},{19,31,2609},{18,31,1640},{19,31,6490},{16,31,2617},{15,31,318},{14,28,835},{11,31,6135},{6,29,598},{24,31,1881},{23,31,931},{22,31,245},{21,31,5},{31,19,3318},{20,31,1733},{17,31,202},{8,29,545},{27,26,3318},{8,29,545},{19,31,2609},{19,31,2609},{19,31,2609},{18,31,1640},{17,31,2086},{15,31,318},{15,31,318},{14,27,58},{8,31,1868},{8,27,157},{22,31,245}, +{22,31,245},{22,31,245},{21,31,5},{29,16,1152},{17,31,202},{17,31,202},{12,27,1},{31,20,1152},{12,27,1},{31,26,545},{26,31,305},{25,31,4},{20,31,1},{31,26,545},{29,29,545},{20,31,1},{0,29,545},{29,29,545},{0,29,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,31,29},{15,31,29},{15,31,29},{14,26,13},{5,31,9},{5,31,9},{22,31,5719},{21,31,3980},{20,31,2834}, +{19,31,1745},{20,31,6050},{18,31,2457},{16,31,536},{15,29,515},{14,31,5674},{7,30,406},{25,31,1573},{24,31,861},{23,31,338},{22,31,10},{30,22,2753},{21,31,1438},{19,31,290},{11,29,338},{27,27,2753},{11,29,338},{20,31,2834},{20,31,2834},{20,31,2834},{19,31,1745},{18,31,2284},{16,31,536},{16,31,536},{15,28,74},{10,31,2064},{9,28,137},{23,31,338},{23,31,338},{23,31,338},{22,31,10},{30,17,1152}, +{19,31,290},{19,31,290},{12,28,2},{25,25,1152},{12,28,2},{31,27,313},{28,31,181},{26,31,1},{23,31,0},{31,27,313},{30,29,313},{23,31,0},{0,29,337},{30,29,313},{0,29,337},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,31,52},{16,31,52},{16,31,52},{15,27,13},{6,31,25},{6,31,25},{22,31,5399},{22,31,3974},{21,31,3035},{20,31,1875},{21,31,5619},{19,31,2378},{18,31,776}, +{16,30,318},{15,31,5258},{9,30,225},{26,31,1333},{25,31,813},{24,31,425},{23,31,65},{30,24,2273},{23,31,1218},{20,31,353},{12,30,146},{29,27,2273},{12,30,146},{21,31,3035},{21,31,3035},{21,31,3035},{20,31,1875},{19,31,2518},{18,31,776},{18,31,776},{16,28,62},{11,31,2323},{10,29,137},{24,31,425},{24,31,425},{24,31,425},{23,31,65},{31,18,1152},{20,31,353},{20,31,353},{13,29,2},{26,26,1152}, +{13,29,2},{31,28,145},{28,31,85},{28,31,4},{26,31,1},{31,28,145},{30,30,145},{26,31,1},{0,30,145},{30,30,145},{0,30,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,31,85},{17,31,85},{17,31,85},{16,27,8},{8,31,40},{8,31,40},{23,31,5143},{23,31,4004},{22,31,3254},{21,31,2070},{22,31,5274},{20,31,2310},{19,31,1062},{17,30,133},{17,31,5011},{10,31,161},{27,31,1161}, +{26,31,801},{26,31,545},{25,31,164},{31,24,1878},{24,31,1094},{22,31,461},{14,30,66},{24,31,1878},{14,30,66},{22,31,3254},{22,31,3254},{22,31,3254},{21,31,2070},{20,31,2833},{19,31,1062},{19,31,1062},{17,29,62},{14,31,2577},{11,30,137},{26,31,545},{26,31,545},{26,31,545},{25,31,164},{30,22,1152},{22,31,461},{22,31,461},{14,30,2},{27,27,1152},{14,30,2},{30,31,41},{30,31,25},{29,31,1}, +{28,31,0},{30,31,41},{31,30,41},{28,31,0},{0,30,65},{31,30,41},{0,30,65},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,31,136},{18,31,136},{18,31,136},{17,28,8},{10,31,80},{10,31,80},{24,31,4882},{24,31,4070},{23,31,3532},{22,31,2360},{24,31,4945},{21,31,2422},{20,31,1433},{18,31,58},{20,31,4717},{12,31,157},{28,31,1026},{27,31,835},{27,31,666},{26,31,305},{31,26,1536}, +{26,31,996},{24,31,628},{16,31,1},{29,29,1536},{16,31,1},{23,31,3532},{23,31,3532},{23,31,3532},{22,31,2360},{22,31,3110},{20,31,1433},{20,31,1433},{18,31,58},{16,31,2939},{12,31,157},{27,31,666},{27,31,666},{27,31,666},{26,31,305},{30,25,1152},{24,31,628},{24,31,628},{16,31,1},{30,27,1152},{16,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{19,31,221},{19,31,221},{19,31,221},{18,30,13},{12,31,157},{12,31,157},{25,31,4212},{24,31,3590},{24,31,3106},{23,31,2201},{24,31,4129},{22,31,2101},{21,31,1301},{19,31,13},{20,31,3869},{14,31,233},{28,31,706},{28,31,562},{28,31,481},{27,31,218},{31,27,1067},{27,31,699},{25,31,442},{18,31,1},{30,29,1067}, +{18,31,1},{24,31,3106},{24,31,3106},{24,31,3106},{23,31,2201},{23,31,2668},{21,31,1301},{21,31,1301},{19,31,13},{18,31,2523},{14,31,233},{28,31,481},{28,31,481},{28,31,481},{27,31,218},{31,25,802},{25,31,442},{25,31,442},{18,31,1},{31,27,802},{18,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,1576}, +{23,0,1576},{23,0,1576},{23,0,1576},{20,31,325},{20,31,325},{20,31,325},{19,31,13},{14,31,233},{14,31,233},{26,31,3642},{25,31,3132},{25,31,2771},{24,31,2070},{25,31,3444},{23,31,1834},{22,31,1205},{20,31,8},{21,31,3219},{16,31,346},{29,31,456},{28,31,370},{28,31,289},{28,31,145},{30,29,683},{28,31,451},{27,31,290},{21,31,1},{29,30,683},{21,31,1},{25,31,2771},{25,31,2771},{25,31,2771}, +{24,31,2070},{24,31,2273},{22,31,1205},{22,31,1205},{20,31,8},{20,31,2121},{16,31,346},{28,31,289},{28,31,289},{28,31,289},{28,31,145},{31,26,512},{27,31,290},{27,31,290},{21,31,1},{29,29,512},{21,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,31,421}, +{22,31,421},{22,31,421},{20,31,8},{16,31,346},{16,31,346},{26,31,3162},{26,31,2742},{26,31,2486},{25,31,1947},{26,31,2877},{24,31,1641},{23,31,1145},{22,31,52},{22,31,2673},{18,31,458},{29,31,264},{29,31,200},{29,31,164},{28,31,81},{30,30,384},{28,31,243},{28,31,162},{23,31,1},{30,30,384},{23,31,1},{26,31,2486},{26,31,2486},{26,31,2486},{25,31,1947},{24,31,1969},{23,31,1145},{23,31,1145}, +{22,31,52},{20,31,1785},{18,31,458},{29,31,164},{29,31,164},{29,31,164},{28,31,81},{31,27,290},{28,31,162},{28,31,162},{23,31,1},{30,29,290},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,31,520},{23,31,520},{23,31,520},{22,31,52},{18,31,458}, +{18,31,458},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{0,15,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,8,6546},{0,7,4643},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{9,0,5885},{0,11,4118},{0,10,1800},{0,7,4202},{15,0,5885},{0,7,4202},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,549},{0,5,289},{0,5,289},{0,3,306},{0,3,630},{0,3,387},{0,11,0}, +{0,11,0},{0,11,0},{0,7,4},{2,2,545},{0,5,289},{0,5,289},{0,3,306},{4,1,545},{0,3,306},{13,1,2665},{0,18,680},{0,13,50},{0,11,785},{13,1,2665},{23,0,2665},{0,11,785},{0,8,2689},{23,0,2665},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,2665},{0,20,521},{0,14,5}, +{0,12,625},{0,17,6669},{0,13,4529},{0,11,1890},{0,8,4610},{0,9,7494},{0,7,5171},{0,25,2665},{0,20,521},{0,14,5},{0,12,625},{9,2,6669},{0,13,4529},{0,11,1890},{0,8,4610},{17,0,6669},{0,8,4610},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,7,841},{0,6,445},{0,6,445},{0,4,464},{0,3,982},{0,3,595},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{4,0,841}, +{0,6,445},{0,6,445},{0,4,464},{7,0,841},{0,4,464},{14,2,2665},{0,20,521},{0,14,5},{0,12,625},{14,2,2665},{25,0,2665},{0,12,625},{0,9,2689},{25,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{0,19,7541},{0,14,4934},{0,12,2042}, +{0,9,5045},{0,10,8546},{0,8,5682},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{10,2,7538},{0,14,4934},{0,12,2042},{0,9,5045},{17,1,7538},{0,9,5045},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,8,1201},{0,7,637},{0,7,637},{0,4,656},{0,4,1385},{0,4,800},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{5,0,1201},{0,7,637},{0,7,637},{0,4,656},{8,0,1201}, +{0,4,656},{16,1,2665},{0,22,405},{1,15,5},{0,13,514},{16,1,2665},{28,0,2665},{0,13,514},{0,10,2689},{28,0,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,30,2669},{0,23,313},{0,16,68},{0,15,410},{0,20,8498},{0,16,5330},{0,13,2210},{0,10,5530},{0,11,9702},{0,9,6270},{0,30,2669}, +{0,23,313},{0,16,68},{0,15,410},{11,2,8493},{0,16,5330},{0,13,2210},{0,10,5530},{17,2,8493},{0,10,5530},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,9,1629},{0,8,832},{0,8,832},{0,5,881},{0,5,1874},{0,5,1106},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{5,1,1625},{0,8,832},{0,8,832},{0,5,881},{8,1,1625},{0,5,881},{17,2,2665},{0,23,313},{2,16,8}, +{0,15,410},{17,2,2665},{29,1,2665},{0,15,410},{0,11,2689},{29,1,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{0,22,9674},{0,17,5849},{0,14,2450},{0,10,6106},{0,12,11199},{0,10,7006},{1,31,2741},{0,26,232},{1,17,146},{0,16,305},{11,4,9669}, +{0,17,5849},{0,14,2450},{0,10,6106},{19,2,9669},{0,10,6106},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,11,2178},{0,10,1125},{0,10,1125},{0,6,1189},{0,6,2520},{0,5,1475},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{6,1,2178},{0,10,1125},{0,10,1125},{0,6,1189},{11,0,2178},{0,6,1189},{20,0,2665},{0,26,232},{3,17,2},{0,16,305},{20,0,2665},{30,2,2665},{0,16,305}, +{0,12,2689},{30,2,2665},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,2949},{0,28,217},{1,18,261},{0,17,282},{0,25,9670},{0,19,5529},{0,16,1970},{0,12,5738},{0,13,11589},{0,11,6898},{2,31,2789},{0,28,217},{2,18,146},{0,17,282},{15,0,9669},{0,19,5529},{0,16,1970},{0,12,5738},{20,3,9669}, +{0,12,5738},{0,24,64},{0,24,64},{0,24,64},{1,14,64},{0,13,2180},{0,11,949},{0,11,949},{0,7,1018},{0,7,2691},{0,6,1433},{1,23,1},{1,23,1},{1,23,1},{1,14,0},{8,0,2178},{0,11,949},{0,11,949},{0,7,1018},{5,5,2178},{0,7,1018},{21,1,2665},{0,28,153},{4,18,5},{0,17,218},{21,1,2665},{31,3,2665},{0,17,218},{0,13,2689},{31,3,2665},{0,13,2689},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,3,1},{0,3,1},{0,3,1},{0,2,4},{0,1,18},{0,1,18},{2,31,3285},{1,28,273},{2,19,453},{1,18,346},{0,27,9674},{0,20,5170},{0,17,1546},{0,13,5429},{0,15,11993},{0,12,6819},{3,31,2873},{1,28,209},{3,19,146},{1,18,282},{14,4,9669},{0,20,5170},{0,17,1546},{0,13,5429},{24,2,9669},{0,13,5429},{1,25,128},{1,25,128},{1,25,128}, +{1,15,137},{0,16,2178},{0,13,832},{0,13,832},{0,8,881},{0,8,2882},{0,7,1427},{2,23,4},{2,23,4},{2,23,4},{2,15,0},{9,1,2178},{0,13,832},{0,13,832},{0,8,881},{16,0,2178},{0,8,881},{23,0,2665},{0,29,85},{5,19,5},{0,18,149},{23,0,2665},{30,5,2665},{0,18,149},{0,14,2689},{30,5,2665},{0,14,2689},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,5,1}, +{0,5,1},{0,5,1},{0,3,1},{0,2,72},{0,2,72},{3,31,3785},{1,30,405},{2,21,676},{1,19,469},{0,30,9669},{0,22,4878},{0,18,1190},{0,14,5138},{0,16,12390},{0,13,6789},{4,31,2966},{2,29,209},{4,20,149},{2,19,282},{18,0,9669},{0,22,4878},{0,18,1190},{0,14,5138},{30,0,9669},{0,14,5138},{2,26,320},{2,26,320},{2,26,320},{1,17,320},{0,19,2180},{0,15,680},{0,15,680}, +{0,9,740},{0,9,3149},{0,8,1441},{3,24,1},{3,24,1},{3,24,1},{3,16,1},{10,2,2178},{0,15,680},{0,15,680},{0,9,740},{17,1,2178},{0,9,740},{24,1,2665},{0,31,41},{6,20,8},{0,19,98},{24,1,2665},{31,6,2665},{0,19,98},{0,15,2689},{31,6,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,160}, +{0,4,160},{4,31,4514},{2,31,630},{3,22,1027},{2,20,694},{1,31,9738},{0,23,4646},{0,20,849},{0,15,4826},{0,18,12955},{0,14,6798},{6,31,3101},{3,31,218},{5,21,146},{3,20,299},{18,3,9669},{0,23,4646},{0,20,849},{0,15,4826},{28,3,9669},{0,15,4826},{2,29,545},{2,29,545},{2,29,545},{2,18,546},{0,22,2180},{0,17,505},{0,17,505},{0,11,610},{0,11,3467},{0,10,1513},{4,26,1}, +{4,26,1},{4,26,1},{4,17,0},{13,0,2178},{0,17,505},{0,17,505},{0,11,610},{20,1,2178},{0,11,610},{26,1,2665},{2,31,85},{7,21,2},{0,20,65},{26,1,2665},{31,8,2665},{0,20,65},{0,16,2689},{31,8,2665},{0,16,2689},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},{0,5,289},{4,31,5330},{3,31,1018},{3,23,1430}, +{2,21,979},{2,31,9981},{0,26,4406},{0,21,579},{0,16,4610},{0,19,13489},{0,15,6846},{7,31,3233},{4,31,226},{6,22,146},{4,21,282},{20,2,9669},{0,26,4406},{0,21,579},{0,16,4610},{24,7,9669},{0,16,4610},{3,30,865},{3,30,865},{3,30,865},{3,19,866},{0,24,2178},{0,19,389},{0,19,389},{0,12,464},{0,12,3885},{0,11,1603},{5,27,1},{5,27,1},{5,27,1},{5,18,0},{14,1,2178}, +{0,19,389},{0,19,389},{0,12,464},{24,0,2178},{0,12,464},{28,0,2665},{3,31,153},{8,22,5},{0,22,37},{28,0,2665},{30,10,2665},{0,22,37},{0,17,2689},{30,10,2665},{0,17,2689},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,445},{0,6,445},{5,31,6270},{3,31,1626},{4,24,1886},{3,22,1299},{2,31,10381},{0,28,4146},{0,22,377}, +{0,17,4373},{0,20,14006},{0,16,6915},{8,31,3434},{6,31,242},{7,23,146},{5,22,282},{21,3,9669},{0,28,4146},{0,22,377},{0,17,4373},{28,6,9669},{0,17,4373},{3,31,1226},{3,31,1226},{3,31,1226},{3,21,1205},{0,27,2180},{0,21,274},{0,21,274},{0,13,353},{0,14,4269},{0,11,1763},{6,27,4},{6,27,4},{6,27,4},{6,19,0},{16,0,2178},{0,21,274},{0,21,274},{0,13,353},{25,1,2178}, +{0,13,353},{29,1,2665},{5,31,232},{9,23,5},{0,23,10},{29,1,2665},{31,11,2665},{0,23,10},{0,18,2689},{31,11,2665},{0,18,2689},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,7,637},{0,7,637},{6,31,7374},{4,31,2339},{4,25,2441},{3,23,1730},{3,31,10950},{0,29,3909},{0,23,243},{0,18,4154},{0,22,14614},{0,17,7029},{9,31,3638}, +{7,31,320},{8,24,149},{6,23,282},{22,4,9669},{0,29,3909},{0,23,243},{0,18,4154},{29,7,9669},{0,18,4154},{4,31,1714},{4,31,1714},{4,31,1714},{3,22,1666},{0,29,2180},{0,23,194},{0,23,194},{0,14,260},{0,15,4686},{0,13,1937},{7,28,1},{7,28,1},{7,28,1},{7,20,1},{17,1,2178},{0,23,194},{0,23,194},{0,14,260},{26,2,2178},{0,14,260},{31,0,2665},{8,31,313},{10,24,8}, +{0,24,4},{31,0,2665},{30,13,2665},{0,24,4},{0,19,2689},{30,13,2665},{0,19,2689},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,8,832},{0,8,832},{7,31,8807},{5,31,3388},{5,26,3116},{4,24,2243},{4,31,11766},{0,31,3686},{0,25,138},{0,19,3938},{0,23,15369},{0,18,7206},{11,31,3853},{8,31,457},{9,25,146},{7,24,299},{25,2,9669}, +{0,31,3686},{0,25,138},{0,19,3938},{27,10,9669},{0,19,3938},{5,31,2427},{5,31,2427},{5,31,2427},{4,23,2182},{0,31,2210},{0,25,137},{0,25,137},{0,15,181},{0,16,5157},{0,14,2163},{8,30,1},{8,30,1},{8,30,1},{8,21,0},{17,4,2178},{0,25,137},{0,25,137},{0,15,181},{29,2,2178},{0,15,181},{31,3,2665},{9,31,405},{11,25,2},{1,25,2},{31,3,2665},{30,15,2665},{1,25,2}, +{0,20,2689},{30,15,2665},{0,20,2689},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,10,1125},{0,10,1125},{7,31,10230},{6,31,4421},{6,27,3739},{4,26,2742},{5,31,12634},{0,31,3719},{0,26,87},{0,20,3771},{0,25,16061},{0,19,7283},{12,31,4050},{10,31,629},{10,26,146},{8,25,282},{26,3,9669},{0,31,3718},{0,26,86},{0,20,3770},{28,11,9669}, +{0,20,3770},{5,31,3050},{5,31,3050},{5,31,3050},{5,24,2690},{1,31,2325},{0,26,86},{0,26,86},{0,16,129},{0,18,5544},{0,15,2318},{9,31,1},{9,31,1},{9,31,1},{9,22,0},{18,5,2178},{0,26,85},{0,26,85},{0,16,128},{30,3,2178},{0,16,128},{31,6,2665},{11,31,521},{12,26,5},{2,26,2},{31,6,2665},{29,17,2665},{2,26,2},{0,21,2689},{29,17,2665},{0,21,2689},{5,0,2689}, +{5,0,2689},{5,0,2689},{5,0,2689},{0,24,1},{0,24,1},{0,24,1},{0,15,5},{0,11,1348},{0,11,1348},{9,31,10738},{7,31,4899},{7,28,3705},{5,27,2742},{6,31,13045},{1,31,4002},{1,27,87},{0,21,3686},{0,27,15601},{0,20,6570},{13,31,4302},{11,31,857},{11,27,146},{9,26,282},{28,2,9669},{2,31,3954},{1,27,86},{0,21,3605},{27,13,9669},{0,21,3605},{6,31,3173},{6,31,3173},{6,31,3173}, +{6,25,2690},{2,31,2427},{1,28,83},{1,28,83},{1,17,129},{0,20,5170},{0,16,1856},{10,31,4},{10,31,4},{10,31,4},{10,23,0},{20,4,2178},{0,28,32},{0,28,32},{0,17,89},{29,5,2178},{0,17,89},{31,8,2665},{13,31,680},{13,27,5},{3,27,2},{31,8,2665},{30,18,2665},{3,27,2},{0,22,2689},{30,18,2665},{0,22,2689},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,25,1}, +{1,25,1},{1,25,1},{1,15,10},{0,13,1217},{0,13,1217},{10,31,11278},{8,31,5402},{7,29,3750},{6,28,2745},{7,31,13510},{3,31,4314},{2,28,77},{1,22,3686},{0,28,15046},{0,21,5958},{14,31,4590},{12,31,1171},{12,28,149},{10,27,282},{29,3,9669},{3,31,4265},{2,28,76},{0,22,3458},{28,14,9669},{0,22,3458},{7,31,3314},{7,31,3314},{7,31,3314},{7,26,2690},{4,31,2532},{2,28,73},{2,28,73}, +{2,18,129},{0,21,4837},{0,17,1490},{11,31,25},{11,31,25},{11,31,25},{11,24,1},{24,0,2178},{0,30,8},{0,30,8},{0,19,49},{30,6,2178},{0,19,49},{31,11,2665},{15,31,832},{14,28,8},{4,28,4},{31,11,2665},{31,19,2665},{4,28,4},{0,23,2689},{31,19,2665},{0,23,2689},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,26,1},{2,26,1},{2,26,1},{2,16,5},{0,14,1037}, +{0,14,1037},{11,31,11942},{10,31,6090},{9,30,3739},{7,29,2751},{9,31,14053},{4,31,4863},{3,29,79},{2,24,3689},{0,30,14558},{0,23,5274},{16,31,4858},{14,31,1556},{13,29,146},{11,28,299},{29,6,9669},{6,31,4594},{3,29,75},{0,24,3265},{31,14,9669},{0,24,3265},{9,31,3505},{9,31,3505},{9,31,3505},{8,27,2693},{5,31,2645},{3,30,72},{3,30,72},{3,19,134},{0,23,4506},{0,19,1109},{12,31,64}, +{12,31,64},{12,31,64},{12,25,0},{24,3,2178},{1,31,10},{1,31,10},{0,20,16},{28,9,2178},{0,20,16},{31,14,2665},{17,31,1053},{15,29,2},{5,29,2},{31,14,2665},{29,22,2665},{5,29,2},{0,24,2689},{29,22,2665},{0,24,2689},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,27,5},{3,27,5},{3,27,5},{3,18,8},{0,16,818},{0,16,818},{12,31,12466},{11,31,6718},{10,31,3739}, +{8,30,2742},{10,31,14554},{6,31,5363},{4,30,87},{3,24,3654},{0,31,14190},{0,24,4785},{17,31,5158},{15,31,1938},{14,30,146},{12,29,282},{30,7,9669},{8,31,4806},{4,30,86},{0,25,3130},{30,16,9669},{0,25,3130},{10,31,3658},{10,31,3658},{10,31,3658},{9,28,2690},{6,31,2795},{4,30,86},{4,30,86},{4,20,129},{0,25,4315},{0,20,809},{14,31,100},{14,31,100},{14,31,100},{13,26,0},{22,9,2178}, +{3,31,34},{3,31,34},{0,21,1},{29,10,2178},{0,21,1},{30,18,2665},{19,31,1241},{16,30,5},{6,30,2},{30,18,2665},{30,23,2665},{6,30,2},{0,25,2689},{30,23,2665},{0,25,2689},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,28,1},{4,28,1},{4,28,1},{4,19,5},{0,18,666},{0,18,666},{14,31,13094},{12,31,7445},{11,31,3830},{9,31,2742},{12,31,14998},{8,31,5926},{5,31,87}, +{4,25,3686},{0,31,14254},{0,25,4323},{18,31,5494},{16,31,2414},{15,31,146},{13,30,282},{29,11,9669},{10,31,5138},{5,31,86},{0,26,3013},{31,17,9669},{0,26,3013},{11,31,3829},{11,31,3829},{11,31,3829},{10,29,2690},{7,31,2981},{5,31,86},{5,31,86},{5,21,129},{0,27,4059},{0,21,597},{15,31,145},{15,31,145},{15,31,145},{14,27,0},{24,8,2178},{5,31,85},{5,31,85},{1,22,1},{30,11,2178}, +{1,22,1},{31,19,2665},{20,31,1378},{17,31,5},{7,31,2},{31,19,2665},{29,25,2665},{7,31,2},{0,26,2689},{29,25,2665},{0,26,2689},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,29,1},{5,29,1},{5,29,1},{5,19,10},{0,20,505},{0,20,505},{15,31,12507},{13,31,7370},{12,31,4001},{11,31,2705},{13,31,14148},{8,31,5491},{6,31,154},{5,26,3063},{1,31,13399},{0,26,3306},{19,31,4949}, +{17,31,2261},{16,31,202},{15,30,185},{29,13,8712},{11,31,4644},{7,31,145},{0,27,2403},{28,20,8712},{0,27,2403},{12,31,4001},{12,31,4001},{12,31,4001},{11,30,2690},{9,31,3204},{6,31,154},{6,31,154},{6,22,129},{0,28,3762},{0,23,425},{16,31,202},{16,31,202},{16,31,202},{15,28,1},{28,4,2178},{7,31,145},{7,31,145},{2,23,1},{29,13,2178},{2,23,1},{30,22,2178},{22,31,1145},{18,31,1}, +{10,31,1},{30,22,2178},{27,27,2178},{10,31,1},{0,27,2178},{27,27,2178},{0,27,2178},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,30,1},{6,30,1},{6,30,1},{6,20,5},{0,22,389},{0,22,389},{16,31,11658},{14,31,7195},{13,31,4225},{12,31,2693},{14,31,13066},{10,31,5014},{8,31,261},{6,27,2390},{3,31,12366},{0,27,2277},{20,31,4338},{18,31,2037},{17,31,289},{16,30,89},{29,15,7578}, +{13,31,4037},{9,31,202},{0,27,1701},{30,20,7578},{0,27,1701},{13,31,4225},{13,31,4225},{13,31,4225},{12,31,2693},{10,31,3429},{8,31,261},{8,31,261},{7,23,134},{0,30,3509},{0,24,306},{17,31,289},{17,31,289},{17,31,289},{16,29,0},{31,2,2178},{9,31,202},{9,31,202},{3,24,0},{27,16,2178},{3,24,0},{31,22,1625},{23,31,850},{20,31,0},{13,31,1},{31,22,1625},{30,26,1625},{13,31,1}, +{0,27,1665},{30,26,1625},{0,27,1665},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{7,31,5},{7,31,5},{7,31,5},{7,22,8},{0,24,306},{0,24,306},{16,31,11002},{15,31,7081},{14,31,4450},{13,31,2738},{15,31,12205},{11,31,4663},{9,31,411},{7,27,1845},{4,31,11643},{0,28,1578},{21,31,3802},{20,31,1845},{18,31,388},{17,31,25},{30,15,6661},{15,31,3525},{11,31,290},{0,28,1217},{30,21,6661}, +{0,28,1217},{14,31,4450},{14,31,4450},{14,31,4450},{13,31,2738},{11,31,3675},{9,31,411},{9,31,411},{8,24,129},{0,31,3354},{0,25,244},{18,31,388},{18,31,388},{18,31,388},{17,30,0},{26,13,2178},{11,31,290},{11,31,290},{4,25,1},{21,21,2178},{4,25,1},{31,23,1201},{24,31,653},{22,31,4},{15,31,1},{31,23,1201},{30,27,1201},{15,31,1},{0,28,1201},{30,27,1201},{0,28,1201},{13,0,2689}, +{13,0,2689},{13,0,2689},{13,0,2689},{8,31,17},{8,31,17},{8,31,17},{8,23,5},{0,26,218},{0,26,218},{17,31,10434},{16,31,7010},{15,31,4693},{14,31,2833},{16,31,11374},{12,31,4462},{10,31,629},{8,28,1387},{6,31,10895},{0,28,1002},{22,31,3334},{20,31,1701},{19,31,505},{18,31,0},{30,17,5829},{16,31,3145},{12,31,405},{1,28,866},{25,25,5829},{1,28,866},{15,31,4693},{15,31,4693},{15,31,4693}, +{14,31,2833},{12,31,3906},{10,31,629},{10,31,629},{9,25,129},{1,31,3525},{0,27,228},{19,31,505},{19,31,505},{19,31,505},{18,31,0},{28,12,2178},{12,31,405},{12,31,405},{5,26,1},{22,22,2178},{5,26,1},{30,26,841},{25,31,461},{23,31,1},{18,31,0},{30,26,841},{31,27,841},{18,31,0},{0,28,865},{31,27,841},{0,28,865},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{9,31,50}, +{9,31,50},{9,31,50},{9,23,10},{0,28,137},{0,28,137},{18,31,9934},{17,31,6962},{16,31,4913},{15,31,2978},{17,31,10683},{13,31,4277},{11,31,915},{9,28,994},{8,31,10078},{0,29,630},{23,31,2934},{22,31,1605},{21,31,650},{19,31,25},{31,17,5082},{18,31,2769},{14,31,521},{2,29,546},{25,26,5082},{2,29,546},{16,31,4913},{16,31,4913},{16,31,4913},{15,31,2978},{14,31,4170},{11,31,915},{11,31,915}, +{10,26,129},{3,31,3789},{1,28,226},{21,31,650},{21,31,650},{21,31,650},{19,31,25},{29,13,2178},{14,31,521},{14,31,521},{6,27,1},{28,20,2178},{6,27,1},{31,26,545},{26,31,305},{25,31,4},{20,31,1},{31,26,545},{29,29,545},{20,31,1},{0,29,545},{29,29,545},{0,29,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,31,74},{11,31,74},{11,31,74},{10,24,5},{0,29,85}, +{0,29,85},{19,31,9465},{18,31,6955},{17,31,5233},{16,31,3218},{18,31,10003},{14,31,4183},{13,31,1258},{10,29,645},{9,31,9445},{1,30,409},{24,31,2529},{23,31,1525},{22,31,785},{20,31,100},{31,19,4344},{20,31,2345},{16,31,698},{5,29,321},{27,26,4344},{5,29,321},{17,31,5233},{17,31,5233},{17,31,5233},{16,31,3218},{15,31,4491},{13,31,1258},{13,31,1258},{11,27,134},{5,31,4171},{2,29,213},{22,31,785}, +{22,31,785},{22,31,785},{20,31,100},{29,16,2178},{16,31,698},{16,31,698},{7,28,0},{31,20,2178},{7,28,0},{31,27,290},{28,31,162},{26,31,4},{23,31,1},{31,27,290},{30,29,290},{23,31,1},{0,29,320},{30,29,290},{0,29,320},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,31,113},{12,31,113},{12,31,113},{11,26,8},{0,31,45},{0,31,45},{20,31,9219},{19,31,6985},{18,31,5530}, +{17,31,3473},{19,31,9496},{15,31,4186},{14,31,1630},{12,29,426},{11,31,8961},{3,30,277},{25,31,2275},{24,31,1509},{23,31,932},{22,31,208},{30,22,3779},{21,31,2086},{18,31,850},{6,30,129},{27,27,3779},{6,30,129},{18,31,5530},{18,31,5530},{18,31,5530},{17,31,3473},{16,31,4770},{14,31,1630},{14,31,1630},{12,28,129},{8,31,4442},{3,30,213},{23,31,932},{23,31,932},{23,31,932},{22,31,208},{30,17,2178}, +{18,31,850},{18,31,850},{8,29,1},{25,25,2178},{8,29,1},{30,30,128},{29,31,72},{28,31,1},{26,31,0},{30,30,128},{30,30,128},{26,31,0},{0,30,128},{30,30,128},{0,30,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,31,170},{13,31,170},{13,31,170},{12,27,5},{2,31,89},{2,31,89},{21,31,8929},{20,31,7062},{19,31,5845},{18,31,3778},{20,31,9188},{17,31,4260},{15,31,2070}, +{13,30,234},{13,31,8680},{4,31,228},{26,31,2089},{25,31,1515},{24,31,1073},{23,31,353},{30,24,3299},{22,31,1913},{20,31,965},{9,30,65},{29,27,3299},{9,30,65},{19,31,5845},{19,31,5845},{19,31,5845},{18,31,3778},{17,31,5124},{15,31,2070},{15,31,2070},{13,29,129},{9,31,4725},{4,31,228},{24,31,1073},{24,31,1073},{24,31,1073},{23,31,353},{31,18,2178},{20,31,965},{20,31,965},{9,30,1},{26,26,2178}, +{9,30,1},{31,30,34},{30,31,18},{29,31,4},{28,31,1},{31,30,34},{31,30,34},{28,31,1},{0,30,64},{31,30,34},{0,30,64},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{14,31,245},{14,31,245},{14,31,245},{13,27,10},{4,31,164},{4,31,164},{22,31,8707},{21,31,7170},{21,31,6209},{19,31,4133},{21,31,8853},{18,31,4387},{17,31,2548},{14,31,170},{14,31,8388},{6,31,244},{27,31,1971}, +{26,31,1557},{25,31,1250},{24,31,565},{31,24,2904},{23,31,1826},{22,31,1145},{10,31,1},{24,31,2904},{10,31,1},{21,31,6209},{21,31,6209},{21,31,6209},{19,31,4133},{19,31,5460},{17,31,2548},{17,31,2548},{14,30,129},{11,31,5085},{6,31,244},{25,31,1250},{25,31,1250},{25,31,1250},{24,31,565},{30,22,2178},{22,31,1145},{22,31,1145},{10,31,1},{27,27,2178},{10,31,1},{31,31,0},{31,31,0},{31,31,0}, +{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{15,31,338},{15,31,338},{15,31,338},{14,28,5},{6,31,244},{6,31,244},{23,31,7705},{22,31,6418},{21,31,5633},{20,31,3845},{22,31,7654},{19,31,3874},{18,31,2310},{15,31,53},{15,31,7258},{8,31,317},{27,31,1458},{27,31,1186},{26,31,932},{25,31,425},{31,25,2166}, +{25,31,1398},{23,31,850},{13,31,1},{28,29,2166},{13,31,1},{21,31,5633},{21,31,5633},{21,31,5633},{20,31,3845},{19,31,4830},{18,31,2310},{18,31,2310},{15,31,53},{13,31,4506},{8,31,317},{26,31,932},{26,31,932},{26,31,932},{25,31,425},{31,22,1625},{23,31,850},{23,31,850},{13,31,1},{30,26,1625},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{16,31,449},{16,31,449},{16,31,449},{15,30,8},{8,31,317},{8,31,317},{24,31,6881},{23,31,5814},{22,31,5138},{21,31,3650},{23,31,6713},{20,31,3400},{19,31,2142},{16,31,5},{17,31,6397},{9,31,425},{28,31,1075},{27,31,866},{27,31,697},{26,31,320},{31,26,1601},{26,31,1041},{24,31,653},{15,31,1},{29,29,1601}, +{15,31,1},{22,31,5138},{22,31,5138},{22,31,5138},{21,31,3650},{21,31,4313},{19,31,2142},{19,31,2142},{16,31,5},{14,31,3981},{9,31,425},{27,31,697},{27,31,697},{27,31,697},{26,31,320},{31,23,1201},{24,31,653},{24,31,653},{15,31,1},{30,27,1201},{15,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{21,0,2689}, +{21,0,2689},{21,0,2689},{21,0,2689},{18,31,549},{18,31,549},{18,31,549},{16,31,5},{9,31,425},{9,31,425},{24,31,6097},{24,31,5285},{23,31,4693},{22,31,3473},{23,31,5833},{21,31,3067},{20,31,1988},{17,31,10},{18,31,5571},{11,31,541},{28,31,739},{28,31,595},{27,31,505},{27,31,233},{30,28,1121},{26,31,737},{25,31,461},{18,31,0},{28,30,1121},{18,31,0},{23,31,4693},{23,31,4693},{23,31,4693}, +{22,31,3473},{22,31,3845},{20,31,1988},{20,31,1988},{17,31,10},{15,31,3542},{11,31,541},{27,31,505},{27,31,505},{27,31,505},{27,31,233},{30,26,841},{25,31,461},{25,31,461},{18,31,0},{31,27,841},{18,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,31,666}, +{19,31,666},{19,31,666},{17,31,10},{11,31,541},{11,31,541},{25,31,5427},{24,31,4757},{24,31,4273},{23,31,3314},{24,31,5002},{22,31,2788},{21,31,1898},{18,31,65},{20,31,4714},{13,31,698},{29,31,489},{28,31,387},{28,31,306},{28,31,162},{30,29,726},{27,31,482},{26,31,305},{20,31,1},{29,30,726},{20,31,1},{24,31,4273},{24,31,4273},{24,31,4273},{23,31,3314},{22,31,3429},{21,31,1898},{21,31,1898}, +{18,31,65},{17,31,3213},{13,31,698},{28,31,306},{28,31,306},{28,31,306},{28,31,162},{31,26,545},{26,31,305},{26,31,305},{20,31,1},{29,29,545},{20,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,31,832},{20,31,832},{20,31,832},{18,31,65},{13,31,698}, +{13,31,698},{4,31,33740},{0,31,5184},{0,22,420},{0,21,4221},{3,31,45594},{0,29,24105},{0,21,8317},{0,18,24790},{0,21,63990},{0,16,38959},{2,31,9704},{0,30,2866},{0,21,389},{0,19,3229},{14,2,18065},{0,20,13257},{0,17,6153},{0,12,13481},{25,0,18065},{0,12,13481},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,8,1105},{0,7,585},{0,7,585},{0,4,596},{0,4,1273},{0,4,740},{0,15,1}, +{0,15,1},{0,15,1},{0,9,1},{4,1,1105},{0,7,585},{0,7,585},{0,4,596},{8,0,1105},{0,4,596},{20,4,9248},{0,30,2866},{0,21,389},{0,19,3229},{20,4,9248},{29,5,9248},{0,19,3229},{0,14,9248},{29,5,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,38380},{1,31,6614},{0,23,245}, +{0,22,3864},{4,31,50747},{0,31,24961},{0,22,8353},{0,19,25735},{0,22,65535},{0,17,41319},{2,31,10152},{0,31,2624},{0,23,229},{0,20,2980},{16,0,19334},{0,20,13769},{0,18,6243},{0,13,14116},{25,1,19334},{0,13,14116},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{0,9,1513},{0,8,772},{0,8,772},{0,5,821},{0,5,1750},{0,4,1028},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{5,1,1513}, +{0,8,772},{0,8,772},{0,5,821},{9,0,1513},{0,5,821},{24,0,9248},{0,31,2624},{0,23,229},{0,20,2980},{24,0,9248},{30,6,9248},{0,20,2980},{0,15,9248},{30,6,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,43788},{1,31,8598},{0,24,126},{0,23,3525},{4,31,56155},{0,31,26241},{0,23,8425}, +{0,20,26793},{0,23,65535},{0,18,43819},{3,31,10706},{0,31,2624},{0,24,122},{0,21,2701},{17,0,20689},{0,22,14385},{0,19,6369},{0,13,14756},{25,2,20689},{0,13,14756},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{0,10,1989},{0,9,1018},{0,9,1018},{0,6,1096},{0,5,2294},{0,5,1334},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{5,2,1985},{0,9,1018},{0,9,1018},{0,6,1096},{9,1,1985}, +{0,6,1096},{25,1,9248},{0,31,2624},{0,24,122},{0,21,2701},{25,1,9248},{31,7,9248},{0,21,2701},{0,16,9250},{31,7,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,31,49566},{1,31,11350},{0,25,41},{0,24,3109},{4,31,62331},{0,31,28289},{0,24,8585},{0,21,27848},{0,23,65535},{0,19,46459},{4,31,11395}, +{0,31,2880},{0,25,37},{0,22,2440},{17,2,22129},{0,23,15030},{0,20,6509},{0,14,15441},{27,2,22129},{0,14,15441},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,12,2525},{0,10,1300},{0,10,1300},{0,6,1384},{0,6,2905},{0,6,1708},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{7,0,2521},{0,10,1300},{0,10,1300},{0,6,1384},{10,1,2521},{0,6,1384},{26,2,9248},{1,31,2866},{0,25,37}, +{0,22,2440},{26,2,9248},{27,11,9248},{0,22,2440},{0,17,9250},{27,11,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,31,56892},{2,31,15166},{0,26,20},{0,25,2804},{5,31,65535},{0,31,31511},{0,25,8733},{0,22,29095},{0,26,65535},{0,20,49444},{4,31,12385},{1,31,3380},{0,26,4},{0,23,2173},{17,4,23851}, +{0,23,15948},{0,21,6729},{0,15,16274},{29,2,23851},{0,15,16274},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{0,13,3200},{0,11,1665},{0,11,1665},{0,7,1754},{0,7,3691},{0,6,2185},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{2,10,3200},{0,11,1665},{0,11,1665},{0,7,1754},{13,0,3200},{0,7,1754},{24,8,9248},{3,31,3204},{0,26,4},{0,23,2173},{24,8,9248},{30,11,9248},{0,23,2173}, +{0,18,9248},{30,11,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,31,63870},{2,31,19230},{0,27,45},{0,27,2520},{5,31,65535},{1,31,35016},{0,26,8925},{0,23,30250},{0,28,65535},{0,21,52374},{5,31,13379},{2,31,4026},{0,27,29},{0,24,1901},{18,4,25472},{0,26,16706},{0,22,6963},{0,16,17124},{29,3,25472}, +{0,16,17124},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{0,14,3874},{0,13,2084},{0,13,2084},{0,8,2165},{0,8,4466},{0,7,2627},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{8,1,3872},{0,13,2084},{0,13,2084},{0,8,2165},{6,5,3872},{0,8,2165},{28,4,9248},{5,31,3589},{1,27,4},{0,24,1901},{28,4,9248},{29,13,9248},{0,24,1901},{0,19,9248},{29,13,9248},{0,19,9248},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,31,65535},{2,31,24002},{0,28,109},{0,27,2268},{6,31,65535},{1,31,38780},{0,27,8825},{0,24,30825},{0,28,65535},{0,22,54996},{6,31,14345},{2,31,4766},{1,28,62},{0,26,1697},{18,6,26744},{0,28,17104},{0,23,6957},{0,17,17625},{31,3,26744},{0,17,17625},{0,31,5},{0,31,5},{0,31,5}, +{0,19,5},{0,16,4418},{0,14,2306},{0,14,2306},{0,9,2420},{0,8,5122},{0,8,2997},{0,31,5},{0,31,5},{0,31,5},{0,19,5},{9,1,4418},{0,14,2306},{0,14,2306},{0,9,2420},{16,0,4418},{0,9,2420},{29,5,9248},{8,31,3904},{2,28,1},{0,26,1693},{29,5,9248},{30,14,9248},{0,26,1693},{0,20,9250},{30,14,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{7,31,65535},{3,31,29032},{0,29,330},{0,28,2105},{6,31,65535},{2,31,42151},{0,28,7781},{0,25,30108},{0,29,65535},{0,22,56388},{7,31,14819},{3,31,5416},{2,29,62},{0,27,1580},{23,0,26744},{0,29,16547},{0,24,6221},{0,18,17124},{30,5,26744},{0,18,17124},{1,31,84},{1,31,84},{1,31,84},{1,20,72},{0,19,4420},{0,16,2005},{0,16,2005}, +{0,10,2165},{0,9,5389},{0,9,2925},{1,31,20},{1,31,20},{1,31,20},{1,20,8},{10,2,4418},{0,16,2005},{0,16,2005},{0,10,2165},{17,1,4418},{0,10,2165},{30,6,9248},{8,31,4160},{3,29,1},{0,27,1480},{30,6,9248},{31,15,9248},{0,27,1480},{0,21,9250},{31,15,9248},{0,21,9250},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,1,34}, +{0,1,34},{7,31,65535},{3,31,35719},{1,30,717},{0,30,2062},{7,31,65535},{2,31,46660},{0,29,6696},{0,26,29322},{0,31,65535},{0,23,58077},{9,31,15473},{5,31,6173},{3,30,65},{1,28,1601},{23,3,26744},{0,31,15992},{0,26,5346},{0,19,16582},{28,8,26744},{0,19,16582},{2,31,329},{2,31,329},{2,31,329},{1,21,189},{0,22,4420},{0,18,1737},{0,18,1737},{0,11,1898},{0,11,5707},{0,10,2885},{3,31,34}, +{3,31,34},{3,31,34},{2,21,10},{13,0,4418},{0,18,1737},{0,18,1737},{0,11,1898},{20,1,4418},{0,11,1898},{28,12,9248},{10,31,4570},{4,30,4},{0,28,1285},{28,12,9248},{22,22,9248},{0,28,1285},{0,22,9248},{22,22,9248},{0,22,9248},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,6,1},{0,6,1},{0,6,1},{0,4,1},{0,3,97},{0,3,97},{7,31,65535},{4,31,40786},{1,31,1122}, +{0,30,2138},{7,31,65535},{2,31,49800},{0,30,5634},{0,27,27967},{0,31,65535},{0,24,58770},{10,31,15531},{6,31,6593},{4,31,61},{2,29,1533},{25,2,26259},{0,31,15284},{0,27,4514},{0,20,15812},{27,10,26259},{0,20,15812},{2,31,633},{2,31,633},{2,31,633},{2,22,381},{0,24,4418},{0,20,1480},{0,20,1480},{0,12,1640},{0,12,6125},{0,11,2891},{4,31,61},{4,31,61},{4,31,61},{3,22,10},{14,1,4418}, +{0,20,1480},{0,20,1480},{0,12,1640},{24,0,4418},{0,12,1640},{30,11,8978},{11,31,4744},{5,31,0},{0,29,1040},{30,11,8978},{31,18,8978},{0,29,1040},{0,23,8980},{31,18,8978},{0,23,8980},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,9,0},{0,9,0},{0,9,0},{0,5,4},{0,4,193},{0,4,193},{8,31,65535},{4,31,40898},{1,31,1890},{0,31,2125},{7,31,65535},{3,31,47871},{0,30,4194}, +{0,27,24703},{0,31,65535},{0,24,56130},{11,31,14325},{8,31,6051},{5,31,100},{3,29,1218},{22,9,24371},{0,31,13716},{0,28,3402},{0,21,13989},{29,10,24371},{0,21,13989},{3,31,1058},{3,31,1058},{3,31,1058},{2,24,617},{0,27,4420},{0,22,1280},{0,22,1280},{0,13,1445},{0,14,6509},{0,12,2945},{5,31,100},{5,31,100},{5,31,100},{4,23,5},{16,0,4418},{0,22,1280},{0,22,1280},{0,13,1445},{25,1,4418}, +{0,13,1445},{29,14,7938},{13,31,4225},{7,31,4},{0,29,656},{29,14,7938},{29,20,7938},{0,29,656},{0,23,7956},{29,20,7938},{0,23,7956},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,11,4},{0,11,4},{0,11,4},{0,7,0},{0,5,325},{0,5,325},{8,31,65535},{4,31,41266},{1,31,2914},{1,31,2170},{7,31,65535},{3,31,46175},{0,30,3010},{0,27,21695},{0,31,65535},{0,25,53636},{12,31,13140}, +{8,31,5571},{6,31,157},{4,29,932},{24,7,22568},{0,31,12404},{0,28,2474},{0,21,12245},{29,11,22568},{0,21,12245},{4,31,1630},{4,31,1630},{4,31,1630},{3,25,937},{0,29,4420},{0,23,1090},{0,23,1090},{0,14,1268},{0,15,6926},{0,13,3029},{6,31,157},{6,31,157},{6,31,157},{5,24,8},{17,1,4418},{0,23,1090},{0,23,1090},{0,14,1268},{26,2,4418},{0,14,1268},{31,12,6962},{14,31,3709},{8,31,1}, +{0,30,353},{31,12,6962},{27,22,6962},{0,30,353},{0,24,6970},{27,22,6962},{0,24,6970},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,14,0},{0,14,0},{0,14,0},{0,8,4},{0,6,493},{0,6,493},{9,31,65535},{5,31,41956},{2,31,4257},{1,31,2512},{7,31,65535},{3,31,44573},{0,30,1984},{0,28,18569},{0,31,65535},{0,25,51026},{13,31,11930},{10,31,5125},{7,31,250},{5,29,701},{27,4,20642}, +{1,31,11209},{0,29,1634},{0,22,10422},{31,11,20642},{0,22,10422},{4,31,2350},{4,31,2350},{4,31,2350},{3,27,1361},{0,31,4450},{0,25,949},{0,25,949},{0,16,1096},{0,16,7397},{0,14,3171},{7,31,250},{7,31,250},{7,31,250},{6,25,10},{17,4,4418},{0,25,949},{0,25,949},{0,16,1096},{29,2,4418},{0,16,1096},{30,15,5941},{15,31,3176},{10,31,0},{0,30,128},{30,15,5941},{30,21,5941},{0,30,128}, +{0,24,5953},{30,21,5941},{0,24,5953},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,17,0},{0,17,0},{0,17,0},{0,10,1},{0,8,697},{0,8,697},{9,31,65535},{5,31,42660},{2,31,5617},{1,31,3088},{8,31,65535},{3,31,43421},{0,31,1250},{0,28,15865},{0,31,65535},{0,25,48978},{13,31,10922},{11,31,4753},{8,31,360},{6,30,509},{28,4,19021},{2,31,10246},{0,30,1088},{0,23,8945},{29,13,19021}, +{0,23,8945},{5,31,3131},{5,31,3131},{5,31,3131},{4,28,1822},{1,31,4580},{0,28,776},{0,28,776},{0,17,925},{0,18,7893},{0,15,3333},{8,31,360},{8,31,360},{8,31,360},{7,26,10},{18,5,4418},{0,28,776},{0,28,776},{0,17,925},{30,3,4418},{0,17,925},{31,15,5101},{17,31,2777},{11,31,9},{0,31,25},{31,15,5101},{30,22,5101},{0,31,25},{0,25,5105},{30,22,5101},{0,25,5105},{4,0,1818}, +{4,0,1818},{4,0,1818},{4,0,1818},{0,20,1},{0,20,1},{0,20,1},{0,12,1},{0,9,925},{0,9,925},{9,31,65535},{5,31,43620},{2,31,7233},{1,31,3920},{8,31,65535},{3,31,42525},{0,31,738},{0,28,13417},{0,31,65535},{0,25,47186},{14,31,9978},{11,31,4449},{10,31,452},{7,30,354},{31,1,17485},{3,31,9369},{0,30,704},{0,24,7570},{31,13,17485},{0,24,7570},{6,31,4058},{6,31,4058},{6,31,4058}, +{4,29,2315},{2,31,4874},{0,29,610},{0,29,610},{0,18,772},{0,20,8427},{0,16,3497},{10,31,452},{10,31,452},{10,31,452},{8,27,5},{20,4,4418},{0,29,610},{0,29,610},{0,18,772},{29,5,4418},{0,18,772},{31,16,4325},{18,31,2357},{13,31,0},{1,31,0},{31,16,4325},{31,22,4325},{1,31,0},{0,25,4337},{31,22,4325},{0,25,4337},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,22,1}, +{0,22,1},{0,22,1},{0,13,4},{0,10,1189},{0,10,1189},{9,31,65535},{5,31,44836},{2,31,9105},{2,31,4905},{8,31,65535},{3,31,41885},{0,31,482},{0,28,11225},{0,31,65535},{0,26,45590},{15,31,9102},{13,31,4161},{11,31,557},{9,30,212},{29,6,16034},{5,31,8602},{0,31,482},{0,24,6242},{31,14,16034},{0,24,6242},{6,31,5066},{6,31,5066},{6,31,5066},{5,30,2907},{2,31,5322},{0,31,482},{0,31,482}, +{0,19,637},{0,20,8939},{0,17,3725},{11,31,557},{11,31,557},{11,31,557},{9,28,8},{24,0,4418},{0,31,482},{0,31,482},{0,19,637},{30,6,4418},{0,19,637},{30,19,3613},{20,31,1940},{15,31,4},{4,31,1},{30,19,3613},{31,23,3613},{4,31,1},{0,26,3617},{31,23,3613},{0,26,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,25,1},{0,25,1},{0,25,1},{0,15,0},{0,11,1489}, +{0,11,1489},{9,31,65535},{5,31,46510},{3,31,11362},{2,31,6237},{9,31,65535},{3,31,41471},{0,31,500},{0,29,8976},{0,31,65535},{0,26,43934},{16,31,8139},{14,31,3853},{12,31,680},{10,30,109},{26,13,14504},{8,31,7667},{0,31,500},{0,25,4979},{21,21,14504},{0,25,4979},{7,31,6337},{7,31,6337},{7,31,6337},{5,31,3642},{3,31,5962},{0,31,500},{0,31,500},{0,20,520},{0,22,9629},{0,18,4035},{12,31,680}, +{12,31,680},{12,31,680},{10,29,10},{24,3,4418},{0,31,500},{0,31,500},{0,20,520},{28,9,4418},{0,20,520},{31,19,2888},{20,31,1517},{16,31,1},{7,31,1},{31,19,2888},{27,26,2888},{7,31,1},{0,26,2906},{27,26,2888},{0,26,2906},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,28,1},{0,28,1},{0,28,1},{0,17,4},{0,11,1930},{0,11,1930},{10,31,65535},{6,31,48082},{3,31,13570}, +{2,31,7693},{9,31,65535},{3,31,41375},{0,31,788},{0,29,7120},{0,31,65535},{0,26,42734},{17,31,7409},{15,31,3625},{13,31,821},{11,31,45},{28,11,13235},{8,31,6899},{2,31,628},{0,25,3987},{31,16,13235},{0,25,3987},{7,31,7681},{7,31,7681},{7,31,7681},{6,31,4437},{4,31,6659},{1,31,738},{1,31,738},{0,21,421},{0,23,10286},{0,20,4305},{13,31,821},{13,31,821},{13,31,821},{11,30,10},{22,9,4418}, +{2,31,628},{2,31,628},{0,21,421},{29,10,4418},{0,21,421},{31,20,2312},{21,31,1217},{18,31,1},{9,31,0},{31,20,2312},{30,25,2312},{9,31,0},{0,27,2314},{30,25,2312},{0,27,2314},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,30,1},{0,30,1},{0,30,1},{0,18,1},{0,13,2329},{0,13,2329},{10,31,65535},{6,31,49890},{3,31,16034},{2,31,9405},{9,31,65535},{4,31,41526},{0,31,1332}, +{0,29,5520},{0,31,65535},{0,26,41790},{18,31,6747},{16,31,3459},{14,31,980},{12,31,5},{31,8,12051},{10,31,6275},{4,31,801},{0,26,3066},{28,19,12051},{0,26,3066},{8,31,9062},{8,31,9062},{8,31,9062},{7,31,5410},{4,31,7555},{1,31,1154},{1,31,1154},{0,23,325},{0,23,11118},{0,20,4625},{14,31,980},{14,31,980},{14,31,980},{12,31,5},{24,8,4418},{4,31,801},{4,31,801},{0,23,325},{30,11,4418}, +{0,23,325},{30,23,1800},{23,31,949},{19,31,4},{12,31,1},{30,23,1800},{28,27,1800},{12,31,1},{0,27,1818},{28,27,1800},{0,27,1818},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{0,31,36},{0,31,36},{0,31,36},{0,20,4},{0,15,2741},{0,15,2741},{10,31,65535},{6,31,51954},{3,31,18754},{3,31,11330},{9,31,65535},{4,31,41798},{1,31,2082},{0,29,4176},{0,31,65535},{0,27,41092},{19,31,6153}, +{17,31,3297},{16,31,1154},{13,31,20},{29,13,10952},{11,31,5708},{6,31,965},{0,27,2291},{28,20,10952},{0,27,2291},{9,31,10545},{9,31,10545},{9,31,10545},{7,31,6482},{5,31,8549},{2,31,1716},{2,31,1716},{0,24,221},{0,26,11876},{0,22,4989},{16,31,1154},{16,31,1154},{16,31,1154},{13,31,20},{28,4,4418},{6,31,965},{6,31,965},{0,24,221},{29,13,4418},{0,24,221},{29,26,1352},{23,31,725},{21,31,0}, +{14,31,1},{29,26,1352},{31,26,1352},{14,31,1},{0,28,1360},{31,26,1352},{0,28,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,31,145},{1,31,145},{1,31,145},{0,21,1},{0,16,3130},{0,16,3130},{10,31,65535},{6,31,54582},{4,31,21886},{3,31,13652},{9,31,65535},{4,31,42410},{1,31,3144},{0,30,2841},{0,31,65535},{0,27,40390},{19,31,5649},{18,31,3157},{17,31,1325},{15,31,74},{29,15,9818}, +{13,31,5241},{8,31,1108},{0,27,1589},{30,20,9818},{0,27,1589},{10,31,12376},{10,31,12376},{10,31,12376},{8,31,7844},{6,31,9861},{3,31,2576},{3,31,2576},{0,25,136},{0,28,12696},{0,23,5429},{17,31,1325},{17,31,1325},{17,31,1325},{15,31,74},{31,2,4418},{8,31,1108},{8,31,1108},{0,25,136},{27,16,4418},{0,25,136},{31,24,925},{25,31,505},{23,31,1},{17,31,1},{31,24,925},{31,27,925},{17,31,1}, +{0,28,937},{31,27,925},{0,28,937},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{1,31,388},{1,31,388},{1,31,388},{0,23,0},{0,17,3665},{0,17,3665},{10,31,65535},{7,31,57052},{4,31,24910},{3,31,15988},{9,31,65535},{4,31,43226},{1,31,4360},{0,30,1833},{0,31,65535},{0,27,40038},{21,31,5202},{19,31,3073},{18,31,1508},{16,31,180},{30,15,8901},{14,31,4814},{10,31,1300},{0,28,1021},{30,21,8901}, +{0,28,1021},{10,31,14136},{10,31,14136},{10,31,14136},{8,31,9252},{7,31,11195},{3,31,3536},{3,31,3536},{0,26,85},{0,29,13491},{0,23,5925},{18,31,1508},{18,31,1508},{18,31,1508},{16,31,180},{26,13,4418},{10,31,1300},{10,31,1300},{0,26,85},{21,21,4418},{0,26,85},{30,27,613},{26,31,337},{24,31,1},{20,31,1},{30,27,613},{30,28,613},{20,31,1},{0,29,617},{30,28,613},{0,29,617},{8,0,7956}, +{8,0,7956},{8,0,7956},{8,0,7956},{2,31,697},{2,31,697},{2,31,697},{0,25,4},{0,18,4181},{0,18,4181},{11,31,65535},{7,31,59708},{4,31,28190},{3,31,18580},{10,31,65535},{5,31,44295},{1,31,5832},{0,30,1081},{0,31,65535},{0,27,39942},{22,31,4818},{20,31,3017},{19,31,1709},{17,31,325},{30,17,8069},{15,31,4473},{11,31,1514},{0,29,602},{25,25,8069},{0,29,602},{11,31,15965},{11,31,15965},{11,31,15965}, +{9,31,10757},{7,31,12667},{4,31,4662},{4,31,4662},{0,27,52},{0,30,14340},{0,25,6449},{19,31,1709},{19,31,1709},{19,31,1709},{17,31,325},{28,12,4418},{11,31,1514},{11,31,1514},{0,27,52},{22,22,4418},{0,27,52},{31,27,365},{27,31,205},{26,31,1},{22,31,1},{31,27,365},{30,29,365},{22,31,1},{0,29,377},{30,29,365},{0,29,377},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{2,31,1097}, +{2,31,1097},{2,31,1097},{0,26,1},{0,20,4682},{0,20,4682},{11,31,65535},{8,31,58981},{5,31,29551},{4,31,19751},{10,31,65535},{5,31,43215},{2,31,6910},{1,30,686},{0,31,65535},{0,28,34909},{23,31,4502},{21,31,3011},{20,31,1973},{18,31,520},{31,17,7322},{17,31,4242},{13,31,1769},{0,29,314},{25,26,7322},{0,29,314},{12,31,16739},{12,31,16739},{12,31,16739},{10,31,11492},{8,31,13636},{5,31,5510},{5,31,5510}, +{1,28,54},{0,31,14139},{0,26,6041},{20,31,1973},{20,31,1973},{20,31,1973},{18,31,520},{29,13,4418},{13,31,1769},{13,31,1769},{0,28,29},{28,20,4418},{0,28,29},{31,28,181},{28,31,97},{27,31,4},{25,31,0},{31,28,181},{31,29,181},{25,31,0},{0,30,185},{31,29,181},{0,30,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{3,31,1348},{3,31,1348},{3,31,1348},{1,27,5},{0,21,4520}, +{0,21,4520},{12,31,65535},{9,31,57270},{6,31,30345},{5,31,20521},{11,31,65535},{6,31,41449},{3,31,8015},{2,31,301},{0,31,65535},{0,28,28330},{24,31,4181},{22,31,3053},{21,31,2248},{20,31,772},{31,19,6584},{20,31,3941},{15,31,2041},{0,30,77},{27,26,6584},{0,30,77},{13,31,17289},{13,31,17289},{13,31,17289},{11,31,12050},{10,31,14315},{7,31,6389},{7,31,6389},{2,29,53},{0,31,13860},{0,28,5286},{21,31,2248}, +{21,31,2248},{21,31,2248},{20,31,772},{29,16,4418},{15,31,2041},{15,31,2041},{0,30,13},{31,20,4418},{0,30,13},{30,31,50},{30,31,34},{29,31,0},{28,31,1},{30,31,50},{31,30,50},{28,31,1},{0,30,68},{31,30,50},{0,30,68},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{4,31,1549},{4,31,1549},{4,31,1549},{2,28,2},{0,23,4114},{0,23,4114},{13,31,65535},{9,31,55894},{7,31,31068}, +{6,31,21256},{12,31,65535},{8,31,39740},{4,31,9073},{3,31,90},{0,31,65535},{0,29,23356},{24,31,3973},{23,31,3125},{23,31,2500},{21,31,1037},{30,22,6019},{20,31,3701},{17,31,2340},{0,31,4},{27,27,6019},{0,31,4},{14,31,17796},{14,31,17796},{14,31,17796},{12,31,12625},{11,31,14957},{8,31,7139},{8,31,7139},{3,30,53},{0,31,14020},{0,29,4652},{23,31,2500},{23,31,2500},{23,31,2500},{21,31,1037},{30,17,4418}, +{17,31,2340},{17,31,2340},{0,31,4},{25,25,4418},{0,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{31,31,4},{30,31,1},{0,31,4},{31,31,4},{0,31,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,31,1765},{6,31,1765},{6,31,1765},{3,29,2},{0,25,3877},{0,25,3877},{13,31,65535},{10,31,53236},{8,31,30487},{7,31,21105},{13,31,65535},{8,31,37332},{5,31,9177}, +{4,31,36},{1,31,65535},{0,29,18680},{25,31,3443},{24,31,2741},{23,31,2248},{22,31,980},{29,25,5163},{21,31,3218},{20,31,2117},{3,31,1},{30,26,5163},{3,31,1},{15,31,17289},{15,31,17289},{15,31,17289},{13,31,12512},{12,31,14328},{9,31,7149},{9,31,7149},{4,31,20},{0,31,13376},{0,29,3944},{23,31,2248},{23,31,2248},{23,31,2248},{22,31,980},{31,17,3872},{20,31,2117},{20,31,2117},{3,31,1},{25,26,3872}, +{3,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,31,1972},{7,31,1972},{7,31,1972},{4,30,5},{0,27,3545},{0,27,3545},{14,31,65535},{11,31,50266},{9,31,29322},{8,31,20567},{13,31,65535},{9,31,35025},{6,31,8985},{5,31,21},{2,31,65535},{0,29,14712},{26,31,2873}, +{25,31,2283},{24,31,1825},{22,31,820},{30,24,4267},{22,31,2657},{20,31,1685},{5,31,1},{29,27,4267},{5,31,1},{16,31,16427},{16,31,16427},{16,31,16427},{14,31,12185},{13,31,13442},{10,31,6915},{10,31,6915},{5,31,5},{1,31,12539},{0,30,3314},{24,31,1825},{24,31,1825},{24,31,1825},{22,31,820},{28,23,3200},{20,31,1685},{20,31,1685},{5,31,1},{28,25,3200},{5,31,1},{31,31,0},{31,31,0},{31,31,0}, +{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,31,2250},{8,31,2250},{8,31,2250},{5,31,5},{0,28,3170},{0,28,3170},{15,31,65535},{12,31,47239},{10,31,28065},{9,31,20104},{14,31,65535},{10,31,32574},{7,31,8839},{6,31,54},{3,31,64890},{0,30,10964},{26,31,2252},{25,31,1806},{25,31,1445},{23,31,650},{30,25,3361}, +{23,31,2091},{21,31,1322},{8,31,0},{30,27,3361},{8,31,0},{17,31,15584},{17,31,15584},{17,31,15584},{15,31,11846},{14,31,12522},{11,31,6697},{11,31,6697},{6,31,50},{3,31,11669},{0,31,2834},{25,31,1445},{25,31,1445},{25,31,1445},{23,31,650},{30,21,2521},{21,31,1322},{21,31,1322},{8,31,0},{31,24,2521},{8,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,31,2525},{9,31,2525},{9,31,2525},{6,31,50},{0,31,2834},{0,31,2834},{16,31,65535},{13,31,44559},{11,31,27000},{10,31,19705},{15,31,64179},{11,31,30525},{8,31,8677},{7,31,149},{3,31,60570},{0,30,8308},{27,31,1782},{26,31,1416},{25,31,1157},{24,31,520},{30,26,2646},{23,31,1691},{22,31,1040},{11,31,1},{31,27,2646}, +{11,31,1},{18,31,14889},{18,31,14889},{18,31,14889},{16,31,11585},{15,31,11778},{12,31,6555},{12,31,6555},{7,31,145},{3,31,11061},{0,31,2610},{25,31,1157},{25,31,1157},{25,31,1157},{24,31,520},{31,21,1985},{22,31,1040},{22,31,1040},{11,31,1},{31,25,1985},{11,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{15,0,9250}, +{15,0,9250},{15,0,9250},{15,0,9250},{10,31,2792},{10,31,2792},{10,31,2792},{7,31,145},{0,31,2610},{0,31,2610},{16,31,63318},{14,31,42019},{12,31,25930},{11,31,19324},{16,31,59178},{11,31,28845},{9,31,8605},{8,31,276},{6,31,56253},{0,30,6420},{27,31,1366},{27,31,1094},{26,31,872},{25,31,397},{30,27,2017},{25,31,1298},{23,31,794},{13,31,1},{30,28,2017},{13,31,1},{19,31,14244},{19,31,14244},{19,31,14244}, +{17,31,11312},{16,31,11037},{13,31,6429},{13,31,6429},{8,31,260},{6,31,10457},{0,31,2642},{26,31,872},{26,31,872},{26,31,872},{25,31,397},{31,22,1513},{23,31,794},{23,31,794},{13,31,1},{29,27,1513},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3074}, +{12,31,3074},{12,31,3074},{8,31,260},{0,31,2642},{0,31,2642},{17,31,58848},{15,31,39619},{13,31,24975},{12,31,19007},{16,31,54474},{13,31,27057},{10,31,8569},{9,31,461},{8,31,51302},{0,31,5046},{28,31,979},{27,31,806},{27,31,637},{26,31,292},{31,26,1473},{26,31,953},{24,31,605},{16,31,0},{29,29,1473},{16,31,0},{19,31,13604},{19,31,13604},{19,31,13604},{18,31,11057},{16,31,10429},{14,31,6339},{14,31,6339}, +{10,31,424},{8,31,9713},{1,31,2900},{27,31,637},{27,31,637},{27,31,637},{26,31,292},{30,25,1105},{24,31,605},{24,31,605},{16,31,0},{30,27,1105},{16,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,31,3330},{12,31,3330},{12,31,3330},{10,31,424},{1,31,2900}, +{1,31,2900}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_56.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_56.inc new file mode 100644 index 0000000000..2b56c0944c --- /dev/null +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_atc_56.inc @@ -0,0 +1,481 @@ +{0,3,20},{0,3,5},{0,2,1},{0,2,9},{0,2,35},{0,2,27},{0,1,17},{0,1,24},{0,1,41},{0,1,25},{0,3,20},{0,3,5},{0,2,1},{0,2,9},{0,2,35},{0,2,27},{0,1,17},{0,1,24},{1,0,35},{0,1,24},{0,1,1},{0,1,1},{0,1,1},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,3,5},{0,2,1},{0,2,9},{1,0,18},{1,1,18},{0,2,9},{0,1,20},{1,1,18},{0,1,20},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,54},{0,7,37},{0,4,52}, +{0,4,36},{0,7,52},{0,5,21},{0,4,0},{0,3,21},{0,4,88},{0,3,37},{1,5,24},{1,5,9},{1,4,5},{1,4,13},{2,1,51},{0,5,21},{0,4,0},{0,3,21},{3,1,51},{0,3,21},{0,7,36},{0,7,36},{0,7,36},{0,4,36},{0,5,10},{0,4,0},{0,4,0},{0,2,5},{0,3,26},{0,2,14},{1,3,5},{1,3,5},{1,3,5},{1,3,4},{1,2,8}, +{0,4,0},{0,4,0},{0,2,5},{2,1,8},{0,2,5},{2,2,18},{0,7,1},{1,4,1},{0,4,0},{2,2,18},{2,3,18},{0,4,0},{0,3,20},{2,3,18},{0,3,20},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,10},{0,2,10},{1,11,54},{1,9,37},{1,6,52},{1,6,36},{1,9,52},{1,7,21},{1,6,0}, +{1,5,21},{0,7,63},{0,5,25},{2,7,24},{2,7,9},{2,6,5},{2,6,13},{3,3,51},{1,7,21},{1,6,0},{1,5,21},{4,3,51},{1,5,21},{1,9,36},{1,9,36},{1,9,36},{1,6,36},{1,7,10},{1,6,0},{1,6,0},{1,4,5},{0,6,11},{0,5,9},{2,5,5},{2,5,5},{2,5,5},{2,5,4},{3,1,8},{1,6,0},{1,6,0},{1,4,5},{3,3,8}, +{1,4,5},{3,4,18},{1,9,1},{2,6,1},{1,6,0},{3,4,18},{7,0,18},{1,6,0},{0,5,20},{7,0,18},{0,5,20},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,6,0},{1,6,0},{1,6,0},{1,4,1},{0,6,2},{0,6,2},{2,13,54},{2,11,37},{2,8,52},{2,8,36},{2,11,52},{2,9,21},{2,8,0},{2,7,21},{0,11,51},{1,7,25},{3,9,24}, +{3,9,9},{3,8,5},{3,8,13},{5,1,51},{2,9,21},{2,8,0},{2,7,21},{9,0,51},{2,7,21},{2,11,36},{2,11,36},{2,11,36},{2,8,36},{2,9,10},{2,8,0},{2,8,0},{2,6,5},{1,8,11},{1,7,9},{3,7,5},{3,7,5},{3,7,5},{3,7,4},{3,6,8},{2,8,0},{2,8,0},{2,6,5},{8,0,8},{2,6,5},{4,6,18},{2,11,1},{3,8,1}, +{2,8,0},{4,6,18},{8,2,18},{2,8,0},{0,7,20},{8,2,18},{0,7,20},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,8,0},{2,8,0},{2,8,0},{2,6,1},{1,8,2},{1,8,2},{3,15,70},{3,13,51},{4,10,69},{3,10,52},{3,14,52},{3,12,25},{3,10,4},{3,9,27},{1,13,53},{2,10,26},{4,12,22},{4,11,12},{4,10,5},{4,10,9},{6,4,51}, +{2,13,22},{3,10,3},{3,9,26},{11,1,51},{3,9,26},{3,13,51},{3,13,51},{3,13,51},{3,10,51},{3,12,9},{3,11,2},{3,11,2},{3,9,2},{2,11,9},{3,9,10},{4,10,4},{4,10,4},{4,10,4},{4,9,5},{6,2,8},{3,11,1},{3,11,1},{3,9,1},{11,0,8},{3,9,1},{7,2,18},{3,13,1},{4,10,1},{3,10,2},{7,2,18},{11,2,18},{3,10,2}, +{0,9,26},{11,2,18},{0,9,26},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,11,1},{3,11,1},{3,11,1},{3,9,1},{2,10,0},{2,10,0},{4,17,54},{4,15,36},{4,13,54},{4,12,38},{4,15,52},{4,14,24},{4,12,3},{4,11,26},{2,15,53},{3,12,26},{5,14,22},{5,13,12},{5,12,5},{5,12,9},{8,2,51},{3,15,22},{4,12,3},{4,11,26},{12,3,51}, +{4,11,26},{4,15,36},{4,15,36},{4,15,36},{4,12,37},{4,14,8},{4,12,2},{4,12,2},{4,11,1},{3,13,9},{3,11,14},{5,12,4},{5,12,4},{5,12,4},{5,11,5},{8,0,8},{4,12,2},{4,12,2},{4,11,1},{11,3,8},{4,11,1},{9,0,18},{4,15,0},{5,12,1},{4,12,2},{9,0,18},{15,0,18},{4,12,2},{0,11,26},{15,0,18},{0,11,26},{4,0,36}, +{4,0,36},{4,0,36},{4,0,36},{4,12,1},{4,12,1},{4,12,1},{4,11,0},{3,12,0},{3,12,0},{5,19,54},{5,17,37},{5,15,54},{5,14,38},{5,17,52},{5,15,27},{5,14,3},{5,13,26},{3,17,52},{4,14,26},{6,15,24},{6,15,12},{6,14,5},{6,14,9},{10,1,51},{4,17,21},{5,14,3},{5,13,26},{17,0,51},{5,13,26},{5,17,36},{5,17,36},{5,17,36}, +{5,14,37},{5,15,10},{5,14,2},{5,14,2},{5,13,1},{4,15,12},{4,13,11},{6,14,4},{6,14,4},{6,14,4},{6,13,5},{9,2,8},{5,14,2},{5,14,2},{5,13,1},{16,0,8},{5,13,1},{10,2,18},{5,17,1},{6,14,1},{5,14,2},{10,2,18},{16,2,18},{5,14,2},{0,13,26},{16,2,18},{0,13,26},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,14,1}, +{5,14,1},{5,14,1},{5,13,0},{4,14,0},{4,14,0},{6,21,54},{6,19,37},{6,16,52},{6,16,36},{6,19,52},{6,17,21},{6,16,0},{6,15,26},{4,19,51},{5,15,36},{7,17,24},{7,17,9},{7,16,5},{7,16,13},{11,3,51},{6,17,21},{6,16,0},{6,15,26},{18,2,51},{6,15,26},{6,19,36},{6,19,36},{6,19,36},{6,16,36},{6,17,10},{6,16,0},{6,16,0}, +{6,15,1},{5,16,11},{5,15,11},{7,15,5},{7,15,5},{7,15,5},{7,15,5},{11,1,8},{6,16,0},{6,16,0},{6,15,1},{17,2,8},{6,15,1},{11,4,18},{6,19,1},{7,16,1},{6,16,0},{11,4,18},{17,4,18},{6,16,0},{0,15,26},{17,4,18},{0,15,26},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,16,0},{6,16,0},{6,16,0},{6,15,0},{5,16,2}, +{5,16,2},{7,23,70},{7,21,51},{8,18,69},{7,18,52},{7,22,52},{7,20,25},{7,18,4},{7,17,27},{5,21,53},{6,18,26},{8,20,22},{8,19,12},{8,18,5},{8,18,9},{13,2,51},{6,21,22},{7,18,3},{7,17,26},{21,2,51},{7,17,26},{7,21,51},{7,21,51},{7,21,51},{7,18,51},{7,20,9},{7,19,2},{7,19,2},{7,17,2},{6,19,9},{7,17,10},{8,18,4}, +{8,18,4},{8,18,4},{8,17,5},{13,0,8},{7,19,1},{7,19,1},{7,17,1},{20,2,8},{7,17,1},{14,0,18},{7,21,1},{8,18,1},{7,18,2},{14,0,18},{20,4,18},{7,18,2},{0,17,26},{20,4,18},{0,17,26},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,19,1},{7,19,1},{7,19,1},{7,17,1},{6,18,0},{6,18,0},{8,25,54},{8,23,36},{8,21,54}, +{8,20,38},{8,24,51},{8,22,24},{8,20,3},{8,19,26},{6,23,53},{7,20,26},{9,22,22},{9,21,12},{9,20,5},{9,20,9},{14,4,51},{7,23,22},{8,20,3},{8,19,26},{25,0,51},{8,19,26},{8,23,36},{8,23,36},{8,23,36},{8,20,37},{8,22,8},{8,20,2},{8,20,2},{8,19,1},{7,21,9},{7,19,14},{9,20,4},{9,20,4},{9,20,4},{9,19,5},{14,2,8}, +{8,20,2},{8,20,2},{8,19,1},{24,0,8},{8,19,1},{15,2,18},{8,23,0},{9,20,1},{8,20,2},{15,2,18},{25,1,18},{8,20,2},{0,19,26},{25,1,18},{0,19,26},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,20,1},{8,20,1},{8,20,1},{8,19,0},{7,20,0},{7,20,0},{9,27,54},{9,25,36},{9,23,54},{9,22,38},{9,26,51},{9,24,24},{9,22,3}, +{9,21,26},{7,25,53},{8,22,26},{10,24,22},{10,23,12},{10,22,5},{10,22,9},{16,2,51},{8,25,19},{9,22,3},{9,21,26},{27,1,51},{9,21,26},{9,25,36},{9,25,36},{9,25,36},{9,22,37},{9,24,8},{9,22,2},{9,22,2},{9,21,1},{8,23,12},{8,21,11},{10,22,4},{10,22,4},{10,22,4},{10,21,5},{16,0,8},{9,22,2},{9,22,2},{9,21,1},{26,1,8}, +{9,21,1},{17,0,18},{9,25,0},{10,22,1},{9,22,2},{17,0,18},{26,3,18},{9,22,2},{0,21,26},{26,3,18},{0,21,26},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,22,1},{9,22,1},{9,22,1},{9,21,0},{8,22,0},{8,22,0},{10,29,54},{10,27,36},{10,25,54},{10,24,38},{10,28,51},{10,26,24},{10,24,3},{10,23,26},{8,27,52},{9,24,26},{11,26,22}, +{11,25,12},{11,24,5},{11,24,9},{17,4,51},{9,27,19},{10,24,3},{10,23,26},{27,4,51},{10,23,26},{10,27,36},{10,27,36},{10,27,36},{10,24,37},{10,26,8},{10,24,2},{10,24,2},{10,23,1},{9,25,12},{9,23,11},{11,24,4},{11,24,4},{11,24,4},{11,23,5},{17,2,8},{10,24,2},{10,24,2},{10,23,1},{27,3,8},{10,23,1},{18,2,18},{10,27,0},{11,24,1}, +{10,24,2},{18,2,18},{31,0,18},{10,24,2},{0,23,26},{31,0,18},{0,23,26},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,24,1},{10,24,1},{10,24,1},{10,23,0},{9,24,0},{9,24,0},{11,31,70},{11,29,52},{11,27,70},{11,27,54},{11,30,53},{11,28,20},{11,27,5},{11,26,25},{10,28,56},{10,26,22},{12,28,22},{12,27,9},{12,26,8},{12,26,9},{19,3,51}, +{11,28,19},{11,27,4},{10,26,21},{30,4,51},{10,26,21},{11,30,50},{11,30,50},{11,30,50},{11,27,50},{11,28,11},{11,27,1},{11,27,1},{11,25,2},{10,27,11},{11,25,10},{12,26,4},{12,26,4},{12,26,4},{12,25,5},{19,1,8},{11,27,0},{11,27,0},{11,25,1},{29,4,8},{11,25,1},{20,1,18},{11,29,2},{12,26,4},{11,27,4},{20,1,18},{29,6,18},{11,27,4}, +{0,26,20},{29,6,18},{0,26,20},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,27,1},{11,27,1},{11,27,1},{11,25,2},{10,26,2},{10,26,2},{12,33,54},{12,31,37},{12,29,56},{12,29,41},{12,32,51},{12,30,22},{12,29,5},{12,28,24},{11,30,56},{11,28,22},{13,30,22},{13,29,9},{13,28,8},{13,28,9},{20,5,51},{12,30,22},{12,29,5},{11,28,21},{31,6,51}, +{11,28,21},{12,31,37},{12,31,37},{12,31,37},{12,29,37},{12,30,8},{12,29,1},{12,29,1},{12,27,0},{11,29,11},{12,27,9},{13,28,4},{13,28,4},{13,28,4},{13,27,5},{20,3,8},{12,29,1},{12,29,1},{12,27,0},{30,6,8},{12,27,0},{21,3,18},{12,31,1},{13,28,4},{13,28,5},{21,3,18},{30,8,18},{13,28,5},{0,28,20},{30,8,18},{0,28,20},{12,0,36}, +{12,0,36},{12,0,36},{12,0,36},{12,29,0},{12,29,0},{12,29,0},{12,27,0},{11,28,2},{11,28,2},{13,35,54},{13,33,36},{13,31,56},{13,31,41},{13,34,51},{13,32,24},{13,31,5},{13,30,24},{11,33,53},{12,30,21},{14,32,22},{14,31,9},{14,30,8},{14,30,9},{23,0,51},{12,33,19},{13,31,5},{12,30,21},{31,9,51},{12,30,21},{13,33,36},{13,33,36},{13,33,36}, +{13,31,37},{13,32,8},{13,31,1},{13,31,1},{13,29,0},{12,31,9},{13,29,9},{14,30,4},{14,30,4},{14,30,4},{14,29,5},{21,5,8},{13,31,1},{13,31,1},{13,29,0},{31,8,8},{13,29,0},{22,5,18},{13,33,0},{14,30,4},{14,30,5},{22,5,18},{31,10,18},{14,30,5},{0,30,20},{31,10,18},{0,30,20},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,31,0}, +{13,31,0},{13,31,0},{13,29,0},{12,30,1},{12,30,1},{14,37,54},{14,35,36},{14,33,54},{14,32,38},{14,36,51},{14,34,24},{14,32,3},{14,32,35},{12,35,52},{13,32,26},{15,34,22},{15,33,12},{15,32,5},{15,32,9},{24,2,51},{13,35,19},{14,32,3},{13,32,26},{27,17,51},{13,32,26},{14,35,36},{14,35,36},{14,35,36},{14,32,37},{14,34,8},{14,32,2},{14,32,2}, +{14,31,0},{13,33,12},{14,31,9},{15,32,4},{15,32,4},{15,32,4},{15,31,5},{24,0,8},{14,32,2},{14,32,2},{14,31,0},{31,11,8},{14,31,0},{25,0,18},{14,35,0},{15,32,1},{14,32,2},{25,0,18},{31,13,18},{14,32,2},{0,32,26},{31,13,18},{0,32,26},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,32,1},{14,32,1},{14,32,1},{14,31,0},{13,32,0}, +{13,32,0},{15,40,68},{15,37,52},{15,35,70},{15,35,54},{15,38,53},{15,36,20},{15,35,5},{15,34,25},{14,36,56},{14,34,22},{16,36,22},{16,35,9},{16,34,8},{16,34,9},{26,1,51},{15,36,19},{15,35,4},{14,34,21},{30,17,51},{14,34,21},{15,38,50},{15,38,50},{15,38,50},{15,35,50},{15,36,11},{15,35,1},{15,35,1},{15,33,2},{14,35,11},{15,33,10},{16,34,4}, +{16,34,4},{16,34,4},{16,33,5},{24,6,8},{15,35,0},{15,35,0},{15,33,1},{29,17,8},{15,33,1},{25,6,18},{15,37,2},{16,34,4},{15,35,4},{25,6,18},{29,19,18},{15,35,4},{0,34,20},{29,19,18},{0,34,20},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,35,1},{15,35,1},{15,35,1},{15,33,2},{14,34,2},{14,34,2},{16,41,56},{16,39,37},{16,37,56}, +{16,37,41},{16,40,51},{16,38,22},{16,37,5},{16,36,24},{15,38,56},{15,36,22},{17,38,22},{17,37,9},{17,36,8},{17,36,9},{27,3,51},{16,38,22},{16,37,5},{15,36,21},{31,19,51},{15,36,21},{16,39,37},{16,39,37},{16,39,37},{16,37,37},{16,38,8},{16,37,1},{16,37,1},{16,35,0},{15,37,11},{16,35,9},{17,36,4},{17,36,4},{17,36,4},{17,35,5},{27,1,8}, +{16,37,1},{16,37,1},{16,35,0},{30,19,8},{16,35,0},{28,1,18},{16,39,1},{17,36,4},{17,36,5},{28,1,18},{30,21,18},{17,36,5},{0,36,20},{30,21,18},{0,36,20},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,37,0},{16,37,0},{16,37,0},{16,35,0},{15,36,2},{15,36,2},{17,43,56},{17,41,37},{17,39,56},{17,39,41},{17,42,51},{17,40,22},{17,39,5}, +{17,38,24},{15,42,56},{16,38,21},{18,40,22},{18,39,9},{18,38,8},{18,38,9},{28,5,51},{17,40,22},{17,39,5},{16,38,21},{31,22,51},{16,38,21},{17,41,37},{17,41,37},{17,41,37},{17,39,37},{17,40,8},{17,39,1},{17,39,1},{17,37,0},{16,39,9},{17,37,9},{18,38,4},{18,38,4},{18,38,4},{18,37,5},{28,3,8},{17,39,1},{17,39,1},{17,37,0},{31,21,8}, +{17,37,0},{29,3,18},{17,41,1},{18,38,4},{18,38,5},{29,3,18},{31,23,18},{18,38,5},{0,38,20},{31,23,18},{0,38,20},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,39,0},{17,39,0},{17,39,0},{17,37,0},{16,38,1},{16,38,1},{18,45,56},{18,43,37},{18,41,56},{18,41,41},{18,44,51},{18,42,22},{18,41,5},{18,40,24},{16,43,53},{17,40,21},{19,42,22}, +{19,41,9},{19,40,8},{19,40,9},{31,0,51},{18,42,22},{18,41,5},{17,40,21},{28,29,51},{17,40,21},{18,43,37},{18,43,37},{18,43,37},{18,41,37},{18,42,8},{18,41,1},{18,41,1},{18,39,0},{17,41,9},{18,39,9},{19,40,4},{19,40,4},{19,40,4},{19,39,5},{29,5,8},{18,41,1},{18,41,1},{18,39,0},{31,24,8},{18,39,0},{30,5,18},{18,43,1},{19,40,4}, +{19,40,5},{30,5,18},{31,26,18},{19,40,5},{0,40,20},{31,26,18},{0,40,20},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,41,0},{18,41,0},{18,41,0},{18,39,0},{17,40,1},{17,40,1},{19,48,68},{19,46,51},{20,43,70},{19,43,51},{19,47,52},{19,44,22},{19,43,3},{19,42,20},{17,46,51},{18,42,23},{20,44,24},{20,44,8},{20,43,6},{20,42,14},{31,6,51}, +{19,44,21},{19,43,2},{19,42,19},{31,29,51},{19,42,19},{19,46,50},{19,46,50},{19,46,50},{19,43,50},{19,45,9},{19,43,2},{19,43,2},{19,41,3},{18,43,10},{19,41,6},{20,42,5},{20,42,5},{20,42,5},{20,42,5},{31,4,8},{19,43,1},{19,43,1},{19,41,2},{30,29,8},{19,41,2},{30,11,18},{19,46,1},{20,43,2},{19,43,1},{30,11,18},{30,31,18},{19,43,1}, +{0,42,18},{30,31,18},{0,42,18},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,43,2},{19,43,2},{19,43,2},{19,41,2},{18,43,1},{18,43,1},{20,49,56},{20,47,38},{20,45,53},{20,45,37},{20,48,51},{20,46,19},{20,45,1},{20,44,22},{18,48,56},{19,44,23},{21,46,24},{21,46,8},{21,45,6},{21,44,14},{30,15,51},{20,46,19},{20,45,1},{19,44,22},{31,32,51}, +{19,44,22},{20,47,37},{20,47,37},{20,47,37},{20,45,36},{20,46,10},{20,45,0},{20,45,0},{20,43,2},{19,45,10},{20,43,11},{21,44,5},{21,44,5},{21,44,5},{21,44,5},{30,13,8},{20,45,0},{20,45,0},{20,43,2},{31,31,8},{20,43,2},{31,13,18},{20,47,2},{21,45,2},{20,45,1},{31,13,18},{30,34,18},{20,45,1},{0,44,18},{30,34,18},{0,44,18},{20,0,36}, +{20,0,36},{20,0,36},{20,0,36},{20,45,0},{20,45,0},{20,45,0},{20,43,1},{19,45,1},{19,45,1},{21,51,56},{21,49,37},{21,47,53},{21,47,37},{21,50,51},{21,48,22},{21,47,1},{21,46,22},{19,50,56},{20,46,20},{22,48,22},{22,48,13},{22,47,6},{22,46,14},{30,20,51},{21,48,22},{21,47,1},{20,46,19},{31,35,51},{20,46,19},{21,49,37},{21,49,37},{21,49,37}, +{21,47,36},{21,48,8},{21,47,0},{21,47,0},{21,45,2},{20,47,11},{21,45,11},{22,46,5},{22,46,5},{22,46,5},{22,46,5},{31,15,8},{21,47,0},{21,47,0},{21,45,2},{31,34,8},{21,45,2},{31,18,18},{21,49,1},{22,47,2},{21,47,1},{31,18,18},{31,36,18},{21,47,1},{0,46,18},{31,36,18},{0,46,18},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,47,0}, +{21,47,0},{21,47,0},{21,45,1},{20,46,2},{20,46,2},{22,53,56},{22,51,37},{22,49,56},{22,49,41},{22,52,51},{22,50,22},{22,49,5},{22,48,24},{20,51,53},{21,48,21},{23,50,22},{23,49,9},{23,48,8},{23,48,9},{31,22,51},{22,50,22},{22,49,5},{21,48,21},{28,42,51},{21,48,21},{22,51,37},{22,51,37},{22,51,37},{22,49,37},{22,50,8},{22,49,1},{22,49,1}, +{22,47,2},{21,49,9},{22,47,11},{23,48,4},{23,48,4},{23,48,4},{23,48,5},{31,20,8},{22,49,1},{22,49,1},{22,47,2},{31,37,8},{22,47,2},{31,23,18},{22,51,1},{23,48,4},{23,48,5},{31,23,18},{31,39,18},{23,48,5},{0,48,20},{31,39,18},{0,48,20},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,49,0},{22,49,0},{22,49,0},{22,47,1},{21,48,1}, +{21,48,1},{23,56,68},{23,54,51},{24,51,70},{23,51,51},{23,55,52},{23,52,22},{23,51,3},{23,50,20},{21,54,51},{22,50,23},{24,52,24},{24,52,8},{24,51,6},{24,50,14},{31,28,51},{23,52,21},{23,51,2},{23,50,19},{31,42,51},{23,50,19},{23,54,50},{23,54,50},{23,54,50},{23,51,50},{23,53,9},{23,51,2},{23,51,2},{23,49,3},{22,51,10},{23,49,6},{24,50,5}, +{24,50,5},{24,50,5},{24,50,5},{31,26,8},{23,51,1},{23,51,1},{23,49,2},{30,42,8},{23,49,2},{31,29,18},{23,54,1},{24,51,2},{23,51,1},{31,29,18},{30,44,18},{23,51,1},{0,50,18},{30,44,18},{0,50,18},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,51,2},{23,51,2},{23,51,2},{23,49,2},{22,51,1},{22,51,1},{24,58,54},{24,55,38},{24,53,53}, +{24,53,37},{24,56,52},{24,54,19},{24,53,1},{24,52,22},{22,56,51},{23,52,23},{25,54,24},{25,54,8},{25,53,6},{25,52,14},{31,33,51},{24,54,19},{24,53,1},{23,52,22},{31,45,51},{23,52,22},{24,56,36},{24,56,36},{24,56,36},{24,53,36},{24,54,10},{24,53,0},{24,53,0},{24,51,2},{23,53,10},{24,51,11},{25,52,5},{25,52,5},{25,52,5},{25,52,5},{31,31,8}, +{24,53,0},{24,53,0},{24,51,2},{31,44,8},{24,51,2},{31,34,18},{24,55,2},{25,53,2},{24,53,1},{31,34,18},{31,46,18},{24,53,1},{0,52,18},{31,46,18},{0,52,18},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,53,0},{24,53,0},{24,53,0},{24,51,1},{23,53,1},{23,53,1},{25,60,54},{25,57,38},{25,55,53},{25,55,37},{25,58,52},{25,56,19},{25,55,1}, +{25,54,22},{23,58,51},{24,54,20},{26,56,24},{26,56,8},{26,55,6},{26,54,14},{31,38,51},{25,56,19},{25,55,1},{24,54,19},{31,48,51},{24,54,19},{25,58,36},{25,58,36},{25,58,36},{25,55,36},{25,56,10},{25,55,0},{25,55,0},{25,53,2},{24,55,11},{25,53,11},{26,54,5},{26,54,5},{26,54,5},{26,54,5},{31,36,8},{25,55,0},{25,55,0},{25,53,2},{31,47,8}, +{25,53,2},{30,43,18},{25,57,2},{26,55,2},{25,55,1},{30,43,18},{30,50,18},{25,55,1},{0,54,18},{30,50,18},{0,54,18},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,55,0},{25,55,0},{25,55,0},{25,53,1},{24,54,2},{24,54,2},{26,62,54},{26,59,38},{26,57,53},{26,57,37},{26,60,52},{26,58,19},{26,57,1},{26,56,22},{25,58,56},{25,56,20},{27,58,24}, +{27,58,8},{27,57,6},{27,56,14},{30,47,51},{26,58,19},{26,57,1},{25,56,19},{31,51,51},{25,56,19},{26,60,36},{26,60,36},{26,60,36},{26,57,36},{26,58,10},{26,57,0},{26,57,0},{26,55,2},{25,57,11},{26,55,11},{27,56,5},{27,56,5},{27,56,5},{27,56,5},{30,45,8},{26,57,0},{26,57,0},{26,55,2},{31,50,8},{26,55,2},{31,45,18},{26,59,2},{27,57,2}, +{26,57,1},{31,45,18},{31,52,18},{26,57,1},{0,56,18},{31,52,18},{0,56,18},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,57,0},{26,57,0},{26,57,0},{26,55,1},{25,56,2},{25,56,2},{27,63,76},{27,62,52},{28,59,69},{27,59,51},{27,63,52},{27,60,21},{27,59,3},{27,58,22},{25,62,52},{26,58,28},{28,60,24},{28,60,9},{28,59,5},{28,59,13},{31,49,51}, +{27,60,20},{27,59,2},{27,58,21},{30,56,51},{27,58,21},{27,62,51},{27,62,51},{27,62,51},{27,59,51},{27,61,9},{27,59,3},{27,59,3},{27,58,6},{26,60,11},{27,57,12},{28,58,5},{28,58,5},{28,58,5},{28,58,4},{29,54,8},{27,59,2},{27,59,2},{27,58,5},{29,56,8},{27,58,5},{30,54,18},{27,62,2},{28,59,1},{27,59,1},{30,54,18},{29,58,18},{27,59,1}, +{0,58,20},{29,58,18},{0,58,20},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,60,1},{27,60,1},{27,60,1},{27,58,2},{26,59,0},{26,59,0},{28,63,86},{28,63,38},{28,61,52},{28,61,36},{28,63,59},{28,62,21},{28,61,0},{28,60,21},{27,62,60},{27,60,28},{29,62,24},{29,62,9},{29,61,5},{29,61,13},{30,58,51},{28,62,21},{28,61,0},{28,60,21},{31,58,51}, +{28,60,21},{28,63,37},{28,63,37},{28,63,37},{28,61,36},{28,62,10},{28,61,0},{28,61,0},{28,59,5},{27,62,11},{27,60,12},{29,60,5},{29,60,5},{29,60,5},{29,60,4},{30,56,8},{28,61,0},{28,61,0},{28,59,5},{30,58,8},{28,59,5},{31,56,18},{28,63,2},{29,61,1},{28,61,0},{31,56,18},{30,60,18},{28,61,0},{0,60,20},{30,60,18},{0,60,20},{28,0,36}, +{28,0,36},{28,0,36},{28,0,36},{28,61,0},{28,61,0},{28,61,0},{28,59,1},{27,61,0},{27,61,0},{30,63,94},{30,63,78},{29,63,52},{29,63,36},{30,63,115},{29,63,36},{29,63,0},{29,62,21},{29,63,88},{28,62,25},{30,63,30},{30,63,14},{30,63,5},{30,63,13},{31,60,51},{29,63,36},{29,63,0},{29,62,21},{31,61,51},{29,62,21},{29,63,52},{29,63,52},{29,63,52}, +{29,63,36},{29,63,16},{29,63,0},{29,63,0},{29,61,5},{28,63,11},{28,62,9},{30,62,5},{30,62,5},{30,62,5},{30,62,4},{31,58,8},{29,63,0},{29,63,0},{29,61,5},{31,60,8},{29,61,5},{31,61,18},{30,63,10},{30,63,1},{29,63,0},{31,61,18},{31,62,18},{29,63,0},{0,62,20},{31,62,18},{0,62,20},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,63,0}, +{29,63,0},{29,63,0},{29,61,1},{28,63,2},{28,63,2},{31,63,68},{31,63,68},{30,63,61},{30,63,45},{30,63,59},{30,63,27},{30,63,18},{30,63,1},{30,63,28},{30,63,10},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{30,63,1},{31,63,4},{30,63,1},{30,63,61},{30,63,61},{30,63,61},{30,63,45},{30,63,34},{30,63,18},{30,63,18}, +{30,63,1},{30,63,19},{30,63,10},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,62,4},{31,63,4},{31,63,4},{30,63,1},{31,63,4},{30,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,63,9},{30,63,9},{30,63,9},{30,63,1},{30,63,10}, +{30,63,10},{0,7,74},{0,6,20},{0,4,2},{0,4,26},{0,5,153},{0,4,110},{0,3,45},{0,2,115},{0,3,169},{0,2,124},{0,7,74},{0,6,20},{0,4,2},{0,4,26},{0,5,153},{0,4,110},{0,3,45},{0,2,115},{0,3,153},{0,2,115},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,2,13},{0,2,9},{0,2,9},{0,1,5},{0,1,14},{0,1,6},{0,3,1}, +{0,3,1},{0,3,1},{0,2,0},{0,2,13},{0,2,9},{0,2,9},{0,1,5},{1,0,13},{0,1,5},{2,1,72},{0,6,20},{0,4,2},{0,4,26},{2,1,72},{3,1,72},{0,4,26},{0,3,74},{3,1,72},{0,3,74},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,13,81},{0,10,13},{0,6,26}, +{0,6,14},{0,9,244},{0,7,129},{0,6,41},{0,4,139},{0,5,300},{0,4,175},{0,13,81},{0,10,13},{0,6,26},{0,6,14},{2,3,243},{0,7,129},{0,6,41},{0,4,139},{4,1,243},{0,4,139},{0,9,9},{0,9,9},{0,9,9},{0,5,10},{0,5,52},{0,5,17},{0,5,17},{0,3,17},{0,3,68},{0,3,33},{0,9,9},{0,9,9},{0,9,9},{0,5,10},{1,2,50}, +{0,5,17},{0,5,17},{0,3,17},{2,1,50},{0,3,17},{3,3,72},{0,10,4},{1,6,2},{0,6,5},{3,3,72},{4,3,72},{0,6,5},{0,5,74},{4,3,72},{0,5,74},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,2},{0,1,2},{1,15,145},{1,12,77},{1,8,90},{1,8,78},{0,15,243},{0,10,96},{0,8,3}, +{0,6,106},{0,8,395},{0,6,187},{1,15,81},{1,12,13},{1,8,26},{1,8,14},{4,1,243},{0,10,96},{0,8,3},{0,6,106},{5,3,243},{0,6,106},{1,11,73},{1,11,73},{1,11,73},{1,7,74},{0,11,50},{0,8,2},{0,8,2},{0,5,2},{0,5,131},{0,5,51},{1,11,9},{1,11,9},{1,11,9},{1,7,10},{3,1,50},{0,8,2},{0,8,2},{0,5,2},{3,3,50}, +{0,5,2},{5,1,72},{1,12,4},{2,8,2},{0,8,2},{5,1,72},{9,0,72},{0,8,2},{0,7,74},{9,0,72},{0,7,74},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,7,1},{0,7,1},{0,7,1},{0,4,1},{0,3,32},{0,3,32},{2,17,162},{2,14,94},{2,10,107},{2,10,95},{1,17,244},{1,12,97},{1,10,4},{1,8,107},{0,11,345},{0,8,116},{2,17,81}, +{2,14,13},{2,10,26},{2,10,14},{6,0,243},{0,14,76},{1,10,3},{0,9,83},{10,0,243},{0,9,83},{2,13,90},{2,13,90},{2,13,90},{2,9,91},{1,13,51},{1,10,3},{1,10,3},{1,7,3},{0,9,94},{0,7,14},{2,13,9},{2,13,9},{2,13,9},{2,9,10},{3,6,50},{1,10,2},{1,10,2},{1,7,2},{8,0,50},{1,7,2},{7,0,72},{2,14,4},{3,10,2}, +{1,10,2},{7,0,72},{10,2,72},{1,10,2},{0,9,74},{10,2,72},{0,9,74},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,9,2},{1,9,2},{1,9,2},{1,6,2},{0,7,13},{0,7,13},{3,19,154},{3,16,82},{3,13,100},{3,12,85},{2,19,244},{2,15,90},{2,13,5},{2,11,97},{0,15,287},{0,11,73},{3,19,90},{3,16,18},{4,12,29},{3,12,21},{6,6,243}, +{1,16,75},{2,13,5},{0,11,73},{12,1,243},{0,11,73},{3,15,81},{3,15,81},{3,15,81},{3,12,81},{2,15,52},{2,13,1},{2,13,1},{2,9,4},{0,13,61},{0,10,14},{3,15,17},{3,15,17},{3,15,17},{3,12,17},{6,2,50},{2,13,1},{2,13,1},{2,9,4},{11,0,50},{2,9,4},{8,2,72},{3,16,2},{4,12,4},{2,13,4},{8,2,72},{12,3,72},{2,13,4}, +{0,11,72},{12,3,72},{0,11,72},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,12,0},{2,12,0},{2,12,0},{2,9,0},{0,11,1},{0,11,1},{4,21,162},{4,18,94},{4,15,103},{4,14,95},{3,21,244},{3,17,88},{3,15,5},{3,13,97},{0,18,260},{1,13,73},{4,21,81},{4,18,13},{4,15,22},{4,14,14},{9,1,243},{2,18,75},{3,15,5},{1,13,73},{13,3,243}, +{1,13,73},{4,17,90},{4,17,90},{4,17,90},{4,14,91},{3,17,52},{3,15,1},{3,15,1},{3,11,4},{0,16,52},{1,12,14},{4,17,9},{4,17,9},{4,17,9},{4,14,10},{8,0,50},{3,15,1},{3,15,1},{3,11,4},{11,3,50},{3,11,4},{10,1,72},{3,20,2},{5,14,4},{3,15,4},{10,1,72},{17,0,72},{3,15,4},{0,13,72},{17,0,72},{0,13,72},{4,0,90}, +{4,0,90},{4,0,90},{4,0,90},{3,14,0},{3,14,0},{3,14,0},{3,11,0},{1,13,1},{1,13,1},{5,23,162},{5,20,94},{5,16,107},{5,16,95},{4,23,244},{4,18,97},{4,16,4},{4,15,98},{0,22,244},{2,15,73},{5,23,81},{5,20,13},{5,16,26},{5,16,14},{10,3,243},{3,20,75},{4,16,3},{2,15,73},{18,0,243},{2,15,73},{5,19,90},{5,19,90},{5,19,90}, +{5,15,94},{4,19,51},{4,16,3},{4,16,3},{4,13,5},{1,18,52},{2,14,14},{5,19,9},{5,19,9},{5,19,9},{5,15,13},{9,2,50},{4,16,2},{4,16,2},{4,13,4},{16,0,50},{4,13,4},{11,3,72},{5,20,4},{6,16,2},{4,16,2},{11,3,72},{18,2,72},{4,16,2},{0,15,72},{18,2,72},{0,15,72},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,15,2}, +{4,15,2},{4,15,2},{4,13,1},{2,15,1},{2,15,1},{6,25,162},{6,22,94},{6,18,107},{6,18,95},{5,25,244},{5,20,97},{5,18,4},{5,16,107},{1,24,244},{3,17,74},{6,25,81},{6,22,13},{6,18,26},{6,18,14},{12,1,243},{3,24,75},{5,18,3},{3,17,74},{19,2,243},{3,17,74},{6,21,90},{6,21,90},{6,21,90},{6,17,91},{5,21,51},{5,18,3},{5,18,3}, +{5,15,5},{2,20,52},{3,16,19},{6,21,9},{6,21,9},{6,21,9},{6,17,10},{11,1,50},{5,18,2},{5,18,2},{5,15,4},{17,2,50},{5,15,4},{13,1,72},{6,22,4},{7,18,2},{5,18,2},{13,1,72},{19,4,72},{5,18,2},{0,17,74},{19,4,72},{0,17,74},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,17,2},{5,17,2},{5,17,2},{5,15,1},{3,17,0}, +{3,17,0},{7,27,154},{7,24,82},{7,21,100},{7,20,85},{6,27,244},{6,23,90},{6,21,5},{6,19,97},{2,26,244},{4,19,73},{7,27,90},{7,24,18},{8,20,29},{7,20,21},{13,4,243},{5,24,75},{6,21,5},{4,19,73},{22,2,243},{4,19,73},{7,23,81},{7,23,81},{7,23,81},{7,20,81},{6,23,52},{6,21,1},{6,21,1},{6,17,4},{3,23,52},{4,18,14},{7,23,17}, +{7,23,17},{7,23,17},{7,20,17},{13,0,50},{6,21,1},{6,21,1},{6,17,4},{20,2,50},{6,17,4},{14,4,72},{7,24,2},{8,20,4},{6,21,4},{14,4,72},{25,0,72},{6,21,4},{0,19,72},{25,0,72},{0,19,72},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,20,0},{6,20,0},{6,20,0},{6,17,0},{4,19,1},{4,19,1},{8,29,162},{8,26,92},{8,23,103}, +{8,22,95},{7,29,244},{7,25,90},{7,23,5},{7,21,97},{3,28,244},{5,21,73},{8,29,81},{8,26,11},{8,23,22},{8,22,14},{14,6,243},{6,26,75},{7,23,5},{5,21,73},{26,0,243},{5,21,73},{8,25,90},{8,25,90},{8,25,90},{8,22,91},{7,25,52},{7,23,1},{7,23,1},{7,19,4},{4,24,52},{5,20,14},{8,25,9},{8,25,9},{8,25,9},{8,22,10},{14,2,50}, +{7,23,1},{7,23,1},{7,19,4},{24,0,50},{7,19,4},{16,2,72},{8,26,2},{9,22,4},{7,23,4},{16,2,72},{27,1,72},{7,23,4},{0,21,72},{27,1,72},{0,21,72},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,22,0},{7,22,0},{7,22,0},{7,19,0},{5,21,1},{5,21,1},{9,31,162},{9,28,92},{9,25,103},{9,24,95},{8,31,244},{8,26,100},{8,24,9}, +{8,23,98},{4,30,244},{6,23,73},{9,31,81},{9,28,11},{9,25,22},{9,24,14},{16,4,243},{7,28,75},{8,24,8},{6,23,73},{27,2,243},{6,23,73},{9,27,90},{9,27,90},{9,27,90},{9,24,91},{8,27,51},{8,24,5},{8,24,5},{8,21,5},{5,26,52},{6,22,14},{9,27,9},{9,27,9},{9,27,9},{9,24,10},{16,0,50},{8,24,4},{8,24,4},{8,21,4},{26,1,50}, +{8,21,4},{17,4,72},{9,28,2},{10,24,4},{8,25,4},{17,4,72},{27,4,72},{8,25,4},{0,23,72},{27,4,72},{0,23,72},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,24,1},{8,24,1},{8,24,1},{8,21,1},{6,23,1},{6,23,1},{10,33,162},{10,30,92},{10,27,103},{10,26,95},{9,33,244},{9,28,100},{9,26,9},{9,25,98},{5,32,244},{7,25,73},{10,33,81}, +{10,30,11},{10,27,22},{10,26,14},{17,6,243},{8,30,75},{9,26,8},{7,25,73},{28,4,243},{7,25,73},{10,29,90},{10,29,90},{10,29,90},{10,26,91},{9,29,51},{9,26,5},{9,26,5},{9,23,5},{6,28,52},{7,24,14},{10,29,9},{10,29,9},{10,29,9},{10,26,10},{17,2,50},{9,26,4},{9,26,4},{9,23,4},{27,3,50},{9,23,4},{18,6,72},{10,30,2},{11,26,4}, +{9,27,4},{18,6,72},{28,6,72},{9,27,4},{0,25,72},{28,6,72},{0,25,72},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,26,1},{9,26,1},{9,26,1},{9,23,1},{7,25,1},{7,25,1},{11,35,154},{11,32,82},{11,29,97},{11,29,85},{10,35,244},{10,31,96},{10,29,3},{10,27,90},{6,34,244},{8,27,78},{11,35,90},{11,32,18},{12,29,27},{11,29,21},{19,5,243}, +{9,32,75},{10,29,3},{9,27,75},{31,4,243},{9,27,75},{11,31,81},{11,31,81},{11,31,81},{11,28,80},{10,31,52},{10,29,2},{10,29,2},{10,26,5},{7,31,50},{9,26,11},{11,31,17},{11,31,17},{11,31,17},{11,28,16},{19,1,50},{10,29,2},{10,29,2},{9,26,2},{29,4,50},{9,26,2},{20,5,72},{11,32,2},{12,29,2},{10,29,2},{20,5,72},{31,6,72},{10,29,2}, +{0,27,74},{31,6,72},{0,27,74},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,28,1},{10,28,1},{10,28,1},{10,25,1},{8,28,2},{8,28,2},{12,37,162},{12,34,92},{12,31,107},{12,30,99},{11,37,244},{11,33,90},{11,31,3},{11,29,90},{7,36,244},{9,29,78},{12,37,81},{12,34,11},{12,31,26},{12,30,18},{22,0,243},{10,34,75},{11,31,3},{10,29,75},{31,7,243}, +{10,29,75},{12,33,90},{12,33,90},{12,33,90},{12,30,90},{11,33,52},{11,31,2},{11,31,2},{11,28,5},{8,32,52},{10,28,11},{12,33,9},{12,33,9},{12,33,9},{12,30,9},{20,3,50},{11,31,2},{11,31,2},{10,28,2},{30,6,50},{10,28,2},{23,0,72},{12,34,2},{13,31,2},{11,31,2},{23,0,72},{31,9,72},{11,31,2},{0,29,74},{31,9,72},{0,29,74},{12,0,90}, +{12,0,90},{12,0,90},{12,0,90},{11,30,1},{11,30,1},{11,30,1},{11,27,1},{9,30,2},{9,30,2},{13,39,162},{13,36,92},{13,33,103},{13,32,95},{12,39,244},{12,34,100},{12,32,9},{12,31,100},{8,38,244},{10,31,78},{13,39,81},{13,36,11},{13,33,22},{13,32,14},{23,2,243},{11,36,75},{12,32,8},{11,31,75},{28,14,243},{11,31,75},{13,35,90},{13,35,90},{13,35,90}, +{13,32,91},{12,35,51},{12,32,5},{12,32,5},{12,30,6},{9,34,52},{11,30,11},{13,35,9},{13,35,9},{13,35,9},{13,32,10},{21,5,50},{12,32,4},{12,32,4},{11,30,2},{31,8,50},{11,30,2},{24,2,72},{13,36,2},{14,32,4},{12,33,4},{24,2,72},{27,17,72},{12,33,4},{0,31,74},{27,17,72},{0,31,74},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,32,1}, +{12,32,1},{12,32,1},{12,29,1},{10,31,4},{10,31,4},{14,41,162},{14,38,92},{14,35,103},{14,34,95},{13,41,244},{13,36,100},{13,34,9},{13,33,98},{9,40,244},{11,33,73},{14,41,81},{14,38,11},{14,35,22},{14,34,14},{24,4,243},{12,38,75},{13,34,8},{11,33,73},{28,17,243},{11,33,73},{14,37,90},{14,37,90},{14,37,90},{14,34,91},{13,37,51},{13,34,5},{13,34,5}, +{13,31,10},{10,36,52},{11,32,14},{14,37,9},{14,37,9},{14,37,9},{14,34,10},{24,0,50},{13,34,4},{13,34,4},{12,32,4},{31,11,50},{12,32,4},{25,4,72},{14,38,2},{15,34,4},{13,35,4},{25,4,72},{28,19,72},{13,35,4},{0,33,72},{28,19,72},{0,33,72},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,34,1},{13,34,1},{13,34,1},{13,31,1},{11,33,1}, +{11,33,1},{15,44,152},{15,40,84},{15,37,97},{15,37,85},{14,44,243},{14,39,96},{14,37,3},{14,35,90},{10,42,244},{12,35,78},{15,44,88},{15,40,20},{16,37,27},{15,37,21},{27,0,243},{13,41,76},{14,37,3},{13,35,75},{31,17,243},{13,35,75},{15,40,80},{15,40,80},{15,40,80},{15,36,80},{14,40,50},{14,37,2},{14,37,2},{14,34,5},{11,39,50},{13,34,11},{15,40,16}, +{15,40,16},{15,40,16},{15,36,16},{24,6,50},{14,37,2},{14,37,2},{13,34,2},{29,17,50},{13,34,2},{27,3,72},{15,40,4},{16,37,2},{14,37,2},{27,3,72},{31,19,72},{14,37,2},{0,35,74},{31,19,72},{0,35,74},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,36,1},{14,36,1},{14,36,1},{14,33,1},{12,36,2},{12,36,2},{16,45,164},{16,42,95},{16,39,107}, +{16,38,99},{15,46,243},{15,41,96},{15,39,3},{15,37,90},{11,44,244},{13,37,78},{16,45,83},{16,42,14},{16,39,26},{16,38,18},{27,5,243},{14,43,76},{15,39,3},{14,37,75},{31,20,243},{14,37,75},{16,41,91},{16,41,91},{16,41,91},{16,38,90},{15,42,50},{15,39,2},{15,39,2},{15,36,5},{12,41,51},{14,36,11},{16,41,10},{16,41,10},{16,41,10},{16,38,9},{27,1,50}, +{15,39,2},{15,39,2},{14,36,2},{30,19,50},{14,36,2},{28,5,72},{15,44,4},{17,39,2},{15,39,2},{28,5,72},{31,22,72},{15,39,2},{0,37,74},{31,22,72},{0,37,74},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,38,1},{15,38,1},{15,38,1},{15,35,1},{13,38,2},{13,38,2},{17,47,164},{17,44,95},{17,41,107},{17,40,99},{16,47,245},{16,43,91},{16,41,3}, +{16,39,100},{12,46,244},{14,39,78},{17,47,83},{17,44,14},{17,41,26},{17,40,18},{30,0,243},{15,45,76},{16,41,2},{15,39,75},{28,27,243},{15,39,75},{17,43,91},{17,43,91},{17,43,91},{17,40,90},{16,43,53},{16,41,2},{16,41,2},{16,38,6},{13,43,51},{15,38,11},{17,43,10},{17,43,10},{17,43,10},{17,40,9},{28,3,50},{16,41,1},{16,41,1},{15,38,2},{31,21,50}, +{15,38,2},{31,0,72},{16,46,1},{18,41,2},{16,41,1},{31,0,72},{28,29,72},{16,41,1},{0,39,74},{28,29,72},{0,39,74},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,40,1},{16,40,1},{16,40,1},{16,37,1},{14,40,2},{14,40,2},{18,49,162},{18,46,95},{18,43,107},{18,42,99},{17,49,244},{17,45,91},{17,43,3},{17,41,100},{13,48,244},{15,41,78},{18,49,81}, +{18,46,14},{18,43,26},{18,42,18},{31,2,243},{15,48,75},{17,43,2},{15,41,78},{29,29,243},{15,41,78},{18,45,91},{18,45,91},{18,45,91},{18,42,90},{17,45,53},{17,43,2},{17,43,2},{17,40,6},{14,45,51},{16,40,18},{18,45,10},{18,45,10},{18,45,10},{18,42,9},{29,5,50},{17,43,1},{17,43,1},{16,40,2},{31,24,50},{16,40,2},{30,9,72},{17,48,2},{19,43,2}, +{17,43,1},{30,9,72},{29,31,72},{17,43,1},{0,41,74},{29,31,72},{0,41,74},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,42,1},{17,42,1},{17,42,1},{17,39,1},{15,42,2},{15,42,2},{19,52,152},{19,48,84},{19,45,105},{19,45,84},{18,52,243},{18,47,89},{18,45,1},{18,43,96},{14,50,244},{16,44,81},{19,52,88},{19,48,20},{20,45,26},{19,45,20},{31,8,243}, +{17,49,76},{18,45,1},{16,44,80},{31,30,243},{16,44,80},{19,48,80},{19,48,80},{19,48,80},{19,44,81},{18,48,50},{18,45,1},{18,45,1},{18,42,1},{15,47,52},{17,42,13},{19,48,16},{19,48,16},{19,48,16},{19,44,17},{31,4,50},{18,45,1},{18,45,1},{18,42,1},{30,29,50},{18,42,1},{30,15,72},{19,48,4},{20,45,1},{18,45,1},{30,15,72},{31,32,72},{18,45,1}, +{0,43,80},{31,32,72},{0,43,80},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,45,0},{18,45,0},{18,45,0},{18,41,1},{16,44,1},{16,44,1},{20,53,164},{20,50,95},{20,47,106},{20,47,94},{19,54,243},{19,49,96},{19,47,1},{19,45,96},{15,52,244},{17,46,81},{20,53,83},{20,50,14},{20,47,25},{20,47,13},{29,20,243},{18,51,76},{19,47,1},{17,46,80},{31,33,243}, +{17,46,80},{20,49,91},{20,49,91},{20,49,91},{20,46,90},{19,50,50},{19,47,1},{19,47,1},{19,44,1},{16,49,51},{18,44,13},{20,49,10},{20,49,10},{20,49,10},{20,46,9},{30,13,50},{19,47,1},{19,47,1},{19,44,1},{31,31,50},{19,44,1},{30,20,72},{19,52,4},{21,47,1},{19,47,1},{30,20,72},{31,35,72},{19,47,1},{0,45,80},{31,35,72},{0,45,80},{20,0,90}, +{20,0,90},{20,0,90},{20,0,90},{19,47,0},{19,47,0},{19,47,0},{19,43,1},{17,46,1},{17,46,1},{21,55,164},{21,52,95},{21,49,107},{21,48,99},{20,55,245},{20,51,91},{20,49,3},{20,47,97},{16,54,244},{18,47,85},{21,55,83},{21,52,14},{21,49,26},{21,48,18},{30,22,243},{19,53,76},{20,49,2},{19,47,81},{28,40,243},{19,47,81},{21,51,91},{21,51,91},{21,51,91}, +{21,48,90},{20,51,53},{20,49,2},{20,49,2},{20,46,5},{17,51,51},{19,46,13},{21,51,10},{21,51,10},{21,51,10},{21,48,9},{31,15,50},{20,49,1},{20,49,1},{19,46,4},{31,34,50},{19,46,4},{31,22,72},{20,54,1},{22,49,2},{20,49,1},{31,22,72},{28,42,72},{20,49,1},{0,47,80},{28,42,72},{0,47,80},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,48,1}, +{20,48,1},{20,48,1},{20,45,2},{18,48,2},{18,48,2},{22,57,164},{22,54,95},{22,51,107},{22,50,99},{21,57,245},{21,53,91},{21,51,3},{21,49,100},{17,56,244},{19,49,78},{22,57,83},{22,54,14},{22,51,26},{22,50,18},{31,24,243},{19,56,76},{21,51,2},{19,49,78},{29,42,243},{19,49,78},{22,53,91},{22,53,91},{22,53,91},{22,50,90},{21,53,53},{21,51,2},{21,51,2}, +{21,48,6},{18,53,51},{20,48,18},{22,53,10},{22,53,10},{22,53,10},{22,50,9},{31,20,50},{21,51,1},{21,51,1},{20,48,2},{31,37,50},{20,48,2},{31,27,72},{21,56,1},{23,51,2},{21,51,1},{31,27,72},{29,44,72},{21,51,1},{0,49,74},{29,44,72},{0,49,74},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,50,1},{21,50,1},{21,50,1},{21,47,2},{19,50,2}, +{19,50,2},{23,60,152},{23,57,81},{23,53,105},{23,53,84},{22,60,243},{22,55,89},{22,53,1},{22,51,96},{18,59,244},{20,52,81},{23,60,88},{23,57,17},{24,53,26},{23,53,20},{31,30,243},{21,57,73},{22,53,1},{20,52,80},{31,43,243},{20,52,80},{23,56,80},{23,56,80},{23,56,80},{23,52,81},{22,56,50},{22,53,1},{22,53,1},{22,50,1},{19,55,52},{21,50,13},{23,56,16}, +{23,56,16},{23,56,16},{23,52,17},{31,26,50},{22,53,1},{22,53,1},{22,50,1},{30,42,50},{22,50,1},{31,33,72},{23,57,1},{24,53,1},{22,53,1},{31,33,72},{31,45,72},{22,53,1},{0,51,80},{31,45,72},{0,51,80},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,53,0},{22,53,0},{22,53,0},{22,49,1},{20,52,1},{20,52,1},{24,62,162},{24,58,94},{24,55,106}, +{24,55,94},{23,62,243},{23,57,89},{23,55,1},{23,53,96},{19,61,244},{21,54,81},{24,62,81},{24,58,13},{24,55,25},{24,55,13},{31,35,243},{22,59,73},{23,55,1},{21,54,80},{27,51,243},{21,54,80},{24,58,90},{24,58,90},{24,58,90},{24,54,90},{23,58,50},{23,55,1},{23,55,1},{23,52,1},{20,57,50},{22,52,13},{24,58,9},{24,58,9},{24,58,9},{24,54,9},{31,31,50}, +{23,55,1},{23,55,1},{23,52,1},{31,44,50},{23,52,1},{31,38,72},{24,58,4},{25,55,1},{23,55,1},{31,38,72},{31,48,72},{23,55,1},{0,53,80},{31,48,72},{0,53,80},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,55,0},{23,55,0},{23,55,0},{23,51,1},{21,54,1},{21,54,1},{25,63,164},{25,60,94},{25,57,106},{25,57,94},{24,63,245},{24,59,97},{24,57,3}, +{24,55,97},{20,63,249},{22,56,81},{25,63,83},{25,60,13},{25,57,25},{25,57,13},{31,40,243},{23,61,73},{24,57,2},{22,56,80},{31,49,243},{22,56,80},{25,60,90},{25,60,90},{25,60,90},{25,56,90},{24,60,51},{24,57,3},{24,57,3},{24,54,5},{21,59,50},{23,54,13},{25,60,9},{25,60,9},{25,60,9},{25,56,9},{31,36,50},{24,57,2},{24,57,2},{23,54,4},{31,47,50}, +{23,54,4},{30,47,72},{25,60,4},{26,57,1},{24,57,1},{30,47,72},{31,51,72},{24,57,1},{0,55,80},{31,51,72},{0,55,80},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,56,2},{24,56,2},{24,56,2},{24,53,2},{22,56,1},{22,56,1},{26,63,194},{26,62,94},{26,59,106},{26,59,94},{25,63,284},{25,61,97},{25,59,3},{25,57,97},{22,63,253},{23,58,81},{27,62,99}, +{26,62,13},{26,59,25},{26,59,13},{29,52,243},{24,63,76},{25,59,2},{23,58,80},{28,56,243},{23,58,80},{26,62,90},{26,62,90},{26,62,90},{26,58,90},{25,62,51},{25,59,3},{25,59,3},{25,56,5},{22,61,50},{24,56,10},{26,62,9},{26,62,9},{26,62,9},{26,58,9},{30,45,50},{25,59,2},{25,59,2},{24,56,1},{31,50,50},{24,56,1},{30,52,72},{26,62,4},{27,59,1}, +{25,59,1},{30,52,72},{28,58,72},{25,59,1},{0,57,80},{28,58,72},{0,57,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,58,2},{25,58,2},{25,58,2},{25,55,2},{23,58,1},{23,58,1},{27,63,280},{27,63,120},{27,62,105},{27,61,82},{27,63,328},{26,63,99},{26,61,5},{26,59,99},{24,63,308},{24,60,74},{28,63,105},{28,63,45},{28,61,27},{27,61,18},{31,51,243}, +{26,63,99},{26,61,5},{24,60,74},{31,56,243},{24,60,74},{27,63,84},{27,63,84},{27,63,84},{27,60,81},{26,63,58},{26,62,2},{26,62,2},{26,58,2},{23,63,53},{25,58,9},{27,63,20},{27,63,20},{27,63,20},{27,60,17},{29,54,50},{26,62,2},{26,62,2},{26,58,2},{29,56,50},{26,58,2},{30,58,72},{28,63,20},{28,61,2},{27,61,2},{30,58,72},{31,58,72},{27,61,2}, +{0,60,74},{31,58,72},{0,60,74},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,61,0},{26,61,0},{26,61,0},{26,58,1},{24,60,0},{24,60,0},{28,63,331},{28,63,187},{28,63,106},{28,63,94},{28,63,358},{27,63,173},{27,63,4},{27,61,82},{26,63,355},{25,62,65},{29,63,126},{29,63,62},{28,63,25},{28,63,13},{31,56,221},{28,63,121},{27,63,4},{25,62,65},{30,60,221}, +{25,62,65},{28,63,106},{28,63,106},{28,63,106},{28,62,91},{27,63,100},{27,63,4},{27,63,4},{27,60,2},{25,63,72},{26,60,9},{28,63,25},{28,63,25},{28,63,25},{28,62,10},{30,56,50},{27,63,4},{27,63,4},{27,60,2},{30,58,50},{27,60,2},{31,59,61},{29,63,37},{29,63,1},{27,63,4},{31,59,61},{31,61,61},{27,63,4},{0,62,65},{31,61,61},{0,62,65},{28,0,90}, +{28,0,90},{28,0,90},{28,0,90},{27,63,0},{27,63,0},{27,63,0},{27,60,1},{25,62,0},{25,62,0},{29,63,239},{29,63,175},{29,63,139},{29,63,99},{29,63,239},{28,63,122},{28,63,41},{28,62,19},{28,63,233},{26,63,19},{30,63,54},{30,63,38},{30,63,29},{29,63,18},{31,60,93},{29,63,54},{29,63,18},{27,63,9},{31,61,93},{27,63,9},{29,63,139},{29,63,139},{29,63,139}, +{29,63,99},{29,63,139},{28,63,41},{28,63,41},{28,62,3},{27,63,116},{27,62,9},{30,63,29},{30,63,29},{30,63,29},{29,63,18},{31,58,50},{29,63,18},{29,63,18},{28,62,2},{31,60,50},{28,62,2},{31,62,5},{31,63,9},{30,63,4},{30,63,0},{31,62,5},{31,62,9},{30,63,0},{0,63,9},{31,62,9},{0,63,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,63,5}, +{28,63,5},{28,63,5},{28,61,2},{27,62,8},{27,62,8},{30,63,140},{30,63,124},{30,63,115},{30,63,99},{30,63,131},{29,63,98},{29,63,62},{29,63,2},{29,63,122},{28,63,20},{31,63,25},{31,63,25},{31,63,25},{30,63,18},{31,62,17},{30,63,18},{30,63,9},{29,63,1},{31,62,22},{29,63,1},{30,63,115},{30,63,115},{30,63,115},{30,63,99},{30,63,106},{29,63,62},{29,63,62}, +{29,63,2},{29,63,86},{28,63,20},{31,63,25},{31,63,25},{31,63,25},{30,63,18},{31,61,13},{30,63,9},{30,63,9},{29,63,1},{31,62,13},{29,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,63,26},{29,63,26},{29,63,26},{29,63,2},{28,63,20}, +{28,63,20},{0,13,200},{0,10,52},{0,7,2},{0,6,61},{0,9,441},{0,7,308},{0,5,139},{0,4,318},{0,5,491},{0,4,354},{0,13,200},{0,10,52},{0,7,2},{0,6,61},{2,2,441},{0,7,308},{0,5,139},{0,4,318},{2,3,441},{0,4,318},{0,6,0},{0,6,0},{0,6,0},{0,4,1},{0,3,41},{0,3,20},{0,3,20},{0,2,26},{0,2,50},{0,1,30},{0,6,0}, +{0,6,0},{0,6,0},{0,4,1},{1,0,41},{0,3,20},{0,3,20},{0,2,26},{1,1,41},{0,2,26},{3,3,200},{0,10,52},{0,7,2},{0,6,61},{3,3,200},{4,3,200},{0,6,61},{0,5,202},{4,3,200},{0,5,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,200},{0,14,20},{0,10,13}, +{0,9,26},{0,12,686},{0,9,419},{0,8,178},{0,5,442},{0,7,789},{0,5,491},{0,18,200},{0,14,20},{0,10,13},{0,9,26},{3,2,686},{0,9,419},{0,8,178},{0,5,442},{6,0,686},{0,5,442},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{0,6,145},{0,5,74},{0,5,74},{0,3,74},{0,3,165},{0,3,90},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{1,2,145}, +{0,5,74},{0,5,74},{0,3,74},{3,0,145},{0,3,74},{5,1,200},{0,14,20},{1,9,2},{0,9,26},{5,1,200},{9,0,200},{0,9,26},{0,7,202},{9,0,200},{0,7,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,23,251},{0,17,53},{1,12,78},{0,11,54},{0,17,724},{0,13,362},{0,11,86}, +{0,8,387},{0,9,932},{0,7,498},{1,20,201},{1,16,21},{1,12,14},{1,11,27},{4,4,723},{0,13,362},{0,11,86},{0,8,387},{7,2,723},{0,8,387},{0,17,49},{0,17,49},{0,17,49},{0,10,49},{0,11,162},{0,9,45},{0,9,45},{0,5,50},{0,5,243},{0,5,99},{1,13,2},{1,13,2},{1,13,2},{1,9,1},{3,1,162},{0,9,45},{0,9,45},{0,5,50},{3,3,162}, +{0,5,50},{7,0,200},{0,17,4},{2,11,2},{0,11,5},{7,0,200},{10,2,200},{0,11,5},{0,9,202},{10,2,200},{0,9,202},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,13},{0,2,13},{1,25,315},{1,19,117},{1,14,171},{1,13,118},{0,23,723},{0,16,299},{0,13,18},{0,10,318},{0,12,1087},{0,9,516},{2,22,201}, +{2,17,21},{2,14,14},{2,13,27},{5,6,723},{0,16,299},{0,13,18},{0,10,318},{8,4,723},{0,10,318},{1,19,113},{1,19,113},{1,19,113},{1,12,113},{0,16,162},{0,13,17},{0,13,17},{0,8,26},{0,8,338},{0,7,129},{2,15,2},{2,15,2},{2,15,2},{2,11,1},{3,6,162},{0,13,17},{0,13,17},{0,8,26},{8,0,162},{0,8,26},{7,5,200},{1,19,4},{3,13,2}, +{0,13,2},{7,5,200},{14,0,200},{0,13,2},{0,11,202},{14,0,200},{0,11,202},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,10,0},{0,10,0},{0,10,0},{0,6,0},{0,5,58},{0,5,58},{2,28,408},{2,22,210},{2,16,281},{2,15,213},{0,29,739},{0,20,260},{0,16,29},{0,13,280},{0,15,1143},{0,12,464},{3,25,200},{3,21,16},{3,16,18},{3,15,20},{7,5,723}, +{0,20,244},{0,16,13},{0,13,264},{14,0,723},{0,13,264},{2,21,209},{2,21,209},{2,21,209},{2,14,209},{1,19,178},{0,17,18},{0,17,18},{0,10,21},{0,12,376},{0,9,121},{3,18,0},{3,18,0},{3,18,0},{3,13,1},{6,2,162},{0,17,2},{0,17,2},{0,10,5},{11,0,162},{0,10,5},{10,1,200},{2,22,2},{4,15,5},{2,15,5},{10,1,200},{17,0,200},{2,15,5}, +{0,13,200},{17,0,200},{0,13,200},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{1,13,16},{1,13,16},{1,13,16},{1,8,17},{0,8,80},{0,8,80},{3,30,408},{3,24,210},{3,18,281},{3,17,213},{1,31,739},{1,22,260},{2,17,27},{1,15,280},{0,19,1000},{0,14,322},{4,26,201},{4,22,21},{4,18,14},{4,17,27},{10,0,723},{0,23,212},{2,17,11},{0,15,225},{15,2,723}, +{0,15,225},{3,23,209},{3,23,209},{3,23,209},{3,16,209},{2,21,178},{1,19,18},{1,19,18},{1,12,21},{0,15,294},{0,12,44},{4,19,2},{4,19,2},{4,19,2},{4,15,2},{8,0,162},{1,19,2},{1,19,2},{0,13,4},{11,3,162},{0,13,4},{11,3,200},{3,24,2},{5,17,2},{2,17,2},{11,3,200},{18,2,200},{2,17,2},{0,15,200},{18,2,200},{0,15,200},{3,0,208}, +{3,0,208},{3,0,208},{3,0,208},{2,15,16},{2,15,16},{2,15,16},{2,10,17},{0,12,40},{0,12,40},{4,31,420},{4,26,222},{4,20,276},{4,19,223},{3,29,740},{2,24,260},{3,19,27},{2,16,270},{0,23,920},{0,16,234},{5,28,201},{5,24,21},{5,20,14},{5,19,27},{11,2,723},{0,27,200},{3,19,11},{0,17,211},{16,4,723},{0,17,211},{4,25,218},{4,25,218},{4,25,218}, +{4,18,218},{3,23,178},{2,21,18},{2,21,18},{2,14,21},{0,19,228},{0,15,17},{5,21,2},{5,21,2},{5,21,2},{5,17,1},{9,2,162},{2,21,2},{2,21,2},{1,15,4},{16,0,162},{1,15,4},{13,1,200},{3,28,2},{6,19,2},{3,19,2},{13,1,200},{19,4,200},{3,19,2},{0,17,202},{19,4,200},{0,17,202},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,17,16}, +{3,17,16},{3,17,16},{3,12,17},{0,15,13},{0,15,13},{5,33,420},{5,28,222},{5,22,276},{5,21,223},{3,34,740},{3,26,260},{3,22,29},{3,18,270},{0,25,844},{0,19,202},{6,30,201},{6,26,21},{6,22,14},{6,21,27},{12,4,723},{1,29,200},{3,22,13},{0,19,202},{22,0,723},{0,19,202},{5,27,218},{5,27,218},{5,27,218},{5,20,218},{3,28,178},{3,23,18},{3,23,18}, +{3,16,18},{0,22,195},{1,17,17},{6,23,2},{6,23,2},{6,23,2},{6,19,1},{11,1,162},{3,23,2},{3,23,2},{3,16,2},{17,2,162},{3,16,2},{15,0,200},{5,28,4},{7,21,2},{4,21,2},{15,0,200},{24,1,200},{4,21,2},{0,19,202},{24,1,200},{0,19,202},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,22,16},{3,22,16},{3,22,16},{3,16,17},{0,19,0}, +{0,19,0},{6,36,408},{6,30,210},{6,24,276},{6,23,213},{4,37,739},{4,28,260},{4,24,24},{4,21,280},{0,29,780},{1,21,202},{7,33,200},{7,29,17},{7,24,13},{7,23,20},{15,0,723},{2,31,203},{4,24,8},{1,21,201},{24,1,723},{1,21,201},{6,29,209},{6,29,209},{6,29,209},{6,22,209},{5,27,178},{4,25,17},{4,25,17},{4,18,21},{0,26,168},{2,19,14},{7,26,0}, +{7,26,0},{7,26,0},{7,21,1},{13,0,162},{4,25,1},{4,25,1},{3,19,4},{20,2,162},{3,19,4},{16,2,200},{6,30,2},{8,23,5},{6,23,5},{16,2,200},{27,1,200},{6,23,5},{0,21,200},{27,1,200},{0,21,200},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{5,21,16},{5,21,16},{5,21,16},{5,16,17},{1,21,2},{1,21,2},{7,38,408},{7,32,210},{7,26,276}, +{7,25,213},{5,39,739},{5,30,260},{5,26,24},{5,23,280},{0,33,749},{2,23,202},{8,34,201},{8,30,19},{8,26,14},{8,25,21},{16,1,723},{3,33,202},{5,26,8},{2,23,201},{25,3,723},{2,23,201},{7,31,209},{7,31,209},{7,31,209},{7,24,209},{6,29,178},{5,27,17},{5,27,17},{5,20,21},{0,30,164},{3,21,14},{8,28,1},{8,28,1},{8,28,1},{8,23,2},{14,2,162}, +{5,27,1},{5,27,1},{4,21,4},{24,0,162},{4,21,4},{17,4,200},{7,32,2},{9,25,5},{7,25,5},{17,4,200},{27,4,200},{7,25,5},{0,23,200},{27,4,200},{0,23,200},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,23,16},{6,23,16},{6,23,16},{6,18,17},{2,23,2},{2,23,2},{8,39,420},{8,34,222},{8,28,286},{8,27,223},{6,41,739},{6,32,260},{6,28,24}, +{6,25,280},{0,36,725},{3,25,202},{9,36,201},{9,32,21},{9,28,14},{9,27,21},{18,0,723},{4,35,200},{6,28,8},{3,25,201},{30,0,723},{3,25,201},{8,33,218},{8,33,218},{8,33,218},{8,26,219},{7,31,178},{6,29,17},{6,29,17},{6,22,21},{2,30,168},{4,23,17},{9,30,1},{9,30,1},{9,30,1},{9,25,2},{16,0,162},{6,29,1},{6,29,1},{5,23,4},{26,1,162}, +{5,23,4},{18,6,200},{7,36,2},{10,27,5},{8,27,5},{18,6,200},{28,6,200},{8,27,5},{0,25,200},{28,6,200},{0,25,200},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,25,16},{7,25,16},{7,25,16},{7,20,17},{3,25,2},{3,25,2},{9,41,420},{9,36,222},{9,30,286},{9,29,223},{7,43,739},{7,34,260},{7,30,24},{7,27,280},{1,38,725},{4,27,201},{10,38,201}, +{10,34,21},{10,30,14},{10,29,21},{19,2,723},{5,37,200},{7,30,8},{4,27,201},{31,2,723},{4,27,201},{9,35,218},{9,35,218},{9,35,218},{9,28,219},{7,36,178},{7,31,17},{7,31,17},{7,24,21},{2,33,165},{5,25,17},{10,31,2},{10,31,2},{10,31,2},{10,27,2},{17,2,162},{7,31,1},{7,31,1},{6,25,4},{27,3,162},{6,25,4},{21,1,200},{9,36,4},{11,29,5}, +{9,29,5},{21,1,200},{29,8,200},{9,29,5},{0,27,200},{29,8,200},{0,27,200},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,30,16},{7,30,16},{7,30,16},{7,24,17},{4,27,1},{4,27,1},{10,44,408},{10,38,210},{10,32,276},{10,31,217},{8,45,739},{8,36,260},{8,32,24},{8,29,267},{2,41,727},{5,29,207},{11,41,200},{11,37,17},{11,32,13},{11,31,18},{21,1,723}, +{6,39,203},{8,32,8},{6,29,203},{29,8,723},{6,29,203},{10,37,209},{10,37,209},{10,37,209},{10,31,208},{9,35,178},{8,33,17},{8,33,17},{8,27,21},{3,36,165},{6,27,18},{11,34,0},{11,34,0},{11,34,0},{11,30,1},{19,1,162},{8,33,1},{8,33,1},{7,27,2},{29,4,162},{7,27,2},{23,0,200},{10,38,2},{12,32,8},{9,32,5},{23,0,200},{31,9,200},{9,32,5}, +{0,29,202},{31,9,200},{0,29,202},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{9,29,16},{9,29,16},{9,29,16},{9,25,16},{5,30,2},{5,30,2},{11,46,408},{11,40,210},{11,34,276},{11,33,213},{9,47,739},{9,38,260},{9,34,24},{9,31,267},{3,43,727},{6,31,207},{12,42,203},{12,38,19},{12,34,14},{12,33,21},{22,3,723},{7,41,203},{9,34,8},{7,31,203},{30,10,723}, +{7,31,203},{11,39,209},{11,39,209},{11,39,209},{11,32,209},{10,37,178},{9,35,17},{9,35,17},{9,29,21},{4,38,164},{7,29,18},{12,36,1},{12,36,1},{12,36,1},{12,31,2},{20,3,162},{9,35,1},{9,35,1},{8,29,2},{30,6,162},{8,29,2},{24,2,200},{11,40,2},{13,33,5},{11,33,5},{24,2,200},{27,17,200},{11,33,5},{0,31,202},{27,17,200},{0,31,202},{11,0,208}, +{11,0,208},{11,0,208},{11,0,208},{10,31,16},{10,31,16},{10,31,16},{10,27,16},{6,32,2},{6,32,2},{12,47,420},{12,42,220},{12,36,286},{12,35,223},{10,49,739},{10,40,260},{10,36,24},{10,33,280},{3,46,727},{7,33,202},{13,44,203},{13,40,19},{13,36,14},{13,35,21},{24,1,723},{8,43,202},{10,36,8},{7,33,201},{31,12,723},{7,33,201},{12,41,218},{12,41,218},{12,41,218}, +{12,34,219},{11,39,178},{10,37,17},{10,37,17},{10,31,21},{5,40,164},{8,31,21},{13,38,1},{13,38,1},{13,38,1},{13,33,2},{21,5,162},{10,37,1},{10,37,1},{9,31,2},{31,8,162},{9,31,2},{25,4,200},{12,42,2},{14,35,5},{12,35,5},{25,4,200},{28,19,200},{12,35,5},{0,33,200},{28,19,200},{0,33,200},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,33,16}, +{11,33,16},{11,33,16},{11,29,16},{7,33,2},{7,33,2},{13,49,420},{13,44,220},{13,38,286},{13,37,223},{11,51,739},{11,42,260},{11,38,24},{11,35,280},{4,48,729},{8,35,201},{14,46,203},{14,42,19},{14,38,14},{14,37,21},{26,0,723},{9,45,202},{11,38,8},{8,35,201},{31,15,723},{8,35,201},{13,43,218},{13,43,218},{13,43,218},{13,36,219},{11,44,178},{11,39,17},{11,39,17}, +{11,32,21},{6,42,164},{9,33,17},{14,40,1},{14,40,1},{14,40,1},{14,35,2},{24,0,162},{11,39,1},{11,39,1},{10,33,4},{31,11,162},{10,33,4},{26,6,200},{13,44,2},{15,37,5},{13,37,5},{26,6,200},{29,21,200},{13,37,5},{0,35,200},{29,21,200},{0,35,200},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,38,16},{11,38,16},{11,38,16},{11,32,17},{8,35,1}, +{8,35,1},{14,52,408},{14,46,212},{14,41,282},{14,39,217},{12,53,739},{12,44,259},{12,40,27},{12,37,267},{6,49,727},{9,37,207},{15,49,200},{15,44,20},{15,40,17},{15,39,18},{26,6,723},{10,48,203},{12,40,11},{10,37,203},{29,21,723},{10,37,203},{14,46,208},{14,46,208},{14,46,208},{14,39,208},{13,43,178},{12,41,18},{12,41,18},{12,35,21},{7,44,163},{10,35,18},{15,42,1}, +{15,42,1},{15,42,1},{15,38,1},{24,6,162},{12,41,2},{12,41,2},{11,35,2},{29,17,162},{11,35,2},{28,5,200},{13,48,2},{16,40,5},{13,40,1},{28,5,200},{31,22,200},{13,40,1},{0,37,202},{31,22,200},{0,37,202},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{13,37,16},{13,37,16},{13,37,16},{13,33,16},{9,38,2},{9,38,2},{15,54,408},{15,48,210},{15,43,282}, +{15,41,217},{13,55,739},{13,46,259},{13,42,27},{13,39,267},{7,51,727},{10,39,207},{16,50,203},{16,47,18},{16,42,11},{16,41,26},{29,1,723},{11,49,203},{13,42,11},{11,39,203},{30,23,723},{11,39,203},{15,47,209},{15,47,209},{15,47,209},{15,41,208},{14,45,178},{13,43,18},{13,43,18},{13,37,21},{8,46,164},{11,37,18},{16,44,1},{16,44,1},{16,44,1},{16,39,2},{27,1,162}, +{13,43,2},{13,43,2},{12,37,2},{30,19,162},{12,37,2},{31,0,200},{15,48,2},{17,42,5},{14,42,1},{31,0,200},{28,29,200},{14,42,1},{0,39,202},{28,29,200},{0,39,202},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{14,39,16},{14,39,16},{14,39,16},{14,35,16},{10,40,2},{10,40,2},{16,56,418},{16,50,220},{16,44,283},{16,43,228},{14,57,739},{14,48,260},{14,44,27}, +{14,41,267},{7,54,727},{11,41,207},{17,52,203},{17,48,19},{17,44,11},{17,43,26},{30,3,723},{12,51,202},{14,44,11},{12,41,203},{31,25,723},{12,41,203},{16,49,218},{16,49,218},{16,49,218},{16,43,219},{15,47,178},{14,45,18},{14,45,18},{14,39,21},{9,48,164},{12,39,21},{17,46,1},{17,46,1},{17,46,1},{17,41,2},{28,3,162},{14,45,2},{14,45,2},{13,39,2},{31,21,162}, +{13,39,2},{30,9,200},{16,50,2},{18,44,5},{15,44,1},{30,9,200},{29,31,200},{15,44,1},{0,41,202},{29,31,200},{0,41,202},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{15,41,16},{15,41,16},{15,41,16},{15,37,16},{11,42,2},{11,42,2},{17,58,418},{17,52,220},{17,46,283},{17,45,228},{15,59,739},{15,50,260},{15,46,27},{15,43,267},{8,56,724},{12,43,206},{18,54,203}, +{18,50,19},{18,46,11},{18,45,26},{31,5,723},{13,53,202},{15,46,11},{13,43,203},{31,28,723},{13,43,203},{17,51,218},{17,51,218},{17,51,218},{17,45,219},{15,52,178},{15,47,18},{15,47,18},{15,41,21},{10,50,164},{13,41,21},{18,48,1},{18,48,1},{18,48,1},{18,43,2},{29,5,162},{15,47,2},{15,47,2},{14,41,2},{31,24,162},{14,41,2},{31,11,200},{17,52,2},{19,46,5}, +{16,46,1},{31,11,200},{29,34,200},{16,46,1},{0,43,202},{29,34,200},{0,43,202},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,46,17},{15,46,17},{15,46,17},{15,40,16},{12,44,2},{12,44,2},{18,60,410},{18,54,212},{18,49,282},{18,48,218},{17,58,739},{16,52,259},{16,48,27},{16,45,273},{10,57,724},{13,46,208},{19,57,200},{19,52,20},{19,48,17},{19,47,25},{31,11,723}, +{14,56,203},{16,48,11},{14,45,208},{29,34,723},{14,45,208},{18,54,208},{18,54,208},{18,54,208},{18,47,208},{17,51,178},{16,49,18},{16,49,18},{16,43,20},{11,52,163},{14,44,24},{19,50,1},{19,50,1},{19,50,1},{19,46,0},{31,4,162},{16,49,2},{16,49,2},{15,43,4},{30,29,162},{15,43,4},{30,20,200},{18,54,4},{20,48,5},{17,48,1},{30,20,200},{31,35,200},{17,48,1}, +{0,45,208},{31,35,200},{0,45,208},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{17,45,17},{17,45,17},{17,45,17},{17,41,16},{13,46,0},{13,46,0},{19,62,410},{19,56,212},{19,51,282},{19,49,217},{18,60,739},{17,54,259},{17,50,27},{17,47,273},{11,59,724},{14,47,218},{20,59,201},{20,55,18},{20,50,11},{20,49,26},{31,16,723},{15,58,203},{17,50,11},{15,47,208},{30,36,723}, +{15,47,208},{19,56,208},{19,56,208},{19,56,208},{19,49,208},{18,53,178},{17,51,18},{17,51,18},{17,45,20},{12,54,164},{15,46,24},{20,52,1},{20,52,1},{20,52,1},{20,48,2},{30,13,162},{17,51,2},{17,51,2},{16,45,1},{31,31,162},{16,45,1},{31,22,200},{19,56,4},{21,50,5},{18,50,1},{31,22,200},{28,42,200},{18,50,1},{0,47,208},{28,42,200},{0,47,208},{19,0,208}, +{19,0,208},{19,0,208},{19,0,208},{18,47,17},{18,47,17},{18,47,17},{18,43,16},{14,48,2},{14,48,2},{20,63,426},{20,58,223},{20,52,283},{20,51,228},{19,62,739},{18,56,259},{18,52,27},{18,49,267},{12,61,727},{15,49,207},{21,61,201},{21,57,18},{21,52,11},{21,51,26},{31,21,723},{16,60,203},{18,52,11},{16,49,203},{31,38,723},{16,49,203},{20,57,219},{20,57,219},{20,57,219}, +{20,51,219},{19,55,178},{18,53,18},{18,53,18},{18,47,20},{13,56,164},{16,47,17},{21,54,1},{21,54,1},{21,54,1},{21,49,2},{31,15,162},{18,53,2},{18,53,2},{17,47,1},{31,34,162},{17,47,1},{31,27,200},{19,60,4},{22,52,5},{19,52,1},{31,27,200},{29,44,200},{19,52,1},{0,49,202},{29,44,200},{0,49,202},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{19,49,16}, +{19,49,16},{19,49,16},{19,45,16},{15,50,2},{15,50,2},{21,63,468},{21,60,223},{21,54,283},{21,53,228},{20,63,749},{19,58,259},{19,54,27},{19,51,267},{13,63,727},{16,51,206},{22,63,201},{22,59,18},{22,54,11},{22,53,26},{30,30,723},{17,62,203},{19,54,11},{17,51,203},{31,41,723},{17,51,203},{21,59,219},{21,59,219},{21,59,219},{21,53,219},{19,60,180},{19,55,18},{19,55,18}, +{19,49,21},{14,58,164},{17,49,21},{22,56,1},{22,56,1},{22,56,1},{22,51,2},{31,20,162},{19,55,2},{19,55,2},{18,49,2},{31,37,162},{18,49,2},{31,32,200},{20,62,1},{23,54,5},{20,54,1},{31,32,200},{30,46,200},{20,54,1},{0,51,202},{30,46,200},{0,51,202},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,54,17},{19,54,17},{19,54,17},{19,48,16},{16,52,2}, +{16,52,2},{22,63,570},{22,63,209},{22,57,288},{22,56,212},{21,63,804},{20,61,254},{20,56,33},{20,53,273},{15,63,753},{17,54,208},{23,63,232},{23,61,13},{23,56,16},{23,55,25},{31,32,723},{19,62,212},{20,56,17},{18,53,208},{30,46,723},{18,53,208},{22,62,208},{22,62,208},{22,62,208},{22,55,208},{20,63,178},{20,57,17},{20,57,17},{20,51,20},{15,60,163},{18,52,24},{23,59,0}, +{23,59,0},{23,59,0},{23,54,0},{31,26,162},{20,57,1},{20,57,1},{19,51,4},{30,42,162},{19,51,4},{31,38,200},{22,63,1},{24,56,1},{21,56,1},{31,38,200},{31,48,200},{21,56,1},{0,53,208},{31,48,200},{0,53,208},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{20,57,16},{20,57,16},{20,57,16},{21,49,16},{17,54,0},{17,54,0},{23,63,696},{23,63,237},{23,59,288}, +{23,58,212},{23,63,888},{21,63,254},{21,58,33},{21,55,273},{17,63,824},{18,56,208},{24,63,273},{24,62,21},{24,58,17},{24,57,26},{30,41,723},{20,63,233},{21,58,17},{19,55,208},{29,50,723},{19,55,208},{23,63,212},{23,63,212},{23,63,212},{23,57,208},{22,61,180},{21,59,17},{21,59,17},{21,53,20},{16,62,163},{19,54,24},{24,60,2},{24,60,2},{24,60,2},{24,56,2},{31,31,162}, +{21,59,1},{21,59,1},{20,53,1},{31,44,162},{20,53,1},{30,47,200},{24,62,20},{25,58,1},{22,58,1},{30,47,200},{31,51,200},{22,58,1},{0,55,208},{31,51,200},{0,55,208},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{21,59,16},{21,59,16},{21,59,16},{22,51,16},{18,56,0},{18,56,0},{25,63,804},{24,63,334},{24,61,283},{24,60,227},{24,63,957},{22,63,297},{22,60,33}, +{22,57,273},{20,63,913},{19,58,208},{26,63,313},{25,63,51},{25,60,17},{25,59,26},{31,43,723},{22,63,281},{22,60,17},{19,58,208},{30,52,723},{19,58,208},{24,63,234},{24,63,234},{24,63,234},{24,59,218},{23,63,180},{22,61,17},{22,61,17},{22,55,20},{18,62,171},{20,55,17},{25,62,2},{25,62,2},{25,62,2},{25,58,2},{31,36,162},{22,61,1},{22,61,1},{21,55,1},{31,47,162}, +{21,55,1},{30,52,200},{25,63,50},{26,60,1},{23,60,1},{30,52,200},{28,58,200},{23,60,1},{0,57,208},{28,58,200},{0,57,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{22,61,16},{22,61,16},{22,61,16},{23,53,16},{19,58,0},{19,58,0},{26,63,930},{25,63,492},{25,63,283},{25,62,227},{25,63,1068},{24,63,389},{23,62,33},{23,59,273},{21,63,999},{20,60,209},{27,63,379}, +{26,63,149},{26,62,17},{26,61,26},{31,48,723},{24,63,364},{24,62,16},{20,60,208},{31,54,723},{20,60,208},{25,63,267},{25,63,267},{25,63,267},{25,61,218},{24,63,205},{23,63,17},{23,63,17},{23,57,20},{20,63,189},{21,57,17},{26,63,5},{26,63,5},{26,63,5},{26,60,2},{30,45,162},{23,63,1},{23,63,1},{22,57,1},{31,50,162},{22,57,1},{31,54,200},{27,63,90},{27,62,1}, +{24,62,0},{31,54,200},{29,60,200},{24,62,0},{0,59,208},{29,60,200},{0,59,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,63,16},{23,63,16},{23,63,16},{23,56,17},{20,60,1},{20,60,1},{27,63,877},{26,63,585},{26,63,329},{26,63,209},{26,63,990},{25,63,397},{25,63,36},{24,61,165},{23,63,910},{22,61,122},{28,63,306},{28,63,162},{27,63,36},{27,63,4},{30,56,546}, +{26,63,306},{25,63,20},{22,61,113},{30,58,546},{22,61,113},{26,63,329},{26,63,329},{26,63,329},{26,63,209},{25,63,276},{25,63,36},{25,63,36},{24,59,18},{22,63,230},{22,60,22},{27,63,36},{27,63,36},{27,63,36},{27,62,0},{29,54,162},{25,63,20},{25,63,20},{24,59,2},{29,56,162},{24,59,2},{31,58,113},{29,63,61},{28,63,0},{26,63,1},{31,58,113},{31,60,113},{26,63,1}, +{0,61,113},{31,60,113},{0,61,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,62,16},{25,62,16},{25,62,16},{25,57,17},{21,62,1},{21,62,1},{28,63,731},{27,63,573},{27,63,404},{27,63,244},{27,63,797},{26,63,354},{26,63,98},{25,62,57},{25,63,737},{23,62,38},{29,63,190},{29,63,126},{28,63,65},{28,63,5},{31,56,333},{28,63,185},{27,63,52},{24,62,26},{30,60,333}, +{24,62,26},{27,63,404},{27,63,404},{27,63,404},{27,63,244},{27,63,356},{26,63,98},{26,63,98},{25,61,18},{24,63,315},{23,62,22},{28,63,65},{28,63,65},{28,63,65},{28,63,5},{30,56,162},{27,63,52},{27,63,52},{25,61,2},{30,58,162},{25,61,2},{31,61,25},{30,63,13},{30,63,4},{29,63,1},{31,61,25},{31,62,25},{29,63,1},{0,62,25},{31,62,25},{0,62,25},{27,0,208}, +{27,0,208},{27,0,208},{27,0,208},{26,63,17},{26,63,17},{26,63,17},{26,59,17},{23,63,9},{23,63,9},{29,63,642},{28,63,524},{28,63,443},{28,63,299},{28,63,623},{28,63,335},{27,63,201},{26,63,17},{27,63,610},{24,63,26},{30,63,131},{30,63,115},{29,63,101},{29,63,37},{31,59,193},{29,63,121},{28,63,85},{26,63,1},{30,62,193},{26,63,1},{28,63,443},{28,63,443},{28,63,443}, +{28,63,299},{28,63,398},{27,63,201},{27,63,201},{26,63,17},{26,63,378},{24,63,26},{29,63,101},{29,63,101},{29,63,101},{29,63,37},{31,57,145},{28,63,85},{28,63,85},{26,63,1},{31,60,145},{26,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,63,32}, +{27,63,32},{27,63,32},{27,61,17},{24,63,26},{24,63,26},{29,63,418},{29,63,354},{29,63,318},{29,63,254},{29,63,370},{28,63,223},{28,63,142},{28,63,25},{28,63,358},{26,63,58},{30,63,51},{30,63,35},{30,63,26},{30,63,10},{31,61,54},{30,63,34},{30,63,25},{28,63,0},{31,62,54},{28,63,0},{29,63,318},{29,63,318},{29,63,318},{29,63,254},{29,63,270},{28,63,142},{28,63,142}, +{28,63,25},{27,63,249},{26,63,58},{30,63,26},{30,63,26},{30,63,26},{30,63,10},{31,60,41},{30,63,25},{30,63,25},{28,63,0},{31,61,41},{28,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,63,61},{28,63,61},{28,63,61},{28,63,25},{26,63,58}, +{26,63,58},{0,18,421},{0,15,106},{0,10,8},{0,9,117},{0,12,925},{0,9,650},{0,9,286},{0,6,670},{0,7,1030},{0,5,726},{0,18,421},{0,15,106},{0,10,8},{0,9,117},{3,2,925},{0,9,650},{0,9,286},{0,6,670},{6,0,925},{0,6,670},{0,9,0},{0,9,0},{0,9,0},{0,5,1},{0,4,85},{0,4,45},{0,4,45},{0,2,50},{0,2,98},{0,2,59},{0,9,0}, +{0,9,0},{0,9,0},{0,5,1},{1,1,85},{0,4,45},{0,4,45},{0,2,50},{2,0,85},{0,2,50},{5,1,421},{0,15,106},{0,10,8},{0,9,117},{5,1,421},{9,0,421},{0,9,117},{0,7,421},{9,0,421},{0,7,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,24,421},{0,19,53},{0,13,8}, +{0,11,72},{0,16,1261},{0,12,805},{0,11,328},{0,7,822},{0,9,1438},{0,7,922},{0,24,421},{0,19,53},{0,13,8},{0,11,72},{3,6,1261},{0,12,805},{0,11,328},{0,7,822},{8,0,1261},{0,7,822},{0,14,0},{0,14,0},{0,14,0},{0,8,1},{0,7,221},{0,7,116},{0,7,116},{0,4,125},{0,4,257},{0,3,146},{0,14,0},{0,14,0},{0,14,0},{0,8,1},{2,0,221}, +{0,7,116},{0,7,116},{0,4,125},{3,1,221},{0,4,125},{7,0,421},{0,19,53},{1,12,8},{0,11,72},{7,0,421},{11,1,421},{0,11,72},{0,9,421},{11,1,421},{0,9,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,29,430},{0,22,34},{0,15,62},{0,14,49},{0,20,1514},{0,15,866},{0,13,301}, +{0,9,894},{0,11,1797},{0,9,1063},{0,29,430},{0,22,34},{1,15,33},{0,14,49},{6,0,1514},{0,15,866},{0,13,301},{0,9,894},{10,0,1514},{0,9,894},{0,19,10},{0,19,10},{0,19,10},{0,12,10},{0,11,338},{0,9,149},{0,9,149},{0,5,162},{0,5,419},{0,5,211},{0,19,10},{0,19,10},{0,19,10},{0,12,10},{3,1,338},{0,9,149},{0,9,149},{0,5,162},{3,3,338}, +{0,5,162},{8,2,421},{0,22,25},{2,14,8},{0,14,40},{8,2,421},{14,0,421},{0,14,40},{0,11,421},{14,0,421},{0,11,421},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,2},{0,1,2},{1,31,494},{1,24,98},{1,17,131},{1,16,110},{0,25,1517},{0,19,734},{0,15,157},{0,11,789},{0,14,1982},{0,11,1045},{1,31,430}, +{1,24,34},{2,16,35},{1,16,46},{7,2,1514},{0,19,734},{0,15,157},{0,11,789},{11,2,1514},{0,11,789},{1,21,74},{1,21,74},{1,21,74},{1,14,74},{0,16,338},{0,13,89},{0,13,89},{0,8,106},{0,8,514},{0,7,217},{1,21,10},{1,21,10},{1,21,10},{1,14,10},{3,6,338},{0,13,89},{0,13,89},{0,8,106},{8,0,338},{0,8,106},{10,0,421},{0,26,5},{3,16,5}, +{0,16,10},{10,0,421},{17,0,421},{0,16,10},{0,13,421},{17,0,421},{0,13,421},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,7,1},{0,7,1},{0,7,1},{0,4,1},{0,3,32},{0,3,32},{1,37,629},{1,28,213},{2,19,340},{1,18,216},{0,31,1517},{0,23,629},{0,18,54},{0,14,686},{0,17,2187},{0,13,1070},{3,30,437},{2,27,33},{3,19,29},{2,18,45},{9,1,1514}, +{0,23,629},{0,18,54},{0,14,686},{13,3,1514},{0,14,686},{1,27,209},{1,27,209},{1,27,209},{1,17,208},{0,22,338},{0,17,34},{0,17,34},{0,11,53},{0,11,666},{0,9,273},{3,21,16},{3,21,16},{3,21,16},{3,15,16},{6,2,338},{0,17,34},{0,17,34},{0,11,53},{11,0,338},{0,11,53},{12,0,421},{0,30,1},{4,18,8},{0,18,5},{12,0,421},{20,0,421},{0,18,5}, +{0,15,421},{20,0,421},{0,15,421},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,106},{0,6,106},{2,39,821},{2,30,405},{2,22,557},{2,20,408},{0,36,1517},{0,25,562},{0,21,14},{0,16,589},{0,19,2445},{0,15,1130},{3,35,437},{3,29,33},{4,21,33},{3,20,45},{10,3,1514},{0,25,562},{0,21,14},{0,16,589},{18,0,1514}, +{0,16,589},{2,29,401},{2,29,401},{2,29,401},{2,19,400},{0,27,340},{0,21,10},{0,21,10},{0,13,20},{0,14,851},{0,11,357},{3,26,16},{3,26,16},{3,26,16},{3,18,16},{8,0,338},{0,21,10},{0,21,10},{0,13,20},{11,3,338},{0,13,20},{13,1,421},{1,32,2},{5,20,8},{1,20,5},{13,1,421},{21,2,421},{1,20,5},{0,17,421},{21,2,421},{0,17,421},{2,0,400}, +{2,0,400},{2,0,400},{2,0,400},{0,19,0},{0,19,0},{0,19,0},{0,11,1},{0,9,205},{0,9,205},{3,41,854},{3,32,435},{3,24,590},{3,22,441},{1,38,1518},{0,29,543},{1,23,15},{0,18,575},{0,23,2318},{0,17,906},{4,37,430},{4,30,34},{5,23,33},{4,22,49},{12,1,1514},{0,29,494},{1,23,14},{0,18,526},{19,2,1514},{0,18,526},{3,31,434},{3,31,434},{3,31,434}, +{3,21,433},{1,29,341},{1,23,11},{1,23,11},{1,15,21},{0,17,734},{0,15,230},{4,27,10},{4,27,10},{4,27,10},{4,20,10},{9,2,338},{0,25,2},{0,25,2},{0,15,5},{16,0,338},{0,15,5},{15,0,421},{3,32,2},{6,22,8},{2,22,5},{15,0,421},{25,0,421},{2,22,5},{0,19,421},{25,0,421},{0,19,421},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,21,1}, +{1,21,1},{1,21,1},{1,13,2},{0,12,157},{0,12,157},{4,42,866},{4,34,450},{4,26,581},{4,24,458},{2,40,1518},{1,31,543},{2,25,15},{1,20,575},{0,27,2166},{0,20,706},{5,39,430},{5,32,34},{6,25,33},{5,24,49},{14,0,1514},{0,32,461},{2,25,14},{0,20,481},{20,4,1514},{0,20,481},{4,33,445},{4,33,445},{4,33,445},{4,23,445},{2,31,341},{2,25,11},{2,25,11}, +{2,17,27},{0,21,626},{0,17,102},{5,29,10},{5,29,10},{5,29,10},{5,22,10},{11,1,338},{1,27,2},{1,27,2},{0,17,2},{17,2,338},{0,17,2},{16,1,421},{3,36,2},{7,24,8},{3,24,5},{16,1,421},{27,1,421},{3,24,5},{0,21,421},{27,1,421},{0,21,421},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,23,1},{2,23,1},{2,23,1},{2,15,2},{0,16,97}, +{0,16,97},{5,45,854},{5,36,438},{5,28,579},{5,26,446},{3,43,1514},{3,32,545},{3,27,9},{3,22,582},{0,31,2010},{0,22,546},{7,38,437},{6,35,33},{7,27,26},{6,26,50},{14,6,1514},{0,37,430},{3,27,9},{0,23,446},{26,0,1514},{0,23,446},{5,35,434},{5,35,434},{5,35,434},{5,25,434},{3,34,338},{3,27,8},{3,27,8},{3,19,20},{0,25,525},{0,19,45},{7,29,16}, +{7,29,16},{7,29,16},{7,23,16},{13,0,338},{2,29,0},{2,29,0},{1,20,4},{20,2,338},{1,20,4},{18,1,421},{4,38,1},{8,26,10},{4,27,2},{18,1,421},{29,2,421},{4,27,2},{0,23,421},{29,2,421},{0,23,421},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,25,1},{3,25,1},{3,25,1},{3,17,1},{0,19,41},{0,19,41},{6,47,854},{6,38,438},{6,30,579}, +{6,28,446},{4,45,1515},{3,36,553},{4,29,15},{3,24,589},{0,34,1887},{0,25,450},{7,44,437},{7,37,33},{8,29,30},{7,28,50},{16,4,1514},{0,40,422},{4,29,14},{0,25,425},{27,2,1514},{0,25,425},{6,37,434},{6,37,434},{6,37,434},{6,27,434},{4,35,341},{4,29,14},{4,29,14},{4,21,21},{0,29,461},{0,22,41},{7,34,16},{7,34,16},{7,34,16},{7,26,16},{14,2,338}, +{3,31,0},{3,31,0},{2,22,4},{24,0,338},{2,22,4},{19,3,421},{5,40,1},{8,29,5},{5,29,2},{19,3,421},{30,4,421},{5,29,2},{0,25,421},{30,4,421},{0,25,421},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,27,1},{4,27,1},{4,27,1},{4,19,2},{0,23,13},{0,23,13},{7,49,854},{7,40,438},{7,32,590},{7,30,446},{5,47,1515},{4,37,543},{5,31,15}, +{4,26,591},{0,36,1785},{0,27,422},{8,45,430},{8,38,34},{9,31,30},{8,30,43},{17,6,1514},{1,42,422},{5,31,14},{0,27,422},{28,4,1514},{0,27,422},{7,39,434},{7,39,434},{7,39,434},{7,29,434},{5,37,341},{5,31,14},{5,31,14},{5,23,21},{0,32,404},{1,24,41},{8,35,10},{8,35,10},{8,35,10},{8,28,9},{16,0,338},{4,33,2},{4,33,2},{3,24,4},{26,1,338}, +{3,24,4},{21,1,421},{6,42,1},{9,31,5},{6,31,2},{21,1,421},{31,6,421},{6,31,2},{0,27,421},{31,6,421},{0,27,421},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,29,1},{5,29,1},{5,29,1},{5,21,2},{0,27,1},{0,27,1},{8,50,866},{8,42,450},{8,34,581},{8,32,458},{6,48,1518},{5,39,543},{6,33,15},{5,28,591},{0,40,1685},{1,29,422},{9,47,430}, +{9,40,34},{10,33,33},{9,32,49},{20,1,1514},{2,44,422},{6,33,14},{1,29,422},{29,6,1514},{1,29,422},{8,41,445},{8,41,445},{8,41,445},{8,31,446},{6,39,341},{6,33,11},{6,33,11},{6,25,21},{0,35,371},{2,26,41},{9,37,10},{9,37,10},{9,37,10},{9,30,9},{17,2,338},{5,35,2},{5,35,2},{5,25,5},{27,3,338},{5,25,5},{23,0,421},{7,44,1},{11,32,8}, +{7,32,5},{23,0,421},{31,9,421},{7,32,5},{0,29,421},{31,9,421},{0,29,421},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,31,1},{6,31,1},{6,31,1},{6,23,2},{1,29,1},{1,29,1},{9,53,854},{9,45,437},{9,36,579},{9,34,446},{7,51,1514},{7,41,546},{7,35,9},{6,31,589},{0,44,1607},{2,31,430},{11,46,437},{10,43,36},{11,35,26},{10,34,50},{22,0,1514}, +{3,47,421},{7,35,9},{3,31,426},{31,7,1514},{3,31,426},{9,43,434},{9,43,434},{9,43,434},{9,33,434},{7,42,338},{7,35,8},{7,35,8},{7,27,18},{0,39,344},{3,29,42},{11,37,16},{11,37,16},{11,37,16},{10,32,16},{19,1,338},{6,37,0},{6,37,0},{5,28,1},{29,4,338},{5,28,1},{24,2,421},{8,46,1},{12,34,10},{8,35,2},{24,2,421},{30,14,421},{8,35,2}, +{0,31,425},{30,14,421},{0,31,425},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,33,1},{7,33,1},{7,33,1},{7,26,1},{2,32,2},{2,32,2},{10,55,854},{10,47,437},{10,38,579},{10,36,446},{8,53,1515},{7,43,561},{8,37,15},{7,32,589},{0,47,1577},{3,33,425},{11,52,437},{11,45,36},{12,37,30},{11,36,50},{23,2,1514},{4,48,422},{8,37,14},{3,33,425},{28,14,1514}, +{3,33,425},{10,45,434},{10,45,434},{10,45,434},{10,35,434},{8,43,341},{8,37,14},{8,37,14},{8,29,19},{0,43,340},{4,30,38},{11,42,16},{11,42,16},{11,42,16},{11,34,16},{20,3,338},{7,39,0},{7,39,0},{6,30,1},{30,6,338},{6,30,1},{26,1,421},{9,48,1},{12,37,5},{9,37,2},{26,1,421},{30,17,421},{9,37,2},{0,33,421},{30,17,421},{0,33,421},{10,0,433}, +{10,0,433},{10,0,433},{10,0,433},{8,35,1},{8,35,1},{8,35,1},{8,28,2},{3,34,2},{3,34,2},{11,57,854},{11,48,438},{11,40,579},{11,38,446},{9,55,1515},{8,46,555},{9,39,15},{8,34,591},{0,51,1530},{4,35,422},{12,53,430},{12,46,34},{13,39,30},{12,38,43},{24,4,1514},{5,50,422},{9,39,14},{4,35,422},{28,17,1514},{4,35,422},{11,47,434},{11,47,434},{11,47,434}, +{11,37,434},{9,45,341},{9,39,14},{9,39,14},{9,31,19},{1,45,340},{5,32,41},{12,44,9},{12,44,9},{12,44,9},{12,36,9},{21,5,338},{8,41,1},{8,41,1},{7,32,4},{31,8,338},{7,32,4},{27,3,421},{10,50,1},{13,39,5},{10,39,2},{27,3,421},{31,19,421},{10,39,2},{0,35,421},{31,19,421},{0,35,421},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,37,1}, +{9,37,1},{9,37,1},{9,30,2},{4,35,1},{4,35,1},{12,58,866},{12,50,450},{12,41,590},{12,40,458},{10,57,1515},{9,48,555},{10,41,15},{9,36,591},{0,54,1518},{5,37,422},{13,55,430},{13,48,34},{14,41,30},{13,40,43},{25,6,1514},{6,52,422},{10,41,14},{5,37,422},{29,19,1514},{5,37,422},{12,49,445},{12,49,445},{12,49,445},{12,39,446},{10,47,341},{10,41,14},{10,41,14}, +{10,33,21},{2,47,340},{6,34,41},{13,46,9},{13,46,9},{13,46,9},{13,38,9},{24,0,338},{9,43,1},{9,43,1},{9,33,5},{31,11,338},{9,33,5},{29,1,421},{11,52,1},{14,41,5},{11,41,2},{29,1,421},{31,22,421},{11,41,2},{0,37,421},{31,22,421},{0,37,421},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,39,1},{10,39,1},{10,39,1},{10,31,5},{5,37,1}, +{5,37,1},{13,61,854},{13,53,437},{13,44,593},{13,43,442},{11,59,1517},{11,49,546},{11,43,13},{10,39,589},{1,57,1518},{6,39,430},{15,54,437},{14,51,36},{15,43,25},{14,42,48},{27,5,1514},{7,55,421},{11,43,13},{7,39,426},{31,20,1514},{7,39,426},{13,51,434},{13,51,434},{13,51,434},{13,42,434},{11,50,338},{11,43,13},{11,43,13},{11,35,18},{4,47,341},{7,37,42},{15,45,16}, +{15,45,16},{15,45,16},{15,39,17},{24,6,338},{10,45,1},{10,45,1},{9,36,1},{29,17,338},{9,36,1},{31,0,421},{12,54,1},{16,43,4},{12,43,0},{31,0,421},{30,27,421},{12,43,0},{0,39,425},{30,27,421},{0,39,425},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,41,1},{11,41,1},{11,41,1},{11,34,1},{6,40,1},{6,40,1},{14,63,854},{14,55,437},{14,46,593}, +{14,45,442},{12,61,1515},{11,51,561},{12,45,19},{11,41,589},{2,59,1518},{7,41,430},{15,60,437},{15,53,36},{16,45,34},{15,44,48},{30,0,1514},{9,55,425},{12,45,18},{8,41,426},{28,27,1514},{8,41,426},{14,53,434},{14,53,434},{14,53,434},{14,44,434},{12,51,341},{12,46,17},{12,46,17},{12,37,19},{4,51,340},{8,38,38},{15,50,16},{15,50,16},{15,50,16},{15,42,17},{27,1,338}, +{11,47,1},{11,47,1},{10,38,1},{30,19,338},{10,38,1},{31,6,421},{13,56,1},{17,45,4},{13,45,0},{31,6,421},{31,29,421},{13,45,0},{0,41,425},{31,29,421},{0,41,425},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,43,1},{12,43,1},{12,43,1},{12,36,2},{7,42,1},{7,42,1},{15,63,878},{15,57,437},{15,48,579},{15,47,442},{13,63,1515},{12,54,555},{13,47,19}, +{12,43,574},{3,61,1518},{8,43,429},{16,61,430},{16,54,34},{17,47,34},{16,46,41},{31,2,1514},{10,57,425},{13,47,18},{9,43,426},{29,29,1514},{9,43,426},{15,55,434},{15,55,434},{15,55,434},{15,46,434},{13,53,341},{13,47,19},{13,47,19},{13,39,19},{5,53,340},{9,40,38},{16,52,9},{16,52,9},{16,52,9},{16,44,10},{28,3,338},{12,49,1},{12,49,1},{11,40,1},{31,21,338}, +{11,40,1},{31,11,421},{14,58,1},{18,47,4},{14,47,0},{31,11,421},{31,32,421},{14,47,0},{0,43,425},{31,32,421},{0,43,425},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,45,1},{13,45,1},{13,45,1},{13,38,2},{8,43,4},{8,43,4},{16,63,926},{16,58,450},{16,49,590},{16,48,458},{14,63,1542},{13,56,555},{14,49,15},{13,45,574},{4,62,1517},{9,45,429},{17,63,430}, +{17,56,34},{18,49,30},{17,48,43},{30,11,1514},{11,59,425},{14,49,14},{10,45,426},{30,31,1514},{10,45,426},{16,57,445},{16,57,445},{16,57,445},{16,47,446},{14,55,341},{14,49,14},{14,49,14},{14,41,19},{6,55,340},{10,42,38},{17,54,9},{17,54,9},{17,54,9},{17,46,10},{29,5,338},{13,51,1},{13,51,1},{12,42,1},{31,24,338},{12,42,1},{31,16,421},{15,60,1},{18,49,5}, +{15,49,2},{31,16,421},{31,35,421},{15,49,2},{0,45,425},{31,35,421},{0,45,425},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,47,1},{14,47,1},{14,47,1},{14,40,2},{9,45,4},{9,45,4},{17,63,1034},{17,61,438},{17,52,593},{17,51,442},{16,63,1598},{15,57,554},{15,51,13},{15,47,577},{6,63,1535},{10,48,434},{19,63,437},{18,59,41},{19,51,25},{18,50,48},{29,20,1514}, +{11,63,422},{15,51,13},{11,47,433},{31,33,1514},{11,47,433},{17,60,433},{17,60,433},{17,60,433},{17,50,434},{15,58,338},{15,51,13},{15,51,13},{15,43,20},{7,57,339},{12,44,41},{19,53,16},{19,53,16},{19,53,16},{19,47,17},{31,4,338},{14,53,1},{14,53,1},{13,44,1},{30,29,338},{13,44,1},{31,22,421},{16,63,1},{20,51,4},{16,51,0},{31,22,421},{30,40,421},{16,51,0}, +{0,47,433},{30,40,421},{0,47,433},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,49,1},{15,49,1},{15,49,1},{15,42,0},{10,48,1},{10,48,1},{18,63,1166},{18,63,438},{18,54,593},{18,53,442},{17,63,1643},{15,60,561},{16,53,19},{15,49,589},{8,63,1566},{11,49,430},{20,63,458},{19,61,41},{20,53,34},{19,52,48},{30,22,1514},{13,63,429},{16,53,18},{12,49,426},{28,40,1514}, +{12,49,426},{18,62,433},{18,62,433},{18,62,433},{18,52,434},{16,60,339},{16,54,17},{16,54,17},{16,45,26},{9,57,341},{13,46,41},{19,58,17},{19,58,17},{19,58,17},{19,50,17},{30,13,338},{15,55,1},{15,55,1},{14,46,1},{31,31,338},{14,46,1},{31,27,421},{18,63,5},{21,53,4},{17,53,0},{31,27,421},{31,42,421},{17,53,0},{0,49,425},{31,42,421},{0,49,425},{18,0,433}, +{18,0,433},{18,0,433},{18,0,433},{16,51,1},{16,51,1},{16,51,1},{16,44,1},{11,50,1},{11,50,1},{20,63,1326},{19,63,470},{19,56,593},{19,55,442},{18,63,1742},{16,62,546},{17,55,19},{16,51,574},{10,63,1638},{12,51,429},{21,63,506},{20,63,29},{21,55,34},{20,54,41},{31,24,1514},{15,63,461},{17,55,18},{13,51,426},{29,42,1514},{13,51,426},{19,63,434},{19,63,434},{19,63,434}, +{19,54,434},{17,62,339},{17,56,17},{17,56,17},{17,47,26},{10,59,341},{13,48,38},{20,60,9},{20,60,9},{20,60,9},{20,52,10},{31,15,338},{16,57,2},{16,57,2},{15,48,1},{31,34,338},{15,48,1},{31,32,421},{20,63,20},{22,55,4},{18,55,0},{31,32,421},{31,45,421},{18,55,0},{0,51,425},{31,45,421},{0,51,425},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,53,1}, +{17,53,1},{17,53,1},{17,46,1},{12,51,4},{12,51,4},{21,63,1470},{20,63,561},{20,58,582},{20,57,461},{19,63,1895},{18,62,562},{18,57,19},{17,53,574},{12,63,1761},{13,53,429},{22,63,590},{21,63,59},{22,57,34},{21,56,41},{31,29,1514},{17,63,530},{18,57,18},{14,53,426},{30,44,1514},{14,53,426},{20,63,461},{20,63,461},{20,63,461},{20,55,446},{18,63,341},{18,58,17},{18,58,17}, +{18,49,19},{11,61,341},{14,50,38},{21,62,9},{21,62,9},{21,62,9},{21,54,10},{31,20,338},{17,59,2},{17,59,2},{16,50,1},{31,37,338},{16,50,1},{31,38,421},{21,63,50},{23,57,4},{19,57,0},{31,38,421},{31,48,421},{19,57,0},{0,53,425},{31,48,421},{0,53,425},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,55,1},{18,55,1},{18,55,1},{18,48,2},{13,53,4}, +{13,53,4},{22,63,1674},{21,63,753},{21,60,586},{21,59,443},{21,63,2046},{19,63,629},{19,60,19},{19,55,577},{15,63,1917},{14,56,426},{24,63,674},{23,63,120},{23,60,29},{22,58,50},{31,35,1514},{19,63,629},{19,60,19},{14,56,425},{27,51,1514},{14,56,425},{21,63,497},{21,63,497},{21,63,497},{21,58,433},{19,63,388},{19,60,10},{19,60,10},{19,51,20},{12,63,347},{16,52,41},{23,61,17}, +{23,61,17},{23,61,17},{23,55,17},{31,26,338},{18,62,2},{18,62,2},{17,52,1},{30,42,338},{17,52,1},{31,44,421},{23,63,104},{24,59,5},{20,59,1},{31,44,421},{31,51,421},{20,59,1},{0,56,425},{31,51,421},{0,56,425},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,58,0},{19,58,0},{19,58,0},{19,50,0},{14,56,1},{14,56,1},{23,63,1902},{22,63,995},{22,62,586}, +{22,61,443},{22,63,2235},{20,63,759},{20,61,15},{19,57,578},{17,63,2118},{15,58,426},{25,63,770},{24,63,250},{24,61,35},{23,60,50},{31,40,1514},{21,63,701},{20,61,14},{15,58,425},{31,49,1514},{15,58,425},{22,63,554},{22,63,554},{22,63,554},{22,60,433},{21,63,437},{20,61,14},{20,61,14},{20,53,26},{14,63,379},{17,54,41},{24,63,25},{24,63,25},{24,63,25},{23,59,17},{31,31,338}, +{19,63,4},{19,63,4},{18,54,1},{31,44,338},{18,54,1},{31,49,421},{25,63,169},{25,61,5},{21,61,1},{31,49,421},{30,56,421},{21,61,1},{0,58,425},{30,56,421},{0,58,425},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,59,2},{20,59,2},{20,59,2},{20,52,1},{15,58,1},{15,58,1},{24,63,2045},{24,63,1233},{23,63,629},{23,63,442},{24,63,2360},{22,63,914},{21,63,14}, +{20,59,549},{19,63,2241},{16,60,401},{26,63,849},{25,63,395},{25,63,34},{24,62,41},{29,52,1459},{23,63,778},{21,63,13},{17,59,400},{28,56,1459},{17,59,400},{23,63,629},{23,63,629},{23,63,629},{23,62,433},{22,63,491},{21,63,14},{21,63,14},{21,55,26},{16,63,446},{18,56,41},{25,63,34},{25,63,34},{25,63,34},{24,60,10},{31,36,338},{21,63,13},{21,63,13},{19,56,1},{31,47,338}, +{19,56,1},{31,54,392},{27,63,218},{26,63,4},{22,63,0},{31,54,392},{29,60,392},{22,63,0},{0,59,400},{29,60,392},{0,59,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,61,2},{21,61,2},{21,61,2},{21,54,1},{16,60,1},{16,60,1},{25,63,1767},{25,63,1167},{24,63,701},{24,63,449},{24,63,1976},{23,63,747},{22,63,66},{21,60,306},{20,63,1820},{17,61,217},{27,63,611}, +{26,63,317},{26,63,61},{25,63,10},{30,52,1064},{25,63,587},{23,63,41},{18,61,208},{28,58,1064},{18,61,208},{24,63,701},{24,63,701},{24,63,701},{24,63,449},{23,63,581},{22,63,66},{22,63,66},{22,57,26},{18,63,530},{19,58,41},{26,63,61},{26,63,61},{26,63,61},{25,62,10},{30,45,338},{23,63,41},{23,63,41},{20,58,1},{31,50,338},{20,58,1},{30,60,200},{28,63,106},{27,63,1}, +{25,63,1},{30,60,200},{31,59,200},{25,63,1},{0,60,208},{31,59,200},{0,60,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,63,2},{22,63,2},{22,63,2},{22,56,1},{17,62,1},{17,62,1},{26,63,1542},{26,63,1122},{25,63,833},{25,63,497},{26,63,1647},{24,63,687},{24,63,203},{23,61,122},{22,63,1515},{19,62,78},{28,63,410},{28,63,266},{27,63,116},{27,63,20},{30,56,722}, +{26,63,402},{25,63,100},{20,62,65},{30,58,722},{20,62,65},{25,63,833},{25,63,833},{25,63,833},{25,63,497},{24,63,707},{24,63,203},{24,63,203},{23,60,17},{20,63,619},{19,61,46},{27,63,116},{27,63,116},{27,63,116},{27,63,20},{29,54,338},{25,63,100},{25,63,100},{22,60,2},{29,56,338},{22,60,2},{31,59,61},{29,63,37},{29,63,1},{27,63,4},{31,59,61},{31,61,61},{27,63,4}, +{0,62,65},{31,61,61},{0,62,65},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,63,25},{23,63,25},{23,63,25},{23,58,1},{19,62,13},{19,62,13},{27,63,1406},{27,63,1134},{26,63,962},{26,63,602},{27,63,1454},{25,63,702},{25,63,341},{24,62,43},{24,63,1378},{20,63,35},{29,63,318},{28,63,250},{28,63,169},{28,63,61},{31,56,509},{28,63,313},{27,63,164},{22,63,10},{30,60,509}, +{22,63,10},{26,63,962},{26,63,962},{26,63,962},{26,63,602},{26,63,827},{25,63,341},{25,63,341},{24,62,27},{22,63,747},{20,63,35},{28,63,169},{28,63,169},{28,63,169},{28,63,61},{30,56,338},{27,63,164},{27,63,164},{23,62,2},{30,58,338},{23,62,2},{31,62,5},{31,63,9},{30,63,4},{30,63,0},{31,62,5},{31,62,9},{30,63,0},{0,63,9},{31,62,9},{0,63,9},{26,0,433}, +{26,0,433},{26,0,433},{26,0,433},{24,63,50},{24,63,50},{24,63,50},{24,60,1},{20,63,26},{20,63,26},{28,63,1135},{28,63,991},{27,63,874},{27,63,602},{28,63,1162},{26,63,618},{26,63,362},{25,63,5},{25,63,1087},{22,63,58},{30,63,219},{29,63,161},{29,63,125},{29,63,61},{30,62,294},{29,63,193},{28,63,117},{24,63,1},{31,60,297},{24,63,1},{27,63,874},{27,63,874},{27,63,874}, +{27,63,602},{27,63,730},{26,63,362},{26,63,362},{25,63,5},{24,63,681},{22,63,58},{29,63,125},{29,63,125},{29,63,125},{29,63,61},{31,56,221},{28,63,117},{28,63,117},{24,63,1},{31,59,221},{24,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{25,63,101}, +{25,63,101},{25,63,101},{25,62,1},{22,63,58},{22,63,58},{29,63,885},{28,63,751},{28,63,670},{28,63,526},{28,63,778},{27,63,483},{27,63,314},{26,63,10},{26,63,777},{24,63,117},{30,63,75},{30,63,59},{30,63,50},{30,63,34},{31,60,114},{29,63,81},{29,63,45},{27,63,0},{31,61,114},{27,63,0},{28,63,670},{28,63,670},{28,63,670},{28,63,526},{28,63,553},{27,63,314},{27,63,314}, +{26,63,10},{25,63,518},{24,63,117},{30,63,50},{30,63,50},{30,63,50},{30,63,34},{31,59,85},{29,63,45},{29,63,45},{27,63,0},{30,62,85},{27,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,63,145},{27,63,145},{27,63,145},{26,63,10},{24,63,117}, +{24,63,117},{0,26,882},{0,21,218},{0,15,16},{0,13,260},{0,17,1899},{0,13,1341},{0,11,593},{0,8,1380},{0,9,2113},{0,7,1513},{0,26,882},{0,21,218},{0,15,16},{0,13,260},{4,4,1896},{0,13,1341},{0,11,593},{0,8,1380},{7,2,1896},{0,8,1380},{0,12,0},{0,12,0},{0,12,0},{0,7,1},{0,6,162},{0,5,85},{0,5,85},{0,3,85},{0,3,186},{0,3,101},{0,12,0}, +{0,12,0},{0,12,0},{0,7,1},{1,3,162},{0,5,85},{0,5,85},{0,3,85},{3,0,162},{0,3,85},{6,6,882},{0,21,218},{0,15,16},{0,13,260},{6,6,882},{12,1,882},{0,13,260},{0,10,884},{12,1,882},{0,10,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,884},{0,25,146},{0,17,2}, +{0,15,185},{0,21,2355},{0,17,1539},{0,13,653},{0,10,1605},{0,11,2667},{0,9,1777},{0,31,884},{0,25,146},{0,17,2},{0,15,185},{5,4,2355},{0,17,1539},{0,13,653},{0,10,1605},{7,4,2355},{0,10,1605},{0,17,1},{0,17,1},{0,17,1},{0,10,1},{0,9,338},{0,8,180},{0,8,180},{0,5,180},{0,5,389},{0,5,229},{0,17,1},{0,17,1},{0,17,1},{0,10,1},{2,2,338}, +{0,8,180},{0,8,180},{0,5,180},{2,3,338},{0,5,180},{9,1,882},{0,25,146},{0,17,2},{0,15,185},{9,1,882},{13,3,882},{0,15,185},{0,12,884},{13,3,882},{0,12,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,36,884},{0,29,90},{0,19,37},{0,17,130},{0,25,2899},{0,19,1764},{0,16,733}, +{0,11,1853},{0,13,3325},{0,11,2109},{0,36,884},{0,29,90},{0,19,37},{0,17,130},{7,1,2899},{0,19,1764},{0,16,733},{0,11,1853},{12,0,2899},{0,11,1853},{0,23,0},{0,23,0},{0,23,0},{0,14,1},{0,11,580},{0,11,305},{0,11,305},{0,6,325},{0,6,667},{0,5,389},{0,23,0},{0,23,0},{0,23,0},{0,14,1},{2,5,578},{0,11,305},{0,11,305},{0,6,325},{5,1,578}, +{0,6,325},{10,3,882},{0,29,90},{1,19,2},{0,17,130},{10,3,882},{18,0,882},{0,17,130},{0,14,884},{18,0,882},{0,14,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,42,918},{0,32,81},{1,21,105},{0,20,109},{0,29,3051},{0,21,1707},{0,19,569},{0,13,1800},{0,16,3672},{0,13,2161},{1,38,888}, +{0,32,81},{1,21,41},{0,20,109},{8,2,3048},{0,21,1707},{0,19,569},{0,13,1800},{12,3,3048},{0,13,1800},{0,28,36},{0,28,36},{0,28,36},{0,17,37},{0,16,648},{0,13,269},{0,13,269},{0,9,292},{0,8,824},{0,7,417},{1,25,4},{1,25,4},{1,25,4},{1,16,5},{3,6,648},{0,13,269},{0,13,269},{0,9,292},{8,0,648},{0,9,292},{12,1,882},{0,32,45},{2,21,2}, +{0,20,73},{12,1,882},{19,2,882},{0,20,73},{0,16,890},{19,2,882},{0,16,890},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,10},{0,2,10},{1,44,997},{1,34,154},{1,24,229},{1,22,178},{0,35,3048},{0,25,1528},{0,21,324},{0,16,1605},{0,19,3907},{0,15,2138},{2,41,883},{2,32,86},{2,24,42},{1,22,114},{9,5,3048}, +{0,25,1528},{0,21,324},{0,16,1605},{15,3,3048},{0,16,1605},{1,31,113},{1,31,113},{1,31,113},{1,19,113},{0,22,648},{0,17,164},{0,17,164},{0,11,193},{0,11,976},{0,9,443},{2,27,2},{2,27,2},{2,27,2},{2,18,1},{6,2,648},{0,17,164},{0,17,164},{0,11,193},{11,0,648},{0,11,193},{13,4,882},{0,36,13},{3,23,4},{0,22,32},{13,4,882},{22,2,882},{0,22,32}, +{0,18,884},{22,2,882},{0,18,884},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,10,0},{0,10,0},{0,10,0},{0,6,0},{0,5,58},{0,5,58},{1,49,1173},{1,38,302},{2,26,421},{1,24,321},{0,40,3051},{0,29,1380},{0,23,186},{0,18,1464},{0,21,4201},{0,17,2149},{3,43,883},{3,34,86},{3,26,42},{2,24,114},{12,0,3048},{0,29,1380},{0,23,186},{0,18,1464},{20,0,3048}, +{0,18,1464},{1,36,289},{1,36,289},{1,36,289},{1,22,290},{0,27,650},{0,21,100},{0,21,100},{0,13,130},{0,14,1161},{0,13,491},{3,29,2},{3,29,2},{3,29,2},{3,20,1},{8,0,648},{0,21,100},{0,21,100},{0,13,130},{11,3,648},{0,13,130},{14,6,882},{0,40,1},{4,25,1},{0,25,16},{14,6,882},{26,0,882},{0,25,16},{0,20,884},{26,0,882},{0,20,884},{1,0,289}, +{1,0,289},{1,0,289},{1,0,289},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,7,136},{0,7,136},{2,51,1365},{2,40,494},{2,28,722},{2,26,513},{0,46,3048},{0,32,1269},{0,27,82},{0,20,1341},{0,25,4525},{0,19,2197},{4,45,886},{3,38,90},{4,28,36},{3,26,114},{13,2,3048},{0,32,1269},{0,27,82},{0,20,1341},{21,2,3048},{0,20,1341},{2,38,481},{2,38,481},{2,38,481}, +{2,24,482},{0,32,650},{0,25,52},{0,25,52},{0,15,85},{0,17,1345},{0,15,569},{4,31,4},{4,31,4},{4,31,4},{4,22,5},{9,2,648},{0,25,52},{0,25,52},{0,15,85},{16,0,648},{0,15,85},{16,4,882},{1,42,1},{5,27,1},{0,27,1},{16,4,882},{27,2,882},{0,27,1},{0,22,884},{27,2,882},{0,22,884},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,21,0}, +{0,21,0},{0,21,0},{0,12,1},{0,9,250},{0,9,250},{2,57,1667},{2,44,786},{3,30,1042},{2,29,801},{0,51,3048},{0,36,1157},{0,29,20},{0,22,1236},{0,27,4891},{0,21,2281},{5,47,886},{4,41,86},{5,30,36},{4,28,116},{14,4,3048},{0,36,1157},{0,29,20},{0,22,1236},{25,0,3048},{0,22,1236},{2,43,785},{2,43,785},{2,43,785},{2,27,786},{0,38,648},{0,29,20},{0,29,20}, +{0,18,40},{0,19,1594},{0,16,677},{5,33,4},{5,33,4},{5,33,4},{5,24,5},{11,1,648},{0,29,20},{0,29,20},{0,18,40},{17,2,648},{0,18,40},{17,6,882},{2,44,1},{6,29,1},{1,29,1},{17,6,882},{28,4,882},{1,29,1},{0,24,884},{28,4,882},{0,24,884},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,26,0},{0,26,0},{0,26,0},{0,16,1},{0,11,400}, +{0,11,400},{3,59,1784},{3,46,901},{4,32,1195},{3,31,910},{1,53,3055},{0,40,1094},{1,31,23},{0,25,1175},{0,31,4840},{0,23,2054},{6,49,883},{6,41,81},{6,32,42},{5,30,123},{14,10,3048},{0,40,1058},{1,31,19},{0,25,1139},{28,0,3048},{0,25,1139},{3,46,900},{3,46,900},{3,46,900},{3,30,900},{1,40,654},{1,31,22},{1,31,22},{1,20,38},{0,23,1560},{0,19,533},{6,35,2}, +{6,35,2},{6,35,2},{6,26,1},{13,0,648},{0,33,5},{0,33,5},{0,20,13},{20,2,648},{0,20,13},{19,5,882},{3,46,1},{7,31,5},{2,31,1},{19,5,882},{31,4,882},{2,31,1},{0,26,882},{31,4,882},{0,26,882},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,29,4},{1,29,4},{1,29,4},{1,18,4},{0,15,377},{0,15,377},{4,61,1772},{4,48,891},{5,34,1195}, +{4,33,906},{2,55,3055},{1,42,1094},{2,33,29},{1,27,1175},{0,34,4609},{0,26,1716},{7,51,883},{7,43,81},{7,34,42},{6,32,114},{18,1,3048},{0,44,990},{2,33,25},{0,27,1058},{29,2,3048},{0,27,1058},{4,47,891},{4,47,891},{4,47,891},{4,32,891},{2,42,654},{2,33,29},{2,33,29},{2,22,38},{0,27,1396},{0,21,347},{7,37,2},{7,37,2},{7,37,2},{7,28,1},{14,2,648}, +{0,37,1},{0,37,1},{0,23,5},{24,0,648},{0,23,5},{22,0,882},{4,48,1},{8,33,1},{3,33,0},{22,0,882},{31,7,882},{3,33,0},{0,28,882},{31,7,882},{0,28,882},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,31,4},{2,31,4},{2,31,4},{2,20,4},{0,19,260},{0,19,260},{5,63,1772},{5,50,891},{6,36,1195},{5,35,906},{3,57,3055},{2,44,1094},{3,35,29}, +{2,29,1175},{0,38,4381},{0,29,1436},{8,53,886},{7,47,89},{8,36,36},{7,34,114},{19,3,3048},{0,47,949},{3,35,25},{0,29,995},{30,4,3048},{0,29,995},{5,49,890},{5,49,890},{5,49,890},{5,33,891},{3,44,654},{3,35,29},{3,35,29},{3,24,38},{0,29,1251},{0,24,221},{8,39,4},{8,39,4},{8,39,4},{8,30,4},{16,0,648},{1,39,1},{1,39,1},{0,25,2},{26,1,648}, +{0,25,2},{23,2,882},{5,50,1},{9,35,1},{4,35,1},{23,2,882},{28,14,882},{4,35,1},{0,30,882},{28,14,882},{0,30,882},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,32,5},{3,32,5},{3,32,5},{3,22,4},{0,23,180},{0,23,180},{6,63,1790},{6,52,891},{7,38,1195},{6,37,906},{4,59,3057},{3,46,1094},{4,37,29},{3,31,1175},{0,42,4185},{0,31,1206},{9,55,886}, +{8,49,86},{9,38,36},{8,36,116},{20,5,3048},{0,51,907},{4,37,20},{0,31,950},{31,6,3048},{0,31,950},{6,51,890},{6,51,890},{6,51,890},{6,35,891},{4,46,657},{4,37,29},{4,37,29},{3,26,49},{0,34,1121},{0,27,117},{9,41,4},{9,41,4},{9,41,4},{9,32,5},{17,2,648},{2,41,1},{2,41,1},{1,27,2},{27,3,648},{1,27,2},{24,4,882},{6,52,1},{10,37,1}, +{5,37,1},{24,4,882},{28,17,882},{5,37,1},{0,32,884},{28,17,882},{0,32,884},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,34,9},{4,34,9},{4,34,9},{4,24,10},{0,27,116},{0,27,116},{8,63,1844},{7,54,901},{8,40,1188},{7,39,910},{5,62,3052},{4,48,1094},{5,39,23},{4,33,1175},{0,44,3969},{0,33,1039},{10,57,885},{10,49,81},{10,40,35},{9,38,123},{23,1,3048}, +{0,55,888},{5,39,19},{0,34,907},{29,12,3048},{0,34,907},{7,54,900},{7,54,900},{7,54,900},{7,38,900},{5,48,654},{5,39,22},{5,39,22},{5,28,45},{0,38,990},{0,29,80},{10,43,2},{10,43,2},{10,43,2},{10,34,1},{19,1,648},{3,43,1},{3,43,1},{3,29,1},{29,4,648},{3,29,1},{27,0,882},{7,54,1},{11,39,5},{6,39,1},{27,0,882},{31,17,882},{6,39,1}, +{0,34,882},{31,17,882},{0,34,882},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,37,4},{5,37,4},{5,37,4},{5,26,4},{0,31,58},{0,31,58},{9,63,1886},{8,56,892},{9,42,1188},{8,41,900},{6,63,3055},{5,50,1094},{6,41,23},{5,35,1175},{0,49,3804},{0,36,935},{11,59,885},{11,51,81},{11,42,35},{10,40,123},{23,6,3048},{0,58,883},{6,41,19},{0,36,886},{30,14,3048}, +{0,36,886},{8,55,891},{8,55,891},{8,55,891},{8,40,891},{6,50,654},{6,41,22},{6,41,22},{6,30,45},{0,40,889},{1,31,80},{11,45,2},{11,45,2},{11,45,2},{11,36,1},{20,3,648},{5,43,2},{5,43,2},{4,31,1},{30,6,648},{4,31,1},{27,5,882},{8,56,2},{12,41,2},{7,41,1},{27,5,882},{31,20,882},{7,41,1},{0,36,882},{31,20,882},{0,36,882},{8,0,890}, +{8,0,890},{8,0,890},{8,0,890},{6,39,4},{6,39,4},{6,39,4},{6,28,4},{0,34,25},{0,34,25},{10,63,1964},{9,58,892},{10,44,1188},{9,43,900},{7,63,3100},{6,52,1094},{7,43,23},{6,37,1175},{0,51,3640},{0,38,887},{12,61,886},{11,55,89},{12,44,38},{11,42,123},{26,1,3048},{1,60,883},{7,43,19},{0,38,883},{30,17,3048},{0,38,883},{9,57,891},{9,57,891},{9,57,891}, +{9,42,891},{7,52,654},{7,43,22},{7,43,22},{7,32,38},{0,44,801},{2,34,86},{12,47,4},{12,47,4},{12,47,4},{12,38,4},{21,5,648},{6,45,2},{6,45,2},{4,33,2},{31,8,648},{4,33,2},{30,0,882},{9,58,2},{13,43,2},{8,43,1},{30,0,882},{28,27,882},{8,43,1},{0,38,882},{28,27,882},{0,38,882},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,41,4}, +{7,41,4},{7,41,4},{7,30,4},{0,38,5},{0,38,5},{11,63,2078},{10,60,892},{11,46,1188},{10,45,900},{9,63,3181},{7,54,1094},{8,45,35},{7,39,1175},{0,55,3496},{1,40,887},{13,63,886},{12,56,88},{13,46,38},{12,44,110},{27,3,3048},{2,62,883},{7,46,25},{1,40,883},{31,19,3048},{1,40,883},{10,59,891},{10,59,891},{10,59,891},{10,44,891},{8,54,657},{8,45,34},{8,45,34}, +{7,34,49},{0,48,756},{3,36,86},{13,49,4},{13,49,4},{13,49,4},{13,40,4},{24,0,648},{6,49,1},{6,49,1},{5,35,2},{31,11,648},{5,35,2},{31,2,882},{10,60,2},{14,45,2},{9,45,1},{31,2,882},{29,29,882},{9,45,1},{0,40,882},{29,29,882},{0,40,882},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,42,9},{8,42,9},{8,42,9},{8,32,10},{0,42,1}, +{0,42,1},{12,63,2228},{11,63,902},{12,48,1188},{11,47,908},{10,63,3256},{8,57,1095},{9,47,33},{8,41,1164},{0,59,3364},{2,42,889},{14,63,915},{14,57,90},{14,48,35},{13,47,117},{29,2,3048},{4,63,886},{8,48,25},{3,42,885},{29,25,3048},{3,42,885},{11,62,900},{11,62,900},{11,62,900},{11,46,900},{9,57,652},{9,47,29},{9,47,29},{9,36,45},{0,51,691},{4,37,80},{14,51,2}, +{14,51,2},{14,51,2},{14,42,2},{24,6,648},{7,51,1},{7,51,1},{7,37,1},{29,17,648},{7,37,1},{31,8,882},{11,63,2},{15,47,8},{10,47,5},{31,8,882},{31,30,882},{10,47,5},{0,42,884},{31,30,882},{0,42,884},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,45,4},{9,45,4},{9,45,4},{9,34,4},{1,44,1},{1,44,1},{13,63,2414},{12,63,907},{13,50,1188}, +{12,49,900},{11,63,3391},{9,59,1095},{10,49,23},{9,43,1164},{0,63,3276},{3,44,889},{15,63,981},{15,59,90},{15,50,35},{14,48,123},{31,1,3048},{6,63,906},{10,49,19},{4,44,885},{30,27,3048},{4,44,885},{12,63,891},{12,63,891},{12,63,891},{12,48,891},{10,59,652},{10,49,22},{10,49,22},{10,38,45},{0,55,659},{5,39,80},{15,53,2},{15,53,2},{15,53,2},{15,44,2},{27,1,648}, +{9,51,2},{9,51,2},{8,39,1},{30,19,648},{8,39,1},{29,20,882},{12,63,17},{16,49,2},{11,49,1},{29,20,882},{31,33,882},{11,49,1},{0,44,884},{31,33,882},{0,44,884},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,47,4},{10,47,4},{10,47,4},{10,36,4},{2,46,1},{2,46,1},{15,63,2606},{13,63,987},{14,52,1188},{13,51,900},{13,63,3517},{10,61,1095},{11,51,23}, +{10,45,1164},{1,63,3300},{4,46,892},{17,63,1014},{15,63,94},{16,52,38},{15,50,123},{31,6,3048},{8,63,936},{11,51,19},{5,46,885},{31,29,3048},{5,46,885},{13,63,906},{13,63,906},{13,63,906},{13,50,891},{11,61,652},{11,51,22},{11,51,22},{11,40,45},{0,59,651},{6,41,80},{16,55,4},{16,55,4},{16,55,4},{16,46,5},{28,3,648},{10,53,2},{10,53,2},{9,41,1},{31,21,648}, +{9,41,1},{30,22,882},{14,63,37},{17,51,2},{12,51,1},{30,22,882},{28,40,882},{12,51,1},{0,46,884},{28,40,882},{0,46,884},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,49,4},{11,49,4},{11,49,4},{11,38,4},{3,48,0},{3,48,0},{16,63,2792},{15,63,1079},{15,54,1188},{14,53,900},{14,63,3652},{11,63,1095},{12,53,35},{11,47,1164},{3,63,3436},{5,48,887},{18,63,1080}, +{17,62,102},{17,54,38},{16,52,110},{30,15,3048},{10,63,996},{11,54,25},{5,48,883},{31,32,3048},{5,48,883},{14,63,939},{14,63,939},{14,63,939},{14,52,891},{12,62,657},{12,53,34},{12,53,34},{12,42,50},{1,61,651},{7,43,80},{17,57,4},{17,57,4},{17,57,4},{17,48,4},{29,5,648},{11,55,2},{11,55,2},{10,43,1},{31,24,648},{10,43,1},{31,24,882},{16,63,80},{18,53,2}, +{13,53,1},{31,24,882},{29,42,882},{13,53,1},{0,48,882},{29,42,882},{0,48,882},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,50,9},{12,50,9},{12,50,9},{12,40,9},{4,50,1},{4,50,1},{17,63,3038},{16,63,1268},{16,57,1186},{15,55,908},{15,63,3879},{12,63,1146},{13,55,33},{12,49,1164},{5,63,3667},{6,50,889},{19,63,1205},{18,63,147},{18,56,41},{17,55,117},{31,17,3048}, +{12,63,1110},{12,56,20},{7,50,885},{29,38,3048},{7,50,885},{16,63,979},{16,63,979},{16,63,979},{15,54,900},{13,63,670},{13,55,29},{13,55,29},{13,45,41},{2,63,648},{8,46,81},{18,60,1},{18,60,1},{18,60,1},{18,50,2},{31,4,648},{11,59,4},{11,59,4},{11,45,1},{30,29,648},{11,45,1},{31,30,882},{18,63,146},{19,56,5},{14,55,5},{31,30,882},{31,43,882},{14,55,5}, +{0,50,884},{31,43,882},{0,50,884},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,53,4},{13,53,4},{13,53,4},{13,42,5},{5,52,1},{5,52,1},{18,63,3308},{17,63,1502},{17,59,1186},{16,57,898},{17,63,4077},{14,63,1230},{14,57,33},{13,51,1164},{8,63,3820},{7,52,889},{21,63,1368},{19,63,261},{19,58,41},{18,57,117},{30,26,3048},{14,63,1226},{13,58,20},{8,52,885},{30,40,3048}, +{8,52,885},{17,63,1018},{17,63,1018},{17,63,1018},{16,56,890},{14,63,724},{14,57,29},{14,57,29},{14,47,41},{4,63,665},{9,48,88},{19,62,1},{19,62,1},{19,62,1},{19,52,2},{30,13,648},{12,61,1},{12,61,1},{12,47,1},{31,31,648},{12,47,1},{31,35,882},{20,63,193},{20,58,4},{15,57,5},{31,35,882},{27,51,882},{15,57,5},{0,52,884},{27,51,882},{0,52,884},{16,0,890}, +{16,0,890},{16,0,890},{16,0,890},{14,55,4},{14,55,4},{14,55,4},{14,44,5},{6,54,1},{6,54,1},{19,63,3614},{18,63,1804},{18,61,1186},{17,59,898},{18,63,4284},{15,63,1417},{15,59,33},{14,53,1164},{9,63,4036},{8,54,892},{22,63,1494},{20,63,405},{20,60,33},{19,59,117},{31,28,3048},{16,63,1395},{14,60,20},{9,54,885},{31,42,3048},{9,54,885},{18,63,1075},{18,63,1075},{18,63,1075}, +{17,58,890},{16,63,787},{15,59,29},{15,59,29},{15,48,45},{6,63,705},{10,49,80},{20,63,5},{20,63,5},{20,63,5},{20,54,5},{31,15,648},{13,63,1},{13,63,1},{13,49,1},{31,34,648},{13,49,1},{31,40,882},{22,63,277},{21,60,4},{16,59,5},{31,40,882},{31,49,882},{16,59,5},{0,54,884},{31,49,882},{0,54,884},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,57,4}, +{15,57,4},{15,57,4},{15,46,5},{7,56,1},{7,56,1},{20,63,4014},{19,63,2174},{19,63,1186},{18,61,898},{19,63,4545},{17,63,1725},{16,62,39},{15,55,1164},{11,63,4300},{9,56,892},{23,63,1656},{22,63,585},{21,62,33},{20,60,108},{31,33,3048},{18,63,1563},{15,62,20},{10,56,885},{31,45,3048},{10,56,885},{19,63,1150},{19,63,1150},{19,63,1150},{18,60,890},{17,63,841},{16,62,35},{16,62,35}, +{16,50,50},{8,63,747},{11,51,80},{21,63,20},{21,63,20},{21,63,20},{21,56,5},{31,20,648},{15,63,5},{15,63,5},{14,51,1},{31,37,648},{14,51,1},{29,52,882},{23,63,397},{22,62,4},{17,61,5},{29,52,882},{28,56,882},{17,61,5},{0,56,884},{28,56,882},{0,56,884},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,58,10},{16,58,10},{16,58,10},{16,48,9},{8,58,0}, +{8,58,0},{22,63,4123},{21,63,2404},{20,63,1278},{19,63,901},{20,63,4626},{18,63,1849},{17,63,38},{16,57,1006},{14,63,4330},{10,58,771},{24,63,1629},{23,63,715},{22,63,65},{22,62,81},{31,38,2814},{20,63,1505},{17,63,34},{11,58,761},{31,48,2814},{11,58,761},{20,63,1278},{20,63,1278},{20,63,1278},{19,62,901},{18,63,948},{17,63,38},{17,63,38},{17,53,41},{10,63,840},{12,54,81},{22,63,65}, +{22,63,65},{22,63,65},{22,59,2},{31,26,648},{17,63,34},{17,63,34},{15,53,1},{30,42,648},{15,53,1},{31,50,761},{25,63,425},{23,63,9},{19,63,1},{31,50,761},{31,55,761},{19,63,1},{0,58,761},{31,55,761},{0,58,761},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,61,4},{17,61,4},{17,61,4},{17,50,5},{9,60,2},{9,60,2},{23,63,3735},{22,63,2314},{21,63,1395}, +{20,63,899},{22,63,4090},{19,63,1618},{18,63,104},{17,58,678},{15,63,3826},{11,59,507},{25,63,1285},{24,63,609},{23,63,122},{23,62,26},{30,45,2249},{21,63,1186},{19,63,74},{13,59,482},{31,50,2249},{13,59,482},{21,63,1395},{21,63,1395},{21,63,1395},{20,63,899},{19,63,1086},{18,63,104},{18,63,104},{18,55,41},{12,63,969},{13,56,81},{23,63,122},{23,63,122},{23,63,122},{23,61,2},{31,31,648}, +{19,63,74},{19,63,74},{16,55,1},{31,44,648},{16,55,1},{31,53,481},{27,63,269},{25,63,0},{21,63,0},{31,53,481},{31,57,481},{21,63,0},{0,59,481},{31,57,481},{0,59,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,63,4},{18,63,4},{18,63,4},{18,52,5},{10,62,2},{10,62,2},{23,63,3399},{23,63,2260},{22,63,1530},{21,63,954},{23,63,3639},{20,63,1402},{19,63,238}, +{18,59,405},{17,63,3443},{13,60,297},{26,63,1009},{25,63,525},{25,63,164},{24,63,5},{29,52,1769},{23,63,918},{21,63,113},{14,60,266},{28,56,1769},{14,60,266},{22,63,1530},{22,63,1530},{22,63,1530},{21,63,954},{21,63,1251},{19,63,238},{19,63,238},{19,57,41},{14,63,1105},{14,58,81},{25,63,164},{25,63,164},{25,63,164},{24,62,5},{31,36,648},{21,63,113},{21,63,113},{17,57,1},{31,47,648}, +{17,57,1},{31,55,265},{28,63,145},{27,63,4},{24,63,1},{31,55,265},{30,60,265},{24,63,1},{0,60,265},{30,60,265},{0,60,265},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,63,13},{19,63,13},{19,63,13},{19,54,5},{12,62,8},{12,62,8},{24,63,3069},{24,63,2257},{23,63,1683},{23,63,1054},{24,63,3258},{22,63,1330},{21,63,378},{19,61,213},{20,63,3102},{14,61,166},{27,63,801}, +{26,63,477},{26,63,221},{25,63,20},{30,52,1374},{24,63,758},{23,63,181},{16,61,114},{28,58,1374},{16,61,114},{23,63,1683},{23,63,1683},{23,63,1683},{23,63,1054},{22,63,1401},{21,63,378},{21,63,378},{19,59,46},{16,63,1296},{15,60,81},{26,63,221},{26,63,221},{26,63,221},{25,63,20},{30,45,648},{23,63,181},{23,63,181},{18,59,1},{31,50,648},{18,59,1},{31,58,113},{29,63,61},{28,63,0}, +{26,63,1},{31,58,113},{31,60,113},{26,63,1},{0,61,113},{31,60,113},{0,61,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,63,45},{20,63,45},{20,63,45},{20,56,10},{13,63,25},{13,63,25},{25,63,2860},{25,63,2260},{24,63,1854},{24,63,1210},{25,63,2932},{23,63,1310},{22,63,609},{20,62,81},{21,63,2731},{16,62,94},{28,63,630},{27,63,475},{27,63,306},{26,63,101},{30,56,1032}, +{26,63,612},{25,63,290},{18,62,21},{30,58,1032},{18,62,21},{24,63,1854},{24,63,1854},{24,63,1854},{24,63,1210},{23,63,1620},{22,63,609},{22,63,609},{21,61,44},{18,63,1515},{16,62,78},{27,63,306},{27,63,306},{27,63,306},{26,63,101},{29,54,648},{25,63,290},{25,63,290},{19,61,5},{29,56,648},{19,61,5},{31,61,18},{30,63,10},{30,63,1},{29,63,0},{31,61,18},{31,62,18},{29,63,0}, +{0,62,20},{31,62,18},{0,62,20},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{21,63,104},{21,63,104},{21,63,104},{21,59,5},{15,63,65},{15,63,65},{26,63,2626},{26,63,2206},{25,63,1915},{25,63,1315},{26,63,2641},{24,63,1333},{23,63,789},{22,63,40},{22,63,2445},{17,63,116},{29,63,524},{28,63,406},{28,63,325},{27,63,170},{31,56,771},{27,63,507},{26,63,320},{20,63,0},{30,60,771}, +{20,63,0},{25,63,1915},{25,63,1915},{25,63,1915},{25,63,1315},{24,63,1661},{23,63,789},{23,63,789},{22,62,29},{20,63,1517},{17,63,116},{28,63,325},{28,63,325},{28,63,325},{27,63,170},{31,52,578},{26,63,320},{26,63,320},{20,63,0},{28,60,578},{20,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{24,0,890}, +{24,0,890},{24,0,890},{24,0,890},{23,63,164},{23,63,164},{23,63,164},{22,61,5},{17,63,116},{17,63,116},{27,63,2156},{27,63,1884},{26,63,1630},{26,63,1210},{26,63,2081},{25,63,1108},{24,63,705},{23,63,5},{23,63,1927},{19,63,180},{29,63,300},{29,63,236},{29,63,200},{28,63,85},{31,57,451},{28,63,283},{27,63,194},{23,63,1},{29,62,451},{23,63,1},{26,63,1630},{26,63,1630},{26,63,1630}, +{26,63,1210},{25,63,1347},{24,63,705},{24,63,705},{23,63,5},{22,63,1229},{19,63,180},{29,63,200},{29,63,200},{29,63,200},{28,63,85},{30,58,338},{27,63,194},{27,63,194},{23,63,1},{31,58,338},{23,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,63,221}, +{24,63,221},{24,63,221},{23,63,5},{19,63,180},{19,63,180},{28,63,1782},{27,63,1564},{27,63,1395},{27,63,1123},{27,63,1620},{26,63,937},{25,63,651},{24,63,25},{24,63,1560},{21,63,233},{30,63,150},{30,63,134},{29,63,104},{29,63,40},{31,59,216},{29,63,136},{28,63,90},{25,63,1},{30,62,216},{25,63,1},{27,63,1395},{27,63,1395},{27,63,1395},{27,63,1123},{26,63,1101},{25,63,651},{25,63,651}, +{24,63,25},{23,63,998},{21,63,233},{29,63,104},{29,63,104},{29,63,104},{29,63,40},{31,57,162},{28,63,90},{28,63,90},{25,63,1},{29,62,162},{25,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,63,290},{25,63,290},{25,63,290},{24,63,25},{21,63,233}, +{21,63,233},{0,34,1570},{0,27,400},{0,19,25},{0,16,481},{0,23,3372},{0,17,2355},{0,15,1053},{0,11,2425},{0,13,3753},{0,11,2681},{0,34,1570},{0,27,400},{0,19,25},{0,16,481},{7,0,3371},{0,17,2355},{0,15,1053},{0,11,2425},{10,2,3371},{0,11,2425},{0,16,0},{0,16,0},{0,16,0},{0,10,1},{0,8,288},{0,7,149},{0,7,149},{0,5,160},{0,4,332},{0,4,200},{0,16,0}, +{0,16,0},{0,16,0},{0,10,1},{1,5,288},{0,7,149},{0,7,149},{0,5,160},{4,0,288},{0,5,160},{10,1,1568},{0,27,400},{0,19,25},{0,16,481},{10,1,1568},{17,0,1568},{0,16,481},{0,13,1568},{17,0,1568},{0,13,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,40,1568},{0,31,296},{0,22,2}, +{0,20,373},{0,27,3968},{0,21,2627},{0,17,1107},{0,13,2720},{0,15,4479},{0,11,3065},{0,40,1568},{0,31,296},{0,22,2},{0,20,373},{7,3,3968},{0,21,2627},{0,17,1107},{0,13,2720},{13,0,3968},{0,13,2720},{0,21,1},{0,21,1},{0,21,1},{0,13,0},{0,11,512},{0,9,269},{0,9,269},{0,5,288},{0,5,593},{0,5,337},{0,21,1},{0,21,1},{0,21,1},{0,13,0},{3,1,512}, +{0,9,269},{0,9,269},{0,5,288},{3,3,512},{0,5,288},{11,3,1568},{0,31,296},{0,22,2},{0,20,373},{11,3,1568},{18,2,1568},{0,20,373},{0,15,1568},{18,2,1568},{0,15,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,45,1568},{0,34,221},{0,24,17},{0,22,274},{0,30,4651},{0,23,2924},{0,19,1209}, +{0,15,3065},{0,17,5292},{0,13,3465},{0,45,1568},{0,34,221},{0,24,17},{0,22,274},{8,3,4651},{0,23,2924},{0,19,1209},{0,15,3065},{14,1,4651},{0,15,3065},{0,27,0},{0,27,0},{0,27,0},{0,16,0},{0,13,802},{0,12,424},{0,12,424},{0,7,433},{0,7,918},{0,7,533},{0,27,0},{0,27,0},{0,27,0},{0,16,0},{4,0,800},{0,12,424},{0,12,424},{0,7,433},{6,1,800}, +{0,7,433},{13,1,1568},{0,34,221},{1,24,2},{0,22,274},{13,1,1568},{19,4,1568},{0,22,274},{0,17,1570},{19,4,1568},{0,17,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,50,1568},{0,38,145},{0,26,82},{0,24,193},{0,34,5419},{0,25,3275},{0,21,1347},{0,16,3410},{0,17,6220},{0,15,3933},{0,50,1568}, +{0,38,145},{1,26,81},{0,24,193},{10,0,5419},{0,25,3275},{0,21,1347},{0,16,3410},{15,2,5419},{0,16,3410},{0,32,0},{0,32,0},{0,32,0},{0,19,0},{0,16,1152},{0,15,605},{0,15,605},{0,9,628},{0,8,1328},{0,7,789},{0,32,0},{0,32,0},{0,32,0},{0,19,0},{3,6,1152},{0,15,605},{0,15,605},{0,9,628},{8,0,1152},{0,9,628},{15,0,1568},{0,38,145},{2,26,2}, +{0,24,193},{15,0,1568},{24,1,1568},{0,24,193},{0,19,1570},{24,1,1568},{0,19,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,53,1633},{0,42,166},{1,29,162},{0,26,209},{0,40,5419},{0,29,3012},{0,25,964},{0,18,3152},{0,21,6513},{0,17,3861},{1,53,1569},{1,42,138},{2,28,74},{1,26,186},{11,3,5419}, +{0,29,3012},{0,25,964},{0,18,3152},{18,2,5419},{0,18,3152},{1,34,66},{1,34,66},{1,34,66},{1,21,66},{0,22,1152},{0,19,442},{0,19,442},{0,11,493},{0,11,1480},{0,11,749},{1,34,2},{1,34,2},{1,34,2},{1,21,2},{6,2,1152},{0,19,442},{0,19,442},{0,11,493},{11,0,1152},{0,11,493},{16,2,1568},{0,42,85},{3,28,0},{0,26,128},{16,2,1568},{27,1,1568},{0,26,128}, +{0,21,1568},{27,1,1568},{0,21,1568},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,6,0},{0,6,0},{0,6,0},{0,4,1},{0,3,20},{0,3,20},{1,58,1713},{1,44,230},{2,31,354},{1,28,273},{0,45,5419},{0,32,2817},{0,27,682},{0,20,2945},{0,25,6853},{0,19,3825},{2,55,1569},{2,44,138},{3,30,74},{2,28,186},{13,1,5419},{0,32,2817},{0,27,682},{0,20,2945},{19,4,5419}, +{0,20,2945},{1,40,145},{1,40,145},{1,40,145},{1,25,146},{0,27,1154},{0,23,338},{0,23,338},{0,13,394},{0,14,1665},{0,13,755},{2,36,2},{2,36,2},{2,36,2},{2,23,2},{8,0,1152},{0,23,338},{0,23,338},{0,13,394},{11,3,1152},{0,13,394},{17,4,1568},{0,46,41},{4,30,1},{0,29,80},{17,4,1568},{27,4,1568},{0,29,80},{0,23,1568},{27,4,1568},{0,23,1568},{1,0,145}, +{1,0,145},{1,0,145},{1,0,145},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{0,5,74},{0,5,74},{2,60,1905},{1,49,393},{2,33,531},{1,31,433},{0,50,5419},{0,36,2609},{0,29,468},{0,22,2756},{0,27,7195},{0,21,3825},{3,57,1569},{3,46,138},{4,32,81},{3,30,186},{15,0,5419},{0,36,2609},{0,29,468},{0,22,2756},{24,1,5419},{0,22,2756},{2,42,337},{2,42,337},{2,42,337}, +{2,27,338},{0,32,1154},{0,25,244},{0,25,244},{0,16,289},{0,17,1849},{0,15,797},{3,38,2},{3,38,2},{3,38,2},{3,25,2},{9,2,1152},{0,25,244},{0,25,244},{0,16,289},{16,0,1152},{0,16,289},{18,6,1568},{0,51,16},{5,32,2},{0,31,41},{18,6,1568},{28,6,1568},{0,31,41},{0,25,1568},{28,6,1568},{0,25,1568},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,17,0}, +{0,17,0},{0,17,0},{0,10,0},{0,7,164},{0,7,164},{2,63,2145},{2,51,585},{3,35,851},{2,33,618},{0,55,5420},{0,40,2425},{0,32,274},{0,24,2585},{0,29,7609},{0,23,3861},{4,58,1570},{4,47,136},{5,34,81},{4,32,193},{16,1,5419},{0,40,2425},{0,32,274},{0,24,2585},{25,3,5419},{0,24,2585},{2,47,545},{2,47,545},{2,47,545},{2,30,545},{0,38,1152},{0,29,164},{0,29,164}, +{0,18,208},{0,19,2098},{0,17,869},{4,40,0},{4,40,0},{4,40,0},{4,27,1},{11,1,1152},{0,29,164},{0,29,164},{0,18,208},{17,2,1152},{0,18,208},{21,1,1568},{0,54,2},{6,34,2},{0,33,13},{21,1,1568},{29,8,1568},{0,33,13},{0,27,1568},{29,8,1568},{0,27,1568},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,10,289}, +{0,10,289},{3,63,2596},{3,53,934},{3,37,1277},{2,35,964},{0,61,5420},{0,44,2242},{0,35,141},{0,28,2402},{0,32,8131},{0,25,3956},{5,61,1569},{5,50,138},{6,36,74},{5,34,186},{17,4,5419},{0,44,2242},{0,35,141},{0,28,2402},{27,4,5419},{0,28,2402},{3,49,901},{3,49,901},{3,49,901},{3,32,900},{0,44,1152},{0,34,97},{0,34,97},{0,20,145},{0,23,2436},{0,19,989},{5,43,1}, +{5,43,1},{5,43,1},{5,30,2},{13,0,1152},{0,34,97},{0,34,97},{0,20,145},{20,2,1152},{0,20,145},{23,0,1568},{1,57,2},{7,36,0},{0,36,1},{23,0,1568},{31,9,1568},{0,36,1},{0,29,1570},{31,9,1568},{0,29,1570},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,28,0},{0,28,0},{0,28,0},{0,17,1},{0,13,461},{0,13,461},{4,63,3146},{3,57,1262},{4,40,1731}, +{3,37,1284},{1,63,5484},{0,46,2129},{0,38,53},{0,30,2243},{0,36,8615},{0,27,4070},{6,63,1569},{6,52,138},{7,38,74},{6,36,186},{18,6,5419},{0,46,2129},{0,38,53},{0,30,2243},{28,6,5419},{0,30,2243},{3,55,1252},{3,55,1252},{3,55,1252},{3,35,1253},{0,49,1152},{0,38,53},{0,38,53},{0,23,89},{0,25,2763},{0,21,1139},{6,45,1},{6,45,1},{6,45,1},{6,32,2},{14,2,1152}, +{0,38,53},{0,38,53},{0,23,89},{24,0,1152},{0,23,89},{24,2,1568},{2,59,2},{8,38,1},{1,38,1},{24,2,1568},{27,17,1568},{1,38,1},{0,31,1570},{27,17,1568},{0,31,1570},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,33,0},{0,33,0},{0,33,0},{0,20,0},{0,15,653},{0,15,653},{5,63,3716},{4,58,1599},{4,42,2134},{4,39,1627},{2,63,5655},{0,51,1983},{0,40,33}, +{0,32,2133},{0,38,8925},{0,30,4044},{7,63,1587},{7,54,138},{8,40,80},{7,38,186},{21,1,5419},{0,51,1979},{0,40,29},{0,32,2129},{29,8,5419},{0,32,2129},{4,56,1587},{4,56,1587},{4,56,1587},{4,37,1586},{0,54,1156},{0,41,29},{0,41,29},{0,25,54},{0,27,2988},{0,23,1193},{7,47,1},{7,47,1},{7,47,1},{7,33,2},{16,0,1152},{0,41,25},{0,41,25},{0,25,50},{26,1,1152}, +{0,25,50},{25,4,1568},{3,61,2},{9,40,1},{2,40,1},{25,4,1568},{28,19,1568},{2,40,1},{0,33,1568},{28,19,1568},{0,33,1568},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,38,5},{0,38,5},{0,38,5},{0,23,4},{0,17,754},{0,17,754},{6,63,3890},{5,60,1599},{5,44,2134},{5,41,1627},{3,63,5748},{0,55,1975},{1,42,33},{0,33,2085},{0,42,8569},{0,32,3525},{9,63,1634}, +{8,55,136},{9,42,80},{8,40,208},{22,3,5419},{0,55,1875},{1,42,29},{0,33,1985},{30,10,5419},{0,33,1985},{5,58,1587},{5,58,1587},{5,58,1587},{5,39,1586},{1,56,1156},{1,43,29},{1,43,29},{1,27,54},{0,31,2736},{0,25,907},{8,48,0},{8,48,0},{8,48,0},{8,35,1},{17,2,1152},{0,45,5},{0,45,5},{0,28,17},{27,3,1152},{0,28,17},{26,6,1568},{4,62,1},{10,42,1}, +{3,42,1},{26,6,1568},{29,21,1568},{3,42,1},{0,35,1568},{29,21,1568},{0,35,1568},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,40,5},{1,40,5},{1,40,5},{1,25,4},{0,21,610},{0,21,610},{7,63,4136},{6,63,1589},{7,46,2141},{6,44,1613},{4,63,5895},{2,55,1973},{2,45,43},{2,35,2100},{0,46,8199},{0,34,3051},{10,63,1667},{9,57,131},{10,45,74},{9,43,195},{24,2,5419}, +{0,59,1772},{2,45,34},{0,36,1874},{27,17,5419},{0,36,1874},{6,61,1576},{6,61,1576},{6,61,1576},{6,41,1577},{2,59,1161},{2,46,26},{2,46,26},{2,30,49},{0,36,2505},{0,29,616},{9,51,1},{9,51,1},{9,51,1},{9,38,2},{19,1,1152},{0,49,2},{0,49,2},{0,30,4},{29,4,1152},{0,30,4},{28,5,1568},{6,63,13},{11,44,1},{4,44,1},{28,5,1568},{31,22,1568},{4,44,1}, +{0,37,1570},{31,22,1568},{0,37,1570},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,43,9},{2,43,9},{2,43,9},{2,27,10},{0,25,468},{0,25,468},{8,63,4436},{7,63,1625},{7,49,2161},{7,46,1613},{6,63,6079},{3,57,1973},{3,47,43},{3,37,2100},{0,49,7908},{0,37,2671},{11,63,1745},{10,59,131},{11,47,74},{10,45,195},{25,4,5419},{0,61,1699},{3,47,34},{0,38,1787},{28,19,5419}, +{0,38,1787},{7,63,1576},{7,63,1576},{7,63,1576},{7,43,1577},{3,61,1161},{3,47,34},{3,47,34},{3,32,59},{0,38,2294},{0,31,422},{10,53,1},{10,53,1},{10,53,1},{10,40,2},{20,3,1152},{2,49,2},{2,49,2},{1,32,2},{30,6,1152},{1,32,2},{31,0,1568},{8,63,34},{12,46,1},{5,46,1},{31,0,1568},{28,29,1568},{5,46,1},{0,39,1570},{28,29,1568},{0,39,1570},{7,0,1576}, +{7,0,1576},{7,0,1576},{7,0,1576},{3,45,9},{3,45,9},{3,45,9},{3,29,10},{0,29,356},{0,29,356},{9,63,4730},{8,63,1716},{8,50,2134},{8,47,1627},{7,63,6244},{3,61,1977},{4,48,33},{3,40,2100},{0,53,7620},{0,40,2343},{12,63,1832},{11,61,131},{12,48,80},{11,47,195},{26,6,5419},{1,63,1699},{4,48,29},{0,41,1714},{29,21,5419},{0,41,1714},{8,63,1595},{8,63,1595},{8,63,1595}, +{8,45,1587},{4,62,1158},{4,49,29},{4,49,29},{4,33,54},{0,42,2098},{0,34,282},{11,55,1},{11,55,1},{11,55,1},{11,42,2},{21,5,1152},{3,51,2},{3,51,2},{2,34,2},{31,8,1152},{2,34,2},{30,9,1568},{9,63,68},{13,48,1},{6,48,1},{30,9,1568},{29,31,1568},{6,48,1},{0,41,1570},{29,31,1568},{0,41,1570},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,47,4}, +{4,47,4},{4,47,4},{4,31,5},{0,32,269},{0,32,269},{11,63,5010},{9,63,1878},{9,52,2134},{9,49,1627},{8,63,6508},{4,62,1965},{5,50,33},{4,42,2079},{0,55,7360},{0,42,2067},{14,63,1952},{12,63,149},{13,50,80},{12,48,208},{29,1,5419},{3,63,1787},{5,50,29},{0,43,1651},{30,23,5419},{0,43,1651},{9,63,1622},{9,63,1622},{9,63,1622},{9,47,1587},{5,63,1164},{5,51,29},{5,51,29}, +{5,35,54},{0,46,1926},{0,36,186},{12,56,0},{12,56,0},{12,56,0},{12,44,1},{24,0,1152},{3,55,2},{3,55,2},{3,36,2},{31,11,1152},{3,36,2},{31,11,1568},{11,63,116},{14,50,1},{7,50,1},{31,11,1568},{29,34,1568},{7,50,1},{0,43,1570},{29,34,1568},{0,43,1570},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,48,5},{5,48,5},{5,48,5},{5,33,4},{0,36,185}, +{0,36,185},{12,63,5316},{11,63,2154},{11,54,2141},{10,52,1613},{10,63,6800},{6,62,1995},{6,53,43},{5,44,2085},{0,59,7068},{0,44,1845},{15,63,2081},{13,63,206},{14,53,74},{13,51,195},{31,0,5419},{5,63,1937},{6,53,34},{0,45,1601},{28,29,5419},{0,45,1601},{10,63,1676},{10,63,1676},{10,63,1676},{10,49,1577},{7,63,1179},{6,54,26},{6,54,26},{6,38,49},{0,49,1764},{0,40,149},{13,59,1}, +{13,59,1},{13,59,1},{13,46,1},{24,6,1152},{4,57,1},{4,57,1},{4,38,4},{29,17,1152},{4,38,4},{30,20,1568},{13,63,205},{15,52,1},{8,52,1},{30,20,1568},{31,35,1568},{8,52,1},{0,45,1576},{31,35,1568},{0,45,1576},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,51,9},{6,51,9},{6,51,9},{6,35,10},{0,40,113},{0,40,113},{13,63,5658},{12,63,2435},{12,56,2148}, +{11,54,1613},{11,63,7055},{7,63,2090},{7,55,43},{6,46,2085},{0,63,6820},{0,47,1701},{16,63,2216},{14,63,334},{15,55,74},{14,53,195},{30,9,5419},{7,63,2081},{7,55,34},{0,47,1580},{29,31,5419},{0,47,1580},{11,63,1745},{11,63,1745},{11,63,1745},{11,51,1577},{8,63,1220},{7,56,26},{7,56,26},{7,40,49},{0,53,1624},{0,42,145},{14,61,1},{14,61,1},{14,61,1},{14,48,2},{27,1,1152}, +{5,59,1},{5,59,1},{5,40,4},{30,19,1152},{5,40,4},{31,22,1568},{15,63,289},{16,54,1},{9,54,1},{31,22,1568},{28,42,1568},{9,54,1},{0,47,1576},{28,42,1568},{0,47,1576},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,53,9},{7,53,9},{7,53,9},{7,37,10},{0,44,61},{0,44,61},{14,63,6036},{13,63,2751},{12,58,2119},{12,55,1627},{12,63,7316},{8,63,2228},{8,57,31}, +{7,48,2100},{0,63,6884},{0,49,1606},{17,63,2402},{16,63,500},{16,57,82},{15,55,195},{31,11,5419},{9,63,2195},{8,57,27},{0,49,1570},{29,34,5419},{0,49,1570},{12,63,1811},{12,63,1811},{12,63,1811},{12,53,1587},{9,63,1286},{8,57,22},{8,57,22},{8,41,56},{0,57,1508},{1,44,145},{15,63,1},{15,63,1},{15,63,1},{15,50,2},{28,3,1152},{6,61,1},{6,61,1},{6,42,4},{31,21,1152}, +{6,42,4},{31,27,1568},{17,63,410},{17,56,1},{10,56,1},{31,27,1568},{29,44,1568},{10,56,1},{0,49,1570},{29,44,1568},{0,49,1570},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,55,4},{8,55,4},{8,55,4},{8,39,5},{0,48,34},{0,48,34},{15,63,6450},{14,63,3135},{13,60,2119},{13,57,1627},{13,63,7661},{10,63,2448},{9,59,31},{8,50,2079},{2,63,7196},{0,51,1580},{19,63,2594}, +{17,63,698},{16,59,73},{16,57,194},{31,16,5419},{11,63,2379},{9,59,27},{1,51,1570},{30,36,5419},{1,51,1570},{13,63,1910},{13,63,1910},{13,63,1910},{13,55,1587},{10,63,1388},{9,59,22},{9,59,22},{9,43,56},{0,61,1416},{2,46,145},{16,63,4},{16,63,4},{16,63,4},{16,52,1},{29,5,1152},{7,63,1},{7,63,1},{7,44,4},{31,24,1152},{7,44,4},{31,32,1568},{19,63,530},{18,58,1}, +{11,58,1},{31,32,1568},{30,46,1568},{11,58,1},{0,51,1570},{30,46,1568},{0,51,1570},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,57,4},{9,57,4},{9,57,4},{9,41,5},{0,51,10},{0,51,10},{16,63,6900},{15,63,3657},{15,62,2128},{14,60,1616},{15,63,8023},{11,63,2845},{10,61,33},{9,52,2085},{5,63,7651},{1,54,1584},{20,63,2866},{18,63,1011},{18,61,69},{17,59,181},{31,22,5419}, +{13,63,2657},{10,61,24},{2,53,1577},{28,42,5419},{2,53,1577},{15,63,2057},{15,63,2057},{15,63,2057},{14,58,1577},{12,63,1476},{10,61,29},{10,61,29},{10,46,54},{0,63,1324},{4,47,137},{17,63,37},{17,63,37},{17,63,37},{17,54,1},{31,4,1152},{9,63,4},{9,63,4},{7,47,2},{30,29,1152},{7,47,2},{31,38,1568},{21,63,637},{19,60,5},{12,60,4},{31,38,1568},{31,48,1568},{12,60,4}, +{0,53,1576},{31,48,1568},{0,53,1576},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,59,10},{10,59,10},{10,59,10},{10,44,10},{0,56,0},{0,56,0},{18,63,7332},{16,63,4196},{16,63,2175},{15,62,1616},{16,63,8348},{13,63,3285},{11,63,33},{10,54,2085},{8,63,8004},{2,56,1584},{21,63,3112},{20,63,1281},{19,63,69},{18,61,181},{31,27,5419},{15,63,2897},{11,63,24},{3,55,1577},{29,44,5419}, +{3,55,1577},{16,63,2171},{16,63,2171},{16,63,2171},{15,60,1577},{13,63,1590},{11,63,29},{11,63,29},{11,48,49},{2,63,1424},{4,50,145},{19,63,65},{19,63,65},{19,63,65},{18,56,1},{30,13,1152},{11,63,20},{11,63,20},{9,48,4},{31,31,1152},{9,48,4},{30,47,1568},{23,63,785},{20,62,4},{13,62,4},{30,47,1568},{31,51,1568},{13,62,4},{0,55,1576},{31,51,1568},{0,55,1576},{15,0,1576}, +{15,0,1576},{15,0,1576},{15,0,1576},{11,61,10},{11,61,10},{11,61,10},{11,46,10},{1,58,0},{1,58,0},{19,63,7014},{17,63,4230},{17,63,2294},{16,63,1595},{17,63,7865},{14,63,3114},{12,63,85},{12,55,1713},{8,63,7436},{3,57,1268},{22,63,2794},{21,63,1221},{20,63,113},{19,61,114},{31,31,4803},{17,63,2648},{13,63,61},{4,57,1253},{31,44,4803},{4,57,1253},{17,63,2294},{17,63,2294},{17,63,2294}, +{16,62,1587},{14,63,1740},{12,63,85},{12,63,85},{12,49,56},{3,63,1571},{5,52,145},{20,63,113},{20,63,113},{20,63,113},{19,58,1},{31,15,1152},{13,63,61},{13,63,61},{10,50,4},{31,34,1152},{10,50,4},{31,47,1250},{24,63,680},{21,63,4},{15,63,0},{31,47,1250},{31,53,1250},{15,63,0},{0,57,1252},{31,53,1250},{0,57,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{12,63,4}, +{12,63,4},{12,63,4},{12,47,8},{2,60,0},{2,60,0},{19,63,6534},{18,63,4116},{18,63,2435},{17,63,1590},{18,63,7164},{15,63,2809},{14,63,161},{12,57,1256},{10,63,6748},{5,58,909},{23,63,2340},{22,63,1065},{21,63,164},{20,62,41},{31,34,4056},{18,63,2211},{15,63,113},{6,58,884},{31,46,4056},{6,58,884},{18,63,2435},{18,63,2435},{18,63,2435},{17,63,1590},{16,63,1923},{14,63,161},{14,63,161}, +{13,51,56},{6,63,1729},{6,54,145},{21,63,164},{21,63,164},{21,63,164},{20,60,0},{31,20,1152},{15,63,113},{15,63,113},{11,52,4},{31,37,1152},{11,52,4},{31,49,882},{25,63,482},{23,63,0},{18,63,1},{31,49,882},{30,56,882},{18,63,1},{0,58,884},{30,56,882},{0,58,884},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{13,63,13},{13,63,13},{13,63,13},{13,49,5},{3,62,0}, +{3,62,0},{21,63,6091},{20,63,4022},{19,63,2609},{18,63,1640},{19,63,6490},{16,63,2617},{15,63,318},{14,58,834},{11,63,6135},{6,59,598},{24,63,1881},{23,63,931},{22,63,245},{21,63,5},{31,38,3318},{20,63,1733},{17,63,202},{8,59,545},{31,48,3318},{8,59,545},{19,63,2609},{19,63,2609},{19,63,2609},{18,63,1640},{17,63,2086},{15,63,318},{15,63,318},{14,54,54},{8,63,1868},{8,55,137},{22,63,245}, +{22,63,245},{22,63,245},{21,62,2},{31,26,1152},{17,63,202},{17,63,202},{11,55,2},{30,42,1152},{11,55,2},{31,52,545},{26,63,305},{25,63,4},{20,63,1},{31,52,545},{30,58,545},{20,63,1},{0,59,545},{30,58,545},{0,59,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,63,29},{15,63,29},{15,63,29},{14,52,10},{5,62,5},{5,62,5},{22,63,5719},{21,63,3980},{20,63,2834}, +{19,63,1745},{20,63,6050},{18,63,2457},{16,63,536},{15,59,515},{14,63,5674},{8,60,385},{25,63,1573},{24,63,861},{23,63,338},{22,63,10},{30,45,2753},{21,63,1438},{19,63,290},{10,60,313},{31,50,2753},{10,60,313},{20,63,2834},{20,63,2834},{20,63,2834},{19,63,1745},{18,63,2284},{16,63,536},{16,63,536},{15,56,54},{10,63,2064},{9,57,137},{23,63,338},{23,63,338},{23,63,338},{22,63,10},{31,31,1152}, +{19,63,290},{19,63,290},{12,57,2},{31,44,1152},{12,57,2},{31,55,313},{28,63,181},{26,63,1},{23,63,0},{31,55,313},{31,58,313},{23,63,0},{0,60,313},{31,58,313},{0,60,313},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,63,52},{16,63,52},{16,63,52},{15,54,10},{7,62,25},{7,62,25},{22,63,5399},{22,63,3974},{21,63,3035},{20,63,1875},{21,63,5619},{19,63,2378},{18,63,776}, +{16,60,294},{15,63,5258},{9,61,225},{26,63,1333},{25,63,813},{24,63,425},{23,63,65},{29,52,2273},{23,63,1218},{20,63,353},{12,61,146},{28,56,2273},{12,61,146},{21,63,3035},{21,63,3035},{21,63,3035},{20,63,1875},{19,63,2518},{18,63,776},{18,63,776},{16,58,49},{11,63,2323},{10,59,137},{24,63,425},{24,63,425},{24,63,425},{23,63,65},{31,36,1152},{20,63,353},{20,63,353},{13,59,2},{31,47,1152}, +{13,59,2},{31,57,145},{28,63,85},{28,63,4},{26,63,1},{31,57,145},{31,60,145},{26,63,1},{0,61,145},{31,60,145},{0,61,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,63,85},{17,63,85},{17,63,85},{16,56,5},{8,63,40},{8,63,40},{23,63,5143},{23,63,4004},{22,63,3254},{21,63,2070},{22,63,5274},{20,63,2310},{19,63,1062},{17,61,133},{17,63,5011},{10,63,161},{27,63,1161}, +{26,63,801},{26,63,545},{25,63,164},{30,52,1878},{24,63,1094},{22,63,461},{13,62,42},{28,58,1878},{13,62,42},{22,63,3254},{22,63,3254},{22,63,3254},{21,63,2070},{20,63,2833},{19,63,1062},{19,63,1062},{17,60,49},{14,63,2577},{11,61,137},{26,63,545},{26,63,545},{26,63,545},{25,63,164},{30,45,1152},{22,63,461},{22,63,461},{14,61,2},{31,50,1152},{14,61,2},{31,60,41},{30,63,25},{29,63,1}, +{28,63,0},{31,60,41},{31,61,41},{28,63,0},{0,62,41},{31,61,41},{0,62,41},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,63,136},{18,63,136},{18,63,136},{17,58,5},{10,63,80},{10,63,80},{24,63,4882},{24,63,4070},{23,63,3532},{22,63,2360},{24,63,4945},{21,63,2422},{20,63,1433},{18,63,58},{20,63,4717},{12,63,157},{28,63,1026},{27,63,835},{27,63,666},{26,63,305},{30,56,1536}, +{26,63,996},{24,63,628},{16,63,1},{30,58,1536},{16,63,1},{23,63,3532},{23,63,3532},{23,63,3532},{22,63,2360},{22,63,3110},{20,63,1433},{20,63,1433},{18,62,50},{16,63,2939},{12,63,157},{27,63,666},{27,63,666},{27,63,666},{26,63,305},{29,54,1152},{24,63,628},{24,63,628},{16,63,1},{29,56,1152},{16,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0}, +{0,63,0},{31,63,0},{0,63,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{19,63,221},{19,63,221},{19,63,221},{18,60,9},{12,63,157},{12,63,157},{25,63,4212},{24,63,3590},{24,63,3106},{23,63,2201},{24,63,4129},{22,63,2101},{21,63,1301},{19,63,13},{20,63,3869},{14,63,233},{28,63,706},{28,63,562},{28,63,481},{27,63,218},{30,58,1067},{27,63,699},{25,63,442},{18,63,1},{31,58,1067}, +{18,63,1},{24,63,3106},{24,63,3106},{24,63,3106},{23,63,2201},{23,63,2668},{21,63,1301},{21,63,1301},{19,63,13},{18,63,2523},{14,63,233},{28,63,481},{28,63,481},{28,63,481},{27,63,218},{31,50,800},{25,63,442},{25,63,442},{18,63,1},{31,55,800},{18,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{23,0,1576}, +{23,0,1576},{23,0,1576},{23,0,1576},{20,63,325},{20,63,325},{20,63,325},{19,62,9},{14,63,233},{14,63,233},{26,63,3642},{25,63,3132},{25,63,2771},{24,63,2070},{25,63,3444},{23,63,1834},{22,63,1205},{20,63,8},{21,63,3219},{16,63,346},{29,63,456},{28,63,370},{28,63,289},{28,63,145},{31,56,683},{28,63,451},{27,63,290},{21,63,1},{30,60,683},{21,63,1},{25,63,2771},{25,63,2771},{25,63,2771}, +{24,63,2070},{24,63,2273},{22,63,1205},{22,63,1205},{20,63,8},{20,63,2121},{16,63,346},{28,63,289},{28,63,289},{28,63,289},{28,63,145},{30,56,512},{27,63,290},{27,63,290},{21,63,1},{30,58,512},{21,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,63,421}, +{22,63,421},{22,63,421},{20,63,8},{16,63,346},{16,63,346},{26,63,3162},{26,63,2742},{26,63,2486},{25,63,1947},{26,63,2877},{24,63,1641},{23,63,1145},{22,63,52},{22,63,2673},{18,63,458},{29,63,264},{29,63,200},{29,63,164},{28,63,81},{31,58,384},{28,63,243},{28,63,162},{23,63,1},{31,60,384},{23,63,1},{26,63,2486},{26,63,2486},{26,63,2486},{25,63,1947},{24,63,1969},{23,63,1145},{23,63,1145}, +{22,63,52},{20,63,1785},{18,63,458},{29,63,164},{29,63,164},{29,63,164},{28,63,81},{31,55,288},{28,63,162},{28,63,162},{23,63,1},{28,62,288},{23,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,63,520},{23,63,520},{23,63,520},{22,63,52},{18,63,458}, +{18,63,458},{0,46,2665},{0,36,666},{0,26,37},{0,22,773},{0,31,5885},{0,23,4085},{0,21,1802},{0,15,4214},{0,17,6543},{0,13,4662},{0,46,2665},{0,36,666},{0,26,37},{0,22,773},{9,0,5885},{0,23,4085},{0,21,1802},{0,15,4214},{15,0,5885},{0,15,4214},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,11,545},{0,10,289},{0,10,289},{0,6,306},{0,6,630},{0,5,362},{0,22,0}, +{0,22,0},{0,22,0},{0,13,1},{3,1,545},{0,10,289},{0,10,289},{0,6,306},{5,1,545},{0,6,306},{13,2,2665},{0,36,666},{0,26,37},{0,22,773},{13,2,2665},{23,0,2665},{0,22,773},{0,17,2665},{23,0,2665},{0,17,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,51,2665},{0,40,530},{0,28,2}, +{0,24,650},{0,34,6670},{0,27,4445},{0,23,1886},{0,16,4577},{0,19,7493},{0,15,5130},{0,51,2665},{0,40,530},{0,28,2},{0,24,650},{10,1,6669},{0,27,4445},{0,23,1886},{0,16,4577},{17,0,6669},{0,16,4577},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{0,14,841},{0,13,442},{0,13,442},{0,7,458},{0,7,965},{0,7,558},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{4,0,841}, +{0,13,442},{0,13,442},{0,7,458},{7,0,841},{0,7,458},{15,1,2665},{0,40,530},{0,28,2},{0,24,650},{15,1,2665},{25,0,2665},{0,24,650},{0,19,2665},{25,0,2665},{0,19,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,56,2665},{0,44,410},{0,30,17},{0,28,522},{0,38,7538},{0,29,4826},{0,25,2006}, +{0,18,5002},{0,21,8547},{0,16,5681},{0,56,2665},{0,44,410},{0,30,17},{0,28,522},{11,1,7538},{0,29,4826},{0,25,2006},{0,18,5002},{17,2,7538},{0,18,5002},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{0,16,1201},{0,15,628},{0,15,628},{0,9,653},{0,9,1382},{0,9,822},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{5,0,1201},{0,15,628},{0,15,628},{0,9,653},{8,0,1201}, +{0,9,653},{16,2,2665},{0,44,410},{1,30,2},{0,28,522},{16,2,2665},{28,0,2665},{0,28,522},{0,21,2665},{28,0,2665},{0,21,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,62,2665},{0,49,325},{0,33,65},{0,30,405},{0,41,8493},{0,31,5261},{0,27,2162},{0,20,5477},{0,23,9705},{0,18,6259},{0,62,2665}, +{0,49,325},{0,33,65},{0,30,405},{11,4,8493},{0,31,5261},{0,27,2162},{0,20,5477},{17,4,8493},{0,20,5477},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{0,19,1625},{0,17,821},{0,17,821},{0,11,898},{0,10,1874},{0,9,1094},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{5,2,1625},{0,17,821},{0,17,821},{0,11,898},{8,2,1625},{0,11,898},{18,1,2665},{0,49,325},{2,32,1}, +{0,30,405},{18,1,2665},{29,2,2665},{0,30,405},{0,23,2665},{29,2,2665},{0,23,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,63,2777},{0,51,221},{0,35,178},{0,32,306},{0,45,9669},{0,34,5810},{0,29,2382},{0,22,6054},{0,25,11123},{0,20,6989},{1,63,2741},{0,51,221},{1,35,137},{0,32,306},{13,1,9669}, +{0,34,5810},{0,29,2382},{0,22,6054},{19,4,9669},{0,22,6054},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{0,22,2178},{0,19,1108},{0,19,1108},{0,11,1213},{0,11,2506},{0,11,1469},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{6,2,2178},{0,19,1108},{0,19,1108},{0,11,1213},{11,0,2178},{0,11,1213},{20,0,2665},{0,51,221},{3,34,1},{0,32,306},{20,0,2665},{31,3,2665},{0,32,306}, +{0,25,2669},{31,3,2665},{0,25,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,63,2949},{0,55,209},{1,37,242},{0,35,285},{0,50,9669},{0,38,5486},{0,32,1938},{0,24,5733},{0,27,11477},{0,22,6887},{2,63,2789},{0,55,209},{2,37,137},{0,35,285},{15,0,9669},{0,38,5486},{0,32,1938},{0,24,5733},{24,1,9669}, +{0,24,5733},{1,46,64},{1,46,64},{1,46,64},{1,28,65},{0,27,2180},{0,23,932},{0,23,932},{0,15,1037},{0,14,2691},{0,13,1421},{1,46,0},{1,46,0},{1,46,0},{1,28,1},{8,0,2178},{0,23,932},{0,23,932},{0,15,1037},{11,3,2178},{0,15,1037},{21,2,2665},{0,55,145},{4,36,2},{0,35,221},{21,2,2665},{31,6,2665},{0,35,221},{0,27,2669},{31,6,2665},{0,27,2669},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,5,1},{0,5,1},{0,5,1},{0,3,1},{0,3,17},{0,3,17},{2,63,3285},{1,57,273},{2,39,434},{1,37,349},{0,56,9670},{0,42,5186},{0,34,1530},{0,26,5430},{0,29,11903},{0,24,6821},{3,63,2873},{1,57,209},{3,39,137},{1,37,285},{16,1,9669},{0,42,5186},{0,34,1530},{0,26,5430},{25,3,9669},{0,26,5430},{1,51,128},{1,51,128},{1,51,128}, +{1,31,129},{0,32,2180},{0,27,772},{0,27,772},{0,16,865},{0,17,2875},{0,15,1409},{2,47,1},{2,47,1},{2,47,1},{2,30,1},{9,2,2178},{0,27,772},{0,27,772},{0,16,865},{16,0,2178},{0,16,865},{23,0,2665},{0,59,85},{5,38,2},{0,37,146},{23,0,2665},{31,9,2665},{0,37,146},{0,29,2669},{31,9,2665},{0,29,2669},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,11,0}, +{0,11,0},{0,11,0},{0,6,1},{0,5,65},{0,5,65},{3,63,3785},{1,61,405},{2,41,653},{1,39,466},{0,61,9669},{0,44,4909},{0,36,1190},{0,28,5145},{0,32,12358},{0,26,6791},{4,63,2966},{2,59,209},{4,41,154},{2,39,285},{18,0,9669},{0,44,4909},{0,36,1190},{0,28,5145},{30,0,9669},{0,28,5145},{2,53,320},{2,53,320},{2,53,320},{2,33,321},{0,38,2178},{0,31,628},{0,31,628}, +{0,18,730},{0,19,3124},{0,17,1427},{3,49,1},{3,49,1},{3,49,1},{3,32,0},{11,1,2178},{0,31,628},{0,31,628},{0,18,730},{17,2,2178},{0,18,730},{24,2,2665},{0,63,41},{6,40,2},{0,39,89},{24,2,2665},{30,14,2665},{0,39,89},{0,31,2669},{30,14,2665},{0,31,2669},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,16,0},{0,16,0},{0,16,0},{0,10,1},{0,7,149}, +{0,7,149},{4,63,4514},{2,63,630},{3,45,1013},{2,41,681},{1,63,9738},{0,49,4610},{0,40,833},{0,30,4849},{0,36,12905},{0,29,6798},{6,63,3101},{4,60,208},{5,43,149},{3,41,286},{18,6,9669},{0,49,4610},{0,40,833},{0,30,4849},{28,6,9669},{0,30,4849},{2,59,545},{2,59,545},{2,59,545},{2,37,545},{0,44,2178},{0,34,493},{0,34,493},{0,22,584},{0,23,3462},{0,19,1493},{4,52,0}, +{4,52,0},{4,52,0},{4,34,1},{13,0,2178},{0,34,493},{0,34,493},{0,22,584},{20,2,2178},{0,22,584},{26,1,2665},{2,63,85},{7,42,4},{0,41,52},{26,1,2665},{31,16,2665},{0,41,52},{0,33,2669},{31,16,2665},{0,33,2669},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,10,289},{0,10,289},{4,63,5330},{3,63,1018},{3,47,1406}, +{2,43,966},{2,63,9981},{0,53,4366},{0,42,585},{0,33,4609},{0,38,13451},{0,31,6834},{7,63,3233},{5,62,208},{6,45,149},{4,42,285},{21,1,9669},{0,53,4366},{0,42,585},{0,33,4609},{29,8,9669},{0,33,4609},{3,61,865},{3,61,865},{3,61,865},{3,39,865},{0,49,2178},{0,38,377},{0,38,377},{0,24,461},{0,25,3789},{0,21,1589},{5,54,0},{5,54,0},{5,54,0},{5,36,1},{14,2,2178}, +{0,38,377},{0,38,377},{0,24,461},{24,0,2178},{0,24,461},{28,0,2665},{3,63,153},{8,44,5},{0,44,20},{28,0,2665},{31,19,2665},{0,44,20},{0,35,2669},{31,19,2665},{0,35,2669},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{0,13,442},{0,13,442},{5,63,6270},{3,63,1626},{4,49,1866},{3,45,1286},{2,63,10381},{0,55,4133},{0,45,401}, +{0,35,4366},{0,42,14023},{0,32,6917},{8,63,3434},{6,63,242},{7,47,149},{5,44,285},{22,3,9669},{0,55,4133},{0,45,401},{0,35,4366},{30,10,9669},{0,35,4366},{3,63,1226},{3,63,1226},{3,63,1226},{3,42,1201},{0,54,2178},{0,42,277},{0,42,277},{0,26,356},{0,27,4170},{0,23,1721},{6,56,0},{6,56,0},{6,56,0},{6,38,1},{16,0,2178},{0,42,277},{0,42,277},{0,26,356},{26,1,2178}, +{0,26,356},{29,2,2665},{5,63,232},{9,46,5},{0,46,5},{29,2,2665},{31,22,2665},{0,46,5},{0,37,2669},{31,22,2665},{0,37,2669},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{0,15,628},{0,15,628},{6,63,7374},{4,63,2339},{4,51,2411},{3,48,1715},{3,63,10950},{0,59,3909},{0,47,257},{0,37,4141},{0,44,14641},{0,34,7031},{9,63,3638}, +{7,63,320},{8,49,154},{6,46,285},{24,1,9669},{0,59,3909},{0,47,257},{0,37,4141},{31,12,9669},{0,37,4141},{4,63,1714},{4,63,1714},{4,63,1714},{4,44,1665},{0,59,2180},{0,46,193},{0,46,193},{0,28,269},{0,31,4582},{0,25,1889},{7,58,0},{7,58,0},{7,58,0},{7,40,1},{17,2,2178},{0,46,193},{0,46,193},{0,28,269},{27,3,2178},{0,28,269},{31,0,2665},{8,63,313},{10,48,2}, +{1,48,2},{31,0,2665},{30,27,2665},{1,48,2},{0,39,2669},{30,27,2665},{0,39,2669},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{0,17,821},{0,17,821},{7,63,8807},{5,63,3388},{5,53,3082},{4,49,2230},{4,63,11766},{0,63,3686},{0,51,134},{0,39,3909},{0,46,15438},{0,37,7214},{11,63,3853},{8,63,457},{9,51,149},{7,49,286},{25,4,9669}, +{0,63,3686},{0,51,134},{0,39,3909},{28,19,9669},{0,39,3909},{5,63,2427},{5,63,2427},{5,63,2427},{4,47,2179},{0,63,2210},{0,49,128},{0,49,128},{0,31,193},{0,34,5117},{0,29,2123},{8,60,0},{8,60,0},{8,60,0},{8,42,1},{19,1,2178},{0,49,128},{0,49,128},{0,31,193},{29,4,2178},{0,31,193},{31,6,2665},{9,63,405},{11,50,4},{2,50,4},{31,6,2665},{31,29,2665},{2,50,4}, +{0,42,2669},{31,29,2665},{0,42,2669},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{0,19,1108},{0,19,1108},{7,63,10230},{6,63,4421},{6,55,3705},{4,52,2725},{5,63,12634},{0,63,3719},{0,53,77},{0,41,3727},{0,51,15978},{0,39,7289},{12,63,4050},{10,63,629},{10,53,149},{8,50,285},{26,6,9669},{0,63,3718},{0,53,76},{0,41,3726},{29,21,9669}, +{0,41,3726},{5,63,3050},{5,63,3050},{5,63,3050},{5,49,2690},{1,63,2325},{0,53,73},{0,53,73},{0,33,118},{0,36,5499},{0,31,2266},{9,62,0},{9,62,0},{9,62,0},{9,44,1},{20,3,2178},{0,53,72},{0,53,72},{0,33,117},{30,6,2178},{0,33,117},{31,12,2665},{11,63,521},{12,52,5},{3,52,4},{31,12,2665},{31,32,2665},{3,52,4},{0,44,2669},{31,32,2665},{0,44,2669},{5,0,2689}, +{5,0,2689},{5,0,2689},{5,0,2689},{0,49,1},{0,49,1},{0,49,1},{0,29,2},{0,21,1341},{0,21,1341},{9,63,10738},{7,63,4899},{7,57,3705},{5,54,2725},{6,63,13045},{1,63,4002},{1,55,77},{0,43,3642},{0,53,15510},{0,42,6577},{13,63,4302},{11,63,857},{11,55,149},{9,52,285},{29,1,9669},{2,63,3954},{1,55,76},{0,43,3561},{30,23,9669},{0,43,3561},{6,63,3173},{6,63,3173},{6,63,3173}, +{6,51,2690},{2,63,2427},{1,55,73},{1,55,73},{1,35,118},{0,40,5171},{0,32,1846},{10,63,4},{10,63,4},{10,63,4},{10,46,1},{21,5,2178},{0,57,32},{0,57,32},{0,35,72},{31,8,2178},{0,35,72},{31,17,2665},{13,63,680},{13,54,5},{3,55,4},{31,17,2665},{31,35,2665},{3,55,4},{0,46,2669},{31,35,2665},{0,46,2669},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,51,1}, +{1,51,1},{1,51,1},{1,31,2},{0,25,1145},{0,25,1145},{10,63,11278},{8,63,5402},{8,58,3742},{6,56,2725},{7,63,13510},{3,63,4314},{2,57,77},{1,45,3642},{0,57,15046},{0,44,5927},{14,63,4590},{12,63,1171},{12,57,149},{10,54,285},{30,3,9669},{3,63,4265},{2,57,76},{0,45,3414},{31,25,9669},{0,45,3414},{7,63,3314},{7,63,3314},{7,63,3314},{7,53,2690},{4,63,2532},{2,57,73},{2,57,73}, +{2,37,118},{0,44,4867},{0,36,1470},{11,63,25},{11,63,25},{11,63,25},{11,48,1},{24,0,2178},{0,61,8},{0,61,8},{0,38,40},{31,11,2178},{0,38,40},{31,22,2665},{15,63,832},{14,56,5},{4,57,4},{31,22,2665},{30,40,2665},{4,57,4},{0,47,2677},{30,40,2665},{0,47,2677},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,53,1},{2,53,1},{2,53,1},{2,33,2},{0,29,965}, +{0,29,965},{11,63,11942},{10,63,6090},{9,62,3723},{8,58,2734},{9,63,14053},{4,63,4863},{3,59,79},{2,48,3633},{0,61,14558},{0,46,5283},{16,63,4858},{14,63,1556},{13,60,138},{11,57,299},{30,9,9669},{6,63,4594},{3,59,75},{0,48,3233},{29,31,9669},{0,48,3233},{9,63,3505},{9,63,3505},{9,63,3505},{8,55,2690},{5,63,2645},{3,61,72},{3,61,72},{3,39,117},{0,46,4539},{0,38,1091},{12,63,64}, +{12,63,64},{12,63,64},{12,50,1},{24,6,2178},{1,63,10},{1,63,10},{0,40,13},{29,17,2178},{0,40,13},{31,28,2665},{17,63,1053},{15,59,2},{5,59,2},{31,28,2665},{31,42,2665},{5,59,2},{0,50,2669},{31,42,2665},{0,50,2669},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,55,5},{3,55,5},{3,55,5},{3,36,5},{0,34,794},{0,34,794},{12,63,12466},{11,63,6718},{10,62,3738}, +{8,60,2723},{10,63,14554},{6,63,5363},{4,61,87},{3,50,3633},{0,63,14190},{0,49,4774},{17,63,5158},{15,63,1938},{14,62,138},{12,59,282},{31,11,9669},{8,63,4806},{4,61,86},{0,50,3110},{29,34,9669},{0,50,3110},{10,63,3658},{10,63,3658},{10,63,3658},{9,57,2690},{6,63,2795},{4,62,66},{4,62,66},{4,41,131},{0,51,4269},{0,40,833},{14,63,100},{14,63,100},{14,63,100},{13,52,1},{27,1,2178}, +{3,63,34},{3,63,34},{0,43,2},{30,19,2178},{0,43,2},{31,33,2665},{19,63,1241},{16,61,5},{6,61,2},{31,33,2665},{31,45,2665},{6,61,2},{0,52,2669},{31,45,2665},{0,52,2669},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,57,1},{4,57,1},{4,57,1},{4,37,2},{0,38,650},{0,38,650},{14,63,13094},{12,63,7445},{11,63,3830},{9,62,2723},{12,63,14998},{8,63,5926},{5,63,87}, +{4,51,3642},{0,63,14254},{0,51,4306},{18,63,5494},{16,63,2414},{15,63,146},{13,61,282},{31,16,9669},{10,63,5138},{5,63,86},{0,52,3005},{30,36,9669},{0,52,3005},{11,63,3829},{11,63,3829},{11,63,3829},{10,59,2690},{7,63,2981},{5,63,86},{5,63,86},{5,43,131},{0,53,4014},{0,44,601},{15,63,145},{15,63,145},{15,63,145},{14,54,1},{28,3,2178},{5,63,85},{5,63,85},{1,45,2},{31,21,2178}, +{1,45,2},{31,38,2665},{20,63,1378},{17,63,5},{7,63,2},{31,38,2665},{31,48,2665},{7,63,2},{0,54,2669},{31,48,2665},{0,54,2669},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,59,1},{5,59,1},{5,59,1},{5,39,2},{0,40,520},{0,40,520},{15,63,12507},{13,63,7370},{12,63,4001},{11,63,2705},{13,63,14148},{8,63,5491},{6,63,154},{5,53,3033},{1,63,13399},{0,53,3297},{19,63,4949}, +{17,63,2261},{16,63,202},{15,61,185},{31,20,8712},{11,63,4644},{7,63,145},{0,54,2365},{31,37,8712},{0,54,2365},{12,63,4001},{12,63,4001},{12,63,4001},{11,61,2690},{9,63,3204},{6,63,154},{6,63,154},{6,45,131},{0,57,3762},{0,46,419},{16,63,202},{16,63,202},{16,63,202},{15,56,1},{29,5,2178},{7,63,145},{7,63,145},{2,47,2},{31,24,2178},{2,47,2},{30,45,2178},{22,63,1145},{18,63,1}, +{10,63,1},{30,45,2178},{31,50,2178},{10,63,1},{0,55,2180},{31,50,2178},{0,55,2180},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,61,1},{6,61,1},{6,61,1},{6,41,2},{0,44,400},{0,44,400},{16,63,11658},{14,63,7195},{13,63,4225},{12,63,2693},{14,63,13066},{10,63,5014},{8,63,261},{6,54,2366},{3,63,12366},{0,55,2274},{20,63,4338},{18,63,2037},{17,63,289},{16,62,80},{31,24,7578}, +{13,63,4037},{9,63,202},{0,55,1698},{29,42,7578},{0,55,1698},{13,63,4225},{13,63,4225},{13,63,4225},{12,63,2693},{10,63,3429},{8,63,261},{8,63,261},{7,47,132},{0,61,3509},{0,49,290},{17,63,289},{17,63,289},{17,63,289},{16,59,0},{31,4,2178},{9,63,202},{9,63,202},{3,49,2},{30,29,2178},{3,49,2},{31,44,1625},{23,63,850},{20,63,0},{13,63,1},{31,44,1625},{31,52,1625},{13,63,1}, +{0,56,1625},{31,52,1625},{0,56,1625},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{7,63,5},{7,63,5},{7,63,5},{7,44,4},{0,49,289},{0,49,289},{16,63,11002},{15,63,7081},{14,63,4450},{13,63,2738},{15,63,12205},{11,63,4663},{9,63,411},{7,55,1813},{4,63,11643},{0,56,1550},{21,63,3802},{20,63,1845},{18,63,388},{17,62,25},{31,27,6661},{15,63,3525},{11,63,290},{0,57,1217},{29,44,6661}, +{0,57,1217},{14,63,4450},{14,63,4450},{14,63,4450},{13,63,2738},{11,63,3675},{9,63,411},{9,63,411},{8,49,131},{0,63,3354},{0,51,222},{18,63,388},{18,63,388},{18,63,388},{17,61,0},{30,13,2178},{11,63,290},{11,63,290},{4,51,2},{31,31,2178},{4,51,2},{31,47,1201},{24,63,653},{22,63,4},{15,63,1},{31,47,1201},{31,53,1201},{15,63,1},{0,57,1201},{31,53,1201},{0,57,1201},{13,0,2689}, +{13,0,2689},{13,0,2689},{13,0,2689},{8,63,17},{8,63,17},{8,63,17},{8,46,2},{0,53,205},{0,53,205},{17,63,10434},{16,63,7010},{15,63,4693},{14,63,2833},{16,63,11374},{12,63,4462},{10,63,629},{8,56,1358},{6,63,10895},{0,57,1002},{22,63,3334},{20,63,1701},{19,63,505},{18,63,0},{31,31,5829},{16,63,3145},{12,63,405},{1,58,842},{31,44,5829},{1,58,842},{15,63,4693},{15,63,4693},{15,63,4693}, +{14,63,2833},{12,63,3906},{10,63,629},{10,63,629},{9,51,131},{1,63,3525},{0,54,218},{19,63,505},{19,63,505},{19,63,505},{18,63,0},{31,15,2178},{12,63,405},{12,63,405},{5,53,2},{31,34,2178},{5,53,2},{31,49,841},{25,63,461},{23,63,1},{18,63,0},{31,49,841},{31,55,841},{18,63,0},{0,58,841},{31,55,841},{0,58,841},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{9,63,50}, +{9,63,50},{9,63,50},{9,48,2},{0,57,137},{0,57,137},{18,63,9934},{17,63,6962},{16,63,4913},{15,63,2978},{17,63,10683},{13,63,4277},{11,63,915},{9,58,974},{8,63,10078},{0,59,630},{23,63,2934},{22,63,1605},{21,63,650},{19,63,25},{31,34,5082},{18,63,2769},{14,63,521},{2,59,546},{31,46,5082},{2,59,546},{16,63,4913},{16,63,4913},{16,63,4913},{15,63,2978},{14,63,4170},{11,63,915},{11,63,915}, +{10,53,131},{3,63,3789},{1,56,218},{21,63,650},{21,63,650},{21,63,650},{19,63,25},{31,20,2178},{14,63,521},{14,63,521},{6,55,2},{31,37,2178},{6,55,2},{31,52,545},{26,63,305},{25,63,4},{20,63,1},{31,52,545},{30,58,545},{20,63,1},{0,59,545},{30,58,545},{0,59,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,63,74},{11,63,74},{11,63,74},{10,49,2},{0,59,85}, +{0,59,85},{19,63,9465},{18,63,6955},{17,63,5233},{16,63,3218},{18,63,10003},{14,63,4183},{13,63,1258},{11,58,645},{9,63,9445},{1,61,409},{24,63,2529},{23,63,1525},{22,63,785},{20,63,100},{31,38,4344},{20,63,2345},{16,63,698},{4,60,289},{31,48,4344},{4,60,289},{17,63,5233},{17,63,5233},{17,63,5233},{16,63,3218},{15,63,4491},{13,63,1258},{13,63,1258},{11,56,120},{5,63,4171},{2,59,213},{22,63,785}, +{22,63,785},{22,63,785},{20,63,100},{31,26,2178},{16,63,698},{16,63,698},{7,57,0},{30,42,2178},{7,57,0},{31,55,288},{28,63,162},{26,63,4},{23,63,1},{31,55,288},{28,62,288},{23,63,1},{0,60,288},{28,62,288},{0,60,288},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,63,113},{12,63,113},{12,63,113},{11,52,4},{0,63,45},{0,63,45},{20,63,9219},{19,63,6985},{18,63,5530}, +{17,63,3473},{19,63,9496},{15,63,4186},{14,63,1630},{11,60,404},{11,63,8961},{3,61,277},{25,63,2275},{24,63,1509},{23,63,932},{22,63,208},{30,45,3779},{21,63,2086},{18,63,850},{6,61,129},{31,50,3779},{6,61,129},{18,63,5530},{18,63,5530},{18,63,5530},{17,63,3473},{16,63,4770},{14,63,1630},{14,63,1630},{12,57,129},{8,63,4442},{3,61,213},{23,63,932},{23,63,932},{23,63,932},{22,63,208},{31,31,2178}, +{18,63,850},{18,63,850},{8,59,1},{31,44,2178},{8,59,1},{31,58,128},{29,63,72},{28,63,1},{26,63,0},{31,58,128},{31,60,128},{26,63,0},{0,61,128},{31,60,128},{0,61,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,63,170},{13,63,170},{13,63,170},{12,54,2},{2,63,89},{2,63,89},{21,63,8929},{20,63,7062},{19,63,5845},{18,63,3778},{20,63,9188},{17,63,4260},{15,63,2070}, +{13,61,234},{13,63,8680},{4,62,212},{26,63,2089},{25,63,1515},{24,63,1073},{23,63,353},{29,52,3299},{22,63,1913},{20,63,965},{8,62,32},{28,56,3299},{8,62,32},{19,63,5845},{19,63,5845},{19,63,5845},{18,63,3778},{17,63,5124},{15,63,2070},{15,63,2070},{13,59,129},{9,63,4725},{4,62,196},{24,63,1073},{24,63,1073},{24,63,1073},{23,63,353},{31,36,2178},{20,63,965},{20,63,965},{9,61,1},{31,47,2178}, +{9,61,1},{31,60,34},{30,63,18},{29,63,4},{28,63,1},{31,60,34},{31,61,34},{28,63,1},{0,62,32},{31,61,34},{0,62,32},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{14,63,245},{14,63,245},{14,63,245},{13,56,2},{4,63,164},{4,63,164},{22,63,8707},{21,63,7170},{21,63,6209},{19,63,4133},{21,63,8853},{18,63,4387},{17,63,2548},{14,62,154},{14,63,8388},{6,63,244},{27,63,1971}, +{26,63,1557},{25,63,1250},{24,63,565},{30,52,2904},{23,63,1826},{22,63,1145},{10,63,1},{28,58,2904},{10,63,1},{21,63,6209},{21,63,6209},{21,63,6209},{19,63,4133},{19,63,5460},{17,63,2548},{17,63,2548},{14,61,129},{11,63,5085},{6,63,244},{25,63,1250},{25,63,1250},{25,63,1250},{24,63,565},{30,45,2178},{22,63,1145},{22,63,1145},{10,63,1},{31,50,2178},{10,63,1},{31,63,0},{31,63,0},{31,63,0}, +{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{15,63,338},{15,63,338},{15,63,338},{14,58,2},{6,63,244},{6,63,244},{23,63,7705},{22,63,6418},{21,63,5633},{20,63,3845},{22,63,7654},{19,63,3874},{18,63,2310},{15,63,53},{15,63,7258},{8,63,317},{27,63,1458},{27,63,1186},{26,63,932},{25,63,425},{30,54,2166}, +{25,63,1398},{23,63,850},{13,63,1},{29,58,2166},{13,63,1},{21,63,5633},{21,63,5633},{21,63,5633},{20,63,3845},{19,63,4830},{18,63,2310},{18,63,2310},{15,62,45},{13,63,4506},{8,63,317},{26,63,932},{26,63,932},{26,63,932},{25,63,425},{31,44,1625},{23,63,850},{23,63,850},{13,63,1},{31,52,1625},{13,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0}, +{0,63,0},{31,63,0},{0,63,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{16,63,449},{16,63,449},{16,63,449},{15,60,4},{8,63,317},{8,63,317},{24,63,6881},{23,63,5814},{22,63,5138},{21,63,3650},{23,63,6713},{20,63,3400},{19,63,2142},{16,63,5},{17,63,6397},{9,63,425},{28,63,1075},{27,63,866},{27,63,697},{26,63,320},{30,56,1601},{26,63,1041},{24,63,653},{15,63,1},{30,58,1601}, +{15,63,1},{22,63,5138},{22,63,5138},{22,63,5138},{21,63,3650},{21,63,4313},{19,63,2142},{19,63,2142},{16,63,5},{14,63,3981},{9,63,425},{27,63,697},{27,63,697},{27,63,697},{26,63,320},{31,47,1201},{24,63,653},{24,63,653},{15,63,1},{31,53,1201},{15,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{21,0,2689}, +{21,0,2689},{21,0,2689},{21,0,2689},{18,63,549},{18,63,549},{18,63,549},{16,62,1},{9,63,425},{9,63,425},{24,63,6097},{24,63,5285},{23,63,4693},{22,63,3473},{23,63,5833},{21,63,3067},{20,63,1988},{17,63,10},{18,63,5571},{11,63,541},{28,63,739},{28,63,595},{27,63,505},{27,63,233},{31,54,1121},{26,63,737},{25,63,461},{18,63,0},{29,60,1121},{18,63,0},{23,63,4693},{23,63,4693},{23,63,4693}, +{22,63,3473},{22,63,3845},{20,63,1988},{20,63,1988},{17,63,10},{15,63,3542},{11,63,541},{27,63,505},{27,63,505},{27,63,505},{27,63,233},{31,49,841},{25,63,461},{25,63,461},{18,63,0},{31,55,841},{18,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,63,666}, +{19,63,666},{19,63,666},{17,63,10},{11,63,541},{11,63,541},{25,63,5427},{24,63,4757},{24,63,4273},{23,63,3314},{24,63,5002},{22,63,2788},{21,63,1898},{18,63,65},{20,63,4714},{13,63,698},{29,63,489},{28,63,387},{28,63,306},{28,63,162},{31,56,726},{27,63,482},{26,63,305},{20,63,1},{30,60,726},{20,63,1},{24,63,4273},{24,63,4273},{24,63,4273},{23,63,3314},{22,63,3429},{21,63,1898},{21,63,1898}, +{18,63,65},{17,63,3213},{13,63,698},{28,63,306},{28,63,306},{28,63,306},{28,63,162},{31,52,545},{26,63,305},{26,63,305},{20,63,1},{30,58,545},{20,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,63,832},{20,63,832},{20,63,832},{18,63,65},{13,63,698}, +{13,63,698},{4,63,33740},{0,63,5184},{0,45,446},{0,43,4126},{3,63,45594},{0,59,24105},{0,42,8295},{0,37,24703},{0,44,64117},{0,34,38807},{2,63,9704},{0,61,2866},{0,44,386},{0,37,3205},{14,4,18065},{0,38,13219},{0,34,6147},{0,24,13496},{25,0,18065},{0,24,13496},{0,31,1},{0,31,1},{0,31,1},{0,19,1},{0,16,1105},{0,15,584},{0,15,584},{0,9,605},{0,8,1273},{0,7,750},{0,31,1}, +{0,31,1},{0,31,1},{0,19,1},{4,2,1105},{0,15,584},{0,15,584},{0,9,605},{8,0,1105},{0,9,605},{21,5,9248},{0,61,2866},{0,44,386},{0,37,3205},{21,5,9248},{31,8,9248},{0,37,3205},{0,28,9256},{31,8,9248},{0,28,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,63,38380},{1,63,6614},{0,47,261}, +{0,45,3769},{4,63,50747},{0,63,24961},{0,44,8337},{0,39,25658},{0,46,65535},{0,36,41267},{2,63,10152},{0,63,2624},{0,46,221},{0,41,2929},{16,0,19334},{0,42,13795},{0,36,6237},{0,26,14121},{26,1,19334},{0,26,14121},{0,36,1},{0,36,1},{0,36,1},{0,22,0},{0,18,1513},{0,17,769},{0,17,769},{0,9,845},{0,9,1742},{0,9,1014},{0,36,1},{0,36,1},{0,36,1},{0,22,0},{5,1,1513}, +{0,17,769},{0,17,769},{0,9,845},{9,0,1513},{0,9,845},{24,0,9248},{0,63,2624},{0,46,221},{0,41,2929},{24,0,9248},{31,11,9248},{0,41,2929},{0,30,9256},{31,11,9248},{0,30,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,63,43788},{1,63,8598},{0,49,121},{0,47,3430},{4,63,56155},{0,63,26241},{0,46,8415}, +{0,41,26663},{0,46,65535},{0,36,43795},{3,63,10706},{0,63,2624},{0,48,116},{0,43,2650},{17,0,20689},{0,44,14404},{0,38,6363},{0,28,14796},{26,3,20689},{0,28,14796},{0,42,0},{0,42,0},{0,42,0},{0,25,0},{0,21,1985},{0,19,1009},{0,19,1009},{0,11,1090},{0,11,2281},{0,11,1346},{0,42,0},{0,42,0},{0,42,0},{0,25,0},{6,1,1985},{0,19,1009},{0,19,1009},{0,11,1090},{9,2,1985}, +{0,11,1090},{25,2,9248},{0,63,2624},{0,48,116},{0,43,2650},{25,2,9248},{27,19,9248},{0,43,2650},{0,32,9250},{27,19,9248},{0,32,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,49566},{1,63,11350},{0,51,36},{0,50,3105},{4,63,62331},{0,63,28289},{0,48,8549},{0,43,27718},{0,49,65535},{0,38,46395},{4,63,11395}, +{0,63,2880},{0,51,36},{0,45,2389},{17,4,22129},{0,46,15067},{0,40,6525},{0,28,15500},{27,4,22129},{0,28,15500},{0,47,0},{0,47,0},{0,47,0},{0,28,1},{0,24,2521},{0,21,1285},{0,21,1285},{0,13,1385},{0,12,2905},{0,11,1714},{0,47,0},{0,47,0},{0,47,0},{0,28,1},{7,0,2521},{0,21,1285},{0,21,1285},{0,13,1385},{11,1,2521},{0,13,1385},{26,4,9248},{1,63,2866},{0,51,36}, +{0,45,2389},{26,4,9248},{28,21,9248},{0,45,2389},{0,34,9250},{28,21,9248},{0,34,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,56892},{2,63,15166},{0,53,5},{0,52,2726},{5,63,65535},{0,63,31511},{0,51,8735},{0,45,28953},{0,53,65535},{0,40,49505},{4,63,12385},{1,63,3380},{0,53,1},{0,47,2120},{19,1,23851}, +{0,49,15876},{0,42,6761},{0,30,16331},{29,4,23851},{0,30,16331},{0,53,0},{0,53,0},{0,53,0},{0,32,1},{0,27,3200},{0,23,1640},{0,23,1640},{0,15,1769},{0,13,3689},{0,13,2169},{0,53,0},{0,53,0},{0,53,0},{0,32,1},{7,3,3200},{0,23,1640},{0,23,1640},{0,15,1769},{13,0,3200},{0,15,1769},{28,3,9248},{3,63,3204},{0,53,1},{0,47,2120},{28,3,9248},{31,21,9248},{0,47,2120}, +{0,36,9256},{31,21,9248},{0,36,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,63870},{2,63,19230},{0,55,30},{0,54,2429},{5,63,65535},{1,63,35016},{0,53,8921},{0,47,30108},{0,55,65535},{0,42,52385},{5,63,13379},{2,63,4026},{0,56,18},{0,50,1885},{20,1,25472},{0,53,16616},{0,46,6989},{0,33,17105},{29,6,25472}, +{0,33,17105},{0,58,0},{0,58,0},{0,58,0},{0,35,0},{0,29,3874},{0,25,1994},{0,25,1994},{0,16,2129},{0,15,4454},{0,15,2637},{0,58,0},{0,58,0},{0,58,0},{0,35,0},{8,2,3872},{0,25,1994},{0,25,1994},{0,16,2129},{12,3,3872},{0,16,2129},{29,5,9248},{5,63,3589},{1,55,1},{0,50,1885},{29,5,9248},{31,24,9248},{0,50,1885},{0,38,9256},{31,24,9248},{0,38,9256},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,65535},{2,63,24002},{0,57,109},{0,56,2154},{6,63,65535},{1,63,38780},{0,55,8815},{0,48,30807},{0,57,65535},{0,44,54965},{6,63,14345},{2,63,4766},{1,58,54},{0,52,1670},{20,5,26744},{0,55,17059},{0,46,7005},{0,33,17609},{31,6,26744},{0,33,17609},{0,63,5},{0,63,5},{0,63,5}, +{0,38,4},{0,32,4420},{0,29,2210},{0,29,2210},{0,18,2378},{0,17,5115},{0,16,2981},{0,63,5},{0,63,5},{0,63,5},{0,38,4},{9,2,4418},{0,29,2210},{0,29,2210},{0,18,2378},{16,0,4418},{0,18,2378},{30,7,9248},{8,63,3904},{2,57,1},{0,52,1666},{30,7,9248},{28,31,9248},{0,52,1666},{0,40,9256},{28,31,9248},{0,40,9256},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,1,0},{0,1,1},{0,1,1},{7,63,65535},{3,63,29032},{0,60,314},{0,58,1989},{6,63,65535},{2,63,42151},{0,57,7781},{0,50,30102},{0,59,65535},{0,46,56345},{7,63,14819},{3,63,5416},{2,60,54},{0,54,1565},{23,0,26744},{0,59,16547},{0,49,6177},{0,37,17105},{31,9,26744},{0,37,17105},{1,63,84},{1,63,84},{1,63,84},{1,40,68},{0,38,4418},{0,31,1972},{0,31,1972}, +{0,20,2129},{0,19,5364},{0,18,2915},{1,63,20},{1,63,20},{1,63,20},{1,40,4},{11,1,4418},{0,31,1972},{0,31,1972},{0,20,2129},{17,2,4418},{0,20,2129},{31,9,9248},{8,63,4160},{3,59,1},{0,54,1465},{31,9,9248},{28,34,9248},{0,54,1465},{0,42,9256},{28,34,9248},{0,42,9256},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,7,0},{0,7,0},{0,7,0},{0,4,0},{0,3,25}, +{0,3,25},{7,63,65535},{3,63,35719},{1,62,657},{0,60,1985},{7,63,65535},{2,63,46660},{0,59,6696},{0,52,29368},{0,63,65535},{0,49,58301},{9,63,15473},{5,63,6173},{3,62,45},{1,56,1566},{23,6,26744},{0,63,15992},{0,53,5318},{0,39,16547},{30,14,26744},{0,39,16547},{2,63,329},{2,63,329},{2,63,329},{1,44,186},{0,44,4418},{0,36,1709},{0,36,1709},{0,22,1872},{0,23,5702},{0,20,2885},{3,63,34}, +{3,63,34},{3,63,34},{2,42,10},{13,0,4418},{0,36,1709},{0,36,1709},{0,22,1872},{20,2,4418},{0,22,1872},{31,15,9248},{10,63,4570},{4,61,4},{0,56,1268},{31,15,9248},{31,34,9248},{0,56,1268},{0,45,9250},{31,34,9248},{0,45,9250},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,6,97},{0,6,97},{7,63,65535},{4,63,40786},{1,63,1122}, +{0,62,2034},{7,63,65535},{2,63,49800},{0,61,5634},{0,54,27965},{0,63,65535},{0,49,58553},{10,63,15531},{6,63,6593},{4,63,61},{2,58,1482},{25,4,26259},{0,63,15284},{0,55,4484},{0,41,15722},{28,19,26259},{0,41,15722},{2,63,633},{2,63,633},{2,63,633},{2,46,378},{0,49,4418},{0,40,1489},{0,40,1489},{0,24,1665},{0,25,6029},{0,22,2897},{4,63,61},{4,63,61},{4,63,61},{3,44,10},{14,2,4418}, +{0,40,1489},{0,40,1489},{0,24,1665},{24,0,4418},{0,24,1665},{31,19,8978},{11,63,4744},{5,63,0},{0,58,1025},{31,19,8978},{30,38,8978},{0,58,1025},{0,46,8986},{30,38,8978},{0,46,8986},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,18,0},{0,18,0},{0,18,0},{0,11,0},{0,8,193},{0,8,193},{8,63,65535},{4,63,40898},{1,63,1890},{1,62,2029},{7,63,65535},{3,63,47871},{0,61,4194}, +{0,56,24760},{0,63,65535},{0,49,55881},{11,63,14325},{8,63,6051},{5,63,100},{3,58,1197},{27,1,24371},{0,63,13716},{0,57,3402},{0,41,13914},{30,19,24371},{0,41,13914},{3,63,1058},{3,63,1058},{3,63,1058},{2,49,618},{0,54,4418},{0,44,1285},{0,44,1285},{0,26,1476},{0,27,6410},{0,25,2937},{5,63,100},{5,63,100},{5,63,100},{4,46,5},{16,0,4418},{0,44,1285},{0,44,1285},{0,26,1476},{26,1,4418}, +{0,26,1476},{31,22,7938},{13,63,4225},{7,63,4},{0,60,628},{31,22,7938},{28,42,7938},{0,60,628},{0,47,7946},{28,42,7938},{0,47,7946},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,11,320},{0,11,320},{8,63,65535},{4,63,41266},{1,63,2914},{1,62,2109},{7,63,65535},{3,63,46175},{0,61,3010},{0,56,21624},{0,63,65535},{0,51,53461},{12,63,13140}, +{8,63,5571},{6,63,157},{4,59,932},{28,1,22568},{0,63,12404},{0,57,2474},{0,43,12155},{30,21,22568},{0,43,12155},{4,63,1630},{4,63,1630},{4,63,1630},{3,51,938},{0,59,4420},{0,46,1117},{0,46,1117},{0,30,1280},{0,31,6822},{0,27,3009},{6,63,157},{6,63,157},{6,63,157},{5,48,4},{17,2,4418},{0,46,1117},{0,46,1117},{0,30,1280},{27,3,4418},{0,30,1280},{30,28,6962},{14,63,3709},{8,63,1}, +{0,60,340},{30,28,6962},{31,40,6962},{0,60,340},{0,48,6964},{31,40,6962},{0,48,6964},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,29,0},{0,29,0},{0,29,0},{0,17,0},{0,13,482},{0,13,482},{9,63,65535},{5,63,41956},{2,63,4257},{1,62,2505},{7,63,65535},{3,63,44573},{0,62,1944},{0,56,18402},{0,63,65535},{0,51,50815},{13,63,11930},{10,63,5125},{7,63,250},{5,59,701},{28,5,20642}, +{1,63,11209},{0,59,1634},{0,45,10346},{31,22,20642},{0,45,10346},{4,63,2350},{4,63,2350},{4,63,2350},{3,54,1361},{0,63,4450},{0,51,914},{0,51,914},{0,32,1097},{0,34,7357},{0,29,3131},{7,63,250},{7,63,250},{7,63,250},{6,50,10},{19,1,4418},{0,51,914},{0,51,914},{0,32,1097},{29,4,4418},{0,32,1097},{31,27,5941},{15,63,3176},{10,63,0},{0,62,116},{31,27,5941},{31,42,5941},{0,62,116}, +{0,49,5945},{31,42,5941},{0,49,5945},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,34,1},{0,34,1},{0,34,1},{0,21,1},{0,17,706},{0,17,706},{9,63,65535},{5,63,42660},{2,63,5617},{1,63,3088},{8,63,65535},{3,63,43421},{0,62,1240},{0,56,15810},{0,63,65535},{0,51,48735},{13,63,10922},{11,63,4753},{8,63,360},{6,61,509},{29,5,19021},{2,63,10246},{0,61,1088},{0,47,8885},{31,24,19021}, +{0,47,8885},{5,63,3131},{5,63,3131},{5,63,3131},{4,56,1818},{1,63,4580},{0,55,754},{0,55,754},{0,35,928},{0,36,7846},{0,31,3281},{8,63,360},{8,63,360},{8,63,360},{7,52,10},{20,3,4418},{0,55,754},{0,55,754},{0,35,928},{30,6,4418},{0,35,928},{31,30,5101},{17,63,2777},{11,63,9},{0,62,20},{31,30,5101},{31,43,5101},{0,62,20},{0,50,5105},{31,43,5101},{0,50,5105},{4,0,1818}, +{4,0,1818},{4,0,1818},{4,0,1818},{0,40,0},{0,40,0},{0,40,0},{0,24,0},{0,17,914},{0,17,914},{9,63,65535},{5,63,43620},{2,63,7233},{1,63,3920},{8,63,65535},{3,63,42525},{0,63,738},{0,58,13413},{0,63,65535},{0,51,46911},{14,63,9978},{11,63,4449},{10,63,452},{8,60,344},{31,2,17485},{3,63,9369},{0,61,704},{0,48,7498},{29,29,17485},{0,48,7498},{6,63,4058},{6,63,4058},{6,63,4058}, +{4,59,2315},{2,63,4874},{0,59,610},{0,59,610},{0,37,769},{0,38,8389},{0,32,3497},{10,63,452},{10,63,452},{10,63,452},{8,54,5},{21,5,4418},{0,59,610},{0,59,610},{0,37,769},{31,8,4418},{0,37,769},{31,32,4325},{18,63,2357},{13,63,0},{1,63,0},{31,32,4325},{31,45,4325},{1,63,0},{0,51,4329},{31,45,4325},{0,51,4329},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,45,0}, +{0,45,0},{0,45,0},{0,27,0},{0,19,1184},{0,19,1184},{9,63,65535},{5,63,44836},{2,63,9105},{2,63,4905},{8,63,65535},{3,63,41885},{0,63,482},{0,58,11125},{0,63,65535},{0,51,45343},{15,63,9102},{13,63,4161},{11,63,557},{9,61,212},{30,9,16034},{5,63,8602},{0,63,482},{0,48,6250},{29,31,16034},{0,48,6250},{6,63,5066},{6,63,5066},{6,63,5066},{5,61,2907},{2,63,5322},{0,63,482},{0,63,482}, +{0,39,628},{0,42,8965},{0,36,3717},{11,63,557},{11,63,557},{11,63,557},{9,56,5},{24,0,4418},{0,63,482},{0,63,482},{0,39,628},{31,11,4418},{0,39,628},{31,35,3613},{20,63,1940},{15,63,4},{4,63,1},{31,35,3613},{28,50,3613},{4,63,1},{0,52,3617},{28,50,3613},{0,52,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,50,0},{0,50,0},{0,50,0},{0,30,1},{0,23,1480}, +{0,23,1480},{9,63,65535},{5,63,46510},{3,63,11362},{2,63,6237},{9,63,65535},{3,63,41471},{0,63,500},{0,58,8857},{0,63,65535},{0,53,43697},{16,63,8139},{14,63,3853},{12,63,680},{10,62,89},{30,13,14504},{8,63,7667},{0,63,500},{0,50,4961},{31,31,14504},{0,50,4961},{7,63,6337},{7,63,6337},{7,63,6337},{5,63,3642},{3,63,5962},{0,63,500},{0,63,500},{0,41,493},{0,44,9656},{0,38,3995},{12,63,680}, +{12,63,680},{12,63,680},{10,59,10},{24,6,4418},{0,63,500},{0,63,500},{0,41,493},{29,17,4418},{0,41,493},{31,38,2888},{20,63,1517},{16,63,1},{7,63,1},{31,38,2888},{31,48,2888},{7,63,1},{0,53,2896},{31,48,2888},{0,53,2896},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,56,0},{0,56,0},{0,56,0},{0,34,1},{0,25,1853},{0,25,1853},{10,63,65535},{6,63,48082},{3,63,13570}, +{2,63,7693},{9,63,65535},{3,63,41375},{0,63,788},{0,58,7113},{0,63,65535},{0,53,42465},{17,63,7409},{15,63,3625},{13,63,821},{11,62,34},{31,13,13235},{8,63,6899},{2,63,628},{0,52,3956},{30,34,13235},{0,52,3956},{7,63,7681},{7,63,7681},{7,63,7681},{6,63,4437},{4,63,6659},{1,63,738},{1,63,738},{0,43,394},{0,46,10331},{0,40,4289},{13,63,821},{13,63,821},{13,63,821},{11,61,10},{27,1,4418}, +{2,63,628},{2,63,628},{0,43,394},{30,19,4418},{0,43,394},{31,41,2312},{21,63,1217},{18,63,1},{9,63,0},{31,41,2312},{29,52,2312},{9,63,0},{0,54,2320},{29,52,2312},{0,54,2320},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,61,1},{0,61,1},{0,61,1},{0,37,0},{0,27,2225},{0,27,2225},{10,63,65535},{6,63,49890},{3,63,16034},{2,63,9405},{9,63,65535},{4,63,41526},{0,63,1332}, +{0,59,5520},{0,63,65535},{0,53,41489},{18,63,6747},{16,63,3459},{14,63,980},{12,63,5},{30,20,12051},{10,63,6275},{4,63,801},{0,53,3089},{31,35,12051},{0,53,3089},{8,63,9062},{8,63,9062},{8,63,9062},{7,63,5410},{4,63,7555},{1,63,1154},{1,63,1154},{0,46,306},{0,49,11046},{0,42,4619},{14,63,980},{14,63,980},{14,63,980},{12,62,5},{28,3,4418},{4,63,801},{4,63,801},{0,46,306},{31,21,4418}, +{0,46,306},{30,47,1800},{23,63,949},{19,63,4},{12,63,1},{30,47,1800},{31,51,1800},{12,63,1},{0,55,1808},{31,51,1800},{0,55,1808},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{0,63,36},{0,63,36},{0,63,36},{0,40,0},{0,29,2633},{0,29,2633},{10,63,65535},{6,63,51954},{3,63,18754},{3,63,11330},{9,63,65535},{4,63,41798},{1,63,2082},{0,60,4084},{0,63,65535},{0,53,40769},{19,63,6153}, +{17,63,3297},{16,63,1154},{13,63,20},{31,20,10952},{11,63,5708},{6,63,965},{0,54,2281},{31,37,10952},{0,54,2281},{9,63,10545},{9,63,10545},{9,63,10545},{7,63,6482},{5,63,8549},{2,63,1716},{2,63,1716},{0,48,208},{0,53,11786},{0,44,4985},{16,63,1154},{16,63,1154},{16,63,1154},{13,63,20},{29,5,4418},{6,63,965},{6,63,965},{0,48,208},{31,24,4418},{0,48,208},{31,46,1352},{23,63,725},{21,63,0}, +{14,63,1},{31,46,1352},{30,54,1352},{14,63,1},{0,56,1360},{30,54,1352},{0,56,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,63,145},{1,63,145},{1,63,145},{0,43,1},{0,31,3077},{0,31,3077},{10,63,65535},{6,63,54582},{4,63,21886},{3,63,13652},{9,63,65535},{4,63,42410},{1,63,3144},{0,60,2770},{0,63,65535},{0,55,40127},{19,63,5649},{18,63,3157},{17,63,1325},{15,63,74},{31,24,9818}, +{13,63,5241},{8,63,1108},{0,56,1538},{29,42,9818},{0,56,1538},{10,63,12376},{10,63,12376},{10,63,12376},{8,63,7844},{6,63,9861},{3,63,2576},{3,63,2576},{0,50,145},{0,55,12659},{0,46,5441},{17,63,1325},{17,63,1325},{17,63,1325},{15,63,74},{31,4,4418},{8,63,1108},{8,63,1108},{0,50,145},{30,29,4418},{0,50,145},{31,49,925},{25,63,505},{23,63,1},{17,63,1},{31,49,925},{30,56,925},{17,63,1}, +{0,58,929},{30,56,925},{0,58,929},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{1,63,388},{1,63,388},{1,63,388},{0,47,0},{0,34,3625},{0,34,3625},{10,63,65535},{7,63,57052},{4,63,24910},{3,63,15988},{9,63,65535},{4,63,43226},{1,63,4360},{0,61,1833},{0,63,65535},{0,55,39743},{21,63,5202},{19,63,3073},{18,63,1508},{16,63,180},{31,27,8901},{14,63,4814},{10,63,1300},{0,57,1021},{29,44,8901}, +{0,57,1021},{10,63,14136},{10,63,14136},{10,63,14136},{8,63,9252},{7,63,11195},{3,63,3536},{3,63,3536},{0,53,89},{0,59,13491},{0,49,5921},{18,63,1508},{18,63,1508},{18,63,1508},{16,63,180},{30,13,4418},{10,63,1300},{10,63,1300},{0,53,89},{31,31,4418},{0,53,89},{31,51,613},{26,63,337},{24,63,1},{20,63,1},{31,51,613},{31,56,613},{20,63,1},{0,59,617},{31,56,613},{0,59,617},{8,0,7956}, +{8,0,7956},{8,0,7956},{8,0,7956},{2,63,697},{2,63,697},{2,63,697},{0,50,0},{0,36,4141},{0,36,4141},{11,63,65535},{7,63,59708},{4,63,28190},{3,63,18580},{10,63,65535},{5,63,44295},{1,63,5832},{0,61,1081},{0,63,65535},{0,55,39615},{22,63,4818},{20,63,3017},{19,63,1709},{17,63,325},{31,31,8069},{15,63,4473},{11,63,1514},{0,58,593},{31,44,8069},{0,58,593},{11,63,15965},{11,63,15965},{11,63,15965}, +{9,63,10757},{7,63,12667},{4,63,4662},{4,63,4662},{0,55,50},{0,61,14340},{0,51,6395},{19,63,1709},{19,63,1709},{19,63,1709},{17,63,325},{31,15,4418},{11,63,1514},{11,63,1514},{0,55,50},{31,34,4418},{0,55,50},{31,54,365},{27,63,205},{26,63,1},{22,63,1},{31,54,365},{31,58,365},{22,63,1},{0,60,369},{31,58,365},{0,60,369},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{2,63,1097}, +{2,63,1097},{2,63,1097},{0,53,0},{0,40,4689},{0,40,4689},{11,63,65535},{8,63,58981},{5,63,29551},{4,63,19751},{10,63,65535},{5,63,43215},{2,63,6910},{1,62,614},{0,63,65535},{0,57,34909},{23,63,4502},{21,63,3011},{20,63,1973},{18,63,520},{31,34,7322},{17,63,4242},{13,63,1769},{0,60,274},{31,46,7322},{0,60,274},{12,63,16739},{12,63,16739},{12,63,16739},{10,63,11492},{8,63,13636},{5,63,5510},{5,63,5510}, +{0,58,53},{0,63,14139},{0,53,5981},{20,63,1973},{20,63,1973},{20,63,1973},{18,63,520},{31,20,4418},{13,63,1769},{13,63,1769},{0,58,17},{31,37,4418},{0,58,17},{31,57,181},{28,63,97},{27,63,4},{25,63,0},{31,57,181},{31,59,181},{25,63,0},{0,61,185},{31,59,181},{0,61,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{3,63,1348},{3,63,1348},{3,63,1348},{1,55,4},{0,42,4545}, +{0,42,4545},{12,63,65535},{9,63,57270},{6,63,30345},{5,63,20521},{11,63,65535},{6,63,41449},{3,63,8015},{2,62,242},{0,63,65535},{0,57,28330},{24,63,4181},{22,63,3053},{21,63,2248},{20,63,772},{31,38,6584},{20,63,3941},{15,63,2041},{0,61,77},{31,48,6584},{0,61,77},{13,63,17289},{13,63,17289},{13,63,17289},{11,63,12050},{10,63,14315},{7,63,6389},{7,63,6389},{2,60,41},{0,63,13860},{0,55,5252},{21,63,2248}, +{21,63,2248},{21,63,2248},{20,63,772},{31,26,4418},{15,63,2041},{15,63,2041},{0,60,4},{30,42,4418},{0,60,4},{31,60,50},{30,63,34},{29,63,0},{28,63,1},{31,60,50},{31,61,50},{28,63,1},{0,62,52},{31,61,50},{0,62,52},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{4,63,1549},{4,63,1549},{4,63,1549},{2,57,2},{0,46,4141},{0,46,4141},{13,63,65535},{9,63,55894},{7,63,31068}, +{6,63,21256},{12,63,65535},{8,63,39740},{4,63,9073},{3,63,90},{0,63,65535},{0,59,23356},{24,63,3973},{23,63,3125},{23,63,2500},{21,63,1037},{30,45,6019},{20,63,3701},{17,63,2340},{0,63,4},{31,50,6019},{0,63,4},{14,63,17796},{14,63,17796},{14,63,17796},{12,63,12625},{11,63,14957},{8,63,7139},{8,63,7139},{3,62,41},{0,63,14020},{0,59,4652},{23,63,2500},{23,63,2500},{23,63,2500},{21,63,1037},{31,31,4418}, +{17,63,2340},{17,63,2340},{1,62,4},{31,44,4418},{1,62,4},{31,62,4},{31,63,4},{31,63,4},{30,63,1},{31,62,4},{31,63,4},{30,63,1},{0,63,4},{31,63,4},{0,63,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,63,1765},{6,63,1765},{6,63,1765},{3,59,2},{0,51,3816},{0,51,3816},{13,63,65535},{10,63,53236},{8,63,30487},{7,63,21105},{13,63,65535},{8,63,37332},{5,63,9177}, +{4,63,36},{1,63,65535},{0,59,18680},{25,63,3443},{24,63,2741},{23,63,2248},{22,63,980},{31,44,5163},{21,63,3218},{20,63,2117},{3,63,1},{29,54,5163},{3,63,1},{15,63,17289},{15,63,17289},{15,63,17289},{13,63,12512},{12,63,14328},{9,63,7149},{9,63,7149},{4,63,20},{0,63,13376},{0,59,3944},{23,63,2248},{23,63,2248},{23,63,2248},{22,63,980},{31,34,3872},{20,63,2117},{20,63,2117},{3,63,1},{31,46,3872}, +{3,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,63,1972},{7,63,1972},{7,63,1972},{4,61,5},{0,55,3488},{0,55,3488},{14,63,65535},{11,63,50266},{9,63,29322},{8,63,20567},{13,63,65535},{9,63,35025},{6,63,8985},{5,63,21},{2,63,65535},{0,59,14712},{26,63,2873}, +{25,63,2283},{24,63,1825},{22,63,820},{29,52,4267},{22,63,2657},{20,63,1685},{5,63,1},{28,56,4267},{5,63,1},{16,63,16427},{16,63,16427},{16,63,16427},{14,63,12185},{13,63,13442},{10,63,6915},{10,63,6915},{5,63,5},{1,63,12539},{0,61,3314},{24,63,1825},{24,63,1825},{24,63,1825},{22,63,820},{31,37,3200},{20,63,1685},{20,63,1685},{5,63,1},{27,52,3200},{5,63,1},{31,63,0},{31,63,0},{31,63,0}, +{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,63,2250},{8,63,2250},{8,63,2250},{5,63,5},{0,57,3170},{0,57,3170},{15,63,65535},{12,63,47239},{10,63,28065},{9,63,20104},{14,63,65535},{10,63,32574},{7,63,8839},{6,63,54},{3,63,64890},{0,61,10964},{26,63,2252},{25,63,1806},{25,63,1445},{23,63,650},{29,54,3361}, +{23,63,2091},{21,63,1322},{8,63,0},{29,56,3361},{8,63,0},{17,63,15584},{17,63,15584},{17,63,15584},{15,63,11846},{14,63,12522},{11,63,6697},{11,63,6697},{6,63,50},{3,63,11669},{0,63,2834},{25,63,1445},{25,63,1445},{25,63,1445},{23,63,650},{31,40,2521},{21,63,1322},{21,63,1322},{8,63,0},{31,49,2521},{8,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0}, +{0,63,0},{31,63,0},{0,63,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,63,2525},{9,63,2525},{9,63,2525},{6,63,50},{0,63,2834},{0,63,2834},{16,63,65535},{13,63,44559},{11,63,27000},{10,63,19705},{15,63,64179},{11,63,30525},{8,63,8677},{7,63,149},{3,63,60570},{0,61,8308},{27,63,1782},{26,63,1416},{25,63,1157},{24,63,520},{31,49,2646},{23,63,1691},{22,63,1040},{11,63,1},{30,56,2646}, +{11,63,1},{18,63,14889},{18,63,14889},{18,63,14889},{16,63,11585},{15,63,11778},{12,63,6555},{12,63,6555},{7,63,145},{3,63,11061},{0,63,2610},{25,63,1157},{25,63,1157},{25,63,1157},{24,63,520},{31,42,1985},{22,63,1040},{22,63,1040},{11,63,1},{30,52,1985},{11,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{15,0,9250}, +{15,0,9250},{15,0,9250},{15,0,9250},{10,63,2792},{10,63,2792},{10,63,2792},{7,63,145},{0,63,2610},{0,63,2610},{16,63,63318},{14,63,42019},{12,63,25930},{11,63,19324},{16,63,59178},{11,63,28845},{9,63,8605},{8,63,276},{6,63,56253},{0,61,6420},{27,63,1366},{27,63,1094},{26,63,872},{25,63,397},{31,51,2017},{25,63,1298},{23,63,794},{13,63,1},{31,56,2017},{13,63,1},{19,63,14244},{19,63,14244},{19,63,14244}, +{17,63,11312},{16,63,11037},{13,63,6429},{13,63,6429},{8,63,260},{6,63,10457},{0,63,2642},{26,63,872},{26,63,872},{26,63,872},{25,63,397},{31,45,1513},{23,63,794},{23,63,794},{13,63,1},{31,52,1513},{13,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,63,3074}, +{12,63,3074},{12,63,3074},{8,63,260},{0,63,2642},{0,63,2642},{17,63,58848},{15,63,39619},{13,63,24975},{12,63,19007},{16,63,54474},{13,63,27057},{10,63,8569},{9,63,461},{8,63,51302},{0,63,5046},{28,63,979},{27,63,806},{27,63,637},{26,63,292},{30,56,1473},{26,63,953},{24,63,605},{16,63,0},{30,58,1473},{16,63,0},{19,63,13604},{19,63,13604},{19,63,13604},{18,63,11057},{16,63,10429},{14,63,6339},{14,63,6339}, +{10,63,424},{8,63,9713},{1,63,2900},{27,63,637},{27,63,637},{27,63,637},{26,63,292},{31,48,1105},{24,63,605},{24,63,605},{16,63,0},{31,54,1105},{16,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,63,3330},{12,63,3330},{12,63,3330},{10,63,424},{1,63,2900}, +{1,63,2900}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc index 433b126a71..6669852923 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc @@ -46,4 +46,4 @@ {76,0,3},{255,1,27},{255,7,24},{255,1,27},{179,39,8},{255,22,16},{85,0,3},{255,2,27},{255,22,24},{255,7,27},{187,47,8},{255,47,16},{93,0,3},{255,4,27},{251,100,28},{182,0,7},{195,55,8},{255,71,16},{101,0,3},{255,4,27},{253,108,28},{191,0,7},{203,63,8},{255,95,16},{109,0,3},{255,7,27},{255,118,28},{200,0,7},{212,72,8},{255,123,16},{118,0,3},{246,0,7}, {255,129,28},{209,0,7},{220,80,8},{255,147,16},{126,0,3},{246,0,7},{255,138,28},{218,0,7},{228,88,8},{255,172,16},{134,0,3},{249,3,7},{245,91,8},{228,3,7},{236,96,8},{255,196,16},{142,6,3},{251,14,7},{250,102,8},{237,12,7},{245,105,8},{255,223,16},{151,15,3},{253,22,7},{254,112,8},{245,20,7},{253,113,8},{255,248,16},{159,23,3},{253,31,7},{255,124,8},{249,28,7}, {255,124,8},{255,0,0},{167,31,3},{254,39,7},{255,10,4},{252,37,7},{255,10,4},{255,0,0},{175,39,3},{255,48,7},{255,38,4},{254,48,7},{255,38,4},{255,0,0},{184,48,3},{255,56,7},{255,62,4},{255,56,7},{255,62,4},{255,0,0},{192,56,3},{255,65,7},{255,86,4},{255,65,7},{255,86,4},{255,0,0},{200,64,3},{255,74,7},{255,111,4},{255,77,7},{255,111,4},{255,0,0}, -{208,5,2}, \ No newline at end of file +{208,5,2}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc index 357b14b7a1..c0780988d8 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc @@ -478,4 +478,4 @@ {70,127,10779},{68,127,12146},{54,127,6176},{54,127,6176},{34,127,52},{14,127,7281},{2,127,1213},{109,127,937},{109,127,937},{109,127,937},{102,127,281},{127,84,1513},{93,127,565},{93,127,565},{43,127,0},{127,106,1513},{43,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{65,0,9250},{65,0,9250},{65,0,9250},{65,0,9250},{49,127,3656}, {49,127,3656},{49,127,3656},{34,127,52},{2,127,1213},{2,127,1213},{71,127,63180},{60,127,37225},{52,127,26137},{48,127,18128},{68,127,59595},{51,127,22636},{42,127,8480},{37,127,164},{22,127,37455},{0,126,2073},{114,127,1019},{111,127,766},{111,127,666},{105,127,205},{127,102,1473},{102,127,681},{99,127,405},{56,127,0},{127,115,1473},{56,127,0},{79,127,14066},{79,127,14066},{79,127,14066},{73,127,10571},{71,127,11450},{59,127,6166},{59,127,6166}, {37,127,148},{25,127,6914},{5,127,1413},{111,127,666},{111,127,666},{111,127,666},{105,127,205},{127,90,1105},{99,127,405},{99,127,405},{56,127,0},{127,109,1105},{56,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{69,0,9250},{69,0,9250},{69,0,9250},{69,0,9250},{52,127,3940},{52,127,3940},{52,127,3940},{37,127,148},{5,127,1413}, -{5,127,1413}, \ No newline at end of file +{5,127,1413}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_5.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_5.inc index 3e7610ff53..8244550959 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_5.inc +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_5.inc @@ -491,4 +491,4 @@ {17,31,10897},{16,31,12077},{13,31,6285},{13,31,6285},{8,31,68},{4,31,7686},{0,31,1341},{27,31,968},{27,31,968},{27,31,968},{25,31,325},{31,21,1513},{23,31,605},{23,31,605},{11,31,0},{31,26,1513},{11,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3626}, {12,31,3626},{12,31,3626},{8,31,68},{0,31,1341},{0,31,1341},{21,31,17476},{20,31,14998},{20,31,14098},{18,31,10672},{20,31,16018},{15,31,8154},{15,31,6218},{9,31,200},{10,31,11338},{0,31,1613},{28,31,1041},{27,31,801},{27,31,680},{26,31,232},{29,29,1473},{26,31,753},{24,31,442},{14,31,0},{31,28,1473},{14,31,0},{20,31,14098},{20,31,14098},{20,31,14098},{18,31,10672},{17,31,11453},{15,31,6218},{15,31,6218}, {9,31,200},{6,31,7270},{0,31,1613},{27,31,680},{27,31,680},{27,31,680},{26,31,232},{28,28,1105},{24,31,442},{24,31,442},{14,31,0},{28,28,1105},{14,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{13,31,3929},{13,31,3929},{13,31,3929},{9,31,200},{0,31,1613}, -{0,31,1613}, \ No newline at end of file +{0,31,1613}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_6.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_6.inc index 2441fbe859..fad45fe22d 100644 --- a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_6.inc +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_dxt1_6.inc @@ -491,4 +491,4 @@ {34,63,10841},{34,63,12089},{26,63,6206},{26,63,6206},{17,63,74},{9,63,7678},{0,63,1341},{54,63,937},{54,63,937},{54,63,937},{51,63,305},{63,43,1513},{47,63,605},{47,63,605},{22,63,1},{62,53,1513},{22,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{32,0,9256},{32,0,9256},{32,0,9256},{32,0,9256},{23,63,3650}, {23,63,3650},{23,63,3650},{17,63,74},{0,63,1341},{0,63,1341},{43,63,17392},{40,63,15021},{40,63,14060},{37,63,10673},{40,63,16013},{32,63,8261},{29,63,6166},{19,63,194},{20,63,11338},{1,63,1594},{57,63,1041},{56,63,822},{54,63,697},{52,63,234},{63,51,1473},{51,63,737},{49,63,442},{28,63,1},{63,57,1473},{28,63,1},{40,63,14060},{40,63,14060},{40,63,14060},{37,63,10673},{34,63,11401},{29,63,6166},{29,63,6166}, {19,63,194},{12,63,7270},{1,63,1594},{54,63,697},{54,63,697},{54,63,697},{52,63,234},{63,46,1105},{49,63,442},{49,63,442},{28,63,1},{63,54,1105},{28,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{34,0,9256},{34,0,9256},{34,0,9256},{34,0,9256},{26,63,3898},{26,63,3898},{26,63,3898},{19,63,194},{1,63,1594}, -{1,63,1594}, \ No newline at end of file +{1,63,1594}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc new file mode 100644 index 0000000000..fbaf988d78 --- /dev/null +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc @@ -0,0 +1,481 @@ +{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,52},{0,0,68},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,43},{0,1,61},{0,1,1},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,2,20},{0,1,18},{0,1,9},{0,1,36},{0,1,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,4,56},{0,3,38},{0,2,52}, +{0,2,36},{0,4,56},{0,3,35},{0,2,0},{0,2,52},{0,2,88},{0,1,78},{0,4,56},{0,3,38},{0,2,52},{0,2,36},{1,0,52},{0,3,35},{0,2,0},{0,2,52},{1,1,51},{0,2,52},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10},{0,2,0},{0,2,0},{0,1,5},{0,1,35},{0,1,14},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10}, +{0,2,0},{0,2,0},{0,1,5},{1,0,16},{0,1,5},{1,1,18},{0,3,2},{0,2,16},{0,2,0},{1,1,18},{2,0,20},{0,2,0},{0,2,36},{2,0,20},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,4,88},{1,3,78},{1,3,69},{1,3,77},{1,3,115},{0,4,88},{0,3,98}, +{0,3,101},{0,4,72},{0,3,38},{1,4,24},{1,3,14},{1,3,5},{1,3,13},{1,3,51},{0,4,24},{0,3,34},{0,3,37},{3,0,52},{0,3,37},{1,3,69},{1,3,69},{1,3,69},{1,2,72},{1,2,72},{1,2,72},{1,2,72},{1,2,72},{0,3,11},{0,2,24},{1,3,5},{1,3,5},{1,3,5},{1,2,8},{1,2,8},{1,2,8},{1,2,8},{1,2,8},{1,2,8}, +{1,2,8},{0,7,18},{1,3,10},{1,3,1},{0,3,9},{0,7,18},{1,3,18},{0,3,9},{0,3,36},{1,3,18},{0,3,36},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{1,1,65},{1,1,65},{1,1,65},{0,3,65},{0,3,2},{0,3,2},{1,6,56},{1,5,38},{1,4,53},{1,4,37},{1,6,56},{1,5,35},{1,4,1},{1,4,66},{0,5,60},{0,4,70},{1,6,56}, +{1,5,38},{1,4,53},{1,4,37},{0,9,51},{1,5,35},{1,4,1},{0,4,54},{2,3,51},{0,4,54},{1,5,37},{1,5,37},{1,5,37},{1,4,36},{1,5,10},{1,4,0},{1,4,0},{1,3,5},{0,5,11},{1,3,14},{1,5,37},{1,5,37},{1,5,37},{1,4,36},{0,8,8},{1,4,0},{1,4,0},{1,3,5},{4,0,8},{1,3,5},{2,3,18},{1,5,2},{1,4,17}, +{1,4,1},{2,3,18},{4,1,18},{1,4,1},{0,4,50},{4,1,18},{0,4,50},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,4,0},{1,4,0},{1,4,0},{1,3,1},{1,3,10},{1,3,10},{2,6,88},{2,5,78},{2,5,69},{2,5,77},{2,5,115},{2,5,107},{2,5,98},{1,5,117},{1,6,60},{1,5,36},{2,6,24},{2,5,14},{2,5,5},{2,5,13},{3,2,51}, +{1,6,35},{2,5,34},{1,5,36},{1,6,51},{1,5,36},{2,5,69},{2,5,69},{2,5,69},{2,4,72},{2,4,72},{2,4,72},{2,4,72},{2,4,72},{1,5,16},{1,5,36},{2,5,5},{2,5,5},{2,5,5},{2,4,8},{3,1,8},{2,4,8},{2,4,8},{2,4,8},{3,3,8},{2,4,8},{3,3,20},{2,5,10},{2,5,1},{2,5,9},{3,3,20},{2,5,18},{2,5,9}, +{0,5,36},{2,5,18},{0,5,36},{2,0,68},{2,0,68},{2,0,68},{2,0,68},{2,3,68},{2,3,68},{2,3,68},{2,4,68},{1,5,0},{1,5,0},{2,8,56},{2,7,38},{2,6,52},{2,6,36},{2,8,56},{2,7,35},{2,6,0},{2,6,52},{1,7,76},{1,6,70},{2,8,56},{2,7,38},{2,6,52},{2,6,36},{4,1,51},{2,7,35},{2,6,0},{1,6,45},{3,5,51}, +{1,6,45},{2,7,37},{2,7,37},{2,7,37},{2,6,36},{2,7,10},{2,6,0},{2,6,0},{2,5,5},{1,7,12},{2,5,14},{2,7,37},{2,7,37},{2,7,37},{2,6,36},{4,0,8},{2,6,0},{2,6,0},{2,5,5},{1,7,8},{2,5,5},{3,5,18},{2,7,2},{2,6,16},{2,6,0},{3,5,18},{0,9,18},{2,6,0},{0,6,36},{0,9,18},{0,6,36},{2,0,36}, +{2,0,36},{2,0,36},{2,0,36},{2,6,0},{2,6,0},{2,6,0},{2,5,1},{1,7,8},{1,7,8},{3,8,88},{3,7,78},{3,7,69},{3,7,77},{3,7,115},{2,8,88},{2,7,98},{2,7,101},{1,9,67},{2,7,38},{3,8,24},{3,7,14},{3,7,5},{3,7,13},{3,7,51},{2,8,24},{2,7,34},{2,7,37},{8,0,51},{2,7,37},{3,7,69},{3,7,69},{3,7,69}, +{3,6,72},{3,6,72},{3,6,72},{3,6,72},{3,6,72},{2,7,11},{2,6,24},{3,7,5},{3,7,5},{3,7,5},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{5,1,18},{3,7,10},{3,7,1},{2,7,9},{5,1,18},{3,7,18},{2,7,9},{0,7,36},{3,7,18},{0,7,36},{3,0,68},{3,0,68},{3,0,68},{3,0,68},{3,5,65}, +{3,5,65},{3,5,65},{2,7,65},{2,7,2},{2,7,2},{3,10,56},{3,9,38},{3,8,53},{3,8,37},{3,10,56},{3,9,35},{3,8,1},{3,8,66},{2,9,60},{2,8,70},{3,10,56},{3,9,38},{3,8,53},{3,8,37},{5,3,51},{3,9,35},{3,8,1},{2,8,54},{4,7,51},{2,8,54},{3,9,37},{3,9,37},{3,9,37},{3,8,36},{3,9,10},{3,8,0},{3,8,0}, +{3,7,5},{2,9,11},{3,7,14},{3,9,37},{3,9,37},{3,9,37},{3,8,36},{5,2,8},{3,8,0},{3,8,0},{3,7,5},{8,1,8},{3,7,5},{4,7,18},{3,9,2},{3,8,17},{3,8,1},{4,7,18},{8,2,18},{3,8,1},{0,8,50},{8,2,18},{0,8,50},{3,0,36},{3,0,36},{3,0,36},{3,0,36},{3,8,0},{3,8,0},{3,8,0},{3,7,1},{3,7,10}, +{3,7,10},{4,10,88},{4,9,78},{4,9,69},{4,9,77},{4,9,115},{4,9,107},{4,9,98},{3,9,117},{3,10,60},{3,9,36},{4,10,24},{4,9,14},{4,9,5},{4,9,13},{5,6,51},{3,10,35},{4,9,34},{3,9,36},{10,1,51},{3,9,36},{4,9,69},{4,9,69},{4,9,69},{4,8,72},{4,8,72},{4,8,72},{4,8,72},{4,8,72},{3,9,16},{3,9,36},{4,9,5}, +{4,9,5},{4,9,5},{4,8,8},{5,5,8},{4,8,8},{4,8,8},{4,8,8},{5,7,8},{4,8,8},{7,0,18},{4,9,10},{4,9,1},{4,9,9},{7,0,18},{4,9,18},{4,9,9},{0,9,36},{4,9,18},{0,9,36},{4,0,68},{4,0,68},{4,0,68},{4,0,68},{4,7,68},{4,7,68},{4,7,68},{4,8,68},{3,9,0},{3,9,0},{4,12,56},{4,11,38},{4,10,52}, +{4,10,36},{4,12,56},{4,11,35},{4,10,0},{4,10,52},{3,11,76},{3,10,70},{4,12,56},{4,11,38},{4,10,52},{4,10,36},{7,2,51},{4,11,35},{4,10,0},{3,10,45},{12,0,51},{3,10,45},{4,11,37},{4,11,37},{4,11,37},{4,10,36},{4,11,10},{4,10,0},{4,10,0},{4,9,5},{3,11,12},{4,9,14},{4,11,37},{4,11,37},{4,11,37},{4,10,36},{7,1,8}, +{4,10,0},{4,10,0},{4,9,5},{10,2,8},{4,9,5},{5,9,18},{4,11,2},{4,10,16},{4,10,0},{5,9,18},{10,3,18},{4,10,0},{0,10,36},{10,3,18},{0,10,36},{4,0,36},{4,0,36},{4,0,36},{4,0,36},{4,10,0},{4,10,0},{4,10,0},{4,9,1},{3,11,8},{3,11,8},{5,12,88},{5,11,78},{5,11,69},{5,11,77},{5,11,115},{4,12,88},{4,11,98}, +{4,11,101},{3,13,67},{4,11,38},{5,12,24},{5,11,14},{5,11,5},{5,11,13},{5,11,51},{4,12,24},{4,11,34},{4,11,37},{11,3,51},{4,11,37},{5,11,69},{5,11,69},{5,11,69},{5,10,72},{5,10,72},{5,10,72},{5,10,72},{5,10,72},{4,11,11},{4,10,24},{5,11,5},{5,11,5},{5,11,5},{5,10,8},{5,10,8},{5,10,8},{5,10,8},{5,10,8},{12,1,8}, +{5,10,8},{8,0,18},{5,11,10},{5,11,1},{4,11,9},{8,0,18},{12,2,18},{4,11,9},{0,11,36},{12,2,18},{0,11,36},{5,0,68},{5,0,68},{5,0,68},{5,0,68},{5,9,65},{5,9,65},{5,9,65},{4,11,65},{4,11,2},{4,11,2},{5,14,56},{5,13,38},{5,12,53},{5,12,37},{5,14,56},{5,13,35},{5,12,1},{5,12,66},{4,13,60},{4,12,70},{5,14,56}, +{5,13,38},{5,12,53},{5,12,37},{8,2,51},{5,13,35},{5,12,1},{4,12,54},{13,2,51},{4,12,54},{5,13,37},{5,13,37},{5,13,37},{5,12,36},{5,13,10},{5,12,0},{5,12,0},{5,11,5},{4,13,11},{5,11,14},{5,13,37},{5,13,37},{5,13,37},{5,12,36},{8,1,8},{5,12,0},{5,12,0},{5,11,5},{10,5,8},{5,11,5},{6,11,18},{5,13,2},{5,12,17}, +{5,12,1},{6,11,18},{15,0,18},{5,12,1},{0,12,50},{15,0,18},{0,12,50},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,12,0},{5,12,0},{5,12,0},{5,11,1},{5,11,10},{5,11,10},{6,14,88},{6,13,78},{6,13,69},{6,13,77},{6,13,115},{6,13,107},{6,13,98},{5,13,117},{5,14,60},{5,13,36},{6,14,24},{6,13,14},{6,13,5},{6,13,13},{8,5,51}, +{5,14,35},{6,13,34},{5,13,36},{12,5,51},{5,13,36},{6,13,69},{6,13,69},{6,13,69},{6,12,72},{6,12,72},{6,12,72},{6,12,72},{6,12,72},{5,13,16},{5,13,36},{6,13,5},{6,13,5},{6,13,5},{6,12,8},{8,4,8},{6,12,8},{6,12,8},{6,12,8},{14,2,8},{6,12,8},{3,24,18},{6,13,10},{6,13,1},{6,13,9},{3,24,18},{14,3,18},{6,13,9}, +{0,13,36},{14,3,18},{0,13,36},{6,0,68},{6,0,68},{6,0,68},{6,0,68},{6,11,68},{6,11,68},{6,11,68},{6,12,68},{5,13,0},{5,13,0},{6,16,56},{6,15,38},{6,14,52},{6,14,36},{6,16,56},{6,15,35},{6,14,0},{6,14,52},{5,15,76},{5,14,70},{6,16,56},{6,15,38},{6,14,52},{6,14,36},{3,26,51},{6,15,35},{6,14,0},{5,14,45},{15,3,51}, +{5,14,45},{6,15,37},{6,15,37},{6,15,37},{6,14,36},{6,15,10},{6,14,0},{6,14,0},{6,13,5},{5,15,12},{6,13,14},{6,15,37},{6,15,37},{6,15,37},{6,14,36},{3,25,8},{6,14,0},{6,14,0},{6,13,5},{12,6,8},{6,13,5},{9,5,18},{6,15,2},{6,14,16},{6,14,0},{9,5,18},{12,7,18},{6,14,0},{0,14,36},{12,7,18},{0,14,36},{6,0,36}, +{6,0,36},{6,0,36},{6,0,36},{6,14,0},{6,14,0},{6,14,0},{6,13,1},{5,15,8},{5,15,8},{7,16,88},{7,15,78},{7,15,69},{7,15,77},{7,15,115},{6,16,88},{6,15,98},{6,15,101},{5,17,67},{6,15,38},{7,16,24},{7,15,14},{7,15,5},{7,15,13},{11,0,51},{6,16,24},{6,15,34},{6,15,37},{13,7,51},{6,15,37},{7,15,69},{7,15,69},{7,15,69}, +{7,14,72},{7,14,72},{7,14,72},{7,14,72},{7,14,72},{6,15,11},{6,14,24},{7,15,5},{7,15,5},{7,15,5},{7,14,8},{9,6,8},{7,14,8},{7,14,8},{7,14,8},{14,5,8},{7,14,8},{10,4,18},{7,15,10},{7,15,1},{6,15,9},{10,4,18},{14,6,18},{6,15,9},{0,15,36},{14,6,18},{0,15,36},{7,0,68},{7,0,68},{7,0,68},{7,0,68},{7,13,65}, +{7,13,65},{7,13,65},{6,15,65},{6,15,2},{6,15,2},{7,18,56},{7,17,38},{7,16,53},{7,16,37},{7,18,56},{7,17,35},{7,16,1},{7,16,66},{6,17,60},{6,16,70},{7,18,56},{7,17,38},{7,16,53},{7,16,37},{10,6,51},{7,17,35},{7,16,1},{6,16,54},{15,6,51},{6,16,54},{7,17,37},{7,17,37},{7,17,37},{7,16,36},{7,17,10},{7,16,0},{7,16,0}, +{7,15,5},{6,17,11},{7,15,14},{7,17,37},{7,17,37},{7,17,37},{7,16,36},{10,5,8},{7,16,0},{7,16,0},{7,15,5},{12,9,8},{7,15,5},{12,0,18},{7,17,2},{7,16,17},{7,16,1},{12,0,18},{12,10,18},{7,16,1},{0,16,50},{12,10,18},{0,16,50},{7,0,36},{7,0,36},{7,0,36},{7,0,36},{7,16,0},{7,16,0},{7,16,0},{7,15,1},{7,15,10}, +{7,15,10},{7,21,326},{7,19,322},{8,17,392},{7,17,322},{7,21,137},{7,18,116},{7,17,133},{7,17,117},{7,18,60},{7,17,36},{8,16,118},{8,16,134},{8,17,136},{8,17,136},{10,9,51},{7,18,35},{7,17,52},{7,17,36},{14,9,51},{7,17,36},{7,20,307},{7,20,307},{7,20,307},{7,18,307},{7,20,91},{7,18,91},{7,18,91},{7,16,110},{7,17,16},{7,17,36},{8,15,101}, +{8,15,101},{8,15,101},{8,16,101},{10,8,8},{7,18,10},{7,18,10},{7,16,29},{11,12,8},{7,16,29},{12,3,18},{7,19,16},{8,17,36},{7,17,16},{12,3,18},{11,13,18},{7,17,16},{0,17,36},{11,13,18},{0,17,36},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,19,81},{7,19,81},{7,19,81},{7,17,81},{7,17,0},{7,17,0},{8,19,88},{8,18,78},{8,18,69}, +{8,18,77},{8,18,115},{8,18,107},{8,18,98},{8,17,136},{7,19,76},{7,18,70},{8,19,24},{8,18,14},{8,18,5},{8,18,13},{9,15,51},{8,18,43},{8,18,34},{7,18,45},{5,22,51},{7,18,45},{8,18,69},{8,18,69},{8,18,69},{8,17,72},{8,17,72},{8,17,72},{8,17,72},{8,17,72},{7,19,12},{7,18,70},{8,18,5},{8,18,5},{8,18,5},{8,17,8},{9,14,8}, +{8,17,8},{8,17,8},{8,17,8},{14,10,8},{8,17,8},{11,9,18},{8,18,10},{8,18,1},{8,18,9},{11,9,18},{14,11,18},{8,18,9},{0,18,36},{14,11,18},{0,18,36},{8,0,68},{8,0,68},{8,0,68},{8,0,68},{8,16,65},{8,16,65},{8,16,65},{8,17,68},{7,19,8},{7,19,8},{8,21,56},{8,20,38},{8,19,52},{8,19,36},{8,21,56},{8,20,35},{8,19,0}, +{8,19,52},{7,21,67},{8,18,78},{8,21,56},{8,20,38},{8,19,52},{8,19,36},{13,4,51},{8,20,35},{8,19,0},{8,19,52},{15,11,51},{8,19,52},{8,20,37},{8,20,37},{8,20,37},{8,19,36},{8,20,10},{8,19,0},{8,19,0},{8,18,5},{8,18,35},{8,18,14},{8,20,37},{8,20,37},{8,20,37},{8,19,36},{11,10,8},{8,19,0},{8,19,0},{8,18,5},{5,23,8}, +{8,18,5},{12,8,18},{8,20,2},{8,19,16},{8,19,0},{12,8,18},{11,16,18},{8,19,0},{0,19,36},{11,16,18},{0,19,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,19,0},{8,19,0},{8,19,0},{8,18,1},{8,18,10},{8,18,10},{9,21,88},{9,20,78},{9,20,70},{9,20,78},{9,20,115},{8,21,88},{8,20,99},{8,20,115},{8,21,72},{8,20,52},{9,21,24}, +{9,20,14},{9,20,6},{9,20,14},{15,0,51},{8,21,24},{8,20,35},{8,20,51},{12,16,51},{8,20,51},{9,20,69},{9,20,69},{9,20,69},{9,19,72},{9,19,72},{9,19,72},{9,19,72},{9,19,72},{8,20,11},{8,19,24},{9,20,5},{9,20,5},{9,20,5},{9,19,8},{12,9,8},{9,19,8},{9,19,8},{9,19,8},{14,13,8},{9,19,8},{14,4,18},{9,20,10},{9,20,2}, +{8,20,10},{14,4,18},{14,14,18},{8,20,10},{0,20,50},{14,14,18},{0,20,50},{9,0,68},{9,0,68},{9,0,68},{9,0,68},{9,18,65},{9,18,65},{9,18,65},{8,20,65},{8,20,2},{8,20,2},{9,24,70},{9,22,58},{9,21,75},{9,21,51},{9,23,52},{9,22,25},{9,21,3},{9,21,46},{8,23,68},{8,21,70},{9,24,69},{9,22,57},{9,21,74},{9,21,50},{15,3,51}, +{9,22,24},{9,21,2},{8,21,45},{11,19,51},{8,21,45},{9,23,51},{9,23,51},{9,23,51},{9,21,51},{9,22,9},{9,21,3},{9,21,3},{9,20,9},{8,22,12},{9,20,12},{9,23,50},{9,23,50},{9,23,50},{9,21,50},{15,2,8},{9,21,2},{9,21,2},{9,20,8},{13,16,8},{9,20,8},{14,7,18},{9,22,8},{9,21,25},{9,21,1},{14,7,18},{13,17,18},{9,21,1}, +{0,21,36},{13,17,18},{0,21,36},{9,0,50},{9,0,50},{9,0,50},{9,0,50},{9,21,2},{9,21,2},{9,21,2},{9,20,5},{9,20,8},{9,20,8},{10,23,88},{10,22,78},{10,22,69},{10,22,77},{10,22,115},{10,22,107},{10,22,98},{9,22,117},{9,23,60},{9,22,36},{10,23,24},{10,22,14},{10,22,5},{10,22,13},{11,19,51},{9,23,35},{10,22,34},{9,22,36},{9,23,51}, +{9,22,36},{10,22,69},{10,22,69},{10,22,69},{10,21,72},{10,21,72},{10,21,72},{10,21,72},{10,21,72},{9,22,16},{9,22,36},{10,22,5},{10,22,5},{10,22,5},{10,21,8},{11,18,8},{10,21,8},{10,21,8},{10,21,8},{10,21,8},{10,21,8},{13,13,18},{10,22,10},{10,22,1},{10,22,9},{13,13,18},{15,16,18},{10,22,9},{0,22,36},{15,16,18},{0,22,36},{10,0,68}, +{10,0,68},{10,0,68},{10,0,68},{10,20,65},{10,20,65},{10,20,65},{10,21,68},{9,22,0},{9,22,0},{10,25,56},{10,24,38},{10,23,52},{10,23,36},{10,25,56},{10,24,35},{10,23,0},{10,23,52},{9,24,63},{9,23,70},{10,25,56},{10,24,38},{10,23,52},{10,23,36},{15,8,51},{10,24,35},{10,23,0},{9,23,45},{11,22,51},{9,23,45},{10,24,37},{10,24,37},{10,24,37}, +{10,23,36},{10,24,10},{10,23,0},{10,23,0},{10,22,5},{9,24,14},{10,22,14},{10,24,37},{10,24,37},{10,24,37},{10,23,36},{13,14,8},{10,23,0},{10,23,0},{10,22,5},{8,25,8},{10,22,5},{14,12,18},{10,24,2},{10,23,16},{10,23,0},{14,12,18},{13,20,18},{10,23,0},{0,23,36},{13,20,18},{0,23,36},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,23,0}, +{10,23,0},{10,23,0},{10,22,1},{10,22,10},{10,22,10},{11,25,88},{11,24,78},{11,24,70},{11,24,78},{11,24,115},{10,25,88},{10,24,99},{10,24,115},{9,26,67},{10,24,52},{11,25,24},{11,24,14},{11,24,6},{11,24,14},{14,14,51},{10,25,24},{10,24,35},{10,24,51},{14,20,51},{10,24,51},{11,24,69},{11,24,69},{11,24,69},{11,23,72},{11,23,72},{11,23,72},{11,23,72}, +{11,23,72},{10,24,11},{10,23,24},{11,24,5},{11,24,5},{11,24,5},{11,23,8},{14,13,8},{11,23,8},{11,23,8},{11,23,8},{11,23,8},{11,23,8},{13,18,18},{11,24,10},{11,24,2},{10,24,10},{13,18,18},{11,24,18},{10,24,10},{0,24,50},{11,24,18},{0,24,50},{11,0,68},{11,0,68},{11,0,68},{11,0,68},{11,22,65},{11,22,65},{11,22,65},{10,24,65},{10,24,2}, +{10,24,2},{11,28,70},{11,26,58},{11,25,75},{11,25,51},{11,27,52},{11,26,25},{11,25,3},{11,25,46},{10,27,68},{10,25,70},{11,28,69},{11,26,57},{11,25,74},{11,25,50},{14,17,51},{11,26,24},{11,25,2},{10,25,45},{13,23,51},{10,25,45},{11,27,51},{11,27,51},{11,27,51},{11,25,51},{11,26,9},{11,25,3},{11,25,3},{11,24,9},{10,26,12},{11,24,12},{11,27,50}, +{11,27,50},{11,27,50},{11,25,50},{14,16,8},{11,25,2},{11,25,2},{11,24,8},{15,20,8},{11,24,8},{13,21,18},{11,26,8},{11,25,25},{11,25,1},{13,21,18},{15,21,18},{11,25,1},{0,25,36},{15,21,18},{0,25,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,25,2},{11,25,2},{11,25,2},{11,24,5},{11,24,8},{11,24,8},{12,27,88},{12,26,78},{12,26,69}, +{12,26,77},{12,26,115},{12,26,107},{12,26,98},{11,26,117},{11,27,60},{11,26,36},{12,27,24},{12,26,14},{12,26,5},{12,26,13},{13,23,51},{11,27,35},{12,26,34},{11,26,36},{11,27,51},{11,26,36},{12,26,69},{12,26,69},{12,26,69},{12,25,72},{12,25,72},{12,25,72},{12,25,72},{12,25,72},{11,26,16},{11,26,36},{12,26,5},{12,26,5},{12,26,5},{12,25,8},{13,22,8}, +{12,25,8},{12,25,8},{12,25,8},{12,25,8},{12,25,8},{15,17,18},{12,26,10},{12,26,1},{12,26,9},{15,17,18},{12,26,18},{12,26,9},{0,26,36},{12,26,18},{0,26,36},{12,0,68},{12,0,68},{12,0,68},{12,0,68},{12,24,65},{12,24,65},{12,24,65},{12,25,68},{11,26,0},{11,26,0},{12,29,56},{12,28,38},{12,27,52},{12,27,36},{12,29,56},{12,28,35},{12,27,0}, +{12,27,52},{11,28,63},{11,27,70},{12,29,56},{12,28,38},{12,27,52},{12,27,36},{15,19,51},{12,28,35},{12,27,0},{11,27,45},{13,26,51},{11,27,45},{12,28,37},{12,28,37},{12,28,37},{12,27,36},{12,28,10},{12,27,0},{12,27,0},{12,26,5},{11,28,14},{12,26,14},{12,28,37},{12,28,37},{12,28,37},{12,27,36},{15,18,8},{12,27,0},{12,27,0},{12,26,5},{10,29,8}, +{12,26,5},{13,26,18},{12,28,2},{12,27,16},{12,27,0},{13,26,18},{15,24,18},{12,27,0},{0,27,36},{15,24,18},{0,27,36},{12,0,36},{12,0,36},{12,0,36},{12,0,36},{12,27,0},{12,27,0},{12,27,0},{12,26,1},{12,26,10},{12,26,10},{13,29,88},{13,28,78},{13,28,70},{13,28,78},{13,28,115},{12,29,88},{12,28,99},{12,28,115},{11,30,67},{12,28,52},{13,29,24}, +{13,28,14},{13,28,6},{13,28,14},{13,28,51},{12,29,24},{12,28,35},{12,28,51},{11,30,51},{12,28,51},{13,28,69},{13,28,69},{13,28,69},{13,27,72},{13,27,72},{13,27,72},{13,27,72},{13,27,72},{12,28,11},{12,27,24},{13,28,5},{13,28,5},{13,28,5},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{15,22,18},{13,28,10},{13,28,2}, +{12,28,10},{15,22,18},{13,28,18},{12,28,10},{0,28,50},{13,28,18},{0,28,50},{13,0,68},{13,0,68},{13,0,68},{13,0,68},{13,26,65},{13,26,65},{13,26,65},{12,28,65},{12,28,2},{12,28,2},{13,31,76},{13,30,58},{13,29,75},{13,29,51},{13,31,52},{13,30,25},{13,29,3},{13,29,46},{12,31,68},{12,29,70},{13,31,75},{13,30,57},{13,29,74},{13,29,50},{13,31,51}, +{13,30,24},{13,29,2},{12,29,45},{15,27,51},{12,29,45},{13,31,51},{13,31,51},{13,31,51},{13,29,51},{13,30,9},{13,29,3},{13,29,3},{13,28,9},{12,30,12},{13,28,12},{13,31,50},{13,31,50},{13,31,50},{13,29,50},{13,30,8},{13,29,2},{13,29,2},{13,28,8},{12,30,8},{13,28,8},{15,25,18},{13,30,8},{13,29,25},{13,29,1},{15,25,18},{12,31,18},{13,29,1}, +{0,29,36},{12,31,18},{0,29,36},{13,0,50},{13,0,50},{13,0,50},{13,0,50},{13,29,2},{13,29,2},{13,29,2},{13,28,5},{13,28,8},{13,28,8},{14,31,88},{14,30,78},{14,30,69},{14,30,77},{14,30,115},{14,30,107},{14,30,98},{13,30,117},{13,31,60},{13,30,36},{14,31,24},{14,30,14},{14,30,5},{14,30,13},{15,27,51},{13,31,35},{14,30,34},{13,30,36},{13,31,51}, +{13,30,36},{14,30,69},{14,30,69},{14,30,69},{14,29,72},{14,29,72},{14,29,72},{14,29,72},{14,29,72},{13,30,16},{13,30,36},{14,30,5},{14,30,5},{14,30,5},{14,29,8},{15,26,8},{14,29,8},{14,29,8},{14,29,8},{14,29,8},{14,29,8},{14,31,20},{14,30,10},{14,30,1},{14,30,9},{14,31,20},{14,30,18},{14,30,9},{0,30,36},{14,30,18},{0,30,36},{14,0,68}, +{14,0,68},{14,0,68},{14,0,68},{14,28,65},{14,28,65},{14,28,65},{14,29,68},{13,30,0},{13,30,0},{14,31,152},{14,31,88},{14,31,52},{14,31,36},{14,31,116},{14,31,36},{14,31,0},{14,31,52},{14,31,88},{13,31,70},{15,30,118},{14,31,88},{14,31,52},{14,31,36},{15,29,52},{14,31,36},{14,31,0},{13,31,45},{15,30,51},{13,31,45},{14,31,52},{14,31,52},{14,31,52}, +{14,31,36},{14,31,16},{14,31,0},{14,31,0},{14,30,5},{14,30,35},{14,30,14},{14,31,52},{14,31,52},{14,31,52},{14,31,36},{15,28,10},{14,31,0},{14,31,0},{14,30,5},{15,29,16},{14,30,5},{15,30,18},{15,30,34},{14,31,16},{14,31,0},{15,30,18},{15,30,26},{14,31,0},{0,31,36},{15,30,26},{0,31,36},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,31,0}, +{14,31,0},{14,31,0},{14,30,1},{14,30,10},{14,30,10},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{14,31,20},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68}, +{15,31,68},{14,31,56},{14,31,20},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{15,0,68},{15,0,68},{15,0,68},{15,0,68},{15,30,65},{15,30,65},{15,30,65},{15,31,68},{14,31,20}, +{14,31,20},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{0,1,178},{0,1,124},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{1,0,158},{0,1,115},{0,2,1},{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{0,2,1}, +{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{1,0,74},{0,3,20},{0,2,2},{0,2,26},{1,0,74},{1,1,72},{0,2,26},{0,1,90},{1,1,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,6,83},{0,5,13},{0,3,26}, +{0,3,14},{0,5,248},{0,3,140},{0,3,41},{0,2,139},{0,2,319},{0,2,175},{0,6,83},{0,5,13},{0,3,26},{0,3,14},{1,1,244},{0,3,140},{0,3,41},{0,2,139},{0,3,248},{0,2,139},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52},{0,2,18},{0,2,18},{0,1,29},{0,1,77},{0,1,38},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52}, +{0,2,18},{0,2,18},{0,1,29},{1,0,58},{0,1,29},{1,3,72},{0,5,4},{0,3,17},{0,3,5},{1,3,72},{3,0,74},{0,3,5},{0,2,90},{3,0,74},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{0,9,193},{0,7,125},{0,4,202},{0,4,122},{0,7,244},{0,5,96},{0,4,2}, +{0,3,106},{0,4,395},{0,3,187},{1,6,99},{1,5,45},{1,4,26},{1,4,50},{1,4,243},{0,5,96},{0,4,2},{0,3,106},{2,2,243},{0,3,106},{0,7,121},{0,7,121},{0,7,121},{0,4,122},{0,5,52},{0,4,2},{0,4,2},{0,3,25},{0,3,133},{0,2,62},{1,4,26},{1,4,26},{1,4,26},{1,3,25},{1,2,50},{0,4,2},{0,4,2},{0,3,25},{1,2,50}, +{0,3,25},{0,9,72},{0,7,4},{1,4,1},{0,4,1},{0,9,72},{2,3,72},{0,4,1},{0,3,90},{2,3,72},{0,3,90},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,8,164},{1,7,94},{1,5,106},{1,5,94},{0,10,292},{0,7,125},{0,5,81},{0,4,130},{0,6,364},{0,4,106},{1,8,83}, +{1,7,13},{1,5,25},{1,5,13},{3,0,243},{0,7,76},{0,5,32},{0,4,81},{5,0,243},{0,4,81},{1,6,91},{1,6,91},{1,6,91},{1,5,94},{0,8,99},{0,6,51},{0,6,51},{0,4,66},{0,4,107},{0,4,42},{1,6,10},{1,6,10},{1,6,10},{1,5,13},{0,8,50},{0,6,2},{0,6,2},{0,4,17},{4,0,50},{0,4,17},{3,2,74},{1,7,4},{1,5,16}, +{1,5,4},{3,2,74},{5,1,72},{1,5,4},{0,4,80},{5,1,72},{0,4,80},{1,0,90},{1,0,90},{1,0,90},{1,0,90},{0,6,50},{0,6,50},{0,6,50},{0,4,50},{0,3,20},{0,3,20},{1,11,218},{1,9,149},{1,7,242},{1,6,149},{1,10,248},{1,7,99},{1,6,5},{1,5,99},{0,7,308},{0,5,100},{2,8,99},{2,7,45},{2,6,27},{2,6,51},{3,3,243}, +{0,9,81},{1,6,5},{0,5,99},{4,3,243},{0,5,99},{1,9,145},{1,9,145},{1,9,145},{1,6,148},{1,8,52},{1,6,4},{1,6,4},{1,5,18},{0,6,72},{0,5,19},{2,6,26},{2,6,26},{2,6,26},{2,5,25},{3,1,50},{1,6,4},{1,6,4},{0,5,18},{3,3,50},{0,5,18},{4,1,72},{1,9,5},{2,6,2},{1,6,5},{4,1,72},{3,5,72},{1,6,5}, +{0,5,90},{3,5,72},{0,5,90},{1,0,144},{1,0,144},{1,0,144},{1,0,144},{1,6,0},{1,6,0},{1,6,0},{1,4,4},{0,6,8},{0,6,8},{2,10,164},{2,9,94},{2,7,107},{2,7,95},{1,12,307},{1,9,137},{1,7,115},{1,6,154},{0,9,253},{1,6,106},{2,10,83},{2,9,13},{2,7,26},{2,7,14},{4,2,243},{1,9,73},{2,7,41},{1,6,90},{6,2,243}, +{1,6,90},{2,8,91},{2,8,91},{2,8,91},{2,7,94},{1,10,116},{1,8,69},{1,8,69},{1,6,73},{0,8,50},{1,6,25},{2,8,10},{2,8,10},{2,8,10},{2,7,13},{4,0,50},{1,8,5},{1,8,5},{1,6,9},{1,7,50},{1,6,9},{3,7,72},{2,9,4},{2,7,17},{2,7,5},{3,7,72},{8,0,72},{2,7,5},{0,6,90},{8,0,72},{0,6,90},{2,0,90}, +{2,0,90},{2,0,90},{2,0,90},{1,9,65},{1,9,65},{1,9,65},{1,6,64},{0,7,10},{0,7,10},{2,13,194},{2,11,126},{2,8,203},{2,8,123},{2,11,245},{2,9,97},{2,8,3},{2,7,107},{0,11,249},{1,7,100},{3,10,99},{3,9,45},{3,8,26},{3,8,50},{3,8,243},{1,11,81},{2,8,2},{1,7,99},{4,6,243},{1,7,99},{2,11,122},{2,11,122},{2,11,122}, +{2,8,123},{2,9,53},{2,8,3},{2,8,3},{2,7,26},{1,8,62},{1,7,19},{3,8,26},{3,8,26},{3,8,26},{3,7,25},{3,6,50},{2,8,2},{2,8,2},{1,7,18},{3,6,50},{1,7,18},{5,3,72},{2,11,4},{3,8,1},{2,8,1},{5,3,72},{4,7,72},{2,8,1},{0,7,90},{4,7,72},{0,7,90},{2,0,122},{2,0,122},{2,0,122},{2,0,122},{2,8,2}, +{2,8,2},{2,8,2},{2,6,2},{1,7,10},{1,7,10},{3,12,164},{3,11,94},{3,9,106},{3,9,94},{2,14,292},{2,11,125},{2,9,81},{2,8,130},{1,11,253},{2,8,106},{3,12,83},{3,11,13},{3,9,25},{3,9,13},{5,4,243},{2,11,76},{2,9,32},{2,8,81},{9,1,243},{2,8,81},{3,10,91},{3,10,91},{3,10,91},{3,9,94},{2,12,99},{2,10,51},{2,10,51}, +{2,8,66},{1,10,50},{2,8,42},{3,10,10},{3,10,10},{3,10,10},{3,9,13},{5,2,50},{2,10,2},{2,10,2},{2,8,17},{8,1,50},{2,8,17},{1,19,72},{3,11,4},{3,9,16},{3,9,4},{1,19,72},{9,2,72},{3,9,4},{0,8,80},{9,2,72},{0,8,80},{3,0,90},{3,0,90},{3,0,90},{3,0,90},{2,10,50},{2,10,50},{2,10,50},{2,8,50},{1,9,9}, +{1,9,9},{3,15,218},{3,13,149},{3,11,242},{3,10,149},{3,14,248},{3,11,99},{3,10,5},{3,9,99},{1,13,244},{2,9,100},{4,12,99},{4,11,45},{4,10,27},{4,10,51},{5,7,243},{2,13,81},{3,10,5},{2,9,99},{6,7,243},{2,9,99},{3,13,145},{3,13,145},{3,13,145},{3,10,148},{3,12,52},{3,10,4},{3,10,4},{3,9,18},{1,12,56},{2,9,19},{4,10,26}, +{4,10,26},{4,10,26},{4,9,25},{5,5,50},{3,10,4},{3,10,4},{2,9,18},{5,7,50},{2,9,18},{7,2,72},{3,13,5},{4,10,2},{3,10,5},{7,2,72},{12,0,72},{3,10,5},{0,9,90},{12,0,72},{0,9,90},{3,0,144},{3,0,144},{3,0,144},{3,0,144},{3,10,0},{3,10,0},{3,10,0},{3,8,4},{2,10,8},{2,10,8},{4,14,164},{4,13,94},{4,11,107}, +{4,11,95},{3,16,307},{3,13,137},{3,11,115},{3,10,154},{2,13,253},{3,10,106},{4,14,83},{4,13,13},{4,11,26},{4,11,14},{7,3,243},{3,13,73},{4,11,41},{3,10,90},{11,2,243},{3,10,90},{4,12,91},{4,12,91},{4,12,91},{4,11,94},{3,14,116},{3,12,69},{3,12,69},{3,10,73},{2,12,50},{3,10,25},{4,12,10},{4,12,10},{4,12,10},{4,11,13},{7,1,50}, +{3,12,5},{3,12,5},{3,10,9},{10,2,50},{3,10,9},{5,11,72},{4,13,4},{4,11,17},{4,11,5},{5,11,72},{11,3,72},{4,11,5},{0,10,90},{11,3,72},{0,10,90},{4,0,90},{4,0,90},{4,0,90},{4,0,90},{3,13,65},{3,13,65},{3,13,65},{3,10,64},{2,11,10},{2,11,10},{4,17,194},{4,15,126},{4,12,203},{4,12,123},{4,15,245},{4,13,97},{4,12,3}, +{4,11,107},{2,15,249},{3,11,100},{5,14,99},{5,13,45},{5,12,26},{5,12,50},{5,12,243},{3,15,81},{4,12,2},{3,11,99},{13,1,243},{3,11,99},{4,15,122},{4,15,122},{4,15,122},{4,12,123},{4,13,53},{4,12,3},{4,12,3},{4,11,26},{3,12,62},{3,11,19},{5,12,26},{5,12,26},{5,12,26},{5,11,25},{5,10,50},{4,12,2},{4,12,2},{3,11,18},{12,1,50}, +{3,11,18},{8,2,72},{4,15,4},{5,12,1},{4,12,1},{8,2,72},{13,2,72},{4,12,1},{0,11,90},{13,2,72},{0,11,90},{4,0,122},{4,0,122},{4,0,122},{4,0,122},{4,12,2},{4,12,2},{4,12,2},{4,10,2},{3,11,10},{3,11,10},{5,16,164},{5,15,94},{5,13,106},{5,13,94},{4,18,292},{4,15,125},{4,13,81},{4,12,130},{3,15,253},{4,12,106},{5,16,83}, +{5,15,13},{5,13,25},{5,13,13},{8,3,243},{4,15,76},{4,13,32},{4,12,81},{11,5,243},{4,12,81},{5,14,91},{5,14,91},{5,14,91},{5,13,94},{4,16,99},{4,14,51},{4,14,51},{4,12,66},{3,14,50},{4,12,42},{5,14,10},{5,14,10},{5,14,10},{5,13,13},{8,1,50},{4,14,2},{4,14,2},{4,12,17},{10,5,50},{4,12,17},{3,23,72},{5,15,4},{5,13,16}, +{5,13,4},{3,23,72},{11,6,72},{5,13,4},{0,12,80},{11,6,72},{0,12,80},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,14,50},{4,14,50},{4,14,50},{4,12,50},{3,13,9},{3,13,9},{5,19,218},{5,17,149},{5,15,242},{5,14,149},{5,18,248},{5,15,99},{5,14,5},{5,13,99},{3,17,244},{4,13,100},{6,16,99},{6,15,45},{6,14,27},{6,14,51},{8,6,243}, +{4,17,81},{5,14,5},{4,13,99},{15,2,243},{4,13,99},{5,17,145},{5,17,145},{5,17,145},{5,14,148},{5,16,52},{5,14,4},{5,14,4},{5,13,18},{3,16,56},{4,13,19},{6,14,26},{6,14,26},{6,14,26},{6,13,25},{8,4,50},{5,14,4},{5,14,4},{4,13,18},{14,2,50},{4,13,18},{3,26,72},{5,17,5},{6,14,2},{5,14,5},{3,26,72},{15,3,72},{5,14,5}, +{0,13,90},{15,3,72},{0,13,90},{5,0,144},{5,0,144},{5,0,144},{5,0,144},{5,14,0},{5,14,0},{5,14,0},{5,12,4},{4,14,8},{4,14,8},{6,18,164},{6,17,94},{6,15,107},{6,15,95},{5,20,307},{5,17,137},{5,15,115},{5,14,154},{4,17,253},{5,14,106},{6,18,83},{6,17,13},{6,15,26},{6,15,14},{5,20,243},{5,17,73},{6,15,41},{5,14,90},{13,6,243}, +{5,14,90},{6,16,91},{6,16,91},{6,16,91},{6,15,94},{5,18,116},{5,16,69},{5,16,69},{5,14,73},{4,16,50},{5,14,25},{6,16,10},{6,16,10},{6,16,10},{6,15,13},{3,25,50},{5,16,5},{5,16,5},{5,14,9},{12,6,50},{5,14,9},{11,0,72},{6,17,4},{6,15,17},{6,15,5},{11,0,72},{13,7,72},{6,15,5},{0,14,90},{13,7,72},{0,14,90},{6,0,90}, +{6,0,90},{6,0,90},{6,0,90},{5,17,65},{5,17,65},{5,17,65},{5,14,64},{4,15,10},{4,15,10},{6,21,194},{6,19,126},{6,16,203},{6,16,123},{6,19,245},{6,17,97},{6,16,3},{6,15,107},{4,19,249},{5,15,100},{7,18,99},{7,17,45},{7,16,26},{7,16,50},{11,1,243},{5,19,81},{6,16,2},{5,15,99},{15,5,243},{5,15,99},{6,19,122},{6,19,122},{6,19,122}, +{6,16,123},{6,17,53},{6,16,3},{6,16,3},{6,15,26},{5,16,62},{5,15,19},{7,16,26},{7,16,26},{7,16,26},{7,15,25},{9,6,50},{6,16,2},{6,16,2},{5,15,18},{14,5,50},{5,15,18},{10,6,72},{6,19,4},{7,16,1},{6,16,1},{10,6,72},{15,6,72},{6,16,1},{0,15,90},{15,6,72},{0,15,90},{6,0,122},{6,0,122},{6,0,122},{6,0,122},{6,16,2}, +{6,16,2},{6,16,2},{6,14,2},{5,15,10},{5,15,10},{7,20,164},{7,19,94},{7,17,106},{7,17,94},{6,22,292},{6,19,125},{6,17,81},{6,16,130},{5,19,253},{6,16,106},{7,20,83},{7,19,13},{7,17,25},{7,17,13},{10,7,243},{6,19,76},{6,17,32},{6,16,81},{13,9,243},{6,16,81},{7,18,91},{7,18,91},{7,18,91},{7,17,94},{6,20,99},{6,18,51},{6,18,51}, +{6,16,66},{5,18,50},{6,16,42},{7,18,10},{7,18,10},{7,18,10},{7,17,13},{10,5,50},{6,18,2},{6,18,2},{6,16,17},{12,9,50},{6,16,17},{12,2,72},{7,19,4},{7,17,16},{7,17,4},{12,2,72},{13,10,72},{7,17,4},{0,16,80},{13,10,72},{0,16,80},{7,0,90},{7,0,90},{7,0,90},{7,0,90},{6,18,50},{6,18,50},{6,18,50},{6,16,50},{5,17,9}, +{5,17,9},{7,23,218},{7,21,149},{7,19,242},{7,18,149},{7,22,248},{7,19,99},{7,18,5},{7,17,99},{5,21,244},{6,17,100},{7,23,218},{7,21,149},{8,18,206},{7,18,149},{13,0,243},{6,21,81},{7,18,5},{6,17,99},{5,21,243},{6,17,99},{7,21,145},{7,21,145},{7,21,145},{7,18,148},{7,20,52},{7,18,4},{7,18,4},{7,17,18},{5,20,56},{6,17,19},{7,21,145}, +{7,21,145},{7,21,145},{7,18,148},{10,8,50},{7,18,4},{7,18,4},{6,17,18},{11,12,50},{6,17,18},{9,15,72},{7,21,5},{8,18,37},{7,18,5},{9,15,72},{5,22,72},{7,18,5},{0,17,90},{5,22,72},{0,17,90},{7,0,144},{7,0,144},{7,0,144},{7,0,144},{7,18,0},{7,18,0},{7,18,0},{7,16,4},{6,18,8},{6,18,8},{8,21,388},{8,20,334},{8,19,316}, +{8,19,340},{7,24,307},{7,21,137},{7,19,115},{7,18,154},{6,21,253},{7,18,106},{8,21,99},{8,20,45},{8,19,27},{8,19,51},{8,19,243},{7,21,73},{7,19,51},{7,18,90},{15,10,243},{7,18,90},{8,19,315},{8,19,315},{8,19,315},{8,18,314},{7,22,116},{7,20,69},{7,20,69},{7,18,73},{6,20,50},{7,18,25},{8,19,26},{8,19,26},{8,19,26},{8,18,25},{9,14,50}, +{7,20,5},{7,20,5},{7,18,9},{14,10,50},{7,18,9},{13,4,72},{8,20,20},{8,19,2},{7,19,26},{13,4,72},{15,11,72},{7,19,26},{0,18,90},{15,11,72},{0,18,90},{8,0,314},{8,0,314},{8,0,314},{8,0,314},{7,21,65},{7,21,65},{7,21,65},{7,18,64},{6,19,10},{6,19,10},{8,23,164},{8,22,94},{8,20,106},{8,20,94},{8,22,329},{8,20,221},{8,20,121}, +{8,19,220},{6,23,249},{7,19,100},{8,23,83},{8,22,13},{8,20,25},{8,20,13},{13,5,243},{7,23,81},{8,20,40},{7,19,99},{12,15,243},{7,19,99},{8,21,91},{8,21,91},{8,21,91},{8,20,94},{8,20,133},{8,19,99},{8,19,99},{8,18,110},{7,20,62},{7,19,19},{8,21,10},{8,21,10},{8,21,10},{8,20,13},{11,10,50},{8,19,18},{8,19,18},{7,19,18},{5,23,50}, +{7,19,18},{15,0,72},{8,22,4},{8,20,16},{8,20,4},{15,0,72},{12,16,72},{8,20,4},{0,19,90},{12,16,72},{0,19,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{8,18,81},{8,18,81},{8,18,81},{8,18,85},{7,19,10},{7,19,10},{8,26,194},{8,24,131},{8,21,203},{8,21,123},{8,24,245},{8,22,97},{8,21,3},{8,20,97},{7,23,253},{7,20,141},{9,23,99}, +{9,22,45},{9,21,26},{9,21,50},{15,1,243},{8,22,96},{8,21,2},{8,20,96},{15,13,243},{8,20,96},{8,24,122},{8,24,122},{8,24,122},{8,21,123},{8,22,53},{8,21,3},{8,21,3},{8,19,27},{7,22,50},{8,19,75},{9,21,26},{9,21,26},{9,21,26},{9,20,25},{12,9,50},{8,21,2},{8,21,2},{8,19,26},{14,13,50},{8,19,26},{14,6,72},{8,24,9},{9,21,1}, +{8,21,1},{14,6,72},{15,14,72},{8,21,1},{0,20,80},{15,14,72},{0,20,80},{8,0,122},{8,0,122},{8,0,122},{8,0,122},{8,21,2},{8,21,2},{8,21,2},{8,19,2},{7,21,9},{7,21,9},{9,26,154},{9,24,81},{9,22,106},{9,22,82},{9,24,307},{8,24,137},{9,22,91},{8,21,154},{7,25,244},{8,21,106},{9,26,90},{9,24,17},{9,22,42},{9,22,18},{15,4,243}, +{8,24,73},{9,22,27},{8,21,90},{9,22,243},{8,21,90},{9,24,81},{9,24,81},{9,24,81},{9,22,81},{9,22,114},{8,23,68},{8,23,68},{8,21,73},{7,24,56},{8,21,25},{9,24,17},{9,24,17},{9,24,17},{9,22,17},{15,2,50},{8,23,4},{8,23,4},{8,21,9},{13,16,50},{8,21,9},{11,19,72},{9,24,1},{9,22,26},{9,22,2},{11,19,72},{9,23,72},{9,22,2}, +{0,21,90},{9,23,72},{0,21,90},{9,0,80},{9,0,80},{9,0,80},{9,0,80},{9,20,65},{9,20,65},{9,20,65},{8,21,64},{8,21,16},{8,21,16},{9,28,216},{9,26,149},{9,23,245},{9,23,149},{9,27,248},{9,24,89},{9,23,5},{9,22,99},{7,27,260},{8,22,100},{10,25,99},{10,24,45},{10,23,27},{10,23,51},{10,23,243},{8,26,81},{9,23,5},{8,22,99},{11,21,243}, +{8,22,99},{9,26,145},{9,26,145},{9,26,145},{9,23,148},{9,25,52},{9,23,4},{9,23,4},{9,22,18},{8,23,72},{8,22,19},{10,23,26},{10,23,26},{10,23,26},{10,22,25},{11,18,50},{9,23,4},{9,23,4},{8,22,18},{10,21,50},{8,22,18},{15,8,72},{9,26,5},{10,23,2},{9,23,5},{15,8,72},{11,22,72},{9,23,5},{0,22,90},{11,22,72},{0,22,90},{9,0,144}, +{9,0,144},{9,0,144},{9,0,144},{9,23,0},{9,23,0},{9,23,0},{9,21,4},{8,23,8},{8,23,8},{10,27,164},{10,26,94},{10,24,106},{10,24,94},{9,29,307},{9,26,137},{9,24,105},{9,23,154},{8,26,253},{9,23,106},{10,27,83},{10,26,13},{10,24,25},{10,24,13},{15,9,243},{9,26,73},{10,24,40},{9,23,90},{14,19,243},{9,23,90},{10,25,91},{10,25,91},{10,25,91}, +{10,24,94},{9,27,116},{9,25,69},{9,25,69},{9,23,73},{8,25,50},{9,23,25},{10,25,10},{10,25,10},{10,25,10},{10,24,13},{13,14,50},{9,25,5},{9,25,5},{9,23,9},{8,25,50},{9,23,9},{14,14,72},{10,26,4},{10,24,16},{10,24,4},{14,14,72},{14,20,72},{10,24,4},{0,23,90},{14,20,72},{0,23,90},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,26,65}, +{9,26,65},{9,26,65},{9,23,64},{8,24,9},{8,24,9},{10,30,194},{10,28,131},{10,25,203},{10,25,123},{10,28,245},{10,26,97},{10,25,3},{10,24,97},{8,28,252},{9,24,85},{11,27,99},{11,26,45},{11,25,26},{11,25,50},{14,15,243},{9,28,80},{10,25,2},{9,24,84},{12,23,243},{9,24,84},{10,28,122},{10,28,122},{10,28,122},{10,25,123},{10,26,53},{10,25,3},{10,25,3}, +{10,23,27},{9,25,62},{9,24,21},{11,25,26},{11,25,26},{11,25,26},{11,24,25},{14,13,50},{10,25,2},{10,25,2},{9,24,20},{11,23,50},{9,24,20},{13,20,72},{10,28,9},{11,25,1},{10,25,1},{13,20,72},{13,23,74},{10,25,1},{0,24,80},{13,23,74},{0,24,80},{10,0,122},{10,0,122},{10,0,122},{10,0,122},{10,25,2},{10,25,2},{10,25,2},{10,23,2},{9,24,5}, +{9,24,5},{11,30,154},{11,28,81},{11,26,106},{11,26,82},{11,28,307},{10,28,137},{11,26,91},{10,25,154},{9,28,260},{10,25,106},{11,30,90},{11,28,17},{11,26,42},{11,26,18},{14,18,243},{10,28,73},{11,26,27},{10,25,90},{11,26,243},{10,25,90},{11,28,81},{11,28,81},{11,28,81},{11,26,81},{11,26,114},{10,27,68},{10,27,68},{10,25,73},{9,27,53},{10,25,25},{11,28,17}, +{11,28,17},{11,28,17},{11,26,17},{14,16,50},{10,27,4},{10,27,4},{10,25,9},{15,20,50},{10,25,9},{13,23,72},{11,28,1},{11,26,26},{11,26,2},{13,23,72},{11,27,72},{11,26,2},{0,25,90},{11,27,72},{0,25,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{11,24,65},{11,24,65},{11,24,65},{10,25,64},{10,25,16},{10,25,16},{11,31,248},{11,30,149},{11,27,245}, +{11,27,149},{11,31,248},{11,28,89},{11,27,5},{11,26,99},{9,30,244},{10,26,100},{12,29,99},{12,28,45},{12,27,27},{12,27,51},{12,27,243},{10,30,81},{11,27,5},{10,26,99},{13,25,243},{10,26,99},{11,30,145},{11,30,145},{11,30,145},{11,27,148},{11,29,52},{11,27,4},{11,27,4},{11,26,18},{9,29,56},{10,26,19},{12,27,26},{12,27,26},{12,27,26},{12,26,25},{13,22,50}, +{11,27,4},{11,27,4},{10,26,18},{12,25,50},{10,26,18},{15,19,72},{11,30,5},{12,27,2},{11,27,5},{15,19,72},{13,26,72},{11,27,5},{0,26,90},{13,26,72},{0,26,90},{11,0,144},{11,0,144},{11,0,144},{11,0,144},{11,27,0},{11,27,0},{11,27,0},{11,25,4},{10,27,8},{10,27,8},{12,31,164},{12,30,94},{12,28,106},{12,28,94},{12,30,329},{11,30,137},{11,28,105}, +{11,27,154},{10,30,253},{11,27,106},{12,31,83},{12,30,13},{12,28,25},{12,28,13},{14,23,243},{11,30,73},{12,28,40},{11,27,90},{11,29,243},{11,27,90},{12,29,91},{12,29,91},{12,29,91},{12,28,94},{11,31,116},{11,29,69},{11,29,69},{11,27,73},{10,29,50},{11,27,25},{12,29,10},{12,29,10},{12,29,10},{12,28,13},{15,18,50},{11,29,5},{11,29,5},{11,27,9},{10,29,50}, +{11,27,9},{13,28,72},{12,30,4},{12,28,16},{12,28,4},{13,28,72},{11,30,72},{12,28,4},{0,27,90},{11,30,72},{0,27,90},{12,0,90},{12,0,90},{12,0,90},{12,0,90},{11,30,65},{11,30,65},{11,30,65},{11,27,64},{10,28,9},{10,28,9},{12,31,356},{12,31,140},{12,29,203},{12,29,123},{12,31,284},{12,30,97},{12,29,3},{12,28,97},{10,31,287},{11,28,85},{13,31,99}, +{13,30,45},{13,29,26},{13,29,50},{13,29,243},{12,30,96},{12,29,2},{11,28,84},{14,27,243},{11,28,84},{12,31,131},{12,31,131},{12,31,131},{12,29,123},{12,30,53},{12,29,3},{12,29,3},{12,27,27},{11,29,62},{11,28,21},{13,29,26},{13,29,26},{13,29,26},{13,28,25},{13,27,50},{12,29,2},{12,29,2},{11,28,20},{13,27,50},{11,28,20},{15,24,72},{12,31,18},{13,29,1}, +{12,29,1},{15,24,72},{15,27,74},{12,29,1},{0,28,80},{15,27,74},{0,28,80},{12,0,122},{12,0,122},{12,0,122},{12,0,122},{12,29,2},{12,29,2},{12,29,2},{12,27,2},{11,28,5},{11,28,5},{13,31,280},{13,31,120},{13,30,106},{13,30,82},{13,31,328},{13,31,200},{13,30,91},{12,29,154},{12,31,344},{12,29,106},{13,31,216},{13,31,56},{13,30,42},{13,30,18},{15,25,244}, +{13,31,136},{13,30,27},{12,29,90},{13,30,243},{12,29,90},{13,31,84},{13,31,84},{13,31,84},{13,30,81},{13,30,114},{12,31,68},{12,31,68},{12,29,73},{11,31,53},{12,29,25},{13,31,20},{13,31,20},{13,31,20},{13,30,17},{13,30,50},{12,31,4},{12,31,4},{12,29,9},{12,30,50},{12,29,9},{15,27,72},{13,31,40},{13,30,26},{13,30,2},{15,27,72},{13,31,72},{13,30,2}, +{0,29,90},{13,31,72},{0,29,90},{13,0,80},{13,0,80},{13,0,80},{13,0,80},{13,28,65},{13,28,65},{13,28,65},{12,29,64},{12,29,16},{12,29,16},{14,31,415},{14,31,351},{13,31,244},{13,31,148},{14,31,511},{13,31,173},{13,31,4},{13,30,82},{13,31,381},{12,30,83},{14,31,126},{14,31,62},{14,31,26},{14,31,50},{14,31,222},{13,31,173},{13,31,4},{12,30,82},{14,30,221}, +{12,30,82},{13,31,244},{13,31,244},{13,31,244},{13,31,148},{13,31,100},{13,31,4},{13,31,4},{13,30,18},{12,31,72},{12,30,19},{14,31,26},{14,31,26},{14,31,26},{14,30,25},{15,26,50},{13,31,4},{13,31,4},{12,30,18},{14,29,50},{12,30,18},{15,29,61},{14,31,37},{14,31,1},{13,31,4},{15,29,61},{15,30,61},{13,31,4},{0,30,73},{15,30,61},{0,30,73},{13,0,144}, +{13,0,144},{13,0,144},{13,0,144},{13,31,0},{13,31,0},{13,31,0},{13,29,4},{12,31,8},{12,31,8},{14,31,239},{14,31,175},{14,31,139},{14,31,99},{14,31,239},{14,31,135},{14,31,99},{13,31,73},{13,31,285},{13,31,25},{14,31,158},{14,31,94},{14,31,58},{14,31,18},{15,29,94},{14,31,54},{14,31,18},{13,31,9},{15,30,93},{13,31,9},{14,31,139},{14,31,139},{14,31,139}, +{14,31,99},{14,31,139},{14,31,99},{14,31,99},{13,31,73},{13,31,116},{13,31,25},{14,31,58},{14,31,58},{14,31,58},{14,31,18},{15,28,52},{14,31,18},{14,31,18},{13,31,9},{15,29,58},{13,31,9},{15,30,9},{15,31,9},{15,31,9},{14,31,9},{15,30,9},{15,31,9},{14,31,9},{0,31,9},{15,31,9},{0,31,9},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{14,30,81}, +{14,30,81},{14,30,81},{13,31,64},{13,31,16},{13,31,16},{15,31,314},{14,31,258},{14,31,222},{14,31,158},{14,31,226},{14,31,98},{14,31,62},{14,31,2},{14,31,122},{14,31,50},{15,31,25},{15,31,25},{15,31,25},{15,31,25},{15,30,22},{15,31,25},{15,31,25},{14,31,1},{15,31,25},{14,31,1},{14,31,222},{14,31,222},{14,31,222},{14,31,158},{14,31,126},{14,31,62},{14,31,62}, +{14,31,2},{14,31,86},{14,31,50},{15,31,25},{15,31,25},{15,31,25},{15,31,25},{15,30,13},{15,31,25},{15,31,25},{14,31,1},{15,30,25},{14,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{14,0,122},{14,0,122},{14,0,122},{14,0,122},{14,31,26},{14,31,26},{14,31,26},{14,31,2},{14,31,50}, +{14,31,50},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{0,4,442},{0,3,313},{0,3,142},{0,2,318},{0,2,498},{0,2,354},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{1,1,441},{0,3,313},{0,3,142},{0,2,318},{2,0,442},{0,2,318},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,50},{0,1,35},{0,3,0}, +{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,41},{0,1,26},{1,3,200},{0,5,52},{0,3,25},{0,3,61},{1,3,200},{3,0,202},{0,3,61},{0,2,218},{3,0,202},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,200},{0,7,20},{0,5,20}, +{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{0,3,794},{0,3,524},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{3,0,686},{0,3,443},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145},{0,2,85},{0,2,85},{0,2,101},{0,1,178},{0,1,115},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145}, +{0,2,85},{0,2,85},{0,2,101},{0,2,149},{0,2,101},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{0,9,200},{2,3,200},{0,4,25},{0,3,218},{2,3,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,11,257},{0,9,54},{0,6,114},{0,5,65},{0,9,728},{0,6,371},{0,5,80}, +{0,4,377},{0,5,949},{0,4,521},{0,11,257},{0,9,54},{1,5,97},{0,5,65},{2,2,723},{0,6,371},{0,5,80},{0,4,377},{1,4,723},{0,4,377},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{0,5,164},{0,4,50},{0,4,50},{0,3,65},{0,3,245},{0,2,126},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{1,2,162},{0,4,50},{0,4,50},{0,3,65},{1,2,162}, +{0,3,65},{3,2,202},{0,9,5},{1,5,16},{0,5,16},{3,2,202},{5,1,200},{0,5,16},{0,4,208},{5,1,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,2,1},{0,2,1},{0,2,1},{0,1,4},{0,1,13},{0,1,13},{0,14,425},{0,11,234},{1,7,277},{0,7,245},{0,11,724},{0,8,289},{0,6,34},{0,5,308},{0,6,1087},{0,5,533},{1,11,201}, +{1,9,18},{1,7,21},{1,6,26},{1,8,723},{0,8,289},{0,6,34},{0,5,308},{4,2,723},{0,5,308},{0,11,225},{0,11,225},{0,11,225},{0,7,229},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{0,4,338},{0,3,162},{1,8,2},{1,8,2},{1,8,2},{1,5,5},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{4,0,162},{0,4,25},{3,4,200},{0,11,9},{1,7,20}, +{0,7,20},{3,4,200},{7,0,200},{0,7,20},{0,5,208},{7,0,200},{0,5,208},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,14,410},{1,11,209},{1,8,288},{1,7,234},{0,14,739},{0,10,254},{0,8,33},{0,6,270},{0,8,1131},{0,6,450},{1,14,266},{1,11,65},{2,7,106},{1,7,90},{3,4,723}, +{0,10,238},{0,8,17},{0,6,254},{7,0,723},{0,6,254},{1,11,209},{1,11,209},{1,11,209},{1,7,209},{0,11,178},{0,8,17},{0,8,17},{0,5,18},{0,6,376},{0,5,123},{1,11,65},{1,11,65},{1,11,65},{1,7,65},{3,1,162},{0,8,1},{0,8,1},{0,5,2},{3,3,162},{0,5,2},{3,7,200},{1,11,1},{2,7,25},{0,8,17},{3,7,200},{8,0,200},{0,8,17}, +{0,6,218},{8,0,200},{0,6,218},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,8,16},{0,8,16},{0,8,16},{0,5,17},{0,4,80},{0,4,80},{1,16,474},{1,13,276},{1,9,457},{1,9,292},{1,13,740},{1,10,298},{1,8,45},{1,7,315},{0,9,1013},{0,7,308},{2,13,201},{2,11,21},{2,9,21},{2,8,26},{5,0,723},{0,12,227},{1,8,29},{0,7,227},{5,4,723}, +{0,7,227},{1,13,272},{1,13,272},{1,13,272},{1,9,276},{1,10,180},{1,8,29},{1,8,29},{1,6,33},{0,8,306},{0,6,41},{2,10,2},{2,10,2},{2,10,2},{2,7,5},{4,0,162},{0,10,5},{0,10,5},{0,6,5},{1,7,162},{0,6,5},{5,3,200},{1,13,4},{2,9,20},{1,9,20},{5,3,200},{4,7,200},{1,9,20},{0,7,218},{4,7,200},{0,7,218},{1,0,272}, +{1,0,272},{1,0,272},{1,0,272},{1,7,17},{1,7,17},{1,7,17},{1,5,17},{0,6,40},{0,6,40},{2,15,426},{2,13,223},{2,10,283},{2,9,234},{1,16,739},{1,12,267},{1,10,33},{1,8,273},{0,11,913},{0,8,225},{2,15,257},{2,13,54},{3,9,97},{2,9,65},{4,6,723},{0,13,208},{1,10,17},{0,8,209},{3,8,723},{0,8,209},{2,12,219},{2,12,219},{2,12,219}, +{2,9,218},{1,13,180},{1,10,17},{1,10,17},{1,7,18},{0,9,229},{0,7,27},{2,12,50},{2,12,50},{2,12,50},{2,9,49},{3,6,162},{1,10,1},{1,10,1},{1,7,2},{3,6,162},{1,7,2},{1,19,200},{2,13,5},{3,9,16},{2,9,16},{1,19,200},{9,2,200},{2,9,16},{0,8,208},{9,2,200},{0,8,208},{2,0,218},{2,0,218},{2,0,218},{2,0,218},{1,10,16}, +{1,10,16},{1,10,16},{1,7,17},{0,8,17},{0,8,17},{2,18,450},{2,15,259},{2,11,410},{2,11,270},{2,15,749},{2,12,314},{2,10,59},{2,9,333},{0,13,868},{0,9,213},{3,15,201},{3,13,18},{3,11,21},{3,10,26},{6,2,723},{0,15,204},{2,10,34},{0,9,212},{8,3,723},{0,9,212},{2,15,250},{2,15,250},{2,15,250},{2,11,254},{2,12,187},{2,10,43},{2,10,43}, +{2,8,50},{0,11,189},{1,8,37},{3,12,2},{3,12,2},{3,12,2},{3,9,5},{5,2,162},{1,12,2},{1,12,2},{1,8,1},{8,1,162},{1,8,1},{5,8,200},{2,15,9},{3,11,20},{2,11,20},{5,8,200},{4,10,200},{2,11,20},{0,9,208},{4,10,200},{0,9,208},{2,0,250},{2,0,250},{2,0,250},{2,0,250},{2,9,25},{2,9,25},{2,9,25},{2,7,25},{0,9,5}, +{0,9,5},{3,18,410},{3,15,209},{3,12,288},{3,11,234},{2,18,739},{2,14,254},{2,12,33},{2,10,270},{0,15,804},{1,10,227},{3,18,266},{3,15,65},{4,11,106},{3,11,90},{5,8,723},{1,15,219},{2,12,17},{1,10,218},{4,10,723},{1,10,218},{3,15,209},{3,15,209},{3,15,209},{3,11,209},{2,15,178},{2,12,17},{2,12,17},{2,9,18},{0,13,171},{1,9,26},{3,15,65}, +{3,15,65},{3,15,65},{3,11,65},{5,5,162},{2,12,1},{2,12,1},{2,9,2},{5,7,162},{2,9,2},{5,11,200},{3,15,1},{4,11,25},{2,12,17},{5,11,200},{11,3,200},{2,12,17},{0,10,218},{11,3,200},{0,10,218},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{2,12,16},{2,12,16},{2,12,16},{2,9,17},{1,10,9},{1,10,9},{3,20,474},{3,17,276},{3,13,457}, +{3,13,292},{3,17,740},{3,14,298},{3,12,45},{3,11,315},{0,16,747},{1,11,231},{4,17,201},{4,15,21},{4,13,21},{4,12,26},{7,4,723},{1,17,209},{3,12,29},{1,11,227},{9,5,723},{1,11,227},{3,17,272},{3,17,272},{3,17,272},{3,13,276},{3,14,180},{3,12,29},{3,12,29},{3,10,33},{0,15,171},{2,10,41},{4,14,2},{4,14,2},{4,14,2},{4,11,5},{7,1,162}, +{2,14,5},{2,14,5},{2,10,5},{10,2,162},{2,10,5},{8,2,200},{3,17,4},{4,13,20},{3,13,20},{8,2,200},{13,2,200},{3,13,20},{0,11,218},{13,2,200},{0,11,218},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,11,17},{3,11,17},{3,11,17},{3,9,17},{1,12,8},{1,12,8},{4,19,426},{4,17,223},{4,14,283},{4,13,234},{3,20,739},{3,16,267},{3,14,33}, +{3,12,273},{0,18,727},{2,12,225},{4,19,257},{4,17,54},{5,13,97},{4,13,65},{6,10,723},{2,17,208},{3,14,17},{2,12,209},{12,3,723},{2,12,209},{4,16,219},{4,16,219},{4,16,219},{4,13,218},{3,17,180},{3,14,17},{3,14,17},{3,11,18},{1,15,171},{2,11,27},{4,16,50},{4,16,50},{4,16,50},{4,13,49},{5,10,162},{3,14,1},{3,14,1},{3,11,2},{12,1,162}, +{3,11,2},{3,23,200},{4,17,5},{5,13,16},{4,13,16},{3,23,200},{11,6,200},{4,13,16},{0,12,208},{11,6,200},{0,12,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,14,16},{3,14,16},{3,14,16},{3,11,17},{1,13,10},{1,13,10},{4,22,450},{4,19,259},{4,15,410},{4,15,270},{4,19,749},{4,16,314},{4,14,59},{4,13,333},{1,18,747},{2,13,213},{5,19,201}, +{5,17,18},{5,15,21},{5,14,26},{9,1,723},{2,19,204},{4,14,34},{2,13,212},{15,1,723},{2,13,212},{4,19,250},{4,19,250},{4,19,250},{4,15,254},{4,16,187},{4,14,43},{4,14,43},{4,12,50},{1,16,174},{3,12,37},{5,16,2},{5,16,2},{5,16,2},{5,13,5},{8,1,162},{3,16,2},{3,16,2},{3,12,1},{10,5,162},{3,12,1},{9,4,200},{4,19,9},{5,15,20}, +{4,15,20},{9,4,200},{9,10,200},{4,15,20},{0,13,208},{9,10,200},{0,13,208},{4,0,250},{4,0,250},{4,0,250},{4,0,250},{4,13,25},{4,13,25},{4,13,25},{4,11,25},{2,13,5},{2,13,5},{5,22,410},{5,19,209},{5,16,288},{5,15,234},{4,22,739},{4,18,254},{4,16,33},{4,14,270},{1,20,724},{3,14,227},{5,22,266},{5,19,65},{6,15,106},{5,15,90},{9,4,723}, +{3,19,219},{4,16,17},{3,14,218},{9,10,723},{3,14,218},{5,19,209},{5,19,209},{5,19,209},{5,15,209},{4,19,178},{4,16,17},{4,16,17},{4,13,18},{2,17,171},{3,13,26},{5,19,65},{5,19,65},{5,19,65},{5,15,65},{8,4,162},{4,16,1},{4,16,1},{4,13,2},{14,2,162},{4,13,2},{11,0,200},{5,19,1},{6,15,25},{4,16,17},{11,0,200},{13,7,200},{4,16,17}, +{0,14,218},{13,7,200},{0,14,218},{5,0,208},{5,0,208},{5,0,208},{5,0,208},{4,16,16},{4,16,16},{4,16,16},{4,13,17},{3,14,9},{3,14,9},{5,24,474},{5,21,276},{5,17,457},{5,17,292},{5,21,740},{5,18,298},{5,16,45},{5,15,315},{1,22,740},{3,15,231},{6,21,201},{6,19,21},{6,17,21},{6,16,26},{10,3,723},{3,21,209},{5,16,29},{3,15,227},{11,9,723}, +{3,15,227},{5,21,272},{5,21,272},{5,21,272},{5,17,276},{5,18,180},{5,16,29},{5,16,29},{5,14,33},{2,19,171},{4,14,41},{6,18,2},{6,18,2},{6,18,2},{6,15,5},{3,25,162},{4,18,5},{4,18,5},{4,14,5},{12,6,162},{4,14,5},{10,6,200},{5,21,4},{6,17,20},{5,17,20},{10,6,200},{15,6,200},{5,17,20},{0,15,218},{15,6,200},{0,15,218},{5,0,272}, +{5,0,272},{5,0,272},{5,0,272},{5,15,17},{5,15,17},{5,15,17},{5,13,17},{3,16,8},{3,16,8},{6,23,426},{6,21,223},{6,18,283},{6,17,234},{5,24,739},{5,20,267},{5,18,33},{5,16,273},{2,22,727},{4,16,225},{6,23,257},{6,21,54},{7,17,97},{6,17,65},{9,9,723},{4,21,208},{5,18,17},{4,16,209},{14,7,723},{4,16,209},{6,20,219},{6,20,219},{6,20,219}, +{6,17,218},{5,21,180},{5,18,17},{5,18,17},{5,15,18},{3,19,171},{4,15,27},{6,20,50},{6,20,50},{6,20,50},{6,17,49},{9,6,162},{5,18,1},{5,18,1},{5,15,2},{14,5,162},{5,15,2},{12,2,200},{6,21,5},{7,17,16},{6,17,16},{12,2,200},{13,10,200},{6,17,16},{0,16,208},{13,10,200},{0,16,208},{6,0,218},{6,0,218},{6,0,218},{6,0,218},{5,18,16}, +{5,18,16},{5,18,16},{5,15,17},{3,17,10},{3,17,10},{6,26,450},{6,23,259},{6,19,410},{6,19,270},{6,23,749},{6,20,314},{6,18,59},{6,17,333},{3,22,747},{4,17,213},{7,23,201},{7,21,18},{7,19,21},{7,18,26},{11,5,723},{4,23,204},{6,18,34},{4,17,212},{12,11,723},{4,17,212},{6,23,250},{6,23,250},{6,23,250},{6,19,254},{6,20,187},{6,18,43},{6,18,43}, +{6,16,50},{3,20,174},{5,16,37},{7,20,2},{7,20,2},{7,20,2},{7,17,5},{10,5,162},{5,20,2},{5,20,2},{5,16,1},{12,9,162},{5,16,1},{11,8,200},{6,23,9},{7,19,20},{6,19,20},{11,8,200},{11,14,200},{6,19,20},{0,17,208},{11,14,200},{0,17,208},{6,0,250},{6,0,250},{6,0,250},{6,0,250},{6,17,25},{6,17,25},{6,17,25},{6,15,25},{4,17,5}, +{4,17,5},{7,26,410},{7,23,209},{7,20,288},{7,19,234},{6,26,739},{6,22,254},{6,20,33},{6,18,270},{3,24,724},{5,18,227},{7,26,266},{7,23,65},{7,20,144},{7,19,90},{11,8,723},{5,23,219},{6,20,17},{5,18,218},{11,14,723},{5,18,218},{7,23,209},{7,23,209},{7,23,209},{7,19,209},{6,23,178},{6,20,17},{6,20,17},{6,17,18},{4,21,171},{5,17,26},{7,23,65}, +{7,23,65},{7,23,65},{7,19,65},{10,8,162},{6,20,1},{6,20,1},{6,17,2},{11,12,162},{6,17,2},{13,4,200},{7,23,1},{8,19,50},{6,20,17},{13,4,200},{15,11,200},{6,20,17},{0,18,218},{15,11,200},{0,18,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,20,16},{6,20,16},{6,20,16},{6,17,17},{5,18,9},{5,18,9},{7,28,474},{7,25,276},{7,21,457}, +{7,21,292},{7,25,740},{7,22,298},{7,20,45},{7,19,315},{3,26,740},{5,19,231},{8,23,283},{8,22,133},{8,20,97},{8,20,133},{12,7,723},{5,25,209},{7,20,29},{5,19,227},{13,13,723},{5,19,227},{7,25,272},{7,25,272},{7,25,272},{7,21,276},{7,22,180},{7,20,29},{7,20,29},{7,18,33},{4,23,171},{6,18,41},{8,20,81},{8,20,81},{8,20,81},{8,19,82},{9,14,162}, +{6,22,5},{6,22,5},{6,18,5},{14,10,162},{6,18,5},{15,0,200},{7,25,4},{8,20,16},{7,21,20},{15,0,200},{12,16,200},{7,21,20},{0,19,218},{12,16,200},{0,19,218},{7,0,272},{7,0,272},{7,0,272},{7,0,272},{7,19,17},{7,19,17},{7,19,17},{7,17,17},{5,20,8},{5,20,8},{8,26,642},{8,24,459},{8,22,462},{8,21,467},{7,28,739},{7,24,267},{7,22,33}, +{7,20,273},{4,26,727},{6,20,225},{8,26,201},{8,24,18},{8,22,21},{8,21,26},{14,3,723},{6,25,208},{7,22,17},{6,20,209},{11,17,723},{6,20,209},{8,23,443},{8,23,443},{8,23,443},{8,20,446},{7,25,180},{7,22,17},{7,22,17},{7,19,18},{5,23,171},{6,19,27},{8,23,2},{8,23,2},{8,23,2},{8,20,5},{11,10,162},{7,22,1},{7,22,1},{7,19,2},{5,23,162}, +{7,19,2},{14,6,200},{8,24,17},{8,22,20},{7,22,17},{14,6,200},{15,14,200},{7,22,17},{0,20,208},{15,14,200},{0,20,208},{8,0,442},{8,0,442},{8,0,442},{8,0,442},{7,22,16},{7,22,16},{7,22,16},{7,19,17},{5,21,10},{5,21,10},{8,28,420},{8,26,223},{8,23,283},{8,22,234},{7,31,872},{7,26,371},{7,23,201},{7,21,368},{5,26,747},{6,21,213},{8,28,251}, +{8,26,54},{9,22,97},{8,22,65},{13,9,723},{6,27,204},{7,23,57},{6,21,212},{14,15,723},{6,21,212},{8,25,219},{8,25,219},{8,25,219},{8,22,218},{7,27,308},{7,24,146},{7,24,146},{7,20,145},{5,24,174},{7,20,37},{8,25,50},{8,25,50},{8,25,50},{8,22,49},{12,9,162},{7,24,2},{7,24,2},{7,20,1},{14,13,162},{7,20,1},{13,12,200},{8,26,5},{9,22,16}, +{8,22,16},{13,12,200},{13,18,200},{8,22,16},{0,21,208},{13,18,200},{0,21,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,24,145},{7,24,145},{7,24,145},{7,20,145},{6,21,5},{6,21,5},{8,31,474},{8,28,276},{9,24,425},{8,24,292},{8,28,740},{8,25,298},{8,23,61},{8,22,315},{5,28,724},{7,22,227},{9,28,200},{9,26,13},{9,24,25},{9,23,29},{13,12,723}, +{7,27,219},{8,23,45},{7,22,218},{13,18,723},{7,22,218},{8,28,272},{8,28,272},{8,28,272},{8,24,276},{8,25,180},{8,23,36},{8,23,36},{8,21,33},{6,25,171},{7,21,26},{9,25,0},{9,25,0},{9,25,0},{9,23,4},{15,2,162},{8,23,20},{8,23,20},{8,21,17},{13,16,162},{8,21,17},{15,8,200},{8,28,4},{9,24,25},{8,24,20},{15,8,200},{11,22,200},{8,24,20}, +{0,22,218},{11,22,200},{0,22,218},{8,0,272},{8,0,272},{8,0,272},{8,0,272},{8,22,17},{8,22,17},{8,22,17},{8,20,17},{7,22,9},{7,22,9},{9,31,410},{9,28,212},{9,25,288},{9,24,224},{8,31,739},{8,27,254},{8,25,33},{8,23,270},{5,30,740},{7,23,231},{9,31,266},{9,28,68},{10,24,97},{9,24,80},{14,11,723},{7,29,209},{8,25,17},{7,23,227},{15,17,723}, +{7,23,227},{9,27,212},{9,27,212},{9,27,212},{9,24,208},{8,28,180},{8,25,17},{8,25,17},{8,22,18},{6,27,171},{7,23,62},{9,27,68},{9,27,68},{9,27,68},{9,24,64},{11,18,162},{8,25,1},{8,25,1},{8,22,2},{10,21,162},{8,22,2},{14,14,200},{9,28,4},{10,24,16},{9,24,16},{14,14,200},{14,20,200},{9,24,16},{0,23,218},{14,20,200},{0,23,218},{9,0,208}, +{9,0,208},{9,0,208},{9,0,208},{8,25,16},{8,25,16},{8,25,16},{8,22,17},{7,24,8},{7,24,8},{9,31,570},{9,30,276},{9,26,457},{9,26,292},{9,30,740},{9,27,298},{9,25,45},{9,24,324},{6,30,727},{7,24,280},{10,30,201},{10,28,18},{10,26,21},{10,25,26},{13,17,723},{8,29,227},{9,25,29},{8,24,224},{13,21,723},{8,24,224},{9,30,272},{9,30,272},{9,30,272}, +{9,26,276},{9,27,180},{9,25,29},{9,25,29},{9,23,33},{7,27,171},{8,23,41},{10,27,2},{10,27,2},{10,27,2},{10,24,5},{13,14,162},{8,27,5},{8,27,5},{8,23,5},{8,25,162},{8,23,5},{13,20,200},{9,30,4},{10,26,20},{9,26,20},{13,20,200},{13,23,202},{9,26,20},{0,24,208},{13,23,202},{0,24,208},{9,0,272},{9,0,272},{9,0,272},{9,0,272},{9,24,17}, +{9,24,17},{9,24,17},{9,22,17},{7,25,10},{7,25,10},{10,31,468},{10,30,223},{10,27,283},{10,26,234},{9,31,835},{9,29,267},{9,27,33},{9,25,273},{7,30,747},{8,25,225},{11,29,283},{10,30,54},{11,26,97},{10,26,65},{15,13,723},{8,30,208},{9,27,17},{8,25,209},{11,25,723},{8,25,209},{10,29,219},{10,29,219},{10,29,219},{10,26,218},{9,30,180},{9,27,17},{9,27,17}, +{9,24,20},{7,28,174},{8,24,17},{10,29,50},{10,29,50},{10,29,50},{10,26,49},{14,13,162},{9,27,1},{9,27,1},{9,24,4},{11,23,162},{9,24,4},{15,16,200},{10,30,5},{11,26,16},{10,26,16},{15,16,200},{15,22,200},{10,26,16},{0,25,208},{15,22,200},{0,25,208},{10,0,218},{10,0,218},{10,0,218},{10,0,218},{9,27,16},{9,27,16},{9,27,16},{9,24,20},{8,24,17}, +{8,24,17},{11,31,632},{10,31,297},{11,28,425},{10,28,292},{10,31,804},{10,29,298},{10,27,61},{10,26,315},{7,31,823},{8,26,231},{11,31,232},{11,30,13},{11,28,25},{11,27,29},{15,16,723},{9,31,216},{10,27,45},{8,26,227},{15,22,723},{8,26,227},{10,31,288},{10,31,288},{10,31,288},{10,28,276},{10,29,180},{10,27,36},{10,27,36},{10,25,33},{7,30,189},{9,25,30},{11,29,0}, +{11,29,0},{11,29,0},{11,27,4},{14,16,162},{9,29,4},{9,29,4},{9,25,5},{15,20,162},{9,25,5},{15,19,200},{11,30,13},{11,28,25},{10,28,20},{15,19,200},{13,26,200},{10,28,20},{0,26,218},{13,26,200},{0,26,218},{10,0,272},{10,0,272},{10,0,272},{10,0,272},{10,26,17},{10,26,17},{10,26,17},{10,24,17},{8,27,5},{8,27,5},{11,31,696},{11,31,237},{11,29,288}, +{11,28,224},{11,31,888},{10,31,254},{10,29,33},{10,27,270},{8,31,824},{9,27,227},{12,31,283},{11,31,93},{12,28,97},{11,28,80},{13,25,723},{10,31,238},{10,29,17},{9,27,218},{12,27,723},{9,27,218},{11,31,212},{11,31,212},{11,31,212},{11,28,208},{10,31,196},{10,29,17},{10,29,17},{10,26,18},{8,30,171},{9,26,26},{11,31,68},{11,31,68},{11,31,68},{11,28,64},{13,22,162}, +{10,29,1},{10,29,1},{10,26,2},{12,25,162},{10,26,2},{13,28,200},{11,31,29},{12,28,16},{11,28,16},{13,28,200},{11,30,200},{11,28,16},{0,27,218},{11,30,200},{0,27,218},{11,0,208},{11,0,208},{11,0,208},{11,0,208},{10,29,16},{10,29,16},{10,29,16},{10,26,17},{9,27,9},{9,27,9},{12,31,804},{12,31,492},{11,30,457},{11,30,292},{11,31,1080},{11,31,298},{11,29,45}, +{11,28,324},{9,31,920},{9,28,218},{12,31,363},{12,31,51},{12,30,21},{12,29,26},{15,21,723},{11,31,282},{11,29,29},{9,28,217},{15,25,723},{9,28,217},{11,31,372},{11,31,372},{11,31,372},{11,30,276},{11,31,180},{11,29,29},{11,29,29},{11,27,33},{8,31,184},{10,27,41},{12,31,2},{12,31,2},{12,31,2},{12,28,5},{15,18,162},{10,31,5},{10,31,5},{10,27,5},{10,29,162}, +{10,27,5},{15,24,200},{12,31,50},{12,30,20},{11,30,20},{15,24,200},{15,27,202},{11,30,20},{0,28,208},{15,27,202},{0,28,208},{11,0,272},{11,0,272},{11,0,272},{11,0,272},{11,28,17},{11,28,17},{11,28,17},{11,26,17},{9,29,8},{9,29,8},{12,31,996},{12,31,492},{12,31,283},{12,30,234},{12,31,1068},{11,31,458},{11,31,33},{11,29,273},{10,31,999},{10,29,225},{13,31,379}, +{13,31,171},{13,30,97},{12,30,65},{14,27,723},{12,31,371},{11,31,17},{10,29,209},{13,29,723},{10,29,209},{12,31,267},{12,31,267},{12,31,267},{12,30,218},{12,30,333},{11,31,17},{11,31,17},{11,28,20},{9,31,212},{10,28,17},{13,30,81},{13,30,81},{13,30,81},{12,30,49},{13,27,162},{11,31,1},{11,31,1},{11,28,4},{13,27,162},{11,28,4},{15,27,202},{13,31,90},{13,30,16}, +{12,30,16},{15,27,202},{15,28,202},{12,30,16},{0,29,208},{15,28,202},{0,29,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,31,16},{11,31,16},{11,31,16},{11,28,20},{9,30,10},{9,30,10},{13,31,877},{13,31,605},{13,31,436},{12,31,288},{13,31,1021},{12,31,397},{12,31,36},{12,30,210},{11,31,910},{10,30,126},{14,31,414},{13,31,205},{13,31,36},{13,31,4},{15,26,546}, +{13,31,317},{12,31,20},{10,30,122},{14,29,546},{10,30,122},{13,31,436},{13,31,436},{13,31,436},{12,31,288},{12,31,276},{12,31,36},{12,31,36},{12,29,33},{10,31,261},{11,29,30},{13,31,36},{13,31,36},{13,31,36},{13,31,4},{13,30,162},{12,31,20},{12,31,20},{11,29,5},{12,30,162},{11,29,5},{15,28,117},{14,31,61},{14,31,25},{13,31,4},{15,28,117},{14,31,117},{13,31,4}, +{0,30,113},{14,31,117},{0,30,113},{12,0,272},{12,0,272},{12,0,272},{12,0,272},{12,30,17},{12,30,17},{12,30,17},{12,28,17},{10,31,5},{10,31,5},{13,31,845},{13,31,573},{13,31,404},{13,31,244},{13,31,797},{13,31,365},{12,31,196},{12,30,82},{12,31,737},{11,31,58},{14,31,190},{14,31,126},{14,31,90},{14,31,82},{14,31,334},{13,31,221},{13,31,52},{11,31,49},{14,30,333}, +{11,31,49},{13,31,404},{13,31,404},{13,31,404},{13,31,244},{13,31,356},{12,31,196},{12,31,196},{12,30,18},{11,31,333},{11,30,26},{14,31,90},{14,31,90},{14,31,90},{14,31,82},{15,26,162},{13,31,52},{13,31,52},{12,30,2},{14,29,162},{12,30,2},{15,30,25},{15,30,41},{14,31,9},{14,31,1},{15,30,25},{15,30,29},{14,31,1},{0,31,49},{15,30,29},{0,31,49},{13,0,208}, +{13,0,208},{13,0,208},{13,0,208},{12,31,52},{12,31,52},{12,31,52},{12,30,17},{11,31,9},{11,31,9},{14,31,642},{14,31,578},{14,31,542},{13,31,441},{14,31,690},{13,31,370},{13,31,201},{13,31,32},{13,31,610},{12,31,40},{14,31,201},{14,31,137},{14,31,101},{14,31,37},{15,29,193},{14,31,121},{14,31,85},{12,31,4},{14,31,193},{12,31,4},{14,31,542},{14,31,542},{14,31,542}, +{13,31,441},{13,31,473},{13,31,201},{13,31,201},{13,31,32},{12,31,401},{12,31,40},{14,31,101},{14,31,101},{14,31,101},{14,31,37},{15,28,145},{14,31,85},{14,31,85},{12,31,4},{15,29,149},{12,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,272},{13,0,272},{13,0,272},{13,0,272},{13,31,32}, +{13,31,32},{13,31,32},{13,30,17},{12,31,40},{12,31,40},{14,31,418},{14,31,354},{14,31,318},{14,31,254},{14,31,370},{14,31,242},{14,31,206},{13,31,32},{13,31,418},{13,31,80},{15,31,81},{15,31,81},{15,31,81},{15,31,81},{15,30,54},{14,31,73},{14,31,37},{14,31,9},{15,30,66},{14,31,9},{14,31,318},{14,31,318},{14,31,318},{14,31,254},{14,31,270},{14,31,206},{14,31,206}, +{13,31,32},{13,31,249},{13,31,80},{15,31,81},{15,31,81},{15,31,81},{15,31,81},{15,29,45},{14,31,37},{14,31,37},{14,31,9},{15,30,41},{14,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{14,0,218},{14,0,218},{14,0,218},{14,0,218},{13,31,160},{13,31,160},{13,31,160},{13,31,32},{13,31,80}, +{13,31,80},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{0,3,1039},{0,3,751},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{3,0,925},{0,3,670},{0,4,1},{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{0,1,98},{0,1,59},{0,4,1}, +{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{1,0,85},{0,1,50},{1,6,421},{0,7,113},{0,5,5},{0,4,130},{1,6,421},{3,2,421},{0,4,130},{0,3,445},{3,2,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,12,425},{0,9,52},{0,6,10}, +{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{0,4,1445},{0,4,977},{0,12,425},{0,9,52},{0,6,10},{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{4,0,1261},{0,4,833},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{0,3,225},{0,3,117},{0,3,117},{0,2,125},{0,2,257},{0,2,161},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{1,0,221}, +{0,3,117},{0,3,117},{0,2,125},{1,1,221},{0,2,125},{3,2,421},{0,9,52},{0,6,10},{0,6,82},{3,2,421},{5,1,421},{0,6,82},{0,4,433},{5,1,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{0,10,1514},{0,8,874},{0,6,307}, +{0,5,917},{0,5,1814},{0,4,1074},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{3,0,1514},{0,8,874},{0,6,307},{0,5,917},{5,0,1514},{0,5,917},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{0,5,340},{0,5,160},{0,5,160},{0,3,169},{0,3,421},{0,3,250},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{1,2,338},{0,5,160},{0,5,160},{0,3,169},{1,2,338}, +{0,3,169},{4,1,421},{0,11,20},{1,7,5},{0,7,37},{4,1,421},{7,0,421},{0,7,37},{0,5,433},{7,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{0,17,542},{0,13,130},{0,9,285},{0,8,137},{0,12,1517},{0,9,737},{0,7,185},{0,6,794},{0,7,1982},{0,5,1062},{1,13,450}, +{1,11,77},{1,8,34},{1,7,122},{3,3,1517},{0,9,737},{0,7,185},{0,6,794},{2,5,1514},{0,6,794},{0,12,121},{0,12,121},{0,12,121},{0,7,125},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{0,4,514},{0,4,241},{1,9,25},{1,9,25},{1,9,25},{1,6,26},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{4,0,338},{0,4,97},{5,0,421},{0,13,9},{1,8,9}, +{0,8,16},{5,0,421},{8,0,421},{0,8,16},{0,6,433},{8,0,421},{0,6,433},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,16,697},{1,13,297},{1,9,354},{1,9,309},{0,15,1517},{0,11,630},{0,9,50},{0,7,670},{0,8,2198},{0,6,1109},{1,16,441},{1,13,41},{1,9,98},{1,9,53},{4,2,1514}, +{0,11,630},{0,9,50},{0,7,670},{6,2,1514},{0,7,670},{1,12,273},{1,12,273},{1,12,273},{1,8,273},{0,11,338},{0,8,41},{0,8,41},{0,5,50},{0,6,680},{0,5,275},{1,12,17},{1,12,17},{1,12,17},{1,8,17},{3,1,338},{0,8,41},{0,8,41},{0,5,50},{3,3,338},{0,5,50},{6,0,421},{0,15,1},{2,9,5},{0,9,1},{6,0,421},{5,6,421},{0,9,1}, +{0,7,445},{5,6,421},{0,7,445},{1,0,272},{1,0,272},{1,0,272},{1,0,272},{0,7,1},{0,7,1},{0,7,1},{0,4,0},{0,3,106},{0,3,106},{1,19,821},{1,15,405},{1,11,570},{1,10,410},{0,18,1514},{0,13,577},{0,10,14},{0,8,602},{0,10,2462},{0,7,1175},{2,16,450},{2,13,77},{2,10,35},{2,10,107},{3,8,1514},{0,13,577},{0,10,14},{0,8,602},{4,6,1514}, +{0,8,602},{1,14,401},{1,14,401},{1,14,401},{1,9,404},{0,13,340},{0,10,13},{0,10,13},{0,6,29},{0,7,851},{0,6,353},{2,11,25},{2,11,25},{2,11,25},{2,8,26},{4,0,338},{0,10,13},{0,10,13},{0,6,29},{1,7,338},{0,6,29},{5,6,421},{1,15,5},{2,10,10},{0,10,10},{5,6,421},{10,1,421},{0,10,10},{0,8,433},{10,1,421},{0,8,433},{1,0,400}, +{1,0,400},{1,0,400},{1,0,400},{0,9,1},{0,9,1},{0,9,1},{0,6,4},{0,4,208},{0,4,208},{1,22,902},{1,17,485},{2,11,750},{1,11,482},{0,21,1566},{0,15,570},{0,11,95},{0,9,582},{0,11,2337},{0,9,933},{2,18,430},{2,15,29},{2,11,74},{2,11,46},{5,4,1514},{0,15,521},{0,11,46},{0,9,533},{9,1,1514},{0,9,533},{1,17,481},{1,17,481},{1,17,481}, +{1,11,481},{0,16,387},{0,12,51},{0,12,51},{0,7,75},{0,8,755},{0,7,222},{2,14,10},{2,14,10},{2,14,10},{2,10,10},{3,6,338},{0,12,2},{0,12,2},{0,7,26},{3,6,338},{0,7,26},{7,2,421},{1,17,4},{3,11,5},{1,11,1},{7,2,421},{12,0,421},{1,11,1},{0,9,433},{12,0,421},{0,9,433},{1,0,481},{1,0,481},{1,0,481},{1,0,481},{0,12,50}, +{0,12,50},{0,12,50},{0,7,50},{0,6,157},{0,6,157},{2,21,866},{2,17,454},{2,13,609},{2,12,461},{1,20,1515},{1,15,578},{1,12,19},{1,10,603},{0,13,2214},{0,10,707},{3,17,450},{3,15,77},{3,12,34},{3,11,122},{7,0,1514},{0,16,458},{1,12,18},{0,10,482},{4,9,1514},{0,10,482},{2,16,445},{2,16,445},{2,16,445},{2,11,449},{1,15,341},{1,12,19},{1,12,19}, +{1,8,26},{0,10,635},{0,8,106},{3,13,25},{3,13,25},{3,13,25},{3,10,26},{5,2,338},{0,14,2},{0,14,2},{0,9,20},{8,1,338},{0,9,20},{7,4,421},{2,17,9},{3,12,9},{1,12,9},{7,4,421},{11,3,421},{1,12,9},{0,10,433},{11,3,421},{0,10,433},{2,0,445},{2,0,445},{2,0,445},{2,0,445},{1,11,2},{1,11,2},{1,11,2},{1,7,10},{0,8,90}, +{0,8,90},{2,24,902},{2,19,482},{3,13,723},{2,13,482},{1,23,1578},{1,17,566},{1,14,117},{1,11,590},{0,15,2046},{0,11,535},{3,20,441},{3,17,41},{3,13,98},{3,13,53},{7,3,1514},{0,18,429},{2,13,50},{0,11,454},{11,2,1514},{0,11,454},{2,19,481},{2,19,481},{2,19,481},{2,13,481},{1,18,404},{1,14,68},{1,14,68},{1,10,89},{0,12,557},{0,10,49},{3,16,17}, +{3,16,17},{3,16,17},{3,12,17},{5,5,338},{1,14,4},{1,14,4},{0,10,13},{5,7,338},{0,10,13},{8,2,421},{2,19,1},{4,13,5},{2,13,1},{8,2,421},{14,1,421},{2,13,1},{0,11,445},{14,1,421},{0,11,445},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{1,14,64},{1,14,64},{1,14,64},{1,9,65},{0,10,40},{0,10,40},{3,23,854},{3,19,438},{3,15,603}, +{3,14,443},{2,22,1515},{2,17,578},{2,14,15},{2,12,603},{0,16,1911},{0,12,458},{4,20,450},{4,17,77},{4,14,35},{4,14,107},{5,12,1514},{0,20,425},{2,14,14},{0,12,433},{13,1,1514},{0,12,433},{3,18,434},{3,18,434},{3,18,434},{3,13,437},{2,17,341},{2,14,14},{2,14,14},{2,10,30},{0,14,477},{0,11,35},{4,15,25},{4,15,25},{4,15,25},{4,12,26},{7,1,338}, +{1,16,5},{1,16,5},{1,11,10},{10,2,338},{1,11,10},{8,5,421},{3,19,5},{4,14,10},{2,14,10},{8,5,421},{12,5,421},{2,14,10},{0,12,433},{12,5,421},{0,12,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{2,13,2},{2,13,2},{2,13,2},{2,10,5},{0,12,25},{0,12,25},{3,26,902},{3,21,485},{4,15,750},{3,15,482},{2,25,1566},{2,19,570},{2,15,95}, +{2,13,582},{0,18,1787},{0,13,442},{4,22,430},{4,19,29},{4,15,74},{4,15,46},{8,3,1514},{1,20,425},{2,15,46},{0,13,442},{11,5,1514},{0,13,442},{3,21,481},{3,21,481},{3,21,481},{3,15,481},{2,20,387},{2,16,51},{2,16,51},{2,11,75},{0,16,419},{1,12,69},{4,18,10},{4,18,10},{4,18,10},{4,14,10},{5,10,338},{2,16,2},{2,16,2},{1,12,20},{12,1,338}, +{1,12,20},{9,4,421},{3,21,4},{5,15,5},{3,15,1},{9,4,421},{15,3,421},{3,15,1},{0,13,433},{15,3,421},{0,13,433},{3,0,481},{3,0,481},{3,0,481},{3,0,481},{2,16,50},{2,16,50},{2,16,50},{2,11,50},{0,13,9},{0,13,9},{4,25,866},{4,21,454},{4,17,609},{4,16,461},{3,24,1515},{3,19,578},{3,16,19},{3,14,603},{0,20,1686},{1,14,458},{5,21,450}, +{5,19,77},{5,16,34},{5,15,122},{3,24,1514},{1,22,425},{3,16,18},{1,14,433},{14,3,1514},{1,14,433},{4,20,445},{4,20,445},{4,20,445},{4,15,449},{3,19,341},{3,16,19},{3,16,19},{3,12,26},{0,17,372},{1,13,45},{5,17,25},{5,17,25},{5,17,25},{5,14,26},{8,1,338},{2,18,2},{2,18,2},{2,13,20},{10,5,338},{2,13,20},{11,0,421},{4,21,9},{5,16,9}, +{3,16,9},{11,0,421},{13,7,421},{3,16,9},{0,14,433},{13,7,421},{0,14,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{3,15,2},{3,15,2},{3,15,2},{3,11,10},{0,15,5},{0,15,5},{4,28,902},{4,23,482},{5,17,723},{4,17,482},{3,27,1578},{3,21,566},{3,18,117},{3,15,590},{0,22,1614},{1,15,462},{5,24,441},{5,21,41},{5,17,98},{5,17,53},{5,20,1514}, +{2,22,429},{4,17,50},{2,15,454},{13,6,1514},{2,15,454},{4,23,481},{4,23,481},{4,23,481},{4,17,481},{3,22,404},{3,18,68},{3,18,68},{3,14,89},{0,19,347},{2,14,49},{5,20,17},{5,20,17},{5,20,17},{5,16,17},{8,4,338},{3,18,4},{3,18,4},{2,14,13},{14,2,338},{2,14,13},{11,3,421},{4,23,1},{6,17,5},{4,17,1},{11,3,421},{15,6,421},{4,17,1}, +{0,15,445},{15,6,421},{0,15,445},{4,0,481},{4,0,481},{4,0,481},{4,0,481},{3,18,64},{3,18,64},{3,18,64},{3,13,65},{1,16,8},{1,16,8},{5,27,854},{5,23,438},{5,19,603},{5,18,443},{4,26,1515},{4,21,578},{4,18,15},{4,16,603},{0,23,1566},{2,16,458},{6,24,450},{6,21,77},{6,18,35},{6,18,107},{11,1,1514},{2,24,425},{4,18,14},{2,16,433},{15,5,1514}, +{2,16,433},{5,22,434},{5,22,434},{5,22,434},{5,17,437},{4,21,341},{4,18,14},{4,18,14},{4,14,30},{0,21,341},{2,15,35},{6,19,25},{6,19,25},{6,19,25},{6,16,26},{3,25,338},{3,20,5},{3,20,5},{3,15,10},{12,6,338},{3,15,10},{12,2,421},{5,23,5},{6,18,10},{4,18,10},{12,2,421},{14,9,421},{4,18,10},{0,16,433},{14,9,421},{0,16,433},{5,0,433}, +{5,0,433},{5,0,433},{5,0,433},{4,17,2},{4,17,2},{4,17,2},{4,14,5},{1,17,10},{1,17,10},{5,30,902},{5,25,485},{6,19,750},{5,19,482},{4,29,1566},{4,23,570},{4,19,95},{4,17,582},{0,25,1533},{2,17,442},{6,26,430},{6,23,29},{6,19,74},{6,19,46},{10,7,1514},{3,24,425},{4,19,46},{2,17,442},{13,9,1514},{2,17,442},{5,25,481},{5,25,481},{5,25,481}, +{5,19,481},{4,24,387},{4,20,51},{4,20,51},{4,15,75},{1,21,341},{3,16,69},{6,22,10},{6,22,10},{6,22,10},{6,18,10},{9,6,338},{4,20,2},{4,20,2},{3,16,20},{14,5,338},{3,16,20},{11,8,421},{5,25,4},{7,19,5},{5,19,1},{11,8,421},{11,14,421},{5,19,1},{0,17,433},{11,14,421},{0,17,433},{5,0,481},{5,0,481},{5,0,481},{5,0,481},{4,20,50}, +{4,20,50},{4,20,50},{4,15,50},{2,17,9},{2,17,9},{6,29,866},{6,25,454},{6,21,609},{6,20,461},{5,28,1515},{5,23,578},{5,20,19},{5,18,603},{0,27,1521},{3,18,458},{7,25,450},{7,23,77},{7,20,34},{7,19,122},{12,3,1514},{3,26,425},{5,20,18},{3,18,433},{11,13,1514},{3,18,433},{6,24,445},{6,24,445},{6,24,445},{6,19,449},{5,23,341},{5,20,19},{5,20,19}, +{5,16,26},{1,23,341},{3,17,45},{7,21,25},{7,21,25},{7,21,25},{7,18,26},{10,5,338},{4,22,2},{4,22,2},{4,17,20},{12,9,338},{4,17,20},{13,4,421},{6,25,9},{7,20,9},{5,20,9},{13,4,421},{15,11,421},{5,20,9},{0,18,433},{15,11,421},{0,18,433},{6,0,445},{6,0,445},{6,0,445},{6,0,445},{5,19,2},{5,19,2},{5,19,2},{5,15,10},{2,19,5}, +{2,19,5},{6,31,914},{6,27,482},{7,21,723},{6,21,482},{5,31,1578},{5,25,566},{5,22,117},{5,19,590},{1,27,1535},{3,19,462},{7,28,441},{7,25,41},{7,21,98},{7,21,53},{8,19,1514},{4,26,429},{6,21,50},{4,19,454},{15,10,1514},{4,19,454},{6,27,481},{6,27,481},{6,27,481},{6,21,481},{5,26,404},{5,22,68},{5,22,68},{5,18,89},{2,23,347},{4,18,49},{7,24,17}, +{7,24,17},{7,24,17},{7,20,17},{10,8,338},{5,22,4},{5,22,4},{4,18,13},{11,12,338},{4,18,13},{15,0,421},{6,27,1},{8,21,50},{6,21,1},{15,0,421},{13,15,421},{6,21,1},{0,19,445},{13,15,421},{0,19,445},{6,0,481},{6,0,481},{6,0,481},{6,0,481},{5,22,64},{5,22,64},{5,22,64},{5,17,65},{3,20,8},{3,20,8},{7,31,854},{7,27,438},{7,23,603}, +{7,22,443},{6,30,1515},{6,25,578},{6,22,15},{6,20,603},{1,29,1518},{4,20,458},{7,31,565},{7,27,149},{8,22,174},{7,22,154},{13,5,1514},{4,28,425},{6,22,14},{4,20,433},{12,15,1514},{4,20,433},{7,26,434},{7,26,434},{7,26,434},{7,21,437},{6,25,341},{6,22,14},{6,22,14},{6,18,30},{2,25,341},{4,19,35},{7,26,145},{7,26,145},{7,26,145},{7,21,148},{9,14,338}, +{5,24,5},{5,24,5},{5,19,10},{14,10,338},{5,19,10},{15,3,421},{7,27,5},{8,22,5},{6,22,10},{15,3,421},{15,14,421},{6,22,10},{0,20,433},{15,14,421},{0,20,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{6,21,2},{6,21,2},{6,21,2},{6,18,5},{3,21,10},{3,21,10},{7,31,1046},{7,29,485},{7,24,770},{7,23,482},{7,29,1598},{6,27,570},{6,23,95}, +{6,21,582},{2,29,1533},{4,21,442},{8,28,450},{8,26,77},{8,23,35},{8,23,107},{15,1,1514},{5,28,425},{6,23,46},{4,21,442},{15,13,1514},{4,21,442},{7,29,481},{7,29,481},{7,29,481},{7,23,481},{6,28,387},{6,24,51},{6,24,51},{6,19,75},{3,25,341},{5,20,69},{8,24,25},{8,24,25},{8,24,25},{8,21,26},{11,10,338},{6,24,2},{6,24,2},{5,20,20},{5,23,338}, +{5,20,20},{13,12,421},{7,29,4},{8,23,10},{7,23,1},{13,12,421},{13,18,421},{7,23,1},{0,21,433},{13,18,421},{0,21,433},{7,0,481},{7,0,481},{7,0,481},{7,0,481},{6,24,50},{6,24,50},{6,24,50},{6,19,50},{4,21,9},{4,21,9},{8,31,1106},{8,28,714},{8,24,749},{8,24,734},{7,31,1542},{7,27,578},{7,24,19},{7,22,603},{2,31,1521},{5,22,458},{8,31,430}, +{8,28,38},{8,24,73},{8,24,58},{14,7,1514},{5,30,425},{7,24,18},{5,22,433},{13,17,1514},{5,22,433},{8,27,686},{8,27,686},{8,27,686},{8,23,686},{7,27,341},{7,24,19},{7,24,19},{7,20,26},{3,27,341},{5,21,45},{8,27,10},{8,27,10},{8,27,10},{8,23,10},{12,9,338},{6,26,2},{6,26,2},{6,21,20},{14,13,338},{6,21,20},{15,8,421},{7,31,20},{9,24,4}, +{7,24,9},{15,8,421},{15,17,421},{7,24,9},{0,22,433},{15,17,421},{0,22,433},{8,0,685},{8,0,685},{8,0,685},{8,0,685},{7,23,2},{7,23,2},{7,23,2},{7,19,10},{4,23,5},{4,23,5},{8,31,1034},{8,30,438},{8,26,603},{8,25,443},{8,30,1806},{7,29,566},{7,26,117},{7,23,590},{3,31,1535},{5,23,462},{9,31,437},{9,28,77},{9,25,33},{9,25,98},{10,23,1514}, +{6,30,429},{7,26,53},{6,23,454},{11,21,1514},{6,23,454},{8,29,434},{8,29,434},{8,29,434},{8,24,437},{7,30,404},{7,26,68},{7,26,68},{7,22,89},{4,27,347},{6,22,49},{9,26,17},{9,26,17},{9,26,17},{9,23,20},{15,2,338},{7,26,4},{7,26,4},{6,22,13},{13,16,338},{6,22,13},{15,11,421},{8,30,5},{9,25,17},{8,25,10},{15,11,421},{15,19,421},{8,25,10}, +{0,23,445},{15,19,421},{0,23,445},{8,0,433},{8,0,433},{8,0,433},{8,0,433},{7,26,64},{7,26,64},{7,26,64},{7,21,65},{5,24,8},{5,24,8},{9,31,1174},{8,31,506},{9,26,723},{8,26,482},{8,31,1643},{7,30,717},{8,26,131},{7,24,725},{4,31,1566},{6,24,458},{9,31,549},{9,30,41},{9,26,98},{9,26,53},{15,9,1514},{7,30,461},{8,26,50},{6,24,433},{14,19,1514}, +{6,24,433},{8,31,490},{8,31,490},{8,31,490},{8,26,481},{8,28,421},{8,25,122},{8,25,122},{8,22,131},{4,29,341},{6,23,35},{9,29,17},{9,29,17},{9,29,17},{9,25,17},{11,18,338},{7,28,5},{7,28,5},{7,23,10},{10,21,338},{7,23,10},{14,17,421},{9,30,25},{10,26,5},{8,26,1},{14,17,421},{13,23,421},{8,26,1},{0,24,433},{13,23,421},{0,24,433},{8,0,481}, +{8,0,481},{8,0,481},{8,0,481},{8,24,82},{8,24,82},{8,24,82},{8,21,81},{5,25,10},{5,25,10},{9,31,1334},{9,31,470},{9,28,597},{9,27,443},{9,31,1815},{8,30,578},{8,27,15},{8,25,603},{5,31,1638},{6,25,442},{10,31,506},{10,30,77},{10,27,35},{10,27,107},{14,15,1514},{7,31,506},{8,27,14},{6,25,442},{12,23,1514},{6,25,442},{9,31,434},{9,31,434},{9,31,434}, +{9,26,437},{8,30,341},{8,27,14},{8,27,14},{8,23,30},{5,29,341},{7,24,69},{10,28,25},{10,28,25},{10,28,25},{10,25,26},{13,14,338},{8,27,13},{8,27,13},{7,24,20},{8,25,338},{7,24,20},{15,16,421},{9,31,37},{10,27,10},{8,27,10},{15,16,421},{15,22,421},{8,27,10},{0,25,433},{15,22,421},{0,25,433},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{8,26,2}, +{8,26,2},{8,26,2},{8,23,5},{6,25,9},{6,25,9},{10,31,1470},{10,31,735},{10,28,749},{9,28,481},{9,31,1895},{8,31,579},{8,28,89},{8,26,582},{6,31,1761},{7,26,458},{11,31,614},{10,31,59},{10,28,73},{10,28,58},{13,21,1514},{8,31,530},{8,28,40},{7,26,433},{15,21,1514},{7,26,433},{9,31,562},{9,31,562},{9,31,562},{9,28,481},{9,30,421},{8,29,51},{8,29,51}, +{8,24,65},{5,31,341},{7,25,45},{10,31,10},{10,31,10},{10,31,10},{10,27,10},{14,13,338},{8,29,2},{8,29,2},{8,24,16},{11,23,338},{8,24,16},{15,19,421},{10,31,50},{11,28,4},{9,28,0},{15,19,421},{13,26,421},{9,28,0},{0,26,433},{13,26,421},{0,26,433},{9,0,481},{9,0,481},{9,0,481},{9,0,481},{8,29,50},{8,29,50},{8,29,50},{8,24,49},{6,27,5}, +{6,27,5},{11,31,1838},{10,31,753},{10,30,603},{10,29,443},{10,31,2046},{9,31,629},{9,29,21},{9,27,589},{7,31,1935},{7,27,462},{11,31,749},{11,31,120},{11,29,33},{11,29,98},{12,27,1514},{9,31,629},{9,29,21},{7,27,461},{13,25,1514},{7,27,461},{10,31,497},{10,31,497},{10,31,497},{10,28,437},{9,31,388},{9,29,20},{9,29,20},{9,25,29},{6,31,347},{8,25,105},{11,30,17}, +{11,30,17},{11,30,17},{11,27,20},{14,16,338},{8,31,4},{8,31,4},{8,26,10},{15,20,338},{8,26,10},{14,25,421},{11,31,104},{11,29,17},{10,29,10},{14,25,421},{12,29,421},{10,29,10},{0,27,445},{12,29,421},{0,27,445},{10,0,433},{10,0,433},{10,0,433},{10,0,433},{9,28,1},{9,28,1},{9,28,1},{9,25,4},{7,28,8},{7,28,8},{11,31,1902},{11,31,1001},{11,30,723}, +{10,30,482},{11,31,2286},{10,31,782},{9,31,117},{9,28,578},{8,31,2118},{7,29,491},{12,31,770},{12,31,338},{11,30,98},{11,30,53},{14,23,1514},{10,31,701},{10,30,50},{8,28,442},{11,29,1514},{8,28,442},{11,31,677},{11,31,677},{11,31,677},{10,30,481},{10,31,437},{9,31,68},{9,31,68},{9,27,89},{7,31,379},{8,27,49},{11,31,52},{11,31,52},{11,31,52},{11,29,17},{13,22,338}, +{9,31,4},{9,31,4},{8,27,13},{12,25,338},{8,27,13},{15,24,421},{12,31,169},{12,30,5},{10,30,1},{15,24,421},{15,27,421},{10,30,1},{0,28,433},{15,27,421},{0,28,433},{10,0,481},{10,0,481},{10,0,481},{10,0,481},{9,31,64},{9,31,64},{9,31,64},{9,26,65},{7,29,10},{7,29,10},{12,31,2151},{11,31,1254},{11,31,629},{11,31,442},{11,31,2393},{10,31,975},{10,31,14}, +{10,29,570},{9,31,2241},{8,29,425},{13,31,931},{12,31,395},{12,31,34},{12,30,105},{13,29,1459},{11,31,778},{10,31,13},{8,29,400},{14,27,1459},{8,29,400},{11,31,629},{11,31,629},{11,31,629},{11,30,437},{10,31,581},{10,31,14},{10,31,14},{10,27,30},{8,31,477},{8,28,45},{12,31,34},{12,31,34},{12,31,34},{12,29,26},{15,18,338},{10,31,13},{10,31,13},{9,28,17},{10,29,338}, +{9,28,17},{15,27,394},{13,31,218},{12,31,9},{10,31,9},{15,27,394},{15,28,394},{10,31,9},{0,29,400},{15,28,394},{0,29,400},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{10,30,2},{10,30,2},{10,30,2},{10,27,5},{8,29,25},{8,29,25},{12,31,1767},{12,31,1167},{12,31,806},{11,31,506},{12,31,1983},{11,31,747},{11,31,122},{10,29,346},{10,31,1836},{8,30,217},{13,31,611}, +{13,31,339},{12,31,130},{12,31,10},{15,24,1064},{12,31,587},{11,31,41},{8,30,217},{15,27,1067},{8,30,217},{12,31,806},{12,31,806},{12,31,806},{11,31,506},{11,31,581},{11,31,122},{11,31,122},{10,28,65},{9,31,557},{9,29,69},{12,31,130},{12,31,130},{12,31,130},{12,31,10},{13,27,338},{11,31,41},{11,31,41},{10,28,16},{13,27,338},{10,28,16},{14,31,200},{14,31,136},{13,31,1}, +{12,31,1},{14,31,200},{15,29,200},{12,31,1},{0,30,208},{15,29,200},{0,30,208},{11,0,481},{11,0,481},{11,0,481},{11,0,481},{11,30,82},{11,30,82},{11,30,82},{10,28,49},{8,30,9},{8,30,9},{13,31,1646},{12,31,1194},{12,31,833},{12,31,497},{12,31,1686},{12,31,750},{11,31,221},{11,30,122},{11,31,1541},{9,30,110},{14,31,542},{13,31,285},{13,31,116},{13,31,20},{15,26,722}, +{13,31,429},{12,31,100},{10,30,74},{14,29,722},{10,30,74},{12,31,833},{12,31,833},{12,31,833},{12,31,497},{12,31,725},{11,31,221},{11,31,221},{11,29,29},{10,31,632},{9,30,46},{13,31,116},{13,31,116},{13,31,116},{13,31,20},{13,30,338},{12,31,100},{12,31,100},{10,30,10},{12,30,338},{10,30,10},{15,29,61},{14,31,37},{14,31,1},{13,31,4},{15,29,61},{15,30,61},{13,31,4}, +{0,30,73},{15,30,61},{0,30,73},{12,0,433},{12,0,433},{12,0,433},{12,0,433},{11,31,25},{11,31,25},{11,31,25},{11,29,4},{9,31,17},{9,31,17},{13,31,1406},{13,31,1134},{13,31,965},{12,31,737},{13,31,1454},{12,31,702},{12,31,341},{11,31,89},{11,31,1381},{10,31,49},{14,31,318},{14,31,254},{14,31,218},{13,31,116},{14,31,510},{13,31,333},{13,31,164},{10,31,13},{14,30,509}, +{10,31,13},{13,31,965},{13,31,965},{13,31,965},{12,31,737},{12,31,869},{12,31,341},{12,31,341},{11,31,89},{11,31,756},{10,31,49},{14,31,218},{14,31,218},{14,31,218},{13,31,116},{15,26,338},{13,31,164},{13,31,164},{10,31,13},{14,29,338},{10,31,13},{15,30,9},{15,31,9},{15,31,9},{14,31,9},{15,30,9},{15,31,9},{14,31,9},{0,31,9},{15,31,9},{0,31,9},{12,0,481}, +{12,0,481},{12,0,481},{12,0,481},{12,31,85},{12,31,85},{12,31,85},{11,30,65},{10,31,40},{10,31,40},{13,31,1315},{13,31,1043},{13,31,874},{13,31,602},{13,31,1171},{13,31,627},{13,31,458},{12,31,5},{12,31,1087},{11,31,80},{14,31,225},{14,31,161},{14,31,125},{14,31,61},{15,29,297},{14,31,193},{14,31,157},{12,31,4},{14,31,297},{12,31,4},{13,31,874},{13,31,874},{13,31,874}, +{13,31,602},{13,31,730},{13,31,458},{13,31,458},{12,31,5},{11,31,705},{11,31,80},{14,31,125},{14,31,125},{14,31,125},{14,31,61},{14,31,221},{14,31,157},{14,31,157},{12,31,4},{15,29,221},{12,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{12,31,101}, +{12,31,101},{12,31,101},{12,31,5},{11,31,80},{11,31,80},{14,31,885},{14,31,821},{14,31,785},{13,31,650},{14,31,885},{13,31,483},{13,31,314},{13,31,81},{13,31,779},{11,31,144},{15,31,169},{14,31,145},{14,31,109},{14,31,45},{15,30,118},{14,31,81},{14,31,45},{13,31,0},{15,30,114},{13,31,0},{14,31,785},{14,31,785},{14,31,785},{13,31,650},{13,31,586},{13,31,314},{13,31,314}, +{13,31,81},{12,31,518},{11,31,144},{14,31,109},{14,31,109},{14,31,109},{14,31,45},{15,29,85},{14,31,45},{14,31,45},{13,31,0},{14,31,85},{13,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,481},{13,0,481},{13,0,481},{13,0,481},{13,31,145},{13,31,145},{13,31,145},{13,31,81},{11,31,144}, +{11,31,144},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{0,9,1899},{0,7,1355},{0,6,589},{0,4,1354},{0,5,2124},{0,4,1498},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{2,2,1896},{0,7,1355},{0,6,589},{0,4,1354},{1,4,1896},{0,4,1354},{0,6,0},{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,200},{0,1,134},{0,6,0}, +{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,164},{0,2,104},{3,3,882},{0,10,225},{0,7,18},{0,6,265},{3,3,882},{4,3,882},{0,6,265},{0,5,890},{4,3,882},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,170},{0,8,8}, +{0,7,202},{0,10,2360},{0,8,1530},{0,7,643},{0,5,1579},{0,6,2684},{0,5,1804},{0,15,884},{0,12,170},{0,8,8},{0,7,202},{1,7,2355},{0,8,1530},{0,7,643},{0,5,1579},{1,5,2355},{0,5,1579},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{0,4,340},{0,4,180},{0,4,180},{0,2,200},{0,2,392},{0,2,236},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{1,1,338}, +{0,4,180},{0,4,180},{0,2,200},{2,0,340},{0,2,200},{4,2,882},{0,12,170},{0,8,8},{0,7,202},{4,2,882},{6,2,882},{0,7,202},{0,6,890},{6,2,882},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{0,12,2899},{0,9,1773},{0,8,725}, +{0,6,1854},{0,7,3348},{0,5,2124},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{1,9,2899},{0,9,1773},{0,8,725},{0,6,1854},{6,0,2899},{0,6,1854},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{0,6,580},{0,5,306},{0,5,306},{0,3,325},{0,3,667},{0,3,406},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{1,2,580},{0,5,306},{0,5,306},{0,3,325},{2,1,578}, +{0,3,325},{3,8,882},{0,14,106},{1,9,13},{0,9,148},{3,8,882},{4,6,882},{0,9,148},{0,7,890},{4,6,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,21,920},{0,16,89},{0,11,153},{0,10,121},{0,14,3051},{0,11,1709},{0,9,557},{0,7,1795},{0,8,3651},{0,6,2174},{0,21,920}, +{0,16,89},{1,10,108},{0,10,121},{4,1,3048},{0,11,1709},{0,9,557},{0,7,1795},{3,5,3048},{0,7,1795},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{0,4,824},{0,4,421},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{4,0,648},{0,4,277},{5,4,882},{0,16,53},{1,10,8}, +{0,10,85},{5,4,882},{9,1,882},{0,10,85},{0,8,900},{9,1,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{0,23,1115},{0,18,242},{1,12,309},{0,11,259},{0,17,3051},{0,13,1579},{0,10,346},{0,8,1630},{0,9,3924},{0,7,2173},{1,20,885},{1,16,90},{1,12,53},{1,11,131},{4,4,3048}, +{0,13,1579},{0,10,346},{0,8,1630},{7,2,3048},{0,8,1630},{0,17,226},{0,17,226},{0,17,226},{0,10,225},{0,11,648},{0,9,169},{0,9,169},{0,5,200},{0,6,990},{0,5,425},{1,13,2},{1,13,2},{1,13,2},{1,9,2},{3,1,648},{0,9,169},{0,9,169},{0,5,200},{3,3,648},{0,5,200},{5,7,882},{0,18,17},{2,11,18},{0,11,34},{5,7,882},{6,7,882},{0,11,34}, +{0,9,890},{6,7,882},{0,9,890},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,23,1187},{1,18,350},{1,13,422},{1,12,373},{0,20,3048},{0,15,1443},{0,12,204},{0,9,1483},{0,11,4212},{0,8,2174},{1,23,931},{1,18,94},{2,12,108},{1,12,117},{6,0,3048},{0,15,1443},{0,12,204},{0,9,1483},{5,6,3048}, +{0,9,1483},{1,16,305},{1,16,305},{1,16,305},{1,10,309},{0,13,650},{0,11,109},{0,11,109},{0,7,148},{0,7,1161},{0,6,473},{1,16,49},{1,16,49},{1,16,49},{1,10,53},{4,0,648},{0,11,109},{0,11,109},{0,7,148},{1,7,648},{0,7,148},{7,3,882},{0,20,8},{2,12,8},{0,12,8},{7,3,882},{11,2,882},{0,12,8},{0,10,890},{11,2,882},{0,10,890},{1,0,305}, +{1,0,305},{1,0,305},{1,0,305},{0,8,1},{0,8,1},{0,8,1},{0,5,4},{0,3,145},{0,3,145},{1,25,1365},{1,20,497},{1,14,713},{1,13,510},{0,23,3051},{0,16,1278},{0,13,86},{0,10,1354},{0,12,4609},{0,9,2228},{2,22,886},{2,18,110},{2,14,56},{2,13,152},{5,6,3048},{0,16,1278},{0,13,86},{0,10,1354},{10,1,3048},{0,10,1354},{1,19,482},{1,19,482},{1,19,482}, +{1,12,481},{0,16,648},{0,12,72},{0,12,72},{0,8,101},{0,8,1352},{0,7,557},{2,15,5},{2,15,5},{2,15,5},{2,11,5},{3,6,648},{0,12,72},{0,12,72},{0,8,101},{3,6,648},{0,8,101},{5,12,882},{0,21,10},{3,13,13},{0,13,5},{5,12,882},{13,1,882},{0,13,5},{0,11,890},{13,1,882},{0,11,890},{1,0,481},{1,0,481},{1,0,481},{1,0,481},{0,10,1}, +{0,10,1},{0,10,1},{0,6,1},{0,5,261},{0,5,261},{1,28,1667},{1,22,793},{1,15,1182},{1,14,793},{0,25,3048},{0,18,1170},{0,14,36},{0,11,1243},{0,14,5005},{0,10,2318},{2,25,920},{2,20,89},{3,14,108},{2,14,121},{7,2,3048},{0,18,1170},{0,14,36},{0,11,1243},{12,0,3048},{0,11,1243},{1,21,786},{1,21,786},{1,21,786},{1,14,789},{0,19,650},{0,14,32},{0,14,32}, +{0,9,50},{0,9,1619},{0,8,661},{2,18,37},{2,18,37},{2,18,37},{2,12,37},{5,2,648},{0,14,32},{0,14,32},{0,9,50},{8,1,648},{0,9,50},{8,3,882},{1,22,8},{3,14,8},{1,14,8},{8,3,882},{11,5,882},{1,14,8},{0,12,900},{11,5,882},{0,12,900},{1,0,785},{1,0,785},{1,0,785},{1,0,785},{0,13,1},{0,13,1},{0,13,1},{0,8,4},{0,6,405}, +{0,6,405},{2,27,1844},{2,22,971},{2,16,1186},{2,15,988},{0,28,3084},{0,20,1095},{0,16,77},{0,12,1159},{0,16,4945},{0,12,2084},{3,24,885},{3,20,90},{3,16,53},{3,15,131},{6,8,3048},{0,20,1059},{0,16,41},{0,12,1123},{4,12,3048},{0,12,1123},{2,21,955},{2,21,955},{2,21,955},{2,14,954},{0,22,686},{0,16,41},{0,16,41},{0,10,49},{0,11,1577},{0,9,545},{3,17,2}, +{3,17,2},{3,17,2},{3,13,2},{5,5,648},{0,16,5},{0,16,5},{0,10,13},{5,7,648},{0,10,13},{8,6,882},{1,24,9},{4,15,18},{1,15,13},{8,6,882},{15,2,882},{1,15,13},{0,13,890},{15,2,882},{0,13,890},{2,0,954},{2,0,954},{2,0,954},{2,0,954},{0,16,37},{0,16,37},{0,16,37},{0,9,40},{0,8,373},{0,8,373},{2,30,1772},{2,24,898},{2,17,1287}, +{2,16,898},{1,27,3055},{1,20,1143},{1,16,33},{1,13,1214},{0,17,4639},{0,13,1730},{3,27,931},{3,22,94},{4,16,108},{3,16,117},{5,14,3048},{0,21,996},{1,16,29},{0,13,1054},{14,1,3048},{0,13,1054},{2,23,891},{2,23,891},{2,23,891},{2,16,894},{1,21,652},{1,16,29},{1,16,29},{1,11,44},{0,13,1452},{0,11,365},{3,20,49},{3,20,49},{3,20,49},{3,14,53},{7,1,648}, +{0,18,1},{0,18,1},{0,11,4},{10,2,648},{0,11,4},{5,20,882},{2,24,8},{4,16,8},{2,16,8},{5,20,882},{13,6,882},{2,16,8},{0,14,890},{13,6,882},{0,14,890},{2,0,890},{2,0,890},{2,0,890},{2,0,890},{1,15,4},{1,15,4},{1,15,4},{1,10,5},{0,9,269},{0,9,269},{3,29,1838},{3,24,970},{3,18,1186},{3,17,983},{1,30,3084},{1,22,1095},{1,18,77}, +{1,14,1159},{0,19,4419},{0,14,1444},{4,26,886},{4,22,110},{4,18,56},{4,17,152},{8,5,3048},{0,23,936},{1,18,41},{0,14,1003},{12,5,3048},{0,14,1003},{3,23,955},{3,23,955},{3,23,955},{3,16,954},{1,23,692},{1,18,41},{1,18,41},{1,12,46},{0,15,1296},{0,12,235},{4,19,5},{4,19,5},{4,19,5},{4,15,5},{5,10,648},{1,18,5},{1,18,5},{0,12,10},{12,1,648}, +{0,12,10},{11,1,882},{2,25,10},{5,17,13},{2,17,5},{11,1,882},{15,5,882},{2,17,5},{0,15,890},{15,5,882},{0,15,890},{3,0,954},{3,0,954},{3,0,954},{3,0,954},{1,18,37},{1,18,37},{1,18,37},{1,11,40},{0,11,185},{0,11,185},{3,31,1790},{3,26,898},{3,19,1287},{3,18,898},{2,29,3057},{2,22,1179},{2,18,45},{1,15,1250},{0,20,4156},{0,15,1226},{4,29,920}, +{4,24,89},{5,18,108},{4,18,121},{3,26,3048},{0,25,909},{2,18,36},{0,15,970},{15,3,3048},{0,15,970},{3,25,891},{3,25,891},{3,25,891},{3,18,894},{2,23,659},{2,18,41},{2,18,41},{2,13,59},{0,16,1137},{0,13,137},{4,22,37},{4,22,37},{4,22,37},{4,16,37},{8,1,648},{1,20,2},{1,20,2},{1,13,1},{10,5,648},{1,13,1},{10,7,882},{3,26,8},{5,18,8}, +{3,18,8},{10,7,882},{13,9,882},{3,18,8},{0,16,900},{13,9,882},{0,16,900},{3,0,890},{3,0,890},{3,0,890},{3,0,890},{2,17,10},{2,17,10},{2,17,10},{2,12,13},{0,13,136},{0,13,136},{4,31,1844},{4,26,971},{4,20,1186},{4,19,988},{2,31,3111},{2,24,1095},{2,20,77},{2,16,1159},{0,22,3940},{0,16,1055},{5,28,885},{5,24,90},{5,20,53},{5,19,131},{5,22,3048}, +{0,27,886},{2,20,41},{0,17,926},{8,13,3048},{0,17,926},{4,25,955},{4,25,955},{4,25,955},{4,18,954},{2,26,686},{2,20,41},{2,20,41},{2,14,49},{0,18,1002},{0,15,110},{5,21,2},{5,21,2},{5,21,2},{5,17,2},{8,4,648},{2,20,5},{2,20,5},{2,14,13},{14,2,648},{2,14,13},{13,0,882},{3,28,9},{6,19,18},{3,19,13},{13,0,882},{5,21,882},{3,19,13}, +{0,17,890},{5,21,882},{0,17,890},{4,0,954},{4,0,954},{4,0,954},{4,0,954},{2,20,37},{2,20,37},{2,20,37},{2,13,40},{0,15,74},{0,15,74},{4,31,1972},{4,28,898},{4,21,1287},{4,20,898},{3,31,3055},{3,24,1143},{3,20,33},{3,17,1214},{0,23,3820},{0,18,963},{5,31,931},{5,26,94},{6,20,108},{5,20,117},{11,3,3048},{0,28,899},{3,20,29},{0,18,899},{11,11,3048}, +{0,18,899},{4,27,891},{4,27,891},{4,27,891},{4,20,894},{3,25,652},{3,20,29},{3,20,29},{3,15,44},{0,20,876},{0,16,102},{5,24,49},{5,24,49},{5,24,49},{5,18,53},{3,25,648},{2,22,1},{2,22,1},{2,15,4},{12,6,648},{2,15,4},{8,19,882},{4,28,8},{6,20,8},{4,20,8},{8,19,882},{15,10,882},{4,20,8},{0,18,890},{15,10,882},{0,18,890},{4,0,890}, +{4,0,890},{4,0,890},{4,0,890},{3,19,4},{3,19,4},{3,19,4},{3,14,5},{0,17,29},{0,17,29},{5,31,1964},{5,28,970},{5,22,1186},{5,21,983},{4,31,3172},{3,26,1095},{3,22,77},{3,18,1159},{0,25,3679},{0,19,899},{6,30,886},{6,26,110},{6,22,56},{6,21,152},{10,9,3048},{1,29,888},{3,22,41},{0,19,890},{14,9,3048},{0,19,890},{5,27,955},{5,27,955},{5,27,955}, +{5,20,954},{3,27,692},{3,22,41},{3,22,41},{3,16,46},{0,22,800},{1,16,98},{6,23,5},{6,23,5},{6,23,5},{6,19,5},{9,6,648},{3,22,5},{3,22,5},{2,16,10},{14,5,648},{2,16,10},{13,5,882},{4,29,10},{7,21,13},{4,21,5},{13,5,882},{12,15,882},{4,21,5},{0,19,890},{12,15,882},{0,19,890},{5,0,954},{5,0,954},{5,0,954},{5,0,954},{3,22,37}, +{3,22,37},{3,22,37},{3,15,40},{0,19,9},{0,19,9},{6,31,2264},{5,30,898},{5,23,1287},{5,22,898},{4,31,3204},{4,26,1179},{4,22,45},{3,19,1250},{0,27,3523},{0,20,908},{6,31,968},{6,28,89},{7,22,108},{6,22,121},{9,15,3048},{1,30,899},{4,22,36},{0,20,904},{5,22,3048},{0,20,904},{5,29,891},{5,29,891},{5,29,891},{5,22,894},{4,27,659},{4,22,41},{4,22,41}, +{4,17,59},{0,23,747},{1,18,102},{6,26,37},{6,26,37},{6,26,37},{6,20,37},{10,5,648},{3,24,2},{3,24,2},{3,17,1},{12,9,648},{3,17,1},{15,1,882},{5,30,8},{7,22,8},{5,22,8},{15,1,882},{15,13,882},{5,22,8},{0,20,900},{15,13,882},{0,20,900},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{4,21,10},{4,21,10},{4,21,10},{4,16,13},{0,20,8}, +{0,20,8},{6,31,2228},{6,30,971},{6,24,1186},{6,23,988},{5,31,3256},{4,28,1095},{4,24,77},{4,20,1159},{0,29,3364},{1,21,894},{7,31,915},{7,28,90},{7,24,53},{7,23,131},{14,1,3048},{2,31,886},{4,24,41},{1,21,890},{10,17,3048},{1,21,890},{6,29,955},{6,29,955},{6,29,955},{6,22,954},{4,30,686},{4,24,41},{4,24,41},{4,18,49},{0,25,705},{2,19,110},{7,25,2}, +{7,25,2},{7,25,2},{7,21,2},{10,8,648},{4,24,5},{4,24,5},{4,18,13},{11,12,648},{4,18,13},{15,4,882},{6,30,17},{7,24,52},{5,23,13},{15,4,882},{9,22,882},{5,23,13},{0,21,890},{9,22,882},{0,21,890},{6,0,954},{6,0,954},{6,0,954},{6,0,954},{4,24,37},{4,24,37},{4,24,37},{4,17,40},{1,21,4},{1,21,4},{7,31,2444},{6,31,907},{6,25,1287}, +{6,24,898},{6,31,3436},{5,28,1143},{5,24,33},{5,21,1214},{0,31,3276},{1,22,908},{8,30,1208},{7,30,94},{7,25,166},{7,24,117},{13,7,3048},{3,31,906},{5,24,29},{1,22,899},{13,15,3048},{1,22,899},{6,31,891},{6,31,891},{6,31,891},{6,24,894},{5,29,652},{5,24,29},{5,24,29},{5,19,44},{0,27,665},{2,20,102},{7,28,49},{7,28,49},{7,28,49},{7,22,53},{9,14,648}, +{4,26,1},{4,26,1},{4,19,4},{14,10,648},{4,19,4},{10,23,882},{6,31,17},{8,24,13},{6,24,8},{10,23,882},{11,21,882},{6,24,8},{0,22,890},{11,21,882},{0,22,890},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{5,23,4},{5,23,4},{5,23,4},{5,18,5},{1,23,4},{1,23,4},{7,31,2636},{7,31,991},{7,26,1186},{7,25,983},{6,31,3532},{5,30,1095},{5,26,77}, +{5,22,1159},{0,31,3340},{2,23,899},{8,31,1014},{7,31,262},{8,25,108},{7,25,254},{15,3,3048},{4,31,936},{5,26,41},{2,23,890},{11,19,3048},{2,23,890},{7,31,955},{7,31,955},{7,31,955},{7,24,954},{5,31,692},{5,26,41},{5,26,41},{5,20,46},{0,29,651},{3,20,98},{8,26,101},{8,26,101},{8,26,101},{8,22,101},{11,10,648},{5,26,5},{5,26,5},{4,20,10},{5,23,648}, +{4,20,10},{15,9,882},{7,31,37},{8,25,8},{6,25,5},{15,9,882},{14,19,882},{6,25,5},{0,23,890},{14,19,882},{0,23,890},{7,0,954},{7,0,954},{7,0,954},{7,0,954},{5,26,37},{5,26,37},{5,26,37},{5,19,40},{2,23,9},{2,23,9},{8,31,3110},{7,31,1135},{7,27,1287},{7,26,898},{7,31,3652},{6,30,1179},{6,26,45},{5,23,1250},{1,31,3492},{2,24,908},{8,31,1174}, +{8,31,110},{8,27,56},{8,26,152},{11,19,3048},{5,31,996},{6,26,36},{2,24,904},{9,23,3048},{2,24,904},{7,31,939},{7,31,939},{7,31,939},{7,26,894},{6,31,659},{6,26,41},{6,26,41},{6,21,59},{1,29,659},{3,22,102},{8,28,4},{8,28,4},{8,28,4},{8,24,8},{12,9,648},{5,28,2},{5,28,2},{5,21,1},{14,13,648},{5,21,1},{14,15,882},{8,31,106},{9,26,13}, +{7,26,8},{14,15,882},{12,23,882},{7,26,8},{0,24,900},{12,23,882},{0,24,900},{7,0,890},{7,0,890},{7,0,890},{7,0,890},{6,25,10},{6,25,10},{6,25,10},{6,20,13},{2,24,8},{2,24,8},{8,31,3038},{8,31,1470},{8,28,1391},{7,27,1137},{7,31,4120},{6,31,1146},{6,28,77},{6,24,1159},{3,31,3681},{3,25,894},{9,31,1205},{8,31,245},{9,27,99},{8,27,122},{10,25,3048}, +{6,31,1110},{6,28,41},{3,25,890},{12,21,3048},{3,25,890},{8,31,1274},{8,31,1274},{8,31,1274},{7,27,1128},{7,30,750},{6,28,41},{6,28,41},{6,22,49},{1,31,648},{4,23,110},{8,31,49},{8,31,49},{8,31,49},{8,25,53},{15,2,648},{6,28,5},{6,28,5},{6,22,13},{13,16,648},{6,22,13},{14,18,882},{9,31,164},{9,27,18},{7,27,13},{14,18,882},{11,26,882},{7,27,13}, +{0,25,890},{11,26,882},{0,25,890},{7,0,1124},{7,0,1124},{7,0,1124},{7,0,1124},{6,28,37},{6,28,37},{6,28,37},{6,21,40},{3,25,4},{3,25,4},{9,31,3454},{8,31,1502},{8,29,1186},{8,28,983},{8,31,4077},{7,31,1230},{7,28,33},{7,25,1214},{4,31,3820},{3,26,908},{10,31,1368},{9,31,261},{9,29,53},{9,28,126},{15,11,3048},{7,31,1226},{7,28,29},{3,26,899},{15,19,3048}, +{3,26,899},{8,31,1018},{8,31,1018},{8,31,1018},{8,27,954},{7,31,724},{7,28,29},{7,28,29},{7,23,44},{2,31,665},{4,24,102},{9,30,2},{9,30,2},{9,30,2},{9,26,2},{11,18,648},{6,30,1},{6,30,1},{6,23,4},{10,21,648},{6,23,4},{12,27,882},{10,31,225},{10,28,13},{7,28,20},{12,27,882},{13,25,882},{7,28,20},{0,26,890},{13,25,882},{0,26,890},{8,0,954}, +{8,0,954},{8,0,954},{8,0,954},{7,27,4},{7,27,4},{7,27,4},{7,22,5},{3,27,4},{3,27,4},{9,31,3614},{9,31,1886},{8,30,1287},{8,29,898},{8,31,4381},{7,31,1582},{7,30,77},{7,26,1159},{5,31,4036},{4,27,899},{10,31,1560},{10,31,405},{10,29,108},{9,29,117},{14,17,3048},{8,31,1444},{7,30,41},{4,27,890},{13,23,3048},{4,27,890},{8,31,1146},{8,31,1146},{8,31,1146}, +{8,29,894},{8,30,1011},{7,30,41},{7,30,41},{7,24,46},{3,31,705},{5,24,98},{9,31,85},{9,31,85},{9,31,85},{9,27,53},{13,14,648},{7,30,5},{7,30,5},{6,24,10},{8,25,648},{6,24,10},{14,23,882},{10,31,305},{10,29,8},{8,29,8},{14,23,882},{11,29,882},{8,29,8},{0,27,890},{11,29,882},{0,27,890},{8,0,890},{8,0,890},{8,0,890},{8,0,890},{7,30,37}, +{7,30,37},{7,30,37},{7,23,40},{4,27,9},{4,27,9},{10,31,4072},{9,31,2174},{9,31,1186},{9,30,983},{9,31,4545},{8,31,1725},{8,30,207},{7,27,1250},{6,31,4339},{4,28,908},{11,31,1656},{10,31,645},{10,31,56},{10,30,152},{13,23,3048},{8,31,1604},{7,31,77},{4,28,904},{11,27,3048},{4,28,904},{9,31,1150},{9,31,1150},{9,31,1150},{9,29,954},{8,31,841},{8,29,193},{8,29,193}, +{7,25,197},{4,31,747},{5,26,102},{10,31,20},{10,31,20},{10,31,20},{10,28,8},{14,13,648},{7,31,41},{7,31,41},{7,25,1},{11,23,648},{7,25,1},{13,29,882},{11,31,397},{11,30,13},{8,30,5},{13,29,882},{14,27,882},{8,30,5},{0,28,900},{14,27,882},{0,28,900},{9,0,954},{9,0,954},{9,0,954},{9,0,954},{8,27,122},{8,27,122},{8,27,122},{8,23,122},{4,28,8}, +{4,28,8},{10,31,4147},{10,31,2404},{9,31,1429},{9,31,901},{10,31,4627},{9,31,1938},{8,31,38},{8,28,1061},{7,31,4330},{5,29,789},{12,31,1701},{11,31,715},{11,31,90},{10,31,113},{15,19,2814},{10,31,1554},{8,31,34},{5,29,785},{13,26,2814},{5,29,785},{9,31,1429},{9,31,1429},{9,31,1429},{9,31,901},{9,31,1022},{8,31,38},{8,31,38},{8,26,44},{5,31,840},{6,27,110},{11,31,90}, +{11,31,90},{11,31,90},{10,29,53},{14,16,648},{8,31,34},{8,31,34},{7,26,20},{15,20,648},{7,26,20},{15,25,761},{12,31,425},{11,31,9},{9,31,1},{15,25,761},{12,31,761},{9,31,1},{0,29,785},{12,31,761},{0,29,785},{9,0,900},{9,0,900},{9,0,900},{9,0,900},{8,30,4},{8,30,4},{8,30,4},{8,25,5},{5,29,4},{5,29,4},{11,31,3735},{10,31,2356},{10,31,1395}, +{10,31,954},{10,31,4099},{9,31,1618},{9,31,174},{8,29,686},{7,31,3930},{6,29,510},{12,31,1285},{12,31,685},{11,31,122},{11,31,37},{13,27,2249},{10,31,1186},{9,31,74},{6,29,485},{13,27,2249},{6,29,485},{10,31,1395},{10,31,1395},{10,31,1395},{10,31,954},{9,31,1086},{9,31,174},{9,31,174},{8,27,49},{6,31,969},{6,28,102},{11,31,122},{11,31,122},{11,31,122},{11,30,2},{13,22,648}, +{9,31,74},{9,31,74},{8,27,13},{12,25,648},{8,27,13},{15,26,481},{13,31,269},{12,31,0},{10,31,0},{15,26,481},{15,28,481},{10,31,0},{0,29,481},{15,28,481},{0,29,481},{10,0,954},{10,0,954},{10,0,954},{10,0,954},{8,31,61},{8,31,61},{8,31,61},{8,26,40},{5,31,4},{5,31,4},{11,31,3399},{11,31,2260},{11,31,1635},{10,31,954},{11,31,3639},{10,31,1435},{9,31,238}, +{9,29,430},{8,31,3443},{6,30,314},{13,31,1121},{12,31,525},{12,31,164},{11,31,53},{13,29,1769},{11,31,918},{10,31,113},{7,30,290},{14,27,1769},{7,30,290},{11,31,1635},{11,31,1635},{11,31,1635},{10,31,954},{10,31,1251},{9,31,238},{9,31,238},{9,28,41},{7,31,1105},{7,28,98},{12,31,164},{12,31,164},{12,31,164},{11,31,53},{15,18,648},{10,31,113},{10,31,113},{8,28,1},{10,29,648}, +{8,28,1},{15,27,269},{13,31,173},{13,31,4},{11,31,4},{15,27,269},{14,30,265},{11,31,4},{0,30,289},{14,30,265},{0,30,289},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{9,31,13},{9,31,13},{9,31,13},{9,27,5},{6,31,9},{6,31,9},{12,31,3157},{11,31,2308},{11,31,1683},{11,31,1054},{11,31,3303},{10,31,1339},{10,31,378},{9,30,213},{9,31,3103},{7,30,166},{13,31,801}, +{13,31,529},{12,31,260},{12,31,20},{15,24,1374},{12,31,777},{11,31,181},{8,30,117},{15,27,1377},{8,30,117},{11,31,1683},{11,31,1683},{11,31,1683},{11,31,1054},{10,31,1491},{10,31,378},{10,31,378},{9,29,46},{8,31,1331},{7,30,102},{12,31,260},{12,31,260},{12,31,260},{12,31,20},{13,27,648},{11,31,181},{11,31,181},{8,29,10},{13,27,648},{8,29,10},{15,28,117},{14,31,61},{14,31,25}, +{13,31,4},{15,28,117},{14,31,117},{13,31,4},{0,30,113},{14,31,117},{0,30,113},{11,0,954},{11,0,954},{11,0,954},{11,0,954},{10,31,122},{10,31,122},{10,31,122},{9,28,40},{7,31,29},{7,31,29},{12,31,2860},{12,31,2260},{12,31,1899},{11,31,1261},{12,31,2932},{11,31,1310},{11,31,685},{10,30,108},{10,31,2731},{7,31,173},{13,31,747},{13,31,475},{13,31,306},{13,31,130},{15,26,1032}, +{12,31,651},{12,31,290},{8,31,40},{14,29,1032},{8,31,40},{12,31,1899},{12,31,1899},{12,31,1899},{11,31,1261},{11,31,1620},{11,31,685},{11,31,685},{10,30,44},{9,31,1524},{8,30,134},{13,31,306},{13,31,306},{13,31,306},{13,31,130},{13,30,648},{12,31,290},{12,31,290},{9,30,5},{12,30,648},{9,30,5},{15,30,18},{15,30,34},{14,31,16},{14,31,0},{15,30,18},{15,30,26},{14,31,0}, +{0,31,36},{15,30,26},{0,31,36},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{10,31,104},{10,31,104},{10,31,104},{10,29,5},{8,30,130},{8,30,130},{13,31,2732},{12,31,2276},{12,31,1915},{12,31,1315},{12,31,2660},{11,31,1414},{11,31,789},{10,31,45},{11,31,2487},{8,31,116},{14,31,524},{14,31,460},{14,31,424},{13,31,170},{14,31,776},{13,31,507},{13,31,338},{10,31,9},{14,30,771}, +{10,31,9},{12,31,1915},{12,31,1915},{12,31,1915},{12,31,1315},{12,31,1699},{11,31,789},{11,31,789},{10,31,45},{10,31,1546},{8,31,116},{14,31,424},{14,31,424},{14,31,424},{13,31,170},{15,26,580},{13,31,338},{13,31,338},{10,31,9},{14,29,580},{10,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,954}, +{12,0,954},{12,0,954},{12,0,954},{11,31,164},{11,31,164},{11,31,164},{10,30,40},{8,31,116},{8,31,116},{13,31,2156},{13,31,1884},{13,31,1715},{12,31,1251},{13,31,2132},{12,31,1108},{12,31,747},{11,31,5},{11,31,1927},{9,31,180},{14,31,300},{14,31,236},{14,31,200},{14,31,136},{15,28,451},{14,31,328},{13,31,194},{11,31,1},{15,29,456},{11,31,1},{13,31,1715},{13,31,1715},{13,31,1715}, +{12,31,1251},{12,31,1347},{12,31,747},{12,31,747},{11,31,5},{10,31,1242},{9,31,180},{14,31,200},{14,31,200},{14,31,200},{14,31,136},{15,27,338},{13,31,194},{13,31,194},{11,31,1},{13,31,338},{11,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{11,31,260}, +{11,31,260},{11,31,260},{11,31,5},{9,31,180},{9,31,180},{13,31,1836},{13,31,1564},{13,31,1395},{13,31,1123},{13,31,1620},{12,31,1012},{12,31,651},{11,31,85},{12,31,1564},{10,31,233},{14,31,204},{14,31,140},{14,31,104},{14,31,40},{15,29,216},{14,31,136},{14,31,100},{12,31,1},{14,31,216},{12,31,1},{13,31,1395},{13,31,1395},{13,31,1395},{13,31,1123},{13,31,1179},{12,31,651},{12,31,651}, +{11,31,85},{11,31,998},{10,31,233},{14,31,104},{14,31,104},{14,31,104},{14,31,40},{15,28,162},{14,31,100},{14,31,100},{12,31,1},{15,29,164},{12,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,954},{13,0,954},{13,0,954},{13,0,954},{12,31,290},{12,31,290},{12,31,290},{11,31,85},{10,31,233}, +{10,31,233},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{0,11,3379},{0,9,2369},{0,8,1061},{0,5,2435},{0,6,3760},{0,5,2660},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{1,8,3372},{0,9,2369},{0,8,1061},{0,5,2435},{5,1,3371},{0,5,2435},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{0,2,332},{0,2,200},{0,8,0}, +{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{2,0,288},{0,2,164},{3,7,1568},{0,14,442},{0,10,40},{0,8,485},{3,7,1568},{8,0,1568},{0,8,485},{0,6,1586},{8,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,1570},{0,16,325},{0,11,5}, +{0,9,392},{0,13,3968},{0,10,2630},{0,9,1121},{0,6,2710},{0,7,4484},{0,6,3034},{0,20,1570},{0,16,325},{0,11,5},{0,9,392},{1,10,3968},{0,10,2630},{0,9,1121},{0,6,2710},{5,2,3968},{0,6,2710},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{0,5,514},{0,5,274},{0,5,274},{0,3,289},{0,3,595},{0,3,370},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{1,2,512}, +{0,5,274},{0,5,274},{0,3,289},{1,2,512},{0,3,289},{5,3,1568},{0,16,325},{0,11,5},{0,9,392},{5,3,1568},{4,7,1568},{0,9,392},{0,7,1586},{4,7,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{0,15,4652},{0,11,2945},{0,10,1217}, +{0,7,3035},{0,8,5283},{0,7,3476},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{2,8,4651},{0,11,2945},{0,10,1217},{0,7,3035},{5,3,4651},{0,7,3035},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,7,802},{0,6,424},{0,6,424},{0,4,449},{0,3,931},{0,3,562},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{2,0,800},{0,6,424},{0,6,424},{0,4,449},{0,4,800}, +{0,4,449},{1,19,1568},{0,17,225},{0,12,18},{0,11,292},{1,19,1568},{9,2,1568},{0,11,292},{0,8,1576},{9,2,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1570},{0,19,149},{0,13,73},{0,12,194},{0,17,5424},{0,13,3368},{0,11,1349},{0,8,3449},{0,9,6213},{0,7,3956},{0,25,1570}, +{0,19,149},{0,13,73},{0,12,194},{5,0,5419},{0,13,3368},{0,11,1349},{0,8,3449},{5,4,5419},{0,8,3449},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{0,4,1328},{0,4,769},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{4,0,1152},{0,4,625},{5,8,1568},{0,19,149},{1,13,13}, +{0,12,194},{5,8,1568},{4,10,1568},{0,12,194},{0,9,1576},{4,10,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,1651},{0,21,155},{0,14,281},{0,13,198},{0,20,5424},{0,15,3099},{0,12,996},{0,9,3179},{0,10,6544},{0,8,3890},{1,24,1619},{0,21,155},{1,14,69},{0,13,198},{5,3,5419}, +{0,15,3099},{0,12,996},{0,9,3179},{4,7,5419},{0,9,3179},{0,19,82},{0,19,82},{0,19,82},{0,11,82},{0,11,1152},{0,9,445},{0,9,445},{0,6,505},{0,6,1494},{0,5,737},{1,15,50},{1,15,50},{1,15,50},{1,10,49},{3,1,1152},{0,9,445},{0,9,445},{0,6,505},{3,3,1152},{0,6,505},{5,11,1568},{0,21,74},{1,14,20},{0,13,117},{5,11,1568},{11,3,1568},{0,13,117}, +{0,10,1586},{11,3,1568},{0,10,1586},{0,0,81},{0,0,81},{0,0,81},{0,0,81},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,25},{0,1,25},{1,27,1825},{0,23,323},{1,15,342},{0,14,361},{0,22,5420},{0,16,2834},{0,13,726},{0,10,2966},{0,11,6916},{0,9,3860},{1,27,1569},{1,21,131},{1,15,86},{1,14,181},{1,19,5419},{0,16,2834},{0,13,726},{0,10,2966},{9,2,5419}, +{0,10,2966},{1,18,257},{1,18,257},{1,18,257},{1,12,261},{0,13,1154},{0,11,337},{0,11,337},{0,7,388},{0,7,1665},{0,6,749},{1,18,1},{1,18,1},{1,18,1},{1,12,5},{4,0,1152},{0,11,337},{0,11,337},{0,7,388},{1,7,1152},{0,7,388},{8,2,1568},{0,23,34},{2,15,5},{0,14,72},{8,2,1568},{13,2,1568},{0,14,72},{0,11,1586},{13,2,1568},{0,11,1586},{1,0,257}, +{1,0,257},{1,0,257},{1,0,257},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,2,85},{0,2,85},{1,30,1907},{1,23,411},{1,16,542},{1,15,454},{0,25,5424},{0,18,2630},{0,15,486},{0,11,2771},{0,13,7299},{0,11,3860},{2,26,1634},{1,23,155},{2,16,82},{1,15,198},{5,8,5419},{0,18,2630},{0,15,486},{0,11,2771},{4,10,5419},{0,11,2771},{1,21,338},{1,21,338},{1,21,338}, +{1,13,338},{0,16,1152},{0,13,274},{0,13,274},{0,8,305},{0,8,1856},{0,7,797},{2,17,64},{2,17,64},{2,17,64},{2,12,65},{3,6,1152},{0,13,274},{0,13,274},{0,8,305},{3,6,1152},{0,8,305},{3,23,1568},{0,25,17},{2,16,18},{0,15,45},{3,23,1568},{11,6,1568},{0,15,45},{0,12,1576},{11,6,1568},{0,12,1576},{1,0,337},{1,0,337},{1,0,337},{1,0,337},{0,8,1}, +{0,8,1},{0,8,1},{0,5,0},{0,4,169},{0,4,169},{1,31,2145},{1,25,590},{1,17,915},{1,16,619},{0,27,5420},{0,20,2424},{0,16,282},{0,12,2552},{0,15,7711},{0,11,3908},{2,29,1570},{2,23,149},{2,17,73},{2,16,194},{7,4,5419},{0,20,2424},{0,16,282},{0,12,2552},{9,5,5419},{0,12,2552},{1,23,546},{1,23,546},{1,23,546},{1,15,546},{0,19,1154},{0,15,194},{0,15,194}, +{0,9,218},{0,9,2123},{0,8,865},{2,20,1},{2,20,1},{2,20,1},{2,13,4},{5,2,1152},{0,15,194},{0,15,194},{0,9,218},{8,1,1152},{0,9,218},{9,4,1568},{0,27,5},{3,17,13},{0,16,26},{9,4,1568},{9,10,1568},{0,16,26},{0,13,1576},{9,10,1568},{0,13,1576},{1,0,545},{1,0,545},{1,0,545},{1,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289}, +{0,5,289},{2,31,2746},{1,27,945},{2,18,1370},{1,17,977},{0,30,5420},{0,22,2243},{0,17,145},{0,13,2386},{0,16,8161},{0,13,3986},{3,28,1619},{2,25,155},{3,18,69},{2,17,198},{8,2,5419},{0,22,2243},{0,17,145},{0,13,2386},{13,2,5419},{0,13,2386},{1,26,932},{1,26,932},{1,26,932},{1,16,936},{0,22,1154},{0,17,109},{0,17,109},{0,10,145},{0,11,2441},{0,9,1001},{3,19,50}, +{3,19,50},{3,19,50},{3,14,49},{5,5,1152},{0,17,109},{0,17,109},{0,10,145},{5,7,1152},{0,10,145},{11,0,1568},{0,29,10},{3,18,20},{0,18,8},{11,0,1568},{13,7,1568},{0,18,8},{0,14,1586},{13,7,1568},{0,14,1586},{1,0,932},{1,0,932},{1,0,932},{1,0,932},{0,14,1},{0,14,1},{0,14,1},{0,8,1},{0,6,468},{0,6,468},{2,31,3146},{2,27,1412},{2,19,1743}, +{1,19,1441},{0,31,5515},{0,23,2096},{0,19,69},{0,14,2251},{0,18,8669},{0,14,4100},{3,31,1569},{3,25,131},{3,19,86},{3,18,181},{3,23,5419},{0,23,2096},{0,19,69},{0,14,2251},{11,6,5419},{0,14,2251},{2,25,1379},{2,25,1379},{2,25,1379},{2,17,1379},{0,24,1152},{0,18,61},{0,18,61},{0,11,100},{0,12,2859},{0,10,1157},{3,22,1},{3,22,1},{3,22,1},{3,16,5},{7,1,1152}, +{0,18,61},{0,18,61},{0,11,100},{10,2,1152},{0,11,100},{10,6,1568},{1,29,2},{4,19,5},{0,19,5},{10,6,1568},{15,6,1568},{0,19,5},{0,15,1586},{15,6,1568},{0,15,1586},{2,0,1378},{2,0,1378},{2,0,1378},{2,0,1378},{0,16,1},{0,16,1},{0,16,1},{0,10,1},{0,8,657},{0,8,657},{2,31,3802},{2,29,1603},{2,21,2148},{2,19,1631},{1,31,5655},{0,25,2005},{0,20,31}, +{0,15,2138},{0,19,8963},{0,15,4070},{4,30,1634},{3,27,155},{4,20,82},{3,19,198},{9,4,5419},{0,25,2001},{0,20,27},{0,15,2134},{9,10,5419},{0,15,2134},{2,28,1587},{2,28,1587},{2,28,1587},{2,18,1590},{0,27,1158},{0,20,22},{0,20,22},{0,12,62},{0,14,3075},{0,11,1221},{4,21,64},{4,21,64},{4,21,64},{4,16,65},{5,10,1152},{0,20,18},{0,20,18},{0,12,58},{12,1,1152}, +{0,12,58},{12,2,1568},{1,31,10},{4,20,18},{0,20,18},{12,2,1568},{13,10,1568},{0,20,18},{0,16,1576},{13,10,1568},{0,16,1576},{2,0,1586},{2,0,1586},{2,0,1586},{2,0,1586},{0,19,4},{0,19,4},{0,19,4},{0,11,8},{0,9,769},{0,9,769},{3,31,3890},{2,31,1623},{3,21,2180},{2,20,1644},{1,31,5863},{0,27,1989},{1,21,109},{0,17,2117},{0,21,8560},{0,16,3545},{4,31,1640}, +{4,27,149},{4,21,73},{4,20,194},{10,3,5419},{0,27,1889},{0,21,49},{0,17,2017},{11,9,5419},{0,17,2017},{2,30,1619},{2,30,1619},{2,30,1619},{2,20,1619},{1,26,1188},{1,20,86},{1,20,86},{1,13,121},{0,16,2801},{0,13,949},{4,24,1},{4,24,1},{4,24,1},{4,17,4},{8,1,1152},{0,22,2},{0,22,2},{0,14,26},{10,5,1152},{0,14,26},{11,8,1568},{2,31,5},{5,21,13}, +{1,21,9},{11,8,1568},{11,14,1568},{1,21,9},{0,17,1576},{11,14,1568},{0,17,1576},{2,0,1618},{2,0,1618},{2,0,1618},{2,0,1618},{1,18,37},{1,18,37},{1,18,37},{1,12,37},{0,10,625},{0,10,625},{4,31,4308},{3,31,1589},{3,23,2160},{3,21,1621},{2,31,5895},{1,27,1999},{1,22,33},{1,18,2124},{0,23,8196},{0,17,3043},{5,31,1667},{4,29,155},{5,22,69},{4,21,198},{10,6,5419}, +{0,29,1772},{1,22,24},{0,18,1875},{15,6,5419},{0,18,1875},{3,30,1576},{3,30,1576},{3,30,1576},{3,20,1580},{1,29,1161},{1,22,29},{1,22,29},{1,15,58},{0,17,2529},{0,14,656},{5,23,50},{5,23,50},{5,23,50},{5,18,49},{8,4,1152},{0,24,1},{0,24,1},{0,15,1},{14,2,1152},{0,15,1},{13,4,1568},{3,31,13},{5,22,20},{2,22,8},{13,4,1568},{15,11,1568},{2,22,8}, +{0,18,1586},{15,11,1568},{0,18,1586},{3,0,1576},{3,0,1576},{3,0,1576},{3,0,1576},{1,21,10},{1,21,10},{1,21,10},{1,14,13},{0,12,520},{0,12,520},{4,31,4436},{3,31,1765},{4,23,2175},{3,23,1669},{3,31,6079},{1,29,1977},{2,23,105},{1,19,2107},{0,24,7969},{0,18,2675},{6,31,1832},{5,29,131},{5,23,86},{5,22,181},{12,2,5419},{0,30,1699},{1,23,62},{0,19,1782},{13,10,5419}, +{0,19,1782},{3,31,1665},{3,31,1665},{3,31,1665},{3,22,1640},{2,28,1188},{2,22,97},{2,22,97},{2,15,136},{0,19,2313},{0,15,474},{5,26,1},{5,26,1},{5,26,1},{5,20,5},{3,25,1152},{1,24,5},{1,24,5},{0,16,2},{12,6,1152},{0,16,2},{15,0,1568},{4,31,34},{6,23,5},{2,23,5},{15,0,1568},{12,16,1568},{2,23,5},{0,19,1586},{12,16,1568},{0,19,1586},{3,0,1640}, +{3,0,1640},{3,0,1640},{3,0,1640},{2,20,37},{2,20,37},{2,20,37},{2,14,37},{0,14,400},{0,14,400},{5,31,4740},{4,31,1716},{4,25,2148},{4,23,1631},{3,31,6351},{2,29,2005},{2,24,31},{2,19,2138},{0,26,7669},{0,19,2375},{6,31,1832},{5,31,155},{6,24,82},{5,23,198},{11,8,5419},{0,31,1712},{2,24,27},{0,20,1720},{11,14,5419},{0,20,1720},{4,31,1595},{4,31,1595},{4,31,1595}, +{4,22,1590},{2,31,1158},{2,24,22},{2,24,22},{2,16,62},{0,21,2091},{0,17,306},{6,25,64},{6,25,64},{6,25,64},{6,20,65},{9,6,1152},{1,26,1},{1,26,1},{1,17,5},{14,5,1152},{1,17,5},{14,6,1568},{5,31,74},{6,24,18},{2,24,18},{14,6,1568},{15,14,1568},{2,24,18},{0,20,1576},{15,14,1568},{0,20,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{2,23,4}, +{2,23,4},{2,23,4},{2,15,8},{0,16,277},{0,16,277},{5,31,5060},{5,31,1980},{5,25,2180},{4,24,1644},{4,31,6508},{2,31,1989},{3,25,109},{2,21,2117},{0,28,7364},{0,21,2098},{7,31,1952},{6,31,149},{6,25,73},{6,24,194},{12,7,5419},{1,31,1804},{2,25,49},{0,21,1657},{13,13,5419},{0,21,1657},{4,31,1739},{4,31,1739},{4,31,1739},{4,24,1619},{3,30,1188},{3,24,86},{3,24,86}, +{3,17,121},{0,22,1928},{0,18,194},{6,28,1},{6,28,1},{6,28,1},{6,21,4},{10,5,1152},{2,26,2},{2,26,2},{1,18,2},{12,9,1152},{1,18,2},{13,12,1568},{6,31,149},{7,25,13},{3,25,9},{13,12,1568},{13,18,1568},{3,25,9},{0,21,1576},{13,18,1568},{0,21,1576},{4,0,1618},{4,0,1618},{4,0,1618},{4,0,1618},{3,22,37},{3,22,37},{3,22,37},{3,16,37},{0,18,193}, +{0,18,193},{6,31,5316},{5,31,2160},{5,27,2160},{5,25,1621},{5,31,6800},{3,31,1999},{3,26,33},{3,22,2124},{0,29,7068},{0,22,1836},{7,31,2195},{7,31,270},{7,26,69},{6,25,198},{15,0,5419},{2,31,1970},{3,26,24},{0,22,1611},{12,16,5419},{0,22,1611},{5,31,1676},{5,31,1676},{5,31,1676},{5,24,1580},{3,31,1233},{3,26,29},{3,26,29},{3,19,58},{0,24,1798},{0,19,157},{7,27,50}, +{7,27,50},{7,27,50},{7,22,49},{10,8,1152},{2,28,1},{2,28,1},{2,19,1},{11,12,1152},{2,19,1},{15,8,1568},{7,31,221},{7,26,20},{4,26,8},{15,8,1568},{11,22,1568},{4,26,8},{0,22,1586},{11,22,1568},{0,22,1586},{5,0,1576},{5,0,1576},{5,0,1576},{5,0,1576},{3,25,10},{3,25,10},{3,25,10},{3,18,13},{0,20,106},{0,20,106},{6,31,5828},{6,31,2435},{6,27,2175}, +{5,27,1669},{5,31,7184},{4,31,2132},{4,27,105},{3,23,2107},{0,31,6820},{0,23,1690},{8,31,2306},{7,31,334},{7,27,86},{7,26,181},{14,6,5419},{4,31,2096},{3,27,62},{0,23,1590},{15,14,5419},{0,23,1590},{6,31,1859},{6,31,1859},{6,31,1859},{5,26,1640},{4,31,1220},{4,26,97},{4,26,97},{4,19,136},{0,26,1650},{0,21,161},{7,30,1},{7,30,1},{7,30,1},{7,24,5},{9,14,1152}, +{3,28,5},{3,28,5},{2,20,2},{14,10,1152},{2,20,2},{14,14,1568},{7,31,333},{8,27,40},{4,27,5},{14,14,1568},{14,20,1568},{4,27,5},{0,23,1586},{14,20,1568},{0,23,1586},{5,0,1640},{5,0,1640},{5,0,1640},{5,0,1640},{4,24,37},{4,24,37},{4,24,37},{4,18,37},{0,22,58},{0,22,58},{7,31,6036},{6,31,2835},{6,29,2148},{6,27,1631},{6,31,7316},{4,31,2228},{4,28,31}, +{4,23,2138},{0,31,6884},{0,24,1613},{8,31,2402},{8,31,666},{8,28,269},{7,27,198},{13,12,5419},{4,31,2224},{4,28,27},{0,24,1577},{13,18,5419},{0,24,1577},{6,31,1811},{6,31,1811},{6,31,1811},{6,26,1590},{5,31,1356},{4,28,22},{4,28,22},{4,20,62},{0,28,1508},{1,21,137},{7,31,106},{7,31,106},{7,31,106},{7,25,82},{11,10,1152},{3,30,1},{3,30,1},{3,21,5},{5,23,1152}, +{3,21,5},{13,20,1568},{8,31,410},{8,28,13},{4,28,18},{13,20,1568},{13,23,1570},{4,28,18},{0,24,1576},{13,23,1570},{0,24,1576},{6,0,1586},{6,0,1586},{6,0,1586},{6,0,1586},{4,27,4},{4,27,4},{4,27,4},{4,19,8},{0,24,37},{0,24,37},{7,31,6740},{7,31,3135},{7,29,2180},{6,28,1644},{7,31,7676},{5,31,2448},{5,29,109},{4,25,2117},{1,31,7196},{0,25,1593},{9,31,2594}, +{8,31,698},{8,29,82},{8,28,345},{14,11,5419},{5,31,2412},{4,29,49},{1,25,1580},{15,17,5419},{1,25,1580},{7,31,1979},{7,31,1979},{7,31,1979},{6,28,1619},{5,31,1388},{5,28,86},{5,28,86},{5,21,121},{0,30,1416},{1,23,161},{8,30,64},{8,30,64},{8,30,64},{8,25,65},{12,9,1152},{4,30,2},{4,30,2},{3,22,2},{14,13,1152},{3,22,2},{15,16,1568},{9,31,530},{8,29,18}, +{5,29,9},{15,16,1568},{15,22,1568},{5,29,9},{0,25,1576},{15,22,1568},{0,25,1576},{6,0,1618},{6,0,1618},{6,0,1618},{6,0,1618},{5,26,37},{5,26,37},{5,26,37},{5,20,37},{0,25,17},{0,25,17},{8,31,6906},{7,31,3909},{7,31,2160},{7,29,1621},{7,31,8144},{6,31,2902},{5,30,33},{5,26,2124},{2,31,7661},{1,26,1615},{9,31,2945},{9,31,1025},{8,30,86},{8,29,181},{14,14,5419}, +{7,31,2694},{5,30,24},{1,26,1590},{14,20,5419},{1,26,1590},{7,31,2060},{7,31,2060},{7,31,2060},{7,28,1580},{6,31,1476},{5,30,29},{5,30,29},{5,23,58},{0,31,1324},{2,23,157},{8,31,37},{8,31,37},{8,31,37},{8,27,5},{15,2,1152},{5,30,20},{5,30,20},{4,23,1},{13,16,1152},{4,23,1},{15,19,1568},{10,31,637},{9,30,5},{6,30,8},{15,19,1568},{13,26,1568},{6,30,8}, +{0,26,1586},{13,26,1568},{0,26,1586},{7,0,1576},{7,0,1576},{7,0,1576},{7,0,1576},{5,29,10},{5,29,10},{5,29,10},{5,22,13},{0,28,10},{0,28,10},{8,31,7386},{8,31,4250},{8,31,2490},{7,31,1669},{8,31,8461},{6,31,3350},{6,31,105},{5,27,2107},{4,31,8004},{1,27,1611},{10,31,3112},{9,31,1361},{9,31,69},{8,30,198},{13,20,5419},{7,31,2950},{5,31,62},{2,27,1590},{11,25,5420}, +{2,27,1590},{8,31,2486},{8,31,2486},{8,31,2486},{7,30,1640},{6,31,1700},{6,30,97},{6,30,97},{6,23,136},{1,31,1424},{2,25,161},{9,31,65},{9,31,65},{9,31,65},{9,27,49},{11,18,1152},{5,31,58},{5,31,58},{4,24,2},{10,21,1152},{4,24,2},{13,28,1568},{11,31,785},{9,31,20},{6,31,5},{13,28,1568},{11,30,1568},{6,31,5},{0,27,1586},{11,30,1568},{0,27,1586},{7,0,1640}, +{7,0,1640},{7,0,1640},{7,0,1640},{6,28,37},{6,28,37},{6,28,37},{6,22,37},{1,28,10},{1,28,10},{9,31,7014},{8,31,4230},{8,31,2294},{8,31,1846},{8,31,7865},{7,31,3114},{6,31,85},{6,27,1706},{4,31,7436},{2,28,1289},{11,31,2852},{10,31,1221},{9,31,145},{9,30,114},{13,22,4803},{8,31,2648},{6,31,81},{2,28,1253},{12,25,4803},{2,28,1253},{8,31,2294},{8,31,2294},{8,31,2294}, +{8,30,1811},{7,31,1740},{6,31,85},{6,31,85},{6,24,62},{2,31,1577},{3,25,137},{9,31,145},{9,31,145},{9,31,145},{9,29,5},{13,14,1152},{6,31,81},{6,31,81},{5,25,5},{8,25,1152},{5,25,5},{15,23,1250},{11,31,689},{10,31,4},{7,31,9},{15,23,1250},{11,31,1250},{7,31,9},{0,28,1252},{11,31,1250},{0,28,1252},{8,0,1810},{8,0,1810},{8,0,1810},{8,0,1810},{6,31,4}, +{6,31,4},{6,31,4},{6,23,8},{1,29,8},{1,29,8},{9,31,6534},{9,31,4134},{8,31,2486},{8,31,1590},{9,31,7237},{7,31,2970},{7,31,161},{6,28,1256},{5,31,6748},{2,29,949},{11,31,2340},{10,31,1125},{10,31,164},{9,31,97},{15,17,4056},{9,31,2244},{7,31,125},{3,28,909},{12,26,4056},{3,28,909},{8,31,2486},{8,31,2486},{8,31,2486},{8,31,1590},{7,31,2156},{7,31,161},{7,31,161}, +{7,25,121},{3,31,1729},{3,27,161},{10,31,164},{10,31,164},{10,31,164},{10,29,65},{14,13,1152},{7,31,125},{7,31,125},{5,26,2},{11,23,1152},{5,26,2},{13,31,882},{12,31,482},{11,31,0},{8,31,4},{13,31,882},{15,27,882},{8,31,4},{0,28,900},{15,27,882},{0,28,900},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{7,30,37},{7,30,37},{7,30,37},{7,24,37},{1,31,16}, +{1,31,16},{10,31,6091},{9,31,4053},{9,31,2609},{8,31,1761},{9,31,6490},{8,31,2622},{7,31,458},{7,28,835},{6,31,6162},{3,29,598},{12,31,1989},{11,31,931},{11,31,306},{10,31,5},{15,19,3318},{10,31,1806},{8,31,202},{4,29,545},{13,26,3318},{4,29,545},{9,31,2609},{9,31,2609},{9,31,2609},{8,31,1761},{8,31,2086},{7,31,458},{7,31,458},{7,27,58},{4,31,1868},{4,27,157},{11,31,306}, +{11,31,306},{11,31,306},{10,31,5},{14,16,1152},{8,31,202},{8,31,202},{6,27,1},{15,20,1152},{6,27,1},{15,26,545},{13,31,313},{12,31,4},{10,31,4},{15,26,545},{14,29,545},{10,31,4},{0,29,545},{14,29,545},{0,29,545},{8,0,1640},{8,0,1640},{8,0,1640},{8,0,1640},{7,31,58},{7,31,58},{7,31,58},{7,26,13},{2,31,13},{2,31,13},{10,31,5723},{10,31,3980},{9,31,2945}, +{9,31,1745},{10,31,6083},{8,31,2494},{8,31,558},{7,29,558},{7,31,5674},{4,30,411},{12,31,1573},{11,31,963},{11,31,338},{11,31,49},{13,27,2753},{10,31,1438},{9,31,290},{5,29,341},{13,27,2753},{5,29,341},{9,31,2945},{9,31,2945},{9,31,2945},{9,31,1745},{9,31,2390},{8,31,558},{8,31,558},{7,28,147},{5,31,2064},{4,29,161},{11,31,338},{11,31,338},{11,31,338},{11,31,49},{13,22,1152}, +{9,31,290},{9,31,290},{6,28,2},{12,25,1152},{6,28,2},{15,27,313},{13,31,185},{13,31,16},{11,31,0},{15,27,313},{13,31,313},{11,31,0},{0,29,337},{13,31,313},{0,29,337},{9,0,1576},{9,0,1576},{9,0,1576},{9,0,1576},{8,31,197},{8,31,197},{8,31,197},{7,27,122},{3,31,25},{3,31,25},{11,31,5415},{10,31,3996},{10,31,3035},{10,31,2006},{10,31,5619},{9,31,2378},{8,31,814}, +{8,29,414},{7,31,5338},{5,30,251},{12,31,1413},{12,31,813},{12,31,452},{11,31,65},{13,29,2273},{11,31,1218},{10,31,365},{6,30,146},{14,27,2273},{6,30,146},{10,31,3035},{10,31,3035},{10,31,3035},{10,31,2006},{9,31,2518},{8,31,814},{8,31,814},{8,28,121},{6,31,2329},{5,29,137},{12,31,452},{12,31,452},{12,31,452},{11,31,65},{15,18,1152},{10,31,365},{10,31,365},{7,29,5},{10,29,1152}, +{7,29,5},{15,28,145},{14,31,85},{13,31,16},{12,31,4},{15,28,145},{15,29,149},{12,31,4},{0,30,145},{15,29,149},{0,30,145},{9,0,1640},{9,0,1640},{9,0,1640},{9,0,1640},{8,31,85},{8,31,85},{8,31,85},{8,27,37},{4,31,40},{4,31,40},{11,31,5143},{11,31,4004},{11,31,3379},{10,31,2070},{11,31,5287},{10,31,2431},{9,31,1062},{8,30,133},{8,31,5011},{5,31,161},{13,31,1161}, +{13,31,889},{12,31,548},{12,31,164},{15,24,1878},{11,31,1106},{11,31,481},{7,30,66},{15,27,1881},{7,30,66},{11,31,3379},{11,31,3379},{11,31,3379},{10,31,2070},{10,31,2835},{9,31,1062},{9,31,1062},{8,29,62},{7,31,2577},{5,31,161},{12,31,548},{12,31,548},{12,31,548},{12,31,164},{13,27,1152},{11,31,481},{11,31,481},{7,30,2},{13,27,1152},{7,30,2},{15,29,45},{14,31,37},{14,31,1}, +{14,31,9},{15,29,45},{15,30,41},{14,31,9},{0,30,65},{15,30,41},{0,30,65},{10,0,1586},{10,0,1586},{10,0,1586},{10,0,1586},{9,31,221},{9,31,221},{9,31,221},{8,28,8},{5,31,80},{5,31,80},{12,31,4948},{11,31,4157},{11,31,3532},{11,31,2393},{11,31,5008},{10,31,2422},{10,31,1461},{9,31,125},{9,31,4752},{6,31,157},{13,31,1107},{13,31,835},{13,31,666},{12,31,362},{15,26,1536}, +{12,31,1011},{12,31,650},{8,31,16},{14,29,1536},{8,31,16},{11,31,3532},{11,31,3532},{11,31,3532},{11,31,2393},{10,31,3204},{10,31,1461},{10,31,1461},{9,30,114},{8,31,2976},{6,31,157},{13,31,666},{13,31,666},{13,31,666},{12,31,362},{13,30,1152},{12,31,650},{12,31,650},{8,31,16},{12,30,1152},{8,31,16},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0}, +{0,31,0},{15,31,0},{0,31,0},{10,0,1640},{10,0,1640},{10,0,1640},{10,0,1640},{9,31,221},{9,31,221},{9,31,221},{9,29,25},{6,31,157},{6,31,157},{12,31,4212},{12,31,3612},{12,31,3251},{11,31,2201},{12,31,4212},{11,31,2154},{10,31,1301},{9,31,13},{10,31,3939},{7,31,233},{14,31,776},{13,31,659},{13,31,490},{13,31,218},{15,27,1067},{13,31,699},{12,31,442},{9,31,4},{13,31,1067}, +{9,31,4},{12,31,3251},{12,31,3251},{12,31,3251},{11,31,2201},{11,31,2668},{10,31,1301},{10,31,1301},{9,31,13},{8,31,2528},{7,31,233},{13,31,490},{13,31,490},{13,31,490},{13,31,218},{15,25,802},{12,31,442},{12,31,442},{9,31,4},{15,27,802},{9,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,1576}, +{11,0,1576},{11,0,1576},{11,0,1576},{10,31,340},{10,31,340},{10,31,340},{9,31,13},{7,31,233},{7,31,233},{12,31,3732},{12,31,3132},{12,31,2771},{12,31,2171},{12,31,3444},{11,31,1834},{11,31,1209},{10,31,37},{10,31,3219},{8,31,400},{14,31,456},{14,31,392},{14,31,356},{13,31,170},{14,31,684},{13,31,459},{13,31,290},{10,31,1},{14,30,683},{10,31,1},{12,31,2771},{12,31,2771},{12,31,2771}, +{12,31,2171},{11,31,2348},{11,31,1209},{11,31,1209},{10,31,37},{9,31,2156},{8,31,400},{14,31,356},{14,31,356},{14,31,356},{13,31,170},{15,26,512},{13,31,290},{13,31,290},{10,31,1},{14,29,512},{10,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,1640},{11,0,1640},{11,0,1640},{11,0,1640},{10,31,436}, +{10,31,436},{10,31,436},{10,31,37},{8,31,400},{8,31,400},{13,31,3172},{13,31,2900},{12,31,2547},{12,31,1947},{12,31,2932},{12,31,1732},{11,31,1145},{10,31,53},{11,31,2695},{8,31,464},{14,31,264},{14,31,200},{14,31,164},{14,31,100},{15,28,387},{14,31,268},{13,31,178},{11,31,1},{15,29,396},{11,31,1},{12,31,2547},{12,31,2547},{12,31,2547},{12,31,1947},{12,31,1971},{11,31,1145},{11,31,1145}, +{10,31,53},{10,31,1794},{8,31,464},{14,31,164},{14,31,164},{14,31,164},{14,31,100},{15,27,290},{13,31,178},{13,31,178},{11,31,1},{14,30,290},{11,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{11,31,520},{11,31,520},{11,31,520},{10,31,53},{8,31,464}, +{8,31,464},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{0,15,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,8,6546},{0,7,4643},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{3,5,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,9,5885},{0,7,4202},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,549},{0,5,289},{0,5,289},{0,3,306},{0,3,630},{0,3,387},{0,11,0}, +{0,11,0},{0,11,0},{0,7,4},{1,2,545},{0,5,289},{0,5,289},{0,3,306},{2,1,545},{0,3,306},{6,3,2665},{0,18,680},{0,13,50},{0,11,785},{6,3,2665},{11,0,2665},{0,11,785},{0,8,2689},{11,0,2665},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,2665},{0,20,521},{0,14,5}, +{0,12,625},{0,17,6669},{0,13,4529},{0,11,1890},{0,8,4610},{0,9,7494},{0,7,5171},{0,25,2665},{0,20,521},{0,14,5},{0,12,625},{3,7,6669},{0,13,4529},{0,11,1890},{0,8,4610},{8,0,6669},{0,8,4610},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,7,841},{0,6,445},{0,6,445},{0,4,464},{0,3,982},{0,3,595},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{2,0,841}, +{0,6,445},{0,6,445},{0,4,464},{1,3,841},{0,4,464},{7,2,2665},{0,20,521},{0,14,5},{0,12,625},{7,2,2665},{12,0,2665},{0,12,625},{0,9,2689},{12,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{0,19,7541},{0,14,4934},{0,12,2042}, +{0,9,5045},{0,10,8546},{0,8,5682},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{5,2,7538},{0,14,4934},{0,12,2042},{0,9,5045},{8,1,7538},{0,9,5045},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,8,1201},{0,7,637},{0,7,637},{0,4,656},{0,4,1385},{0,4,800},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{1,5,1201},{0,7,637},{0,7,637},{0,4,656},{4,0,1201}, +{0,4,656},{6,8,2665},{0,22,405},{0,15,10},{0,13,514},{6,8,2665},{11,3,2665},{0,13,514},{0,10,2689},{11,3,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,30,2669},{0,23,313},{0,16,68},{0,15,410},{0,20,8498},{0,16,5330},{0,13,2210},{0,10,5530},{0,11,9702},{0,9,6270},{0,30,2669}, +{0,23,313},{0,16,68},{0,15,410},{4,7,8493},{0,16,5330},{0,13,2210},{0,10,5530},{8,2,8493},{0,10,5530},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,9,1629},{0,8,832},{0,8,832},{0,5,881},{0,5,1874},{0,5,1106},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{2,3,1625},{0,8,832},{0,8,832},{0,5,881},{4,1,1625},{0,5,881},{8,2,2665},{0,23,313},{1,16,8}, +{0,15,410},{8,2,2665},{14,1,2665},{0,15,410},{0,11,2689},{14,1,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{0,22,9674},{0,17,5849},{0,14,2450},{0,10,6106},{0,12,11199},{0,10,7006},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{1,19,9669}, +{0,17,5849},{0,14,2450},{0,10,6106},{9,2,9669},{0,10,6106},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,11,2178},{0,10,1125},{0,10,1125},{0,6,1189},{0,6,2520},{0,5,1475},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{3,1,2178},{0,10,1125},{0,10,1125},{0,6,1189},{3,3,2178},{0,6,1189},{9,2,2665},{0,26,232},{1,17,17},{0,16,305},{9,2,2665},{13,4,2665},{0,16,305}, +{0,12,2689},{13,4,2665},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,3045},{0,28,217},{0,19,401},{0,17,282},{0,25,9670},{0,19,5529},{0,16,1970},{0,12,5738},{0,13,11589},{0,11,6898},{1,31,2789},{0,28,217},{1,18,146},{0,17,282},{5,8,9669},{0,19,5529},{0,16,1970},{0,12,5738},{4,10,9669}, +{0,12,5738},{0,24,64},{0,24,64},{0,24,64},{0,15,68},{0,13,2180},{0,11,949},{0,11,949},{0,7,1018},{0,7,2691},{0,6,1433},{1,21,64},{1,21,64},{1,21,64},{1,13,68},{4,0,2178},{0,11,949},{0,11,949},{0,7,1018},{1,7,2178},{0,7,1018},{10,1,2665},{0,28,153},{2,18,5},{0,17,218},{10,1,2665},{15,3,2665},{0,17,218},{0,13,2689},{15,3,2665},{0,13,2689},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,3,1},{0,3,1},{0,3,1},{0,2,4},{0,1,18},{0,1,18},{1,31,3285},{0,29,341},{1,19,453},{0,18,405},{0,27,9674},{0,20,5170},{0,17,1546},{0,13,5429},{0,15,11993},{0,12,6819},{2,31,2966},{1,28,221},{1,19,197},{1,18,305},{7,4,9669},{0,20,5170},{0,17,1546},{0,13,5429},{9,5,9669},{0,13,5429},{0,27,257},{0,27,257},{0,27,257}, +{1,15,256},{0,16,2178},{0,13,832},{0,13,832},{0,8,881},{0,8,2882},{0,7,1427},{1,23,4},{1,23,4},{1,23,4},{1,15,0},{3,6,2178},{0,13,832},{0,13,832},{0,8,881},{3,6,2178},{0,8,881},{11,0,2665},{0,29,85},{2,19,10},{0,18,149},{11,0,2665},{13,7,2665},{0,18,149},{0,14,2689},{13,7,2665},{0,14,2689},{0,0,256},{0,0,256},{0,0,256},{0,0,256},{0,5,1}, +{0,5,1},{0,5,1},{0,3,1},{0,2,72},{0,2,72},{1,31,3909},{1,29,465},{1,21,676},{1,19,538},{0,30,9669},{0,22,4878},{0,18,1190},{0,14,5138},{0,16,12390},{0,13,6789},{2,31,2966},{1,29,209},{2,20,149},{1,19,282},{6,10,9669},{0,22,4878},{0,18,1190},{0,14,5138},{12,3,9669},{0,14,5138},{1,26,320},{1,26,320},{1,26,320},{1,16,324},{0,19,2180},{0,15,680},{0,15,680}, +{0,9,740},{0,9,3149},{0,8,1441},{1,26,64},{1,26,64},{1,26,64},{1,16,68},{5,2,2178},{0,15,680},{0,15,680},{0,9,740},{8,1,2178},{0,9,740},{11,3,2665},{0,31,41},{3,20,8},{0,19,98},{11,3,2665},{15,6,2665},{0,19,98},{0,15,2689},{15,6,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,160}, +{0,4,160},{2,31,4514},{1,31,630},{1,22,1110},{1,20,694},{0,31,9789},{0,23,4646},{0,20,849},{0,15,4826},{0,18,12955},{0,14,6798},{3,31,3101},{2,30,232},{2,21,197},{2,20,305},{3,23,9669},{0,23,4646},{0,20,849},{0,15,4826},{11,6,9669},{0,15,4826},{1,29,545},{1,29,545},{1,29,545},{1,18,546},{0,22,2180},{0,17,505},{0,17,505},{0,11,610},{0,11,3467},{0,10,1513},{2,26,1}, +{2,26,1},{2,26,1},{2,17,0},{5,5,2178},{0,17,505},{0,17,505},{0,11,610},{5,7,2178},{0,11,610},{11,6,2665},{1,31,85},{3,21,17},{0,20,65},{11,6,2665},{15,8,2665},{0,20,65},{0,16,2689},{15,8,2665},{0,16,2689},{1,0,545},{1,0,545},{1,0,545},{1,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},{0,5,289},{2,31,5330},{1,31,1110},{2,23,1490}, +{1,21,979},{1,31,9981},{0,26,4406},{0,21,579},{0,16,4610},{0,19,13489},{0,15,6846},{3,31,3341},{2,31,226},{3,22,146},{2,21,282},{9,4,9669},{0,26,4406},{0,21,579},{0,16,4610},{9,10,9669},{0,16,4610},{1,31,885},{1,31,885},{1,31,885},{1,20,885},{0,24,2178},{0,19,389},{0,19,389},{0,12,464},{0,12,3885},{0,11,1603},{3,25,64},{3,25,64},{3,25,64},{3,17,68},{7,1,2178}, +{0,19,389},{0,19,389},{0,12,464},{10,2,2178},{0,12,464},{13,2,2665},{2,31,162},{4,22,5},{0,22,37},{13,2,2665},{12,13,2665},{0,22,37},{0,17,2689},{12,13,2665},{0,17,2689},{1,0,881},{1,0,881},{1,0,881},{1,0,881},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,445},{0,6,445},{3,31,6366},{2,31,1635},{2,24,1886},{1,22,1410},{1,31,10381},{0,28,4146},{0,22,377}, +{0,17,4373},{0,20,14006},{0,16,6915},{4,31,3434},{3,31,242},{3,23,197},{3,22,305},{10,3,9669},{0,28,4146},{0,22,377},{0,17,4373},{11,9,9669},{0,17,4373},{2,31,1346},{2,31,1346},{2,31,1346},{2,20,1345},{0,27,2180},{0,21,274},{0,21,274},{0,13,353},{0,14,4269},{0,11,1763},{3,27,4},{3,27,4},{3,27,4},{3,19,0},{5,10,2178},{0,21,274},{0,21,274},{0,13,353},{12,1,2178}, +{0,13,353},{14,1,2665},{3,31,242},{4,23,10},{0,23,10},{14,1,2665},{15,11,2665},{0,23,10},{0,18,2689},{15,11,2665},{0,18,2689},{1,0,1345},{1,0,1345},{1,0,1345},{1,0,1345},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,7,637},{0,7,637},{3,31,7374},{2,31,2339},{2,25,2441},{2,23,1763},{2,31,11019},{0,29,3909},{0,23,243},{0,18,4154},{0,22,14614},{0,17,7029},{5,31,3654}, +{4,31,394},{4,24,149},{3,23,282},{9,9,9669},{0,29,3909},{0,23,243},{0,18,4154},{14,7,9669},{0,18,4154},{2,31,1714},{2,31,1714},{2,31,1714},{2,22,1669},{0,29,2180},{0,23,194},{0,23,194},{0,14,260},{0,15,4686},{0,13,1937},{3,30,64},{3,30,64},{3,30,64},{3,20,68},{8,1,2178},{0,23,194},{0,23,194},{0,14,260},{10,5,2178},{0,14,260},{15,0,2665},{4,31,313},{5,24,8}, +{0,24,4},{15,0,2665},{13,15,2665},{0,24,4},{0,19,2689},{13,15,2665},{0,19,2689},{2,0,1665},{2,0,1665},{2,0,1665},{2,0,1665},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,8,832},{0,8,832},{3,31,8967},{3,31,3510},{3,26,3255},{2,24,2243},{2,31,11766},{0,31,3686},{0,25,138},{0,19,3938},{0,23,15369},{0,18,7206},{5,31,3933},{4,31,457},{4,25,197},{4,24,305},{12,2,9669}, +{0,31,3686},{0,25,138},{0,19,3938},{13,10,9669},{0,19,3938},{2,31,2434},{2,31,2434},{2,31,2434},{2,23,2182},{0,31,2210},{0,25,137},{0,25,137},{0,15,181},{0,16,5157},{0,14,2163},{4,30,1},{4,30,1},{4,30,1},{4,21,0},{8,4,2178},{0,25,137},{0,25,137},{0,15,181},{14,2,2178},{0,15,181},{15,3,2665},{5,31,421},{5,25,17},{1,25,5},{15,3,2665},{12,18,2665},{1,25,5}, +{0,20,2689},{12,18,2665},{0,20,2689},{2,0,2178},{2,0,2178},{2,0,2178},{2,0,2178},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,10,1125},{0,10,1125},{4,31,10234},{3,31,4421},{3,27,3739},{2,26,2742},{2,31,12773},{0,31,3719},{0,26,87},{0,20,3771},{0,25,16061},{0,19,7283},{6,31,4050},{5,31,629},{5,26,146},{4,25,282},{11,8,9669},{0,31,3718},{0,26,86},{0,20,3770},{11,14,9669}, +{0,20,3770},{3,31,3125},{3,31,3125},{3,31,3125},{2,25,2706},{1,31,2411},{0,26,86},{0,26,86},{0,16,129},{0,18,5544},{0,15,2318},{5,29,64},{5,29,64},{5,29,64},{5,21,68},{3,25,2178},{0,26,85},{0,26,85},{0,16,128},{12,6,2178},{0,16,128},{15,6,2665},{5,31,565},{6,26,5},{1,26,2},{15,6,2665},{14,17,2665},{1,26,2},{0,21,2689},{14,17,2665},{0,21,2689},{2,0,2705}, +{2,0,2705},{2,0,2705},{2,0,2705},{0,24,1},{0,24,1},{0,24,1},{0,15,5},{0,11,1348},{0,11,1348},{4,31,10874},{4,31,5018},{3,28,3750},{3,26,2754},{3,31,13045},{1,31,4003},{0,27,183},{0,21,3686},{0,27,15601},{0,20,6570},{7,31,4366},{5,31,965},{5,27,197},{5,26,305},{12,7,9669},{1,31,3954},{0,27,102},{0,21,3605},{13,13,9669},{0,21,3605},{3,31,3173},{3,31,3173},{3,31,3173}, +{3,25,2690},{1,31,2427},{0,28,113},{0,28,113},{0,17,170},{0,20,5170},{0,16,1856},{5,31,4},{5,31,4},{5,31,4},{5,23,0},{9,6,2178},{0,28,32},{0,28,32},{0,17,89},{14,5,2178},{0,17,89},{15,8,2665},{6,31,706},{6,27,10},{2,27,10},{15,8,2665},{12,21,2665},{2,27,10},{0,22,2689},{12,21,2665},{0,22,2689},{3,0,2689},{3,0,2689},{3,0,2689},{3,0,2689},{1,23,53}, +{1,23,53},{1,23,53},{1,15,49},{0,13,1217},{0,13,1217},{5,31,11278},{4,31,5402},{4,29,3753},{3,28,2745},{4,31,13566},{1,31,4403},{1,28,77},{0,22,3747},{0,28,15046},{0,21,5958},{7,31,4590},{6,31,1171},{6,28,149},{5,27,282},{14,3,9669},{2,31,4265},{1,28,76},{0,22,3458},{11,17,9669},{0,22,3458},{4,31,3377},{4,31,3377},{4,31,3377},{3,27,2706},{2,31,2532},{1,28,73},{1,28,73}, +{1,18,129},{0,21,4837},{0,17,1490},{6,31,82},{6,31,82},{6,31,82},{5,24,68},{10,5,2178},{0,30,8},{0,30,8},{0,19,49},{12,9,2178},{0,19,49},{15,11,2665},{7,31,850},{7,28,8},{2,28,4},{15,11,2665},{15,19,2665},{2,28,4},{0,23,2689},{15,19,2665},{0,23,2689},{3,0,2705},{3,0,2705},{3,0,2705},{3,0,2705},{1,26,1},{1,26,1},{1,26,1},{1,16,5},{0,14,1037}, +{0,14,1037},{6,31,11954},{5,31,6090},{4,30,3794},{4,28,2754},{5,31,14170},{2,31,4863},{2,29,187},{1,24,3689},{0,30,14558},{0,23,5274},{8,31,5030},{7,31,1556},{6,29,197},{6,28,305},{14,6,9669},{3,31,4594},{1,29,101},{0,24,3265},{15,14,9669},{0,24,3265},{4,31,3530},{4,31,3530},{4,31,3530},{4,27,2693},{2,31,2739},{1,30,134},{1,30,134},{1,19,197},{0,23,4506},{0,19,1109},{6,31,64}, +{6,31,64},{6,31,64},{6,25,0},{10,8,2178},{1,30,34},{1,30,34},{0,20,16},{11,12,2178},{0,20,16},{15,14,2665},{8,31,1053},{7,29,17},{3,29,5},{15,14,2665},{14,22,2665},{3,29,5},{0,24,2689},{14,22,2665},{0,24,2689},{4,0,2689},{4,0,2689},{4,0,2689},{4,0,2689},{2,26,50},{2,26,50},{2,26,50},{2,17,49},{0,16,818},{0,16,818},{6,31,12466},{5,31,6794},{5,31,3739}, +{4,30,2742},{5,31,14554},{3,31,5363},{2,30,87},{1,24,3737},{0,31,14190},{0,24,4785},{8,31,5158},{7,31,2036},{7,30,146},{6,29,282},{13,12,9669},{4,31,4806},{2,30,86},{0,25,3130},{13,18,9669},{0,25,3130},{5,31,3658},{5,31,3658},{5,31,3658},{4,29,2706},{3,31,2795},{2,30,86},{2,30,86},{2,20,129},{0,25,4315},{0,20,809},{7,31,100},{7,31,100},{7,31,100},{7,25,68},{9,14,2178}, +{1,31,68},{1,31,68},{0,21,1},{14,10,2178},{0,21,1},{13,23,2665},{9,31,1241},{8,30,50},{3,30,2},{13,23,2665},{11,27,2665},{3,30,2},{0,25,2689},{11,27,2665},{0,25,2689},{4,0,2705},{4,0,2705},{4,0,2705},{4,0,2705},{2,28,1},{2,28,1},{2,28,1},{2,19,5},{0,18,666},{0,18,666},{7,31,13094},{6,31,7445},{5,31,3915},{5,30,2754},{6,31,14998},{4,31,5926},{2,31,183}, +{2,25,3686},{0,31,14254},{0,25,4323},{9,31,5546},{8,31,2478},{7,31,197},{7,30,305},{14,11,9669},{5,31,5138},{2,31,102},{0,26,3013},{15,17,9669},{0,26,3013},{5,31,3914},{5,31,3914},{5,31,3914},{5,29,2690},{4,31,3042},{2,31,182},{2,31,182},{2,21,170},{0,27,4059},{0,21,597},{7,31,196},{7,31,196},{7,31,196},{7,27,0},{11,10,2178},{2,31,101},{2,31,101},{0,22,4},{5,23,2178}, +{0,22,4},{15,19,2665},{10,31,1384},{8,31,5},{4,31,10},{15,19,2665},{14,25,2665},{4,31,10},{0,26,2689},{14,25,2665},{0,26,2689},{5,0,2689},{5,0,2689},{5,0,2689},{5,0,2689},{3,27,53},{3,27,53},{3,27,53},{3,19,49},{0,20,505},{0,20,505},{7,31,12517},{6,31,7482},{6,31,4001},{5,31,2706},{6,31,14185},{4,31,5491},{3,31,154},{2,26,3124},{0,31,13437},{0,26,3306},{9,31,4949}, +{8,31,2261},{8,31,325},{7,30,192},{14,13,8712},{6,31,4686},{3,31,153},{0,27,2403},{11,23,8712},{0,27,2403},{6,31,4001},{6,31,4001},{6,31,4001},{5,31,2706},{4,31,3234},{3,31,154},{3,31,154},{3,22,129},{0,28,3762},{0,23,425},{8,31,325},{8,31,325},{8,31,325},{7,28,68},{12,9,2178},{3,31,153},{3,31,153},{1,23,1},{14,13,2178},{1,23,1},{13,27,2178},{10,31,1157},{9,31,16}, +{5,31,1},{13,27,2178},{13,27,2178},{5,31,1},{0,27,2178},{13,27,2178},{0,27,2178},{5,0,2705},{5,0,2705},{5,0,2705},{5,0,2705},{3,30,1},{3,30,1},{3,30,1},{3,20,5},{0,22,389},{0,22,389},{8,31,12034},{7,31,7195},{6,31,4370},{6,31,2693},{7,31,13066},{5,31,5014},{4,31,261},{3,27,2390},{1,31,12394},{0,27,2277},{10,31,4410},{9,31,2045},{8,31,289},{8,30,192},{14,15,7578}, +{7,31,4050},{4,31,212},{0,27,1701},{12,23,7578},{0,27,1701},{6,31,4370},{6,31,4370},{6,31,4370},{6,31,2693},{5,31,3429},{4,31,261},{4,31,261},{3,23,197},{0,30,3509},{0,24,306},{8,31,289},{8,31,289},{8,31,289},{8,28,68},{15,2,2178},{4,31,212},{4,31,212},{1,24,9},{13,16,2178},{1,24,9},{15,22,1625},{11,31,850},{9,31,25},{6,31,4},{15,22,1625},{13,28,1625},{6,31,4}, +{0,27,1665},{13,28,1625},{0,27,1665},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{4,30,50},{4,30,50},{4,30,50},{4,21,49},{0,24,306},{0,24,306},{8,31,11042},{7,31,7259},{7,31,4450},{6,31,2805},{7,31,12298},{5,31,4742},{4,31,501},{4,27,1875},{2,31,11643},{0,28,1578},{10,31,3802},{9,31,1869},{9,31,425},{8,31,25},{13,20,6661},{7,31,3554},{5,31,292},{0,28,1217},{13,23,6662}, +{0,28,1217},{7,31,4450},{7,31,4450},{7,31,4450},{6,31,2805},{6,31,3714},{4,31,501},{4,31,501},{4,24,129},{0,31,3354},{0,25,244},{9,31,425},{9,31,425},{9,31,425},{8,30,0},{11,18,2178},{5,31,292},{5,31,292},{2,25,1},{10,21,2178},{2,25,1},{15,23,1201},{11,31,674},{10,31,9},{7,31,16},{15,23,1201},{12,30,1201},{7,31,16},{0,28,1201},{12,30,1201},{0,28,1201},{6,0,2705}, +{6,0,2705},{6,0,2705},{6,0,2705},{4,31,17},{4,31,17},{4,31,17},{4,23,5},{0,26,218},{0,26,218},{8,31,10434},{8,31,7186},{7,31,4898},{7,31,2833},{8,31,11595},{6,31,4462},{5,31,629},{4,28,1387},{3,31,10895},{0,28,1002},{11,31,3446},{10,31,1707},{9,31,505},{8,31,73},{13,22,5829},{8,31,3170},{6,31,405},{1,28,869},{12,25,5829},{1,28,869},{7,31,4898},{7,31,4898},{7,31,4898}, +{7,31,2833},{6,31,3906},{5,31,629},{5,31,629},{4,25,170},{0,31,3546},{0,27,228},{9,31,505},{9,31,505},{9,31,505},{9,30,68},{13,14,2178},{6,31,405},{6,31,405},{2,26,4},{8,25,2178},{2,26,4},{13,31,841},{12,31,461},{11,31,1},{8,31,9},{13,31,841},{15,27,841},{8,31,9},{0,28,865},{15,27,841},{0,28,865},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{5,31,53}, +{5,31,53},{5,31,53},{5,23,49},{0,28,137},{0,28,137},{9,31,10014},{8,31,6962},{8,31,5026},{7,31,3105},{8,31,10683},{7,31,4354},{6,31,933},{5,28,1019},{4,31,10078},{0,29,630},{11,31,2934},{10,31,1611},{10,31,650},{9,31,25},{15,17,5082},{8,31,2786},{7,31,521},{1,29,546},{12,26,5082},{1,29,546},{8,31,5026},{8,31,5026},{8,31,5026},{7,31,3105},{7,31,4170},{6,31,933},{6,31,933}, +{5,26,129},{1,31,3814},{0,28,234},{10,31,650},{10,31,650},{10,31,650},{9,31,25},{14,13,2178},{7,31,521},{7,31,521},{3,27,1},{11,23,2178},{3,27,1},{15,26,545},{13,31,313},{12,31,4},{10,31,4},{15,26,545},{14,29,545},{10,31,4},{0,29,545},{14,29,545},{0,29,545},{7,0,2705},{7,0,2705},{7,0,2705},{7,0,2705},{5,31,101},{5,31,101},{5,31,101},{5,24,5},{0,29,85}, +{0,29,85},{9,31,9465},{9,31,7065},{8,31,5233},{8,31,3329},{8,31,10116},{7,31,4183},{6,31,1338},{5,29,645},{5,31,9447},{0,30,441},{11,31,2664},{11,31,1525},{10,31,848},{10,31,113},{15,19,4344},{9,31,2424},{8,31,724},{2,30,321},{13,26,4344},{2,30,321},{8,31,5233},{8,31,5233},{8,31,5233},{8,31,3329},{7,31,4629},{6,31,1338},{6,31,1338},{5,27,197},{2,31,4212},{1,29,213},{10,31,848}, +{10,31,848},{10,31,848},{10,31,113},{14,16,2178},{8,31,724},{8,31,724},{3,28,9},{15,20,2178},{3,28,9},{15,27,290},{13,31,178},{13,31,9},{11,31,1},{15,27,290},{14,30,290},{11,31,1},{0,29,320},{14,30,290},{0,29,320},{8,0,2929},{8,0,2929},{8,0,2929},{8,0,2929},{6,31,113},{6,31,113},{6,31,113},{6,25,49},{0,31,45},{0,31,45},{10,31,9329},{9,31,6985},{9,31,5541}, +{8,31,3473},{9,31,9496},{8,31,4420},{7,31,1630},{6,29,426},{5,31,9031},{1,30,301},{12,31,2275},{11,31,1557},{11,31,932},{10,31,225},{13,27,3779},{10,31,2086},{8,31,884},{3,30,129},{13,27,3779},{3,30,129},{9,31,5541},{9,31,5541},{9,31,5541},{8,31,3473},{8,31,4836},{7,31,1630},{7,31,1630},{6,28,129},{4,31,4442},{1,30,237},{11,31,932},{11,31,932},{11,31,932},{10,31,225},{13,22,2178}, +{8,31,884},{8,31,884},{4,29,1},{12,25,2178},{4,29,1},{15,28,130},{14,31,72},{13,31,25},{12,31,9},{15,28,130},{15,29,136},{12,31,9},{0,30,128},{15,29,136},{0,30,128},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{6,31,257},{6,31,257},{6,31,257},{6,27,5},{1,31,89},{1,31,89},{10,31,8929},{10,31,7186},{9,31,5845},{9,31,3829},{9,31,9208},{8,31,4260},{7,31,2270}, +{6,30,245},{6,31,8708},{2,31,228},{12,31,2115},{12,31,1515},{12,31,1154},{11,31,353},{13,29,3299},{11,31,1938},{10,31,1013},{4,30,68},{14,27,3299},{4,30,68},{9,31,5845},{9,31,5845},{9,31,5845},{9,31,3829},{8,31,5124},{7,31,2270},{7,31,2270},{6,29,170},{4,31,4762},{2,31,228},{12,31,1154},{12,31,1154},{12,31,1154},{11,31,353},{15,18,2178},{10,31,1013},{10,31,1013},{4,30,4},{10,29,2178}, +{4,30,4},{15,30,34},{14,31,40},{14,31,4},{14,31,4},{15,30,34},{15,30,34},{14,31,4},{0,30,64},{15,30,34},{0,30,64},{8,0,2705},{8,0,2705},{8,0,2705},{8,0,2705},{7,31,245},{7,31,245},{7,31,245},{7,27,49},{2,31,164},{2,31,164},{11,31,8857},{10,31,7170},{10,31,6209},{9,31,4133},{10,31,8853},{8,31,4484},{8,31,2548},{7,31,170},{7,31,8388},{3,31,244},{13,31,1971}, +{12,31,1611},{12,31,1250},{11,31,625},{15,24,2904},{11,31,1826},{10,31,1157},{5,31,1},{15,27,2907},{5,31,1},{10,31,6209},{10,31,6209},{10,31,6209},{9,31,4133},{9,31,5460},{8,31,2548},{8,31,2548},{7,30,129},{5,31,5126},{3,31,244},{12,31,1250},{12,31,1250},{12,31,1250},{11,31,625},{13,27,2178},{10,31,1157},{10,31,1157},{5,31,1},{13,27,2178},{5,31,1},{15,31,0},{15,31,0},{15,31,0}, +{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{7,31,485},{7,31,485},{7,31,485},{7,28,5},{3,31,244},{3,31,244},{11,31,7705},{11,31,6566},{10,31,5633},{10,31,3890},{10,31,7737},{9,31,3874},{8,31,2386},{7,31,116},{7,31,7398},{4,31,317},{13,31,1458},{13,31,1186},{13,31,1017},{12,31,425},{15,25,2166}, +{12,31,1398},{11,31,850},{6,31,4},{12,31,2166},{6,31,4},{10,31,5633},{10,31,5633},{10,31,5633},{10,31,3890},{9,31,4830},{8,31,2386},{8,31,2386},{7,31,116},{6,31,4509},{4,31,317},{13,31,1017},{13,31,1017},{13,31,1017},{12,31,425},{15,22,1625},{11,31,850},{11,31,850},{6,31,4},{13,28,1625},{6,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0}, +{0,31,0},{15,31,0},{0,31,0},{9,0,2725},{9,0,2725},{9,0,2725},{9,0,2725},{8,31,450},{8,31,450},{8,31,450},{7,30,101},{4,31,317},{4,31,317},{11,31,6953},{11,31,5814},{11,31,5189},{10,31,3650},{11,31,6713},{10,31,3531},{9,31,2142},{8,31,74},{8,31,6397},{5,31,425},{13,31,1138},{13,31,866},{13,31,697},{12,31,361},{15,26,1601},{12,31,1046},{11,31,674},{7,31,16},{14,29,1601}, +{7,31,16},{11,31,5189},{11,31,5189},{11,31,5189},{10,31,3650},{10,31,4313},{9,31,2142},{9,31,2142},{8,31,74},{7,31,3981},{5,31,425},{13,31,697},{13,31,697},{13,31,697},{12,31,361},{15,23,1201},{11,31,674},{11,31,674},{7,31,16},{12,30,1201},{7,31,16},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{10,0,2689}, +{10,0,2689},{10,0,2689},{10,0,2689},{8,31,578},{8,31,578},{8,31,578},{8,30,49},{5,31,425},{5,31,425},{12,31,6211},{11,31,5318},{11,31,4693},{11,31,3554},{11,31,5833},{10,31,3067},{10,31,2106},{8,31,10},{9,31,5601},{6,31,580},{14,31,825},{13,31,674},{13,31,505},{13,31,233},{15,27,1122},{13,31,738},{12,31,461},{8,31,9},{13,31,1122},{8,31,9},{11,31,4693},{11,31,4693},{11,31,4693}, +{11,31,3554},{10,31,3849},{10,31,2106},{10,31,2106},{8,31,10},{7,31,3629},{6,31,580},{13,31,505},{13,31,505},{13,31,505},{13,31,233},{13,31,841},{12,31,461},{12,31,461},{8,31,9},{15,27,841},{8,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{10,0,2705},{10,0,2705},{10,0,2705},{10,0,2705},{9,31,666}, +{9,31,666},{9,31,666},{8,31,10},{6,31,580},{6,31,580},{12,31,5427},{12,31,4827},{11,31,4453},{11,31,3314},{12,31,5175},{10,31,2859},{10,31,1898},{9,31,74},{10,31,4842},{7,31,724},{14,31,489},{14,31,425},{14,31,389},{13,31,169},{14,31,729},{13,31,482},{13,31,313},{10,31,4},{14,30,726},{10,31,4},{11,31,4453},{11,31,4453},{11,31,4453},{11,31,3314},{11,31,3445},{10,31,1898},{10,31,1898}, +{9,31,74},{8,31,3213},{7,31,724},{14,31,389},{14,31,389},{14,31,389},{13,31,169},{15,26,545},{13,31,313},{13,31,313},{10,31,4},{14,29,545},{10,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{9,31,890},{9,31,890},{9,31,890},{9,31,74},{7,31,724}, +{7,31,724},{2,31,33740},{0,31,5184},{0,22,420},{0,21,4221},{1,31,46089},{0,29,24105},{0,21,8317},{0,18,24790},{0,21,63990},{0,16,38959},{1,31,9704},{0,30,2866},{0,21,389},{0,19,3229},{7,2,18065},{0,20,13257},{0,17,6153},{0,12,13481},{12,0,18065},{0,12,13481},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,8,1105},{0,7,585},{0,7,585},{0,4,596},{0,4,1273},{0,4,740},{0,15,1}, +{0,15,1},{0,15,1},{0,9,1},{2,1,1105},{0,7,585},{0,7,585},{0,4,596},{4,0,1105},{0,4,596},{9,6,9248},{0,30,2866},{0,21,389},{0,19,3229},{9,6,9248},{14,5,9248},{0,19,3229},{0,14,9248},{14,5,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,38380},{0,31,6720},{0,23,245}, +{0,22,3864},{2,31,50747},{0,31,24961},{0,22,8353},{0,19,25735},{0,22,65535},{0,17,41319},{1,31,10152},{0,31,2624},{0,23,229},{0,20,2980},{5,10,19334},{0,20,13769},{0,18,6243},{0,13,14116},{12,1,19334},{0,13,14116},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{0,9,1513},{0,8,772},{0,8,772},{0,5,821},{0,5,1750},{0,4,1028},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{1,6,1513}, +{0,8,772},{0,8,772},{0,5,821},{3,2,1513},{0,5,821},{10,5,9248},{0,31,2624},{0,23,229},{0,20,2980},{10,5,9248},{12,9,9248},{0,20,2980},{0,15,9248},{12,9,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,43788},{0,31,9024},{0,24,126},{0,23,3525},{2,31,56155},{0,31,26241},{0,23,8425}, +{0,20,26793},{0,23,65535},{0,18,43819},{2,31,10787},{0,31,2624},{0,24,122},{0,21,2701},{8,0,20689},{0,22,14385},{0,19,6369},{0,13,14756},{12,2,20689},{0,13,14756},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{0,10,1989},{0,9,1018},{0,9,1018},{0,6,1096},{0,5,2294},{0,5,1334},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{1,7,1985},{0,9,1018},{0,9,1018},{0,6,1096},{1,5,1985}, +{0,6,1096},{12,1,9248},{0,31,2624},{0,24,122},{0,21,2701},{12,1,9248},{15,7,9248},{0,21,2701},{0,16,9250},{15,7,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,49964},{1,31,11512},{0,25,41},{0,24,3109},{2,31,62331},{0,31,28289},{0,24,8585},{0,21,27848},{0,23,65535},{0,19,46459},{2,31,11395}, +{0,31,2880},{0,25,37},{0,22,2440},{8,2,22129},{0,23,15030},{0,20,6509},{0,14,15441},{13,2,22129},{0,14,15441},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,12,2525},{0,10,1300},{0,10,1300},{0,6,1384},{0,6,2905},{0,6,1708},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{3,2,2521},{0,10,1300},{0,10,1300},{0,6,1384},{5,1,2521},{0,6,1384},{11,7,9248},{0,31,2880},{0,25,37}, +{0,22,2440},{11,7,9248},{13,11,9248},{0,22,2440},{0,17,9250},{13,11,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,57022},{1,31,15166},{0,26,20},{0,25,2804},{2,31,65535},{0,31,31511},{0,25,8733},{0,22,29095},{0,26,65535},{0,20,49444},{2,31,12385},{0,31,3474},{0,26,4},{0,23,2173},{8,4,23851}, +{0,23,15948},{0,21,6729},{0,15,16274},{14,2,23851},{0,15,16274},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{0,13,3200},{0,11,1665},{0,11,1665},{0,7,1754},{0,7,3691},{0,6,2185},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{1,10,3200},{0,11,1665},{0,11,1665},{0,7,1754},{5,2,3200},{0,7,1754},{11,10,9248},{1,31,3226},{0,26,4},{0,23,2173},{11,10,9248},{5,23,9248},{0,23,2173}, +{0,18,9248},{5,23,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,63870},{1,31,19230},{0,27,45},{0,27,2520},{3,31,65535},{0,31,35191},{0,26,8925},{0,23,30250},{0,28,65535},{0,21,52374},{3,31,13449},{1,31,4026},{0,27,29},{0,24,1901},{3,24,25472},{0,26,16706},{0,22,6963},{0,16,17124},{14,3,25472}, +{0,16,17124},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{0,14,3874},{0,13,2084},{0,13,2084},{0,8,2165},{0,8,4466},{0,7,2627},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{4,1,3872},{0,13,2084},{0,13,2084},{0,8,2165},{3,5,3872},{0,8,2165},{12,9,9248},{2,31,3593},{0,27,29},{0,24,1901},{12,9,9248},{14,13,9248},{0,24,1901},{0,19,9248},{14,13,9248},{0,19,9248},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,65535},{1,31,24002},{0,28,109},{0,27,2268},{3,31,65535},{1,31,39095},{0,27,8825},{0,24,30825},{0,28,65535},{0,22,54996},{3,31,14345},{1,31,4766},{0,29,102},{0,26,1697},{3,26,26744},{0,28,17104},{0,23,6957},{0,17,17625},{15,3,26744},{0,17,17625},{0,31,5},{0,31,5},{0,31,5}, +{0,19,5},{0,16,4418},{0,14,2306},{0,14,2306},{0,9,2420},{0,8,5122},{0,8,2997},{0,31,5},{0,31,5},{0,31,5},{0,19,5},{3,6,4418},{0,14,2306},{0,14,2306},{0,9,2420},{3,6,4418},{0,9,2420},{14,5,9248},{4,31,3904},{1,28,1},{0,26,1693},{14,5,9248},{12,17,9248},{0,26,1693},{0,20,9250},{12,17,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{3,31,65535},{1,31,29442},{0,29,330},{0,28,2105},{3,31,65535},{1,31,42151},{0,28,7781},{0,25,30108},{0,29,65535},{0,22,56388},{4,31,14976},{2,31,5434},{1,29,62},{0,27,1580},{11,0,26744},{0,29,16547},{0,24,6221},{0,18,17124},{13,7,26744},{0,18,17124},{0,31,181},{0,31,181},{0,31,181},{0,20,101},{0,19,4420},{0,16,2005},{0,16,2005}, +{0,10,2165},{0,9,5389},{0,9,2925},{1,31,37},{1,31,37},{1,31,37},{1,19,37},{5,2,4418},{0,16,2005},{0,16,2005},{0,10,2165},{8,1,4418},{0,10,2165},{13,11,9248},{4,31,4160},{1,29,26},{0,27,1480},{13,11,9248},{15,15,9248},{0,27,1480},{0,21,9250},{15,15,9248},{0,21,9250},{0,0,100},{0,0,100},{0,0,100},{0,0,100},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,1,34}, +{0,1,34},{4,31,65535},{2,31,36070},{0,30,822},{0,30,2062},{3,31,65535},{1,31,46660},{0,29,6696},{0,26,29322},{0,31,65535},{0,23,58077},{4,31,15507},{3,31,6253},{1,31,109},{0,28,1646},{11,3,26744},{0,31,15992},{0,26,5346},{0,19,16582},{11,11,26744},{0,19,16582},{1,31,329},{1,31,329},{1,31,329},{1,21,266},{0,22,4420},{0,18,1737},{0,18,1737},{0,11,1898},{0,11,5707},{0,10,2885},{1,31,73}, +{1,31,73},{1,31,73},{1,21,10},{5,5,4418},{0,18,1737},{0,18,1737},{0,11,1898},{5,7,4418},{0,11,1898},{13,14,9248},{5,31,4570},{2,30,4},{0,28,1285},{13,14,9248},{8,25,9248},{0,28,1285},{0,22,9248},{8,25,9248},{0,22,9248},{1,0,265},{1,0,265},{1,0,265},{1,0,265},{0,6,1},{0,6,1},{0,6,1},{0,4,1},{0,3,97},{0,3,97},{4,31,65535},{2,31,40786},{0,31,1405}, +{0,30,2138},{4,31,65535},{1,31,49800},{0,30,5634},{0,27,27967},{0,31,65535},{0,24,58770},{5,31,15531},{3,31,6593},{2,31,61},{1,29,1533},{12,2,26259},{0,31,15284},{0,27,4514},{0,20,15812},{13,10,26259},{0,20,15812},{1,31,633},{1,31,633},{1,31,633},{1,22,381},{0,24,4418},{0,20,1480},{0,20,1480},{0,12,1640},{0,12,6125},{0,11,2891},{2,31,61},{2,31,61},{2,31,61},{2,21,37},{7,1,4418}, +{0,20,1480},{0,20,1480},{0,12,1640},{10,2,4418},{0,12,1640},{13,16,8978},{6,31,4777},{2,31,25},{0,29,1040},{13,16,8978},{15,18,8978},{0,29,1040},{0,23,8980},{15,18,8978},{0,23,8980},{1,0,377},{1,0,377},{1,0,377},{1,0,377},{0,9,0},{0,9,0},{0,9,0},{0,5,4},{0,4,193},{0,4,193},{4,31,65535},{2,31,40898},{1,31,2217},{0,31,2125},{4,31,65535},{1,31,47976},{0,30,4194}, +{0,27,24703},{0,31,65535},{0,24,56130},{5,31,14379},{4,31,6051},{2,31,173},{2,29,1284},{9,14,24371},{0,31,13716},{0,28,3402},{0,21,13989},{14,10,24371},{0,21,13989},{1,31,1193},{1,31,1193},{1,31,1193},{1,24,617},{0,27,4420},{0,22,1280},{0,22,1280},{0,13,1445},{0,14,6509},{0,12,2945},{2,31,173},{2,31,173},{2,31,173},{2,23,5},{5,10,4418},{0,22,1280},{0,22,1280},{0,13,1445},{12,1,4418}, +{0,13,1445},{14,14,7938},{7,31,4253},{3,31,9},{0,29,656},{14,14,7938},{14,20,7938},{0,29,656},{0,23,7956},{14,20,7938},{0,23,7956},{1,0,617},{1,0,617},{1,0,617},{1,0,617},{0,11,4},{0,11,4},{0,11,4},{0,7,0},{0,5,325},{0,5,325},{4,31,65535},{2,31,41266},{1,31,3033},{0,31,2333},{4,31,65535},{1,31,46408},{0,30,3010},{0,27,21695},{0,31,65535},{0,25,53636},{6,31,13140}, +{4,31,5571},{3,31,157},{2,29,932},{11,9,22568},{0,31,12404},{0,28,2474},{0,21,12245},{14,11,22568},{0,21,12245},{2,31,1630},{2,31,1630},{2,31,1630},{1,26,989},{0,29,4420},{0,23,1090},{0,23,1090},{0,14,1268},{0,15,6926},{0,13,3029},{3,31,157},{3,31,157},{3,31,157},{3,23,37},{8,1,4418},{0,23,1090},{0,23,1090},{0,14,1268},{10,5,4418},{0,14,1268},{15,12,6962},{7,31,3709},{4,31,1}, +{0,30,353},{15,12,6962},{13,22,6962},{0,30,353},{0,24,6970},{13,22,6962},{0,24,6970},{1,0,985},{1,0,985},{1,0,985},{1,0,985},{0,14,0},{0,14,0},{0,14,0},{0,8,4},{0,6,493},{0,6,493},{4,31,65535},{2,31,41986},{1,31,4257},{0,31,2873},{4,31,65535},{1,31,44950},{0,30,1984},{0,28,18569},{0,31,65535},{0,25,51026},{6,31,11934},{5,31,5125},{4,31,296},{3,29,706},{13,4,20642}, +{0,31,11234},{0,29,1634},{0,22,10422},{15,11,20642},{0,22,10422},{2,31,2350},{2,31,2350},{2,31,2350},{2,26,1450},{0,31,4450},{0,25,949},{0,25,949},{0,16,1096},{0,16,7397},{0,14,3171},{4,31,296},{4,31,296},{4,31,296},{3,25,10},{8,4,4418},{0,25,949},{0,25,949},{0,16,1096},{14,2,4418},{0,16,1096},{14,17,5941},{7,31,3250},{5,31,0},{0,30,128},{14,17,5941},{13,23,5941},{0,30,128}, +{0,24,5953},{13,23,5941},{0,24,5953},{2,0,1450},{2,0,1450},{2,0,1450},{2,0,1450},{0,17,0},{0,17,0},{0,17,0},{0,10,1},{0,8,697},{0,8,697},{4,31,65535},{2,31,42898},{1,31,5617},{1,31,3337},{4,31,65535},{1,31,43926},{0,31,1250},{0,28,15865},{0,31,65535},{0,25,48978},{7,31,10938},{5,31,4773},{4,31,360},{3,30,509},{12,9,19021},{1,31,10246},{0,30,1088},{0,23,8945},{14,13,19021}, +{0,23,8945},{2,31,3262},{2,31,3262},{2,31,3262},{2,28,1822},{1,31,4682},{0,28,776},{0,28,776},{0,17,925},{0,18,7893},{0,15,3333},{4,31,360},{4,31,360},{4,31,360},{4,25,37},{3,25,4418},{0,28,776},{0,28,776},{0,17,925},{12,6,4418},{0,17,925},{15,15,5101},{8,31,2777},{6,31,9},{0,31,25},{15,15,5101},{11,26,5101},{0,31,25},{0,25,5105},{11,26,5101},{0,25,5105},{2,0,1818}, +{2,0,1818},{2,0,1818},{2,0,1818},{0,20,1},{0,20,1},{0,20,1},{0,12,1},{0,9,925},{0,9,925},{4,31,65535},{2,31,44066},{1,31,7233},{1,31,3993},{4,31,65535},{2,31,43110},{0,31,738},{0,28,13417},{0,31,65535},{0,25,47186},{7,31,9978},{6,31,4467},{5,31,452},{4,30,357},{15,1,17485},{2,31,9441},{0,30,704},{0,24,7570},{15,13,17485},{0,24,7570},{3,31,4058},{3,31,4058},{3,31,4058}, +{2,29,2315},{1,31,4874},{0,29,610},{0,29,610},{0,18,772},{0,20,8427},{0,16,3497},{5,31,452},{5,31,452},{5,31,452},{4,27,5},{9,6,4418},{0,29,610},{0,29,610},{0,18,772},{14,5,4418},{0,18,772},{15,16,4325},{8,31,2377},{6,31,25},{0,31,9},{15,16,4325},{15,22,4325},{0,31,9},{0,25,4337},{15,22,4325},{0,25,4337},{2,0,2314},{2,0,2314},{2,0,2314},{2,0,2314},{0,22,1}, +{0,22,1},{0,22,1},{0,13,4},{0,10,1189},{0,10,1189},{5,31,65535},{3,31,45090},{1,31,9105},{1,31,4905},{4,31,65535},{2,31,42326},{0,31,482},{0,28,11225},{0,31,65535},{0,26,45590},{7,31,9274},{6,31,4179},{5,31,612},{4,30,245},{14,6,16034},{3,31,8633},{0,31,482},{0,24,6242},{15,14,16034},{0,24,6242},{3,31,5066},{3,31,5066},{3,31,5066},{2,31,2939},{1,31,5322},{0,31,482},{0,31,482}, +{0,19,637},{0,20,8939},{0,17,3725},{5,31,612},{5,31,612},{5,31,612},{5,27,37},{10,5,4418},{0,31,482},{0,31,482},{0,19,637},{12,9,4418},{0,19,637},{13,24,3613},{9,31,1973},{7,31,9},{2,31,1},{13,24,3613},{15,23,3613},{2,31,1},{0,26,3617},{15,23,3613},{0,26,3617},{2,0,2938},{2,0,2938},{2,0,2938},{2,0,2938},{0,25,1},{0,25,1},{0,25,1},{0,15,0},{0,11,1489}, +{0,11,1489},{5,31,65535},{3,31,46530},{1,31,11517},{1,31,6237},{4,31,65535},{2,31,41750},{0,31,500},{0,29,8976},{0,31,65535},{0,26,43934},{8,31,8225},{7,31,3853},{6,31,680},{5,30,109},{11,18,14504},{4,31,7667},{0,31,500},{0,25,4979},{10,21,14504},{0,25,4979},{3,31,6506},{3,31,6506},{3,31,6506},{3,31,3701},{2,31,6019},{0,31,500},{0,31,500},{0,20,520},{0,22,9629},{0,18,4035},{6,31,680}, +{6,31,680},{6,31,680},{5,29,10},{10,8,4418},{0,31,500},{0,31,500},{0,20,520},{11,12,4418},{0,20,520},{15,19,2888},{10,31,1537},{8,31,16},{3,31,4},{15,19,2888},{13,26,2888},{3,31,4},{0,26,2906},{13,26,2888},{0,26,2906},{3,0,3697},{3,0,3697},{3,0,3697},{3,0,3697},{0,28,1},{0,28,1},{0,28,1},{0,17,4},{0,11,1930},{0,11,1930},{5,31,65535},{3,31,48082},{1,31,13933}, +{1,31,7693},{4,31,65535},{2,31,41510},{0,31,788},{0,29,7120},{0,31,65535},{0,26,42734},{8,31,7409},{7,31,3693},{7,31,884},{6,30,116},{13,13,13235},{4,31,6899},{1,31,628},{0,25,3987},{15,16,13235},{0,25,3987},{4,31,7686},{4,31,7686},{4,31,7686},{3,31,4437},{2,31,6659},{0,31,788},{0,31,788},{0,21,421},{0,23,10286},{0,20,4305},{7,31,884},{7,31,884},{7,31,884},{6,29,37},{9,14,4418}, +{1,31,628},{1,31,628},{0,21,421},{14,10,4418},{0,21,421},{15,20,2312},{10,31,1217},{8,31,16},{4,31,9},{15,20,2312},{12,28,2312},{4,31,9},{0,27,2314},{12,28,2312},{0,27,2314},{3,0,4337},{3,0,4337},{3,0,4337},{3,0,4337},{0,30,1},{0,30,1},{0,30,1},{0,18,1},{0,13,2329},{0,13,2329},{5,31,65535},{3,31,49890},{2,31,16310},{1,31,9405},{4,31,65535},{2,31,41526},{0,31,1332}, +{0,29,5520},{0,31,65535},{0,26,41790},{8,31,6849},{8,31,3601},{7,31,980},{6,31,5},{15,8,12051},{5,31,6275},{2,31,801},{0,26,3066},{11,22,12051},{0,26,3066},{4,31,9062},{4,31,9062},{4,31,9062},{3,31,5429},{2,31,7555},{1,31,1172},{1,31,1172},{0,23,325},{0,23,11118},{0,20,4625},{7,31,980},{7,31,980},{7,31,980},{6,31,5},{11,10,4418},{2,31,801},{2,31,801},{0,23,325},{5,23,4418}, +{0,23,325},{13,28,1800},{11,31,949},{9,31,4},{6,31,1},{13,28,1800},{11,30,1800},{6,31,1},{0,27,1818},{11,30,1800},{0,27,1818},{3,0,5105},{3,0,5105},{3,0,5105},{3,0,5105},{0,31,36},{0,31,36},{0,31,36},{0,20,4},{0,15,2741},{0,15,2741},{5,31,65535},{3,31,51954},{2,31,18790},{1,31,11373},{5,31,65535},{2,31,41798},{0,31,2132},{0,29,4176},{0,31,65535},{0,27,41092},{9,31,6153}, +{8,31,3297},{7,31,1332},{7,31,37},{14,13,10952},{5,31,5763},{3,31,965},{0,27,2291},{11,23,10952},{0,27,2291},{4,31,10694},{4,31,10694},{4,31,10694},{4,31,6566},{3,31,8619},{1,31,1716},{1,31,1716},{0,24,221},{0,26,11876},{0,22,4989},{7,31,1332},{7,31,1332},{7,31,1332},{7,31,37},{12,9,4418},{3,31,965},{3,31,965},{0,24,221},{14,13,4418},{0,24,221},{14,26,1352},{11,31,725},{10,31,0}, +{7,31,1},{14,26,1352},{15,26,1352},{7,31,1},{0,28,1360},{15,26,1352},{0,28,1360},{3,0,6001},{3,0,6001},{3,0,6001},{3,0,6001},{0,31,196},{0,31,196},{0,31,196},{0,21,1},{0,16,3130},{0,16,3130},{5,31,65535},{3,31,54582},{2,31,21886},{1,31,13893},{5,31,65535},{2,31,42410},{0,31,3338},{0,30,2841},{0,31,65535},{0,27,40390},{9,31,5649},{9,31,3249},{8,31,1325},{7,31,109},{14,15,9818}, +{6,31,5258},{4,31,1108},{0,27,1589},{12,23,9818},{0,27,1589},{5,31,12376},{5,31,12376},{5,31,12376},{4,31,7844},{3,31,9861},{1,31,2634},{1,31,2634},{0,25,136},{0,28,12696},{0,23,5429},{8,31,1325},{8,31,1325},{8,31,1325},{7,31,109},{15,2,4418},{4,31,1108},{4,31,1108},{0,25,136},{13,16,4418},{0,25,136},{15,24,925},{12,31,505},{11,31,1},{8,31,1},{15,24,925},{15,27,925},{8,31,1}, +{0,28,937},{15,27,925},{0,28,937},{4,0,7060},{4,0,7060},{4,0,7060},{4,0,7060},{1,31,425},{1,31,425},{1,31,425},{0,23,0},{0,17,3665},{0,17,3665},{5,31,65535},{3,31,57190},{2,31,24910},{1,31,16405},{5,31,65535},{2,31,43226},{0,31,4682},{0,30,1833},{0,31,65535},{0,27,40038},{10,31,5202},{9,31,3073},{8,31,1565},{8,31,277},{13,20,8901},{7,31,4814},{5,31,1300},{0,28,1021},{13,23,8902}, +{0,28,1021},{5,31,14136},{5,31,14136},{5,31,14136},{4,31,9252},{3,31,11237},{2,31,3590},{2,31,3590},{0,26,85},{0,29,13491},{0,23,5925},{8,31,1565},{8,31,1565},{8,31,1565},{8,31,277},{11,18,4418},{5,31,1300},{5,31,1300},{0,26,85},{10,21,4418},{0,26,85},{15,25,617},{13,31,365},{12,31,16},{9,31,9},{15,25,617},{13,30,613},{9,31,9},{0,29,617},{13,30,613},{0,29,617},{4,0,7956}, +{4,0,7956},{4,0,7956},{4,0,7956},{1,31,697},{1,31,697},{1,31,697},{0,25,4},{0,18,4181},{0,18,4181},{5,31,65535},{3,31,60054},{2,31,28190},{2,31,18895},{5,31,65535},{2,31,44298},{1,31,6090},{0,30,1081},{0,31,65535},{0,27,39942},{10,31,4850},{10,31,3107},{9,31,1709},{8,31,325},{13,22,8069},{7,31,4574},{6,31,1553},{0,29,602},{12,25,8069},{0,29,602},{6,31,16067},{6,31,16067},{6,31,16067}, +{5,31,10872},{4,31,12824},{2,31,4662},{2,31,4662},{0,27,52},{0,30,14340},{0,25,6449},{9,31,1709},{9,31,1709},{9,31,1709},{8,31,325},{13,14,4418},{6,31,1553},{6,31,1553},{0,27,52},{8,25,4418},{0,27,52},{15,27,365},{13,31,205},{12,31,16},{11,31,4},{15,27,365},{13,31,365},{11,31,4},{0,29,377},{13,31,365},{0,29,377},{4,0,8980},{4,0,8980},{4,0,8980},{4,0,8980},{1,31,1097}, +{1,31,1097},{1,31,1097},{0,26,1},{0,20,4682},{0,20,4682},{6,31,65535},{4,31,58981},{2,31,29926},{2,31,19751},{5,31,65535},{3,31,43402},{1,31,6910},{0,30,765},{0,31,65535},{0,28,34909},{11,31,4502},{10,31,3011},{9,31,2045},{9,31,557},{15,17,7322},{8,31,4242},{7,31,1781},{0,29,314},{12,26,7322},{0,29,314},{6,31,16739},{6,31,16739},{6,31,16739},{5,31,11492},{4,31,13636},{3,31,5586},{3,31,5586}, +{0,28,65},{0,31,14139},{0,26,6041},{9,31,2045},{9,31,2045},{9,31,2045},{9,31,557},{14,13,4418},{7,31,1781},{7,31,1781},{0,28,29},{11,23,4418},{0,28,29},{15,28,181},{14,31,117},{13,31,4},{12,31,0},{15,28,181},{15,29,181},{12,31,0},{0,30,185},{15,29,181},{0,30,185},{4,0,9376},{4,0,9376},{4,0,9376},{4,0,9376},{2,31,1405},{2,31,1405},{2,31,1405},{0,28,40},{0,21,4520}, +{0,21,4520},{6,31,65535},{4,31,57316},{3,31,30345},{3,31,20808},{6,31,65535},{3,31,41449},{2,31,8321},{1,31,301},{0,31,65535},{0,28,28330},{11,31,4232},{11,31,3093},{10,31,2248},{9,31,809},{15,19,6584},{9,31,3992},{7,31,2105},{0,30,77},{13,26,6584},{0,30,77},{7,31,17380},{7,31,17380},{7,31,17380},{6,31,12161},{5,31,14315},{3,31,6405},{3,31,6405},{1,29,53},{0,31,13860},{0,28,5286},{10,31,2248}, +{10,31,2248},{10,31,2248},{9,31,809},{14,16,4418},{7,31,2105},{7,31,2105},{0,30,13},{15,20,4418},{0,30,13},{15,29,52},{14,31,36},{14,31,0},{13,31,9},{15,29,52},{15,30,50},{13,31,9},{0,30,68},{15,30,50},{0,30,68},{5,0,9250},{5,0,9250},{5,0,9250},{5,0,9250},{2,31,1549},{2,31,1549},{2,31,1549},{1,28,2},{0,23,4114},{0,23,4114},{6,31,65535},{5,31,55908},{4,31,31583}, +{3,31,21256},{6,31,65535},{4,31,39740},{2,31,9073},{1,31,285},{0,31,65535},{0,29,23356},{12,31,4011},{11,31,3125},{11,31,2500},{10,31,1037},{13,27,6019},{10,31,3738},{8,31,2340},{0,31,4},{13,27,6019},{0,31,4},{7,31,17796},{7,31,17796},{7,31,17796},{6,31,12625},{6,31,14996},{4,31,7139},{4,31,7139},{1,30,86},{0,31,14020},{0,29,4652},{11,31,2500},{11,31,2500},{11,31,2500},{10,31,1037},{13,22,4418}, +{8,31,2340},{8,31,2340},{0,31,4},{12,25,4418},{0,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{0,31,4},{15,31,4},{0,31,4},{6,0,9376},{6,0,9376},{6,0,9376},{6,0,9376},{3,31,1765},{3,31,1765},{3,31,1765},{1,30,50},{0,25,3877},{0,25,3877},{7,31,65535},{5,31,53236},{4,31,30487},{4,31,21367},{6,31,65535},{4,31,37332},{3,31,9385}, +{2,31,36},{0,31,65535},{0,29,18680},{12,31,3443},{12,31,2843},{11,31,2248},{10,31,997},{14,25,5163},{10,31,3218},{9,31,2120},{1,31,4},{12,29,5163},{1,31,4},{7,31,17504},{7,31,17504},{7,31,17504},{7,31,12569},{6,31,14328},{4,31,7227},{4,31,7227},{2,31,20},{0,31,13376},{0,29,3944},{11,31,2248},{11,31,2248},{11,31,2248},{10,31,997},{15,17,3872},{9,31,2120},{9,31,2120},{1,31,4},{12,26,3872}, +{1,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{6,0,9248},{6,0,9248},{6,0,9248},{6,0,9248},{3,31,2005},{3,31,2005},{3,31,2005},{2,30,5},{0,27,3545},{0,27,3545},{7,31,65535},{6,31,50785},{4,31,29687},{4,31,20567},{7,31,65535},{4,31,35412},{3,31,8985},{2,31,196},{1,31,65535},{0,29,14712},{12,31,2883}, +{12,31,2283},{12,31,1922},{11,31,821},{13,29,4267},{11,31,2694},{10,31,1745},{3,31,4},{14,27,4267},{3,31,4},{8,31,16610},{8,31,16610},{8,31,16610},{7,31,12185},{6,31,13528},{5,31,6915},{5,31,6915},{2,31,52},{1,31,12556},{0,30,3314},{12,31,1922},{12,31,1922},{12,31,1922},{11,31,821},{12,28,3200},{10,31,1745},{10,31,1745},{3,31,4},{11,28,3200},{3,31,4},{15,31,0},{15,31,0},{15,31,0}, +{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{6,0,9376},{6,0,9376},{6,0,9376},{6,0,9376},{4,31,2250},{4,31,2250},{4,31,2250},{2,31,52},{0,28,3170},{0,28,3170},{7,31,65535},{6,31,47239},{5,31,28065},{5,31,20409},{7,31,65535},{5,31,32574},{4,31,8965},{3,31,54},{1,31,65206},{0,30,10964},{13,31,2326},{12,31,1806},{12,31,1445},{11,31,650},{13,30,3361}, +{11,31,2091},{10,31,1322},{4,31,0},{12,30,3361},{4,31,0},{8,31,15584},{8,31,15584},{8,31,15584},{7,31,12059},{7,31,12522},{6,31,6811},{6,31,6811},{3,31,50},{1,31,11710},{0,31,2834},{12,31,1445},{12,31,1445},{12,31,1445},{11,31,650},{14,23,2521},{10,31,1322},{10,31,1322},{4,31,0},{15,24,2521},{4,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0}, +{0,31,0},{15,31,0},{0,31,0},{7,0,9250},{7,0,9250},{7,0,9250},{7,0,9250},{5,31,2600},{5,31,2600},{5,31,2600},{3,31,50},{0,31,2834},{0,31,2834},{8,31,65535},{6,31,44903},{5,31,27361},{5,31,19705},{7,31,64494},{5,31,30846},{4,31,8677},{3,31,470},{2,31,60777},{0,30,8308},{13,31,1782},{13,31,1510},{12,31,1157},{12,31,557},{13,31,2646},{11,31,1691},{11,31,1066},{5,31,4},{15,27,2646}, +{5,31,4},{8,31,14944},{8,31,14944},{8,31,14944},{8,31,11696},{7,31,11850},{6,31,6555},{6,31,6555},{4,31,164},{2,31,11097},{0,31,2610},{12,31,1157},{12,31,1157},{12,31,1157},{12,31,557},{15,21,1985},{11,31,1066},{11,31,1066},{5,31,4},{15,25,1985},{5,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{7,0,9410}, +{7,0,9410},{7,0,9410},{7,0,9410},{5,31,2792},{5,31,2792},{5,31,2792},{4,31,164},{0,31,2610},{0,31,2610},{8,31,63584},{7,31,42019},{6,31,25930},{5,31,19769},{8,31,60273},{6,31,28860},{5,31,8761},{4,31,276},{3,31,56253},{0,30,6420},{13,31,1366},{13,31,1094},{13,31,925},{12,31,397},{15,25,2018},{12,31,1298},{11,31,794},{7,31,4},{13,30,2017},{7,31,4},{9,31,14244},{9,31,14244},{9,31,14244}, +{8,31,11312},{8,31,11249},{7,31,6499},{7,31,6499},{4,31,260},{3,31,10457},{0,31,2642},{13,31,925},{13,31,925},{13,31,925},{12,31,397},{15,22,1513},{11,31,794},{11,31,794},{7,31,4},{14,27,1513},{7,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9376},{8,0,9376},{8,0,9376},{8,0,9376},{6,31,3074}, +{6,31,3074},{6,31,3074},{4,31,260},{0,31,2642},{0,31,2642},{8,31,58848},{7,31,39683},{6,31,25130},{6,31,19007},{8,31,54849},{6,31,27132},{5,31,8569},{4,31,756},{4,31,51302},{0,31,5046},{13,31,1078},{13,31,806},{13,31,637},{12,31,365},{15,26,1473},{12,31,978},{12,31,617},{8,31,9},{14,29,1473},{8,31,9},{9,31,13604},{9,31,13604},{9,31,13604},{8,31,11184},{8,31,10433},{7,31,6339},{7,31,6339}, +{5,31,424},{4,31,9713},{0,31,2930},{13,31,637},{13,31,637},{13,31,637},{12,31,365},{14,27,1105},{12,31,617},{12,31,617},{8,31,9},{13,29,1105},{8,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9248},{8,0,9248},{8,0,9248},{8,0,9248},{6,31,3330},{6,31,3330},{6,31,3330},{5,31,424},{0,31,2930}, +{0,31,2930}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc new file mode 100644 index 0000000000..3b9d7022e7 --- /dev/null +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc @@ -0,0 +1,481 @@ +{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,56},{0,0,104},{0,0,181},{0,0,406},{0,0,204},{0,0,442},{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,56},{0,0,104},{0,0,181},{0,0,406},{0,0,168},{0,0,406},{0,0,16},{0,0,16},{0,0,16},{0,0,64},{0,0,52},{0,0,100},{0,0,100},{0,0,325},{0,0,200},{0,0,361},{0,0,16}, +{0,0,16},{0,0,16},{0,0,64},{0,0,52},{0,0,100},{0,0,100},{0,0,325},{0,0,164},{0,0,325},{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,20},{0,0,68},{0,0,145},{0,0,306},{0,0,68},{0,0,306},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,1,126},{0,0,88},{0,0,53}, +{0,0,37},{0,0,116},{0,0,36},{0,0,1},{0,0,66},{0,0,88},{0,0,102},{0,1,126},{0,0,88},{0,0,53},{0,0,37},{0,0,116},{0,0,36},{0,0,1},{0,0,66},{0,0,52},{0,0,66},{0,0,52},{0,0,52},{0,0,52},{0,0,36},{0,0,16},{0,0,0},{0,0,0},{0,0,65},{0,0,52},{0,0,101},{0,0,52},{0,0,52},{0,0,52},{0,0,36},{0,0,16}, +{0,0,0},{0,0,0},{0,0,65},{0,0,16},{0,0,65},{0,1,90},{0,0,52},{0,0,17},{0,0,1},{0,1,90},{0,0,36},{0,0,1},{0,0,50},{0,0,36},{0,0,50},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,0,0},{0,0,0},{0,0,0},{0,0,16},{0,0,52},{0,0,52},{0,1,286},{0,1,310},{0,0,453},{0,0,373},{0,1,115},{0,1,307},{0,0,241}, +{0,0,130},{0,0,280},{0,0,70},{0,1,222},{0,1,246},{0,0,389},{0,0,309},{0,1,51},{1,0,195},{0,0,177},{0,0,66},{1,0,107},{0,0,66},{0,1,261},{0,1,261},{0,1,261},{0,0,324},{0,1,90},{0,0,192},{0,0,192},{0,0,81},{0,0,84},{0,0,21},{0,1,197},{0,1,197},{0,1,197},{0,0,260},{0,1,26},{0,0,128},{0,0,128},{0,0,17},{0,0,80}, +{0,0,17},{0,1,26},{0,1,50},{1,0,130},{1,0,74},{0,1,26},{1,0,26},{1,0,74},{0,0,50},{1,0,26},{0,0,50},{0,0,260},{0,0,260},{0,0,260},{0,0,260},{0,1,89},{0,1,89},{0,1,89},{0,0,80},{0,0,20},{0,0,20},{1,0,494},{1,0,550},{1,0,694},{1,0,702},{0,2,363},{0,1,291},{1,0,583},{1,0,631},{0,1,116},{1,0,428},{1,0,170}, +{1,0,226},{1,0,370},{1,0,378},{1,0,51},{0,1,35},{1,0,259},{1,0,307},{1,0,91},{1,0,307},{1,0,469},{1,0,469},{1,0,469},{1,0,477},{0,1,314},{0,1,290},{0,1,290},{1,0,406},{0,1,115},{1,0,203},{1,0,145},{1,0,145},{1,0,145},{1,0,153},{1,0,26},{1,0,34},{1,0,34},{1,0,82},{1,0,10},{1,0,82},{1,0,26},{0,1,50},{1,0,226}, +{1,0,234},{1,0,26},{0,1,26},{1,0,234},{0,0,306},{0,1,26},{0,0,306},{1,0,468},{1,0,468},{1,0,468},{1,0,468},{0,1,265},{0,1,265},{0,1,265},{1,0,325},{0,1,90},{0,1,90},{1,1,116},{1,1,124},{1,1,215},{1,1,271},{1,1,188},{1,1,252},{1,1,343},{1,1,606},{0,1,152},{0,1,392},{1,1,35},{1,1,43},{1,1,134},{1,1,190},{1,1,107}, +{1,1,171},{0,1,260},{0,1,356},{2,0,51},{0,1,356},{1,1,115},{1,1,115},{1,1,115},{1,1,171},{1,0,161},{1,0,241},{1,0,241},{1,0,469},{0,1,52},{0,1,292},{1,1,34},{1,1,34},{1,1,34},{1,1,90},{0,2,16},{1,0,160},{1,0,160},{0,1,256},{0,1,16},{0,1,256},{1,1,26},{1,1,34},{1,1,125},{0,1,116},{1,1,26},{2,0,26},{0,1,116}, +{0,1,356},{2,0,26},{0,1,356},{1,0,90},{1,0,90},{1,0,90},{1,0,90},{1,0,97},{1,0,97},{1,0,97},{1,0,145},{0,1,36},{0,1,36},{1,1,116},{1,1,60},{1,1,39},{1,1,31},{1,1,92},{1,1,28},{1,1,7},{1,1,94},{1,1,100},{1,1,142},{1,1,115},{1,1,59},{1,1,38},{1,1,30},{0,3,51},{1,1,27},{1,1,6},{1,1,93},{1,1,51}, +{1,1,93},{1,1,35},{1,1,35},{1,1,35},{1,1,27},{1,1,11},{1,1,3},{1,1,3},{1,1,90},{1,1,75},{1,1,138},{1,1,34},{1,1,34},{1,1,34},{1,1,26},{1,1,10},{1,1,2},{1,1,2},{1,1,89},{1,1,26},{1,1,89},{2,0,26},{1,1,34},{1,1,13},{1,1,5},{2,0,26},{3,0,26},{1,1,5},{0,1,68},{3,0,26},{0,1,68},{1,0,26}, +{1,0,26},{1,0,26},{1,0,26},{1,1,2},{1,1,2},{1,1,2},{1,1,26},{1,1,74},{1,1,74},{1,2,238},{1,2,286},{1,1,375},{1,1,303},{1,2,105},{1,1,316},{1,1,183},{1,1,94},{0,2,156},{1,1,46},{1,2,189},{1,2,237},{1,1,326},{1,1,254},{1,2,56},{2,1,232},{1,1,134},{1,1,45},{0,2,56},{1,1,45},{1,2,222},{1,2,222},{1,2,222}, +{1,1,267},{1,2,89},{1,1,147},{1,1,147},{1,1,58},{1,1,59},{1,1,10},{1,2,173},{1,2,173},{1,2,173},{1,1,218},{2,0,10},{1,1,98},{1,1,98},{1,1,9},{3,0,10},{1,1,9},{1,2,20},{1,2,68},{2,1,136},{2,1,72},{1,2,20},{0,2,20},{2,1,72},{0,1,36},{0,2,20},{0,1,36},{1,0,218},{1,0,218},{1,0,218},{1,0,218},{1,2,85}, +{1,2,85},{1,2,85},{1,1,58},{1,1,10},{1,1,10},{2,1,550},{2,1,598},{2,1,730},{2,1,730},{1,3,361},{1,2,265},{2,1,597},{1,1,606},{1,2,152},{2,1,408},{2,1,189},{2,1,237},{2,1,369},{2,1,369},{2,1,56},{1,2,40},{2,1,236},{2,1,264},{4,0,56},{2,1,264},{2,1,534},{2,1,534},{2,1,534},{2,1,534},{1,2,265},{1,2,265},{1,2,265}, +{1,1,410},{2,1,152},{2,1,212},{2,1,173},{2,1,173},{2,1,173},{2,1,173},{0,4,8},{2,1,40},{2,1,40},{2,1,68},{2,1,8},{2,1,68},{2,1,20},{1,2,36},{2,1,200},{2,1,200},{2,1,20},{4,0,20},{2,1,200},{0,1,260},{4,0,20},{0,1,260},{2,0,530},{2,0,530},{2,0,530},{2,0,530},{1,2,229},{1,2,229},{1,2,229},{1,2,325},{1,2,116}, +{1,2,116},{2,2,152},{2,2,168},{2,2,273},{2,2,337},{2,2,236},{2,2,316},{2,2,421},{2,2,706},{1,2,116},{1,2,436},{2,2,52},{2,2,68},{2,2,173},{2,2,237},{3,0,56},{1,2,211},{1,2,251},{1,2,411},{3,1,56},{1,2,411},{2,2,152},{2,2,152},{2,2,152},{2,2,216},{2,1,158},{2,1,230},{2,1,230},{2,1,438},{1,2,35},{1,2,315},{2,2,52}, +{2,2,52},{2,2,52},{2,2,116},{1,3,10},{2,1,130},{2,1,130},{1,2,290},{1,2,10},{1,2,290},{3,0,20},{2,2,52},{2,2,157},{1,2,130},{3,0,20},{3,1,20},{1,2,130},{0,2,410},{3,1,20},{0,2,410},{2,0,116},{2,0,116},{2,0,116},{2,0,116},{2,1,109},{2,1,109},{2,1,109},{2,1,149},{1,2,26},{1,2,26},{2,2,88},{2,2,40},{2,2,33}, +{2,2,33},{2,2,76},{2,2,28},{2,2,21},{2,2,130},{2,2,120},{2,2,190},{2,2,84},{2,2,36},{2,2,29},{2,2,29},{1,4,56},{2,2,24},{2,2,17},{2,2,126},{2,2,56},{2,2,126},{2,2,24},{2,2,24},{2,2,24},{2,2,24},{2,2,12},{2,2,12},{2,2,12},{2,2,121},{2,2,104},{2,2,181},{2,2,20},{2,2,20},{2,2,20},{2,2,20},{2,2,8}, +{2,2,8},{2,2,8},{2,2,117},{5,0,8},{2,2,117},{1,4,20},{2,2,20},{2,2,13},{2,2,13},{1,4,20},{2,2,20},{2,2,13},{0,2,90},{2,2,20},{0,2,90},{2,0,20},{2,0,20},{2,0,20},{2,0,20},{2,2,8},{2,2,8},{2,2,8},{2,2,40},{2,2,100},{2,2,100},{2,3,198},{2,3,270},{2,2,305},{2,2,241},{2,3,103},{2,2,252},{2,2,133}, +{2,2,66},{1,3,148},{2,2,30},{2,3,162},{2,3,234},{2,2,269},{2,2,205},{2,3,67},{2,2,216},{2,2,97},{2,2,30},{6,0,67},{2,2,30},{2,3,189},{2,3,189},{2,3,189},{2,2,216},{2,3,94},{2,2,108},{2,2,108},{2,2,41},{2,2,40},{2,2,5},{2,3,153},{2,3,153},{2,3,153},{2,2,180},{3,1,8},{2,2,72},{2,2,72},{2,2,5},{4,1,8}, +{2,2,5},{2,3,18},{2,3,90},{2,2,125},{2,2,61},{2,3,18},{6,0,18},{2,2,61},{0,2,26},{6,0,18},{0,2,26},{2,0,180},{2,0,180},{2,0,180},{2,0,180},{2,2,72},{2,2,72},{2,2,72},{2,2,40},{2,2,4},{2,2,4},{3,2,614},{2,3,622},{3,2,774},{3,2,766},{2,3,343},{2,3,247},{3,2,619},{2,2,514},{2,3,196},{2,2,382},{3,2,214}, +{3,2,254},{3,2,374},{3,2,366},{4,0,59},{2,3,51},{3,2,219},{3,2,227},{3,2,59},{3,2,227},{3,2,605},{3,2,605},{3,2,605},{3,2,597},{2,3,222},{2,3,246},{2,3,246},{2,2,345},{3,2,179},{2,2,213},{3,2,205},{3,2,205},{3,2,205},{3,2,197},{4,0,10},{3,2,50},{3,2,50},{3,2,58},{3,2,10},{3,2,58},{3,2,18},{2,3,26},{3,2,178}, +{3,2,170},{3,2,18},{5,1,18},{3,2,170},{0,2,218},{5,1,18},{0,2,218},{2,0,596},{2,0,596},{2,0,596},{2,0,596},{2,3,197},{2,3,197},{2,3,197},{2,2,296},{2,3,146},{2,3,146},{3,3,196},{3,3,220},{3,3,339},{3,3,411},{3,3,292},{3,3,388},{3,3,507},{3,3,814},{2,3,88},{2,3,488},{3,3,75},{3,3,99},{3,3,218},{3,3,290},{4,1,67}, +{2,3,168},{2,3,248},{2,3,472},{4,2,67},{2,3,472},{3,2,182},{3,2,182},{3,2,182},{3,2,246},{3,2,161},{3,2,225},{3,2,225},{3,2,413},{2,3,24},{3,2,308},{3,2,61},{3,2,61},{3,2,61},{3,2,125},{2,4,8},{3,2,104},{3,2,104},{3,2,292},{7,0,8},{3,2,292},{4,1,18},{3,3,74},{3,3,193},{2,3,148},{4,1,18},{4,2,18},{2,3,148}, +{0,3,468},{4,2,18},{0,3,468},{3,0,146},{3,0,146},{3,0,146},{3,0,146},{3,2,125},{3,2,125},{3,2,125},{3,2,157},{2,3,20},{2,3,20},{3,3,68},{3,3,28},{3,3,35},{3,3,43},{3,3,68},{3,3,36},{3,3,43},{3,3,174},{3,3,148},{3,3,246},{3,3,59},{3,3,19},{3,3,26},{3,3,34},{3,3,59},{3,3,27},{3,3,34},{2,3,152},{6,1,59}, +{2,3,152},{3,3,19},{3,3,19},{3,3,19},{3,3,27},{3,3,19},{3,3,27},{3,3,27},{3,3,158},{3,3,139},{3,3,230},{3,3,10},{3,3,10},{3,3,10},{3,3,18},{3,3,10},{3,3,18},{3,3,18},{2,3,136},{6,1,10},{2,3,136},{5,0,18},{3,3,10},{3,3,17},{3,3,25},{5,0,18},{3,3,18},{3,3,25},{0,3,116},{3,3,18},{0,3,116},{3,0,18}, +{3,0,18},{3,0,18},{3,0,18},{3,3,18},{3,3,18},{3,3,18},{3,3,58},{3,3,130},{3,3,130},{3,4,166},{3,4,262},{3,3,243},{3,3,187},{3,4,109},{3,3,196},{3,3,91},{3,3,46},{3,3,148},{3,3,22},{3,4,141},{3,4,237},{3,3,218},{3,3,162},{4,2,59},{3,3,171},{3,3,66},{3,3,21},{5,2,59},{3,3,21},{3,4,162},{3,4,162},{3,4,162}, +{3,3,171},{3,4,105},{3,3,75},{3,3,75},{3,3,30},{3,3,27},{3,3,6},{3,4,137},{3,4,137},{3,4,137},{3,3,146},{4,2,10},{3,3,50},{3,3,50},{3,3,5},{5,2,10},{3,3,5},{3,4,20},{3,4,116},{3,3,97},{3,3,41},{3,4,20},{7,1,20},{3,3,41},{0,3,20},{7,1,20},{0,3,20},{3,0,146},{3,0,146},{3,0,146},{3,0,146},{3,3,50}, +{3,3,50},{3,3,50},{3,3,26},{3,3,2},{3,3,2},{3,5,598},{3,4,550},{4,3,826},{4,3,810},{3,4,285},{3,4,237},{4,3,649},{3,3,430},{4,3,248},{3,3,310},{4,3,245},{4,3,277},{4,3,385},{4,3,369},{5,1,52},{3,4,68},{4,3,208},{4,3,196},{4,3,52},{4,3,196},{3,4,546},{3,4,546},{3,4,546},{3,4,594},{3,4,185},{3,4,233},{3,4,233}, +{3,3,286},{4,3,212},{3,3,166},{4,3,241},{4,3,241},{4,3,241},{4,3,225},{5,1,16},{4,3,64},{4,3,64},{4,3,52},{7,1,16},{4,3,52},{4,3,20},{3,4,20},{4,3,160},{4,3,144},{4,3,20},{6,2,20},{4,3,144},{0,3,180},{6,2,20},{0,3,180},{3,0,530},{3,0,530},{3,0,530},{3,0,530},{3,4,169},{3,4,169},{3,4,169},{3,3,250},{3,3,130}, +{3,3,130},{4,4,248},{4,4,280},{4,4,413},{4,4,493},{4,3,291},{4,3,451},{4,4,601},{4,3,835},{3,4,68},{3,4,548},{4,4,104},{4,4,136},{4,4,269},{4,4,349},{6,0,59},{3,4,131},{3,4,251},{3,4,539},{3,4,59},{3,4,539},{4,3,205},{4,3,205},{4,3,205},{4,3,261},{4,3,170},{4,3,226},{4,3,226},{4,3,394},{3,4,19},{4,3,275},{4,3,61}, +{4,3,61},{4,3,61},{4,3,117},{6,0,10},{4,3,82},{4,3,82},{4,3,250},{3,4,10},{4,3,250},{5,2,20},{4,4,100},{4,4,233},{3,4,170},{5,2,20},{5,3,20},{3,4,170},{0,4,530},{5,3,20},{0,4,530},{4,0,180},{4,0,180},{4,0,180},{4,0,180},{4,3,145},{4,3,145},{4,3,145},{4,3,169},{3,4,18},{3,4,18},{4,4,56},{4,4,24},{4,4,45}, +{4,4,61},{4,4,68},{4,4,52},{4,4,73},{4,4,226},{4,4,184},{3,4,292},{4,4,40},{4,4,8},{4,4,29},{4,4,45},{4,4,52},{4,4,36},{4,4,57},{3,4,171},{7,2,52},{3,4,171},{4,4,20},{4,4,20},{4,4,20},{4,4,36},{4,4,32},{4,4,48},{4,4,48},{4,4,201},{4,4,180},{3,4,267},{4,4,4},{4,4,4},{4,4,4},{4,4,20},{5,2,16}, +{4,4,32},{4,4,32},{3,4,146},{7,2,16},{3,4,146},{6,1,20},{4,4,4},{4,4,25},{4,4,41},{6,1,20},{4,4,20},{4,4,41},{0,4,146},{4,4,20},{0,4,146},{4,0,20},{4,0,20},{4,0,20},{4,0,20},{4,4,32},{4,4,32},{4,4,32},{4,4,80},{3,4,146},{3,4,146},{4,5,142},{4,5,262},{4,4,189},{4,4,141},{4,5,123},{4,4,148},{4,4,57}, +{4,4,34},{4,4,120},{4,4,22},{4,5,126},{4,5,246},{4,4,173},{4,4,125},{5,3,52},{4,4,132},{4,4,41},{4,4,18},{6,3,52},{4,4,18},{4,5,141},{4,5,141},{4,5,141},{4,4,132},{4,4,96},{4,4,48},{4,4,48},{4,4,25},{4,4,20},{4,4,13},{4,5,125},{4,5,125},{4,5,125},{4,4,116},{6,1,16},{4,4,32},{4,4,32},{4,4,9},{6,3,16}, +{4,4,9},{7,0,26},{4,5,146},{4,4,73},{4,4,25},{7,0,26},{3,5,26},{4,4,25},{0,4,18},{3,5,26},{0,4,18},{4,0,116},{4,0,116},{4,0,116},{4,0,116},{4,4,32},{4,4,32},{4,4,32},{4,4,16},{4,4,4},{4,4,4},{4,5,558},{4,5,486},{4,4,845},{4,4,733},{4,5,235},{4,5,235},{4,4,553},{4,4,354},{5,4,276},{4,4,246},{5,4,282}, +{5,4,306},{5,4,402},{5,4,378},{6,2,51},{4,5,91},{5,4,203},{5,4,171},{5,4,51},{5,4,171},{4,5,477},{4,5,477},{4,5,477},{4,5,549},{4,5,154},{4,5,226},{4,5,226},{4,4,233},{3,5,235},{4,4,125},{5,4,281},{5,4,281},{5,4,281},{5,4,257},{7,0,10},{5,4,82},{5,4,82},{5,4,50},{3,5,10},{5,4,50},{6,2,26},{4,5,18},{5,4,146}, +{5,4,122},{6,2,26},{7,3,26},{5,4,122},{0,4,146},{7,3,26},{0,4,146},{4,0,468},{4,0,468},{4,0,468},{4,0,468},{4,5,145},{4,5,145},{4,5,145},{4,4,208},{4,4,100},{4,4,100},{5,5,308},{5,5,348},{5,5,495},{5,5,583},{5,4,285},{5,4,429},{5,4,633},{5,4,781},{4,5,56},{4,5,616},{5,5,139},{5,5,179},{5,5,326},{5,5,414},{7,1,52}, +{4,5,100},{4,5,260},{5,4,612},{4,5,52},{5,4,612},{5,4,234},{5,4,234},{5,4,234},{5,4,282},{5,4,185},{5,4,233},{5,4,233},{5,4,381},{4,5,20},{5,4,248},{5,4,65},{5,4,65},{5,4,65},{5,4,113},{7,1,16},{5,4,64},{5,4,64},{5,4,212},{7,3,16},{5,4,212},{6,3,26},{5,5,130},{5,5,277},{4,5,196},{6,3,26},{6,4,26},{4,5,196}, +{0,4,596},{6,4,26},{0,4,596},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{5,4,169},{5,4,169},{5,4,169},{5,4,185},{4,5,20},{4,5,20},{5,5,52},{5,5,28},{5,5,63},{5,5,87},{5,5,76},{5,5,76},{5,5,111},{5,5,286},{5,5,228},{4,5,296},{5,5,27},{5,5,3},{5,5,38},{5,5,62},{5,5,51},{5,5,51},{5,5,86},{4,5,196},{3,6,51}, +{4,5,196},{5,5,27},{5,5,27},{5,5,27},{5,5,51},{5,5,51},{5,5,75},{5,5,75},{5,5,250},{4,5,180},{4,5,260},{5,5,2},{5,5,2},{5,5,2},{5,5,26},{6,3,10},{5,5,50},{5,5,50},{4,5,160},{6,4,10},{4,5,160},{7,2,26},{5,5,2},{5,5,37},{5,5,61},{7,2,26},{5,5,26},{5,5,61},{0,5,180},{5,5,26},{0,5,180},{5,0,26}, +{5,0,26},{5,0,26},{5,0,26},{5,5,50},{5,5,50},{5,5,50},{5,5,106},{4,5,116},{4,5,116},{5,6,126},{5,5,220},{5,5,143},{5,5,103},{5,6,145},{5,5,108},{5,5,31},{5,5,30},{5,5,100},{5,5,30},{5,6,117},{5,5,211},{5,5,134},{5,5,94},{6,4,51},{5,5,99},{5,5,22},{5,5,21},{7,4,51},{5,5,21},{5,6,126},{5,6,126},{5,6,126}, +{5,5,99},{5,5,67},{5,5,27},{5,5,27},{5,5,26},{5,5,19},{5,5,26},{5,6,117},{5,6,117},{5,6,117},{5,5,90},{7,2,10},{5,5,18},{5,5,18},{5,5,17},{5,5,10},{5,5,17},{6,4,26},{5,5,130},{5,5,53},{5,5,13},{6,4,26},{7,4,26},{5,5,13},{0,5,20},{7,4,26},{0,5,20},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{5,5,18}, +{5,5,18},{5,5,18},{5,5,10},{5,5,10},{5,5,10},{5,6,478},{5,6,430},{5,5,735},{5,5,631},{5,6,193},{5,6,241},{5,5,463},{5,5,286},{4,6,268},{5,5,190},{6,5,325},{5,6,309},{6,5,425},{6,5,393},{7,3,56},{6,5,120},{6,5,204},{6,5,152},{6,5,56},{6,5,152},{5,6,414},{5,6,414},{5,6,414},{5,6,510},{5,6,129},{5,6,225},{5,6,225}, +{5,5,186},{5,5,195},{5,5,90},{5,6,293},{5,6,293},{5,6,293},{6,5,293},{5,6,8},{6,5,104},{6,5,104},{6,5,52},{4,6,8},{6,5,52},{7,3,20},{5,6,20},{6,5,136},{6,5,104},{7,3,20},{6,5,20},{6,5,104},{0,5,116},{6,5,20},{0,5,116},{5,0,410},{5,0,410},{5,0,410},{5,0,410},{5,6,125},{5,6,125},{5,6,125},{5,5,170},{5,5,74}, +{5,5,74},{6,5,350},{6,6,424},{6,6,585},{6,5,670},{6,5,287},{6,5,415},{6,5,607},{6,5,735},{5,6,52},{6,5,588},{6,5,154},{6,6,228},{6,6,389},{6,5,474},{5,7,51},{5,6,75},{5,6,275},{6,5,539},{5,6,51},{6,5,539},{6,5,269},{6,5,269},{6,5,269},{6,5,309},{6,5,206},{6,5,246},{6,5,246},{6,5,374},{5,6,27},{6,5,227},{6,5,73}, +{6,5,73},{6,5,73},{6,5,113},{6,5,10},{6,5,50},{6,5,50},{6,5,178},{3,7,10},{6,5,178},{5,7,26},{5,6,146},{6,6,325},{5,6,226},{5,7,26},{5,6,26},{5,6,226},{0,5,530},{5,6,26},{0,5,530},{6,0,260},{6,0,260},{6,0,260},{6,0,260},{6,5,197},{6,5,197},{6,5,197},{6,5,205},{5,6,26},{5,6,26},{6,6,56},{6,6,40},{6,6,89}, +{6,6,121},{6,6,92},{6,6,108},{6,6,157},{6,6,354},{6,6,280},{5,6,308},{6,6,20},{6,6,4},{6,6,53},{6,6,85},{6,6,56},{6,6,72},{6,6,121},{5,6,227},{4,7,56},{5,6,227},{6,6,40},{6,6,40},{6,6,40},{6,6,72},{6,6,76},{6,6,108},{6,6,108},{6,6,305},{5,6,139},{5,6,259},{6,6,4},{6,6,4},{6,6,4},{6,6,36},{7,4,8}, +{6,6,72},{6,6,72},{5,6,178},{7,5,8},{5,6,178},{6,6,20},{6,6,4},{6,6,53},{6,6,85},{6,6,20},{4,7,20},{6,6,85},{0,6,218},{4,7,20},{0,6,218},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,6,72},{6,6,72},{6,6,72},{6,6,136},{5,6,90},{5,6,90},{6,7,118},{6,6,168},{6,6,105},{6,6,73},{6,7,175},{6,6,76},{6,6,13}, +{6,6,34},{6,6,88},{6,6,46},{6,7,114},{6,6,164},{6,6,101},{6,6,69},{7,5,56},{6,6,72},{6,6,9},{6,6,30},{6,6,72},{6,6,30},{6,6,104},{6,6,104},{6,6,104},{6,6,72},{6,6,44},{6,6,12},{6,6,12},{6,6,33},{6,6,24},{6,6,45},{6,6,100},{6,6,100},{6,6,100},{6,6,68},{7,5,40},{6,6,8},{6,6,8},{6,6,29},{6,6,8}, +{6,6,29},{7,5,20},{6,6,100},{6,6,37},{6,6,5},{7,5,20},{5,7,50},{6,6,5},{0,6,26},{5,7,50},{0,6,26},{6,0,68},{6,0,68},{6,0,68},{6,0,68},{6,6,8},{6,6,8},{6,6,8},{6,6,8},{6,6,20},{6,6,20},{6,7,406},{6,7,382},{6,6,633},{6,6,537},{6,7,159},{6,7,255},{6,6,381},{6,6,226},{5,7,228},{6,6,142},{6,7,306}, +{6,7,282},{7,6,454},{7,6,414},{6,7,59},{7,6,139},{7,6,211},{6,6,126},{5,7,59},{6,6,126},{6,7,357},{6,7,357},{6,7,357},{6,6,456},{6,7,110},{6,7,230},{6,7,230},{6,6,145},{6,6,152},{6,6,61},{6,7,257},{6,7,257},{6,7,257},{7,6,333},{6,7,10},{7,6,130},{7,6,130},{6,6,45},{5,7,10},{6,6,45},{7,6,50},{6,7,26},{7,6,130}, +{7,6,90},{7,6,50},{7,6,18},{7,6,90},{0,6,90},{7,6,18},{0,6,90},{6,0,356},{6,0,356},{6,0,356},{6,0,356},{6,7,109},{6,7,109},{6,7,109},{6,6,136},{6,6,52},{6,6,52},{7,6,374},{7,6,454},{7,6,634},{7,6,666},{7,6,297},{7,6,409},{7,6,589},{7,6,697},{6,7,56},{7,6,536},{7,6,149},{7,6,229},{7,6,409},{7,6,441},{7,6,72}, +{6,7,56},{6,7,296},{7,6,472},{6,7,56},{7,6,472},{7,6,310},{7,6,310},{7,6,310},{7,6,342},{7,6,233},{7,6,265},{7,6,265},{7,6,373},{6,7,40},{7,6,212},{7,6,85},{7,6,85},{7,6,85},{7,6,117},{7,6,8},{7,6,40},{7,6,40},{7,6,148},{7,6,40},{7,6,148},{7,6,68},{6,7,116},{7,6,328},{6,7,260},{7,6,68},{6,7,20},{6,7,260}, +{0,6,468},{6,7,20},{0,6,468},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,6,229},{7,6,229},{7,6,229},{7,6,229},{6,7,36},{6,7,36},{7,7,68},{7,7,60},{7,7,123},{7,7,163},{7,7,116},{7,7,148},{7,7,211},{7,7,430},{6,7,248},{6,7,328},{7,7,19},{7,7,11},{7,7,74},{7,7,114},{7,7,67},{7,7,99},{7,7,162},{6,7,264},{7,7,171}, +{6,7,264},{7,7,59},{7,7,59},{7,7,59},{7,7,99},{7,7,107},{7,7,147},{7,7,147},{7,7,366},{6,7,104},{6,7,264},{7,7,10},{7,7,10},{7,7,10},{7,7,50},{7,7,58},{7,7,98},{7,7,98},{6,7,200},{6,7,40},{6,7,200},{7,7,18},{7,7,10},{7,7,73},{6,7,100},{7,7,18},{7,7,50},{6,7,100},{0,7,260},{7,7,50},{0,7,260},{7,0,50}, +{7,0,50},{7,0,50},{7,0,50},{7,6,85},{7,6,85},{7,6,85},{7,6,149},{6,7,68},{6,7,68},{7,7,196},{7,7,124},{7,7,75},{7,7,51},{7,7,148},{7,7,52},{7,7,3},{7,7,46},{7,7,84},{7,7,70},{7,7,195},{7,7,123},{7,7,74},{7,7,50},{7,7,147},{7,7,51},{7,7,2},{7,7,45},{7,7,59},{7,7,45},{7,7,75},{7,7,75},{7,7,75}, +{7,7,51},{7,7,27},{7,7,3},{7,7,3},{7,7,46},{7,7,35},{7,7,70},{7,7,74},{7,7,74},{7,7,74},{7,7,50},{7,7,26},{7,7,2},{7,7,2},{7,7,45},{7,7,10},{7,7,45},{7,7,146},{7,7,74},{7,7,25},{7,7,1},{7,7,146},{7,7,50},{7,7,1},{0,7,36},{7,7,50},{0,7,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,7,2}, +{7,7,2},{7,7,2},{7,7,10},{7,7,34},{7,7,34},{7,7,548},{7,7,476},{7,7,427},{7,7,355},{7,7,404},{7,7,260},{7,7,211},{7,7,106},{7,7,132},{7,7,34},{7,7,467},{7,7,395},{7,7,346},{7,7,274},{7,7,323},{7,7,179},{7,7,130},{7,7,25},{7,7,123},{7,7,25},{7,7,427},{7,7,427},{7,7,427},{7,7,355},{7,7,283},{7,7,211},{7,7,211}, +{7,7,106},{7,7,83},{7,7,34},{7,7,346},{7,7,346},{7,7,346},{7,7,274},{7,7,202},{7,7,130},{7,7,130},{7,7,25},{7,7,74},{7,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,7,162},{7,7,162},{7,7,162},{7,7,106},{7,7,34}, +{7,7,34},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,158},{0,0,110},{0,0,61},{0,0,244},{0,0,210},{0,0,280},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,158},{0,0,110},{0,0,61},{0,0,244},{0,0,174},{0,0,244},{0,0,1},{0,0,1},{0,0,1},{0,0,25},{0,0,37},{0,0,61},{0,0,61},{0,0,244},{0,0,161},{0,0,280},{0,0,1}, +{0,0,1},{0,0,1},{0,0,25},{0,0,37},{0,0,61},{0,0,61},{0,0,244},{0,0,125},{0,0,244},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,122},{0,0,74},{0,0,25},{0,0,144},{0,0,74},{0,0,144},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,1,81},{0,1,81},{0,0,122}, +{0,0,82},{0,1,328},{0,0,243},{0,0,82},{0,0,129},{0,0,319},{0,0,165},{0,1,81},{0,1,81},{0,0,122},{0,0,82},{0,1,328},{0,0,243},{0,0,82},{0,0,129},{0,0,283},{0,0,129},{0,1,45},{0,1,45},{0,1,45},{0,0,18},{0,0,58},{0,0,18},{0,0,18},{0,0,65},{0,0,94},{0,0,101},{0,1,45},{0,1,45},{0,1,45},{0,0,18},{0,0,58}, +{0,0,18},{0,0,18},{0,0,65},{0,0,58},{0,0,65},{0,1,72},{0,1,72},{0,0,113},{0,0,73},{0,1,72},{1,0,104},{0,0,73},{0,0,80},{1,0,104},{0,0,80},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,49},{0,0,85},{0,0,85},{0,2,225},{0,1,129},{0,1,449},{0,0,498},{0,1,264},{0,1,168},{0,0,402}, +{0,0,273},{0,0,687},{0,0,309},{0,2,225},{0,1,129},{0,1,449},{0,0,498},{0,1,264},{0,1,168},{0,0,402},{0,0,273},{1,0,248},{0,0,273},{0,1,125},{0,1,125},{0,1,125},{0,1,221},{0,1,68},{0,0,146},{0,0,146},{0,0,17},{0,0,158},{0,0,53},{0,1,125},{0,1,125},{0,1,125},{0,1,221},{0,1,68},{0,0,146},{0,0,146},{0,0,17},{0,0,122}, +{0,0,17},{1,0,72},{0,1,8},{1,0,292},{1,0,260},{1,0,72},{1,0,104},{1,0,260},{0,0,272},{1,0,104},{0,0,272},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,0,25},{0,0,25},{0,0,25},{0,0,1},{0,0,37},{0,0,37},{0,2,514},{0,2,558},{0,1,610},{0,1,514},{0,2,297},{0,1,153},{0,1,153},{0,1,777},{0,1,392},{0,0,802},{1,1,347}, +{1,1,355},{1,1,446},{0,1,465},{0,2,248},{0,1,104},{0,1,104},{0,1,728},{0,1,248},{0,1,728},{0,2,414},{0,2,414},{0,2,414},{0,1,414},{0,1,149},{0,1,53},{0,1,53},{0,0,274},{0,1,292},{0,0,226},{1,0,229},{1,0,229},{1,0,229},{1,0,261},{1,0,68},{0,1,4},{0,1,4},{1,0,160},{1,0,52},{1,0,160},{0,2,104},{1,1,130},{0,1,200}, +{0,1,104},{0,2,104},{2,0,74},{0,1,104},{0,1,584},{2,0,74},{0,1,584},{0,0,410},{0,0,410},{0,0,410},{0,0,410},{0,1,49},{0,1,49},{0,1,49},{0,0,130},{0,0,82},{0,0,82},{1,1,464},{1,1,400},{1,1,365},{1,1,397},{1,1,572},{0,2,499},{0,1,387},{0,1,435},{0,1,428},{0,1,188},{1,1,140},{1,1,76},{1,1,41},{1,1,73},{1,1,248}, +{1,1,216},{0,1,131},{0,1,179},{2,0,264},{0,1,179},{1,1,364},{1,1,364},{1,1,364},{1,1,396},{0,2,314},{0,1,386},{0,1,386},{0,1,434},{0,1,67},{0,1,187},{1,1,40},{1,1,40},{1,1,40},{1,1,72},{0,2,58},{0,1,130},{0,1,130},{0,1,178},{0,1,58},{0,1,178},{0,3,72},{1,1,40},{1,1,5},{1,1,37},{0,3,72},{1,1,72},{1,1,37}, +{0,1,170},{1,1,72},{0,1,170},{1,0,360},{1,0,360},{1,0,360},{1,0,360},{0,2,265},{0,2,265},{0,2,265},{0,1,265},{0,1,18},{0,1,18},{1,2,178},{1,2,202},{1,1,189},{1,1,157},{1,2,463},{1,1,316},{1,1,169},{1,1,238},{0,2,412},{0,1,124},{1,2,78},{1,2,102},{1,1,89},{1,1,57},{2,0,248},{0,2,99},{1,1,69},{0,1,99},{3,0,248}, +{0,1,99},{1,1,140},{1,1,140},{1,1,140},{1,1,108},{1,1,152},{1,1,120},{1,1,120},{1,1,189},{0,1,275},{0,1,75},{1,1,40},{1,1,40},{1,1,40},{1,1,8},{1,1,52},{1,1,20},{1,1,20},{0,1,50},{1,1,68},{0,1,50},{1,2,74},{1,2,98},{1,1,85},{1,1,53},{1,2,74},{0,2,74},{1,1,53},{0,1,74},{0,2,74},{0,1,74},{1,0,104}, +{1,0,104},{1,0,104},{1,0,104},{1,1,116},{1,1,116},{1,1,116},{1,1,164},{0,1,50},{0,1,50},{1,3,226},{1,2,106},{1,2,466},{1,1,429},{1,2,255},{1,2,207},{1,1,345},{1,1,238},{0,2,252},{1,1,298},{1,3,222},{1,2,102},{1,2,462},{1,1,425},{1,2,251},{1,2,203},{1,1,341},{1,1,234},{0,2,251},{1,1,234},{1,2,105},{1,2,105},{1,2,105}, +{1,1,204},{1,2,86},{1,1,120},{1,1,120},{1,1,13},{0,2,83},{1,1,73},{1,2,101},{1,2,101},{1,2,101},{1,1,200},{2,0,52},{1,1,116},{1,1,116},{1,1,9},{3,0,52},{1,1,9},{2,1,74},{1,2,2},{2,1,274},{2,1,234},{2,1,74},{4,0,74},{2,1,234},{0,1,234},{4,0,74},{0,1,234},{1,0,104},{1,0,104},{1,0,104},{1,0,104},{1,1,20}, +{1,1,20},{1,1,20},{1,1,4},{1,1,64},{1,1,64},{1,3,450},{1,2,522},{1,2,562},{1,2,490},{1,3,295},{1,2,127},{1,2,167},{1,1,750},{1,2,428},{1,1,714},{2,2,400},{2,2,416},{2,2,521},{1,2,454},{2,1,251},{1,2,91},{1,2,131},{0,2,651},{4,0,251},{0,2,651},{1,3,369},{1,3,369},{1,3,369},{1,2,369},{1,2,118},{1,2,46},{1,2,46}, +{1,1,221},{0,2,163},{1,1,185},{2,1,257},{2,1,257},{2,1,257},{2,1,281},{0,4,50},{1,2,10},{1,2,10},{2,1,146},{2,1,50},{2,1,146},{3,0,80},{2,2,160},{1,2,202},{1,2,130},{3,0,80},{3,1,80},{1,2,130},{0,1,650},{3,1,80},{0,1,650},{1,0,360},{1,0,360},{1,0,360},{1,0,360},{1,2,37},{1,2,37},{1,2,37},{1,1,100},{1,1,64}, +{1,1,64},{2,2,500},{2,2,444},{2,2,423},{2,2,463},{1,4,588},{1,3,513},{1,2,329},{1,2,441},{0,3,284},{1,2,232},{2,2,139},{2,2,83},{2,2,62},{2,2,102},{3,0,251},{2,2,243},{1,2,104},{1,2,216},{3,1,251},{1,2,216},{2,2,419},{2,2,419},{2,2,419},{2,2,459},{1,3,277},{1,2,325},{1,2,325},{1,2,437},{1,2,68},{1,2,228},{2,2,58}, +{2,2,58},{2,2,58},{2,2,98},{1,3,52},{1,2,100},{1,2,100},{1,2,212},{1,2,52},{1,2,212},{1,4,74},{2,2,34},{2,2,13},{1,2,40},{1,4,74},{2,2,74},{1,2,40},{0,2,200},{2,2,74},{0,2,200},{2,0,410},{2,0,410},{2,0,410},{2,0,410},{1,3,241},{1,3,241},{1,3,241},{1,2,241},{1,2,32},{1,2,32},{2,3,202},{2,3,250},{2,2,183}, +{2,2,159},{2,2,460},{2,2,316},{2,2,183},{2,2,274},{1,3,468},{1,2,104},{2,3,81},{2,3,129},{2,2,62},{2,2,38},{1,4,251},{1,3,96},{2,2,62},{1,2,88},{2,2,251},{1,2,88},{2,2,147},{2,2,147},{2,2,147},{2,2,123},{2,2,171},{2,2,147},{2,2,147},{2,2,238},{0,3,219},{1,2,68},{2,2,26},{2,2,26},{2,2,26},{2,2,2},{2,2,50}, +{2,2,26},{2,2,26},{1,2,52},{5,0,50},{1,2,52},{2,3,80},{2,3,128},{2,2,61},{2,2,37},{2,3,80},{6,0,80},{2,2,37},{0,2,72},{6,0,80},{0,2,72},{2,0,122},{2,0,122},{2,0,122},{2,0,122},{2,2,146},{2,2,146},{2,2,146},{2,2,202},{1,2,32},{1,2,32},{2,3,234},{2,3,90},{2,2,455},{2,2,367},{2,3,253},{2,3,253},{2,2,295}, +{2,2,210},{1,3,244},{2,2,282},{2,3,225},{2,3,81},{2,2,446},{2,2,358},{2,3,244},{2,3,244},{2,2,286},{2,2,201},{6,0,244},{2,2,201},{2,3,90},{2,3,90},{2,3,90},{2,2,171},{2,3,109},{2,2,99},{2,2,99},{2,2,14},{1,3,100},{2,2,86},{2,3,81},{2,3,81},{2,3,81},{2,2,162},{3,1,50},{2,2,90},{2,2,90},{2,2,5},{4,1,50}, +{2,2,5},{4,0,80},{2,3,0},{3,2,260},{3,2,212},{4,0,80},{5,1,80},{3,2,212},{0,2,200},{5,1,80},{0,2,200},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{2,2,18},{2,2,18},{2,2,18},{2,2,10},{2,2,82},{2,2,82},{2,4,394},{2,3,442},{2,3,522},{2,3,474},{2,4,301},{2,3,109},{2,3,189},{2,2,658},{2,3,472},{2,2,634},{2,4,369}, +{2,3,417},{2,3,497},{2,3,449},{3,2,244},{2,3,84},{2,3,164},{3,2,620},{5,1,244},{3,2,620},{2,4,330},{2,4,330},{2,4,330},{2,3,330},{2,3,93},{2,3,45},{2,3,45},{2,2,174},{1,3,132},{2,2,150},{3,2,289},{3,2,289},{3,2,289},{3,2,305},{4,0,52},{2,3,20},{2,3,20},{3,2,136},{3,2,52},{3,2,136},{2,4,80},{2,3,128},{2,3,208}, +{2,3,160},{2,4,80},{7,0,80},{2,3,160},{0,2,584},{7,0,80},{0,2,584},{2,0,314},{2,0,314},{2,0,314},{2,0,314},{2,3,29},{2,3,29},{2,3,29},{2,2,74},{2,2,50},{2,2,50},{3,3,544},{3,3,496},{3,3,489},{3,3,537},{2,4,535},{2,4,535},{2,3,279},{2,3,455},{1,4,292},{2,3,284},{3,3,144},{3,3,96},{3,3,89},{3,3,137},{4,1,244}, +{3,3,276},{2,3,83},{2,3,259},{4,2,244},{2,3,259},{3,3,480},{3,3,480},{3,3,480},{3,3,528},{2,4,246},{2,3,270},{2,3,270},{2,3,446},{2,3,75},{2,3,275},{3,3,80},{3,3,80},{3,3,80},{3,3,128},{2,4,50},{2,3,74},{2,3,74},{2,3,250},{7,0,50},{2,3,250},{5,0,80},{3,3,32},{3,3,25},{2,3,34},{5,0,80},{6,1,80},{2,3,34}, +{0,3,234},{6,1,80},{0,3,234},{3,0,464},{3,0,464},{3,0,464},{3,0,464},{2,4,221},{2,4,221},{2,4,221},{2,3,221},{2,3,50},{2,3,50},{3,4,234},{3,3,304},{3,3,185},{3,3,169},{3,3,452},{3,3,324},{3,3,205},{3,3,318},{1,4,452},{2,3,92},{3,4,90},{3,3,160},{3,3,41},{3,3,25},{5,0,244},{2,4,99},{3,3,61},{2,3,83},{3,3,244}, +{2,3,83},{3,3,160},{3,3,160},{3,3,160},{3,3,144},{3,3,196},{3,3,180},{3,3,180},{3,3,293},{2,3,187},{2,3,67},{3,3,16},{3,3,16},{3,3,16},{3,3,0},{3,3,52},{3,3,36},{3,3,36},{2,3,58},{6,1,52},{2,3,58},{4,2,80},{3,3,160},{3,3,41},{3,3,25},{4,2,80},{5,2,80},{3,3,25},{0,3,74},{5,2,80},{0,3,74},{3,0,144}, +{3,0,144},{3,0,144},{3,0,144},{3,3,180},{3,3,180},{3,3,180},{3,3,244},{2,3,18},{2,3,18},{3,4,202},{3,4,82},{3,3,393},{3,3,313},{3,4,259},{3,4,307},{3,3,253},{3,3,190},{2,4,244},{3,3,274},{3,4,186},{3,4,66},{3,3,377},{3,3,297},{3,4,243},{2,4,243},{3,3,237},{3,3,174},{7,1,243},{3,3,174},{3,4,81},{3,4,81},{3,4,81}, +{3,3,144},{3,4,138},{3,3,84},{3,3,84},{3,3,21},{2,4,123},{3,3,105},{3,4,65},{3,4,65},{3,4,65},{3,3,128},{4,2,52},{3,3,68},{3,3,68},{3,3,5},{5,2,52},{3,3,5},{5,1,74},{3,4,2},{4,3,250},{4,3,194},{5,1,74},{4,3,74},{4,3,194},{0,3,170},{4,3,74},{0,3,170},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{3,3,20}, +{3,3,20},{3,3,20},{3,3,20},{3,3,104},{3,3,104},{3,5,346},{3,4,370},{3,4,490},{3,4,466},{3,5,315},{3,4,99},{3,4,219},{3,3,574},{2,4,468},{3,3,562},{3,5,330},{3,4,354},{3,4,474},{3,4,450},{4,3,243},{3,4,83},{3,4,203},{3,3,558},{6,2,243},{3,3,558},{3,5,297},{3,5,297},{3,5,297},{3,4,297},{3,4,74},{3,4,50},{3,4,50}, +{3,3,133},{2,4,107},{3,3,121},{3,5,281},{3,5,281},{3,5,281},{3,4,281},{5,1,58},{3,4,34},{3,4,34},{3,3,117},{7,1,58},{3,3,117},{6,0,74},{3,4,98},{3,4,218},{3,4,194},{6,0,74},{3,4,74},{3,4,194},{0,3,522},{3,4,74},{0,3,522},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,4,25},{3,4,25},{3,4,25},{3,3,52},{3,3,40}, +{3,3,40},{4,4,596},{4,4,556},{4,4,563},{4,4,619},{3,5,477},{3,4,477},{3,4,237},{3,4,477},{2,5,308},{3,4,344},{4,4,155},{4,4,115},{4,4,122},{4,4,178},{5,2,243},{2,5,299},{3,4,68},{2,4,308},{5,3,243},{2,4,308},{4,4,547},{4,4,547},{4,4,547},{4,4,603},{3,5,221},{3,4,221},{3,4,221},{3,4,461},{3,4,88},{3,4,328},{4,4,106}, +{4,4,106},{4,4,106},{4,4,162},{6,0,52},{3,4,52},{3,4,52},{2,4,292},{3,4,52},{2,4,292},{4,4,74},{4,4,34},{4,4,41},{3,4,32},{4,4,74},{7,2,74},{3,4,32},{0,4,272},{7,2,74},{0,4,272},{4,0,522},{4,0,522},{4,0,522},{4,0,522},{3,4,205},{3,4,205},{3,4,205},{3,4,205},{3,4,72},{3,4,72},{4,5,274},{4,4,300},{4,4,195}, +{4,4,187},{4,4,452},{4,4,340},{4,4,235},{4,4,370},{2,5,404},{3,4,88},{4,5,105},{4,4,131},{4,4,26},{4,4,18},{6,1,243},{3,5,108},{4,4,66},{3,4,84},{4,4,243},{3,4,84},{4,4,179},{4,4,179},{4,4,179},{4,4,171},{4,4,227},{4,4,219},{4,4,219},{4,4,354},{3,4,152},{3,4,72},{4,4,10},{4,4,10},{4,4,10},{4,4,2},{5,2,58}, +{4,4,50},{4,4,50},{3,4,68},{7,2,58},{3,4,68},{5,3,74},{4,4,130},{4,4,25},{4,4,17},{5,3,74},{6,3,74},{4,4,17},{0,4,80},{6,3,74},{0,4,80},{4,0,170},{4,0,170},{4,0,170},{4,0,170},{4,3,205},{4,3,205},{4,3,205},{4,3,269},{3,4,8},{3,4,8},{4,5,178},{4,5,82},{4,4,339},{4,4,267},{4,5,273},{4,5,369},{4,4,219}, +{4,4,178},{3,5,252},{4,4,274},{4,5,153},{4,5,57},{4,4,314},{4,4,242},{7,0,248},{3,5,204},{4,4,194},{4,4,153},{3,5,248},{4,4,153},{4,5,78},{4,5,78},{4,5,78},{4,4,123},{4,4,147},{4,4,75},{4,4,75},{4,4,34},{3,5,152},{4,4,130},{4,5,53},{4,5,53},{4,5,53},{4,4,98},{6,1,58},{4,4,50},{4,4,50},{4,4,9},{6,3,58}, +{4,4,9},{6,2,72},{4,5,8},{5,4,244},{5,4,180},{6,2,72},{5,4,72},{5,4,180},{0,4,144},{5,4,72},{0,4,144},{4,0,74},{4,0,74},{4,0,74},{4,0,74},{4,4,26},{4,4,26},{4,4,26},{4,4,34},{4,4,130},{4,4,130},{4,6,306},{4,5,306},{4,5,466},{4,5,466},{4,6,337},{4,5,97},{4,5,257},{4,4,498},{3,5,412},{4,4,498},{4,6,297}, +{4,5,297},{4,5,457},{4,5,457},{5,4,248},{4,5,88},{4,5,248},{4,4,489},{7,3,248},{4,4,489},{4,5,270},{4,5,270},{4,5,270},{4,5,270},{4,5,61},{4,5,61},{4,5,61},{4,4,98},{3,5,88},{4,4,98},{4,5,261},{4,5,261},{4,5,261},{4,5,261},{7,0,52},{4,5,52},{4,5,52},{4,4,89},{3,5,52},{4,4,89},{7,1,72},{4,5,72},{4,5,232}, +{3,5,232},{7,1,72},{4,5,72},{3,5,232},{0,4,464},{4,5,72},{0,4,464},{4,0,234},{4,0,234},{4,0,234},{4,0,234},{4,5,25},{4,5,25},{4,5,25},{4,4,34},{4,4,34},{4,4,34},{5,5,656},{5,5,624},{5,5,645},{5,5,709},{4,6,427},{4,5,403},{4,5,203},{4,5,507},{4,5,332},{4,5,412},{5,5,172},{5,5,140},{5,5,161},{5,5,225},{6,3,248}, +{4,5,259},{4,5,59},{3,5,339},{6,4,248},{3,5,339},{5,5,620},{5,5,620},{5,5,620},{5,5,684},{4,6,202},{4,5,178},{4,5,178},{4,5,482},{4,5,107},{4,5,387},{5,5,136},{5,5,136},{5,5,136},{5,5,200},{7,1,58},{4,5,34},{4,5,34},{5,4,290},{7,3,58},{5,4,290},{5,5,72},{5,5,40},{5,5,61},{4,5,34},{5,5,72},{3,6,72},{4,5,34}, +{0,5,314},{3,6,72},{0,5,314},{5,0,584},{5,0,584},{5,0,584},{5,0,584},{4,5,169},{4,5,169},{4,5,169},{4,5,193},{4,5,98},{4,5,98},{5,6,322},{5,5,304},{5,5,213},{5,5,213},{5,5,460},{5,5,364},{5,5,273},{5,5,430},{3,6,364},{4,5,92},{5,6,126},{5,5,108},{5,5,17},{5,5,17},{7,2,248},{4,6,123},{5,5,77},{4,5,91},{5,5,248}, +{4,5,91},{5,5,204},{5,5,204},{5,5,204},{5,5,204},{5,5,264},{5,5,264},{5,5,264},{5,5,421},{4,5,123},{4,5,83},{5,5,8},{5,5,8},{5,5,8},{5,5,8},{6,3,52},{5,5,68},{5,5,68},{4,5,82},{6,4,52},{4,5,82},{6,4,72},{5,5,104},{5,5,13},{5,5,13},{6,4,72},{7,4,72},{5,5,13},{0,5,90},{7,4,72},{0,5,90},{5,0,200}, +{5,0,200},{5,0,200},{5,0,200},{5,4,221},{5,4,221},{5,4,221},{5,4,277},{4,5,2},{4,5,2},{5,6,162},{5,6,90},{5,5,293},{5,5,229},{5,6,295},{5,5,396},{5,5,193},{5,5,174},{4,6,268},{5,5,282},{5,6,126},{5,6,54},{5,5,257},{5,5,193},{5,6,259},{4,6,171},{5,5,157},{5,5,138},{4,6,259},{5,5,138},{5,6,81},{5,6,81},{5,6,81}, +{5,5,108},{5,5,136},{5,5,72},{5,5,72},{5,5,53},{4,6,187},{5,5,161},{5,6,45},{5,6,45},{5,6,45},{5,5,72},{7,2,52},{5,5,36},{5,5,36},{5,5,17},{5,5,52},{5,5,17},{7,3,74},{5,6,18},{5,5,221},{5,5,157},{7,3,74},{6,5,74},{5,5,157},{0,5,122},{6,5,74},{0,5,122},{5,0,72},{5,0,72},{5,0,72},{5,0,72},{5,5,36}, +{5,5,36},{5,5,36},{5,5,52},{5,5,160},{5,5,160},{5,7,274},{5,6,250},{5,6,450},{5,6,474},{5,6,343},{5,6,103},{5,6,303},{5,5,430},{4,6,364},{5,5,442},{5,7,270},{5,6,246},{5,6,446},{5,6,470},{7,3,251},{5,6,99},{5,6,299},{5,5,426},{6,5,251},{5,5,426},{5,6,225},{5,6,225},{5,6,225},{5,6,249},{5,6,54},{5,6,78},{5,6,78}, +{5,5,69},{4,6,75},{5,5,81},{5,6,221},{5,6,221},{5,6,221},{5,6,245},{5,6,50},{5,6,74},{5,6,74},{5,5,65},{4,6,50},{5,5,65},{5,7,74},{5,6,50},{5,6,250},{4,6,250},{5,7,74},{5,6,74},{4,6,250},{0,5,410},{5,6,74},{0,5,410},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{5,6,29},{5,6,29},{5,6,29},{5,5,20},{5,5,32}, +{5,5,32},{6,6,724},{6,6,700},{6,6,735},{5,6,690},{5,7,385},{5,6,337},{5,6,177},{5,6,545},{5,6,328},{5,6,488},{6,6,195},{6,6,171},{6,6,206},{6,6,278},{7,4,259},{5,6,216},{5,6,56},{4,6,376},{7,5,259},{4,6,376},{5,7,654},{5,7,654},{5,7,654},{5,6,654},{5,7,189},{5,6,141},{5,6,141},{5,6,509},{5,6,132},{5,5,450},{6,5,157}, +{6,5,157},{6,5,157},{6,5,221},{6,5,52},{5,6,20},{5,6,20},{6,5,256},{3,7,52},{6,5,256},{6,6,74},{6,6,50},{6,6,85},{5,6,40},{6,6,74},{4,7,74},{5,6,40},{0,6,360},{4,7,74},{0,6,360},{5,0,650},{5,0,650},{5,0,650},{5,0,650},{5,6,137},{5,6,137},{5,6,137},{5,6,185},{5,6,128},{5,6,128},{6,7,378},{6,6,316},{6,6,239}, +{6,6,247},{6,6,476},{6,6,396},{6,6,319},{5,6,465},{4,7,332},{5,6,104},{6,7,153},{6,6,91},{6,6,14},{6,6,22},{6,6,251},{5,7,144},{6,6,94},{5,6,104},{4,7,251},{5,6,104},{6,6,235},{6,6,235},{6,6,235},{6,6,243},{6,6,307},{6,6,315},{6,6,315},{5,6,461},{5,6,100},{5,6,100},{6,6,10},{6,6,10},{6,6,10},{6,6,18},{7,4,50}, +{6,6,90},{6,6,90},{5,6,100},{7,5,50},{5,6,100},{7,5,74},{6,6,82},{6,6,5},{6,6,13},{7,5,74},{6,6,90},{6,6,13},{0,6,104},{6,6,90},{0,6,104},{6,0,234},{6,0,234},{6,0,234},{6,0,234},{6,5,241},{6,5,241},{6,5,241},{6,5,289},{5,6,0},{5,6,0},{6,7,154},{6,7,106},{6,6,255},{6,6,199},{6,7,325},{6,6,364},{6,6,175}, +{6,6,178},{5,7,292},{5,6,232},{6,7,105},{6,7,57},{6,6,206},{6,6,150},{7,5,251},{5,7,144},{6,6,126},{6,6,129},{5,7,276},{6,6,129},{6,7,90},{6,7,90},{6,7,90},{6,6,99},{6,6,131},{6,6,75},{6,6,75},{6,6,78},{6,6,219},{5,6,132},{6,7,41},{6,7,41},{6,7,41},{6,6,50},{7,5,82},{6,6,26},{6,6,26},{6,6,29},{6,6,50}, +{6,6,29},{6,7,80},{6,7,32},{6,6,181},{6,6,125},{6,7,80},{7,6,80},{6,6,125},{0,6,104},{7,6,80},{0,6,104},{6,0,74},{6,0,74},{6,0,74},{6,0,74},{6,6,50},{6,6,50},{6,6,50},{6,6,74},{5,6,128},{5,6,128},{6,7,442},{6,7,202},{6,7,442},{6,7,490},{6,7,309},{6,7,117},{6,7,357},{6,6,370},{5,7,324},{6,6,394},{6,7,441}, +{6,7,201},{6,7,441},{6,7,489},{7,6,276},{6,7,116},{6,7,356},{6,6,369},{7,6,244},{6,6,369},{6,7,186},{6,7,186},{6,7,186},{6,7,234},{6,7,53},{6,7,101},{6,7,101},{6,6,46},{5,7,68},{6,6,70},{6,7,185},{6,7,185},{6,7,185},{6,7,233},{6,7,52},{6,7,100},{6,7,100},{6,6,45},{5,7,52},{6,6,45},{7,6,80},{6,7,32},{6,7,272}, +{5,7,272},{7,6,80},{6,7,80},{5,7,272},{0,6,360},{6,7,80},{0,6,360},{6,0,170},{6,0,170},{6,0,170},{6,0,170},{6,7,37},{6,7,37},{6,7,37},{6,6,10},{6,6,34},{6,6,34},{7,7,800},{7,7,784},{6,7,802},{6,7,634},{6,7,903},{6,7,279},{6,7,159},{6,7,591},{6,7,332},{6,7,572},{7,7,224},{7,7,208},{7,7,257},{7,7,337},{7,6,339}, +{6,7,179},{6,7,59},{5,7,419},{6,7,251},{5,7,419},{7,6,745},{7,6,745},{7,6,745},{6,7,585},{6,7,278},{6,7,110},{6,7,110},{6,6,469},{6,7,163},{6,6,385},{7,6,169},{7,6,169},{7,6,169},{7,6,225},{7,6,50},{6,7,10},{6,7,10},{7,6,226},{7,6,82},{7,6,226},{7,7,80},{7,7,64},{7,7,113},{6,7,50},{7,7,80},{7,7,144},{6,7,50}, +{0,7,410},{7,7,144},{0,7,410},{6,0,584},{6,0,584},{6,0,584},{6,0,584},{6,7,109},{6,7,109},{6,7,109},{6,7,181},{6,6,160},{6,6,160},{7,7,393},{7,7,321},{7,7,272},{7,7,288},{7,7,477},{7,7,421},{7,7,372},{6,7,446},{6,7,483},{6,7,123},{7,7,137},{7,7,65},{7,7,16},{7,7,32},{7,7,221},{7,7,165},{7,7,116},{6,7,122},{7,7,261}, +{6,7,122},{7,7,272},{7,7,272},{7,7,272},{7,7,288},{7,7,356},{7,7,372},{7,7,372},{6,7,446},{6,7,83},{6,7,123},{7,7,16},{7,7,16},{7,7,16},{7,7,32},{7,7,100},{7,7,116},{7,7,116},{6,7,122},{6,7,82},{6,7,122},{7,7,121},{7,7,49},{7,7,0},{7,7,16},{7,7,121},{7,7,65},{7,7,16},{0,7,121},{7,7,65},{0,7,121},{7,0,272}, +{7,0,272},{7,0,272},{7,0,272},{7,6,265},{7,6,265},{7,6,265},{7,6,305},{6,7,2},{6,7,2},{7,7,265},{7,7,193},{7,7,144},{7,7,96},{7,7,253},{7,7,133},{7,7,84},{7,7,109},{7,7,297},{6,7,107},{7,7,201},{7,7,129},{7,7,80},{7,7,32},{7,7,189},{7,7,69},{7,7,20},{7,7,45},{7,7,101},{7,7,45},{7,7,144},{7,7,144},{7,7,144}, +{7,7,96},{7,7,132},{7,7,84},{7,7,84},{7,7,109},{7,7,248},{6,7,107},{7,7,80},{7,7,80},{7,7,80},{7,7,32},{7,7,68},{7,7,20},{7,7,20},{7,7,45},{7,7,52},{7,7,45},{7,7,185},{7,7,113},{7,7,64},{7,7,16},{7,7,185},{7,7,65},{7,7,16},{0,7,9},{7,7,65},{0,7,9},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{7,7,68}, +{7,7,68},{7,7,68},{7,7,100},{6,7,98},{6,7,98},{7,7,386},{7,7,314},{7,7,265},{7,7,193},{7,7,278},{7,7,134},{7,7,85},{7,7,4},{7,7,138},{7,7,40},{7,7,386},{7,7,314},{7,7,265},{7,7,193},{7,7,278},{7,7,134},{7,7,85},{7,7,4},{7,7,102},{7,7,4},{7,7,265},{7,7,265},{7,7,265},{7,7,193},{7,7,157},{7,7,85},{7,7,85}, +{7,7,4},{7,7,89},{7,7,40},{7,7,265},{7,7,265},{7,7,265},{7,7,193},{7,7,157},{7,7,85},{7,7,85},{7,7,4},{7,7,53},{7,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,144},{7,0,144},{7,0,144},{7,0,144},{7,7,36},{7,7,36},{7,7,36},{7,7,4},{7,7,40}, +{7,7,40},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,561},{0,0,398},{0,0,181},{0,0,308},{0,0,498},{0,0,344},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,561},{0,0,398},{0,0,181},{0,0,308},{0,0,462},{0,0,308},{0,0,9},{0,0,9},{0,0,9},{0,0,1},{0,0,45},{0,0,37},{0,0,37},{0,0,164},{0,0,137},{0,0,200},{0,0,9}, +{0,0,9},{0,0,9},{0,0,1},{0,0,45},{0,0,37},{0,0,37},{0,0,164},{0,0,101},{0,0,164},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,200},{1,0,232},{0,0,145},{0,0,208},{1,0,232},{0,0,208},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,232},{0,1,40},{0,1,200}, +{0,1,392},{0,1,689},{0,1,593},{0,0,485},{0,0,500},{0,0,914},{0,0,536},{0,2,232},{0,1,40},{0,1,200},{0,1,392},{0,1,689},{0,1,593},{0,0,485},{0,0,500},{1,0,761},{0,0,500},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,157},{0,0,85},{0,0,85},{0,0,100},{0,0,185},{0,0,136},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,157}, +{0,0,85},{0,0,85},{0,0,100},{0,0,149},{0,0,100},{1,0,200},{0,1,40},{0,1,200},{0,1,392},{1,0,200},{1,0,232},{0,1,392},{0,0,400},{1,0,232},{0,0,400},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,281},{0,2,149},{0,1,121},{0,1,121},{0,2,808},{0,1,376},{0,1,216}, +{0,0,857},{0,1,1169},{0,0,893},{0,2,281},{0,2,149},{0,1,121},{0,1,121},{1,0,728},{0,1,376},{0,1,216},{0,0,857},{1,0,744},{0,0,857},{0,1,85},{0,1,85},{0,1,85},{0,1,85},{0,1,180},{0,1,180},{0,1,180},{0,0,73},{0,0,270},{0,0,109},{0,1,85},{0,1,85},{0,1,85},{0,1,85},{0,1,180},{0,1,180},{0,1,180},{0,0,73},{0,0,234}, +{0,0,73},{0,2,232},{0,2,100},{0,1,72},{0,1,72},{0,2,232},{2,0,202},{0,1,72},{0,1,712},{2,0,202},{0,1,712},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,0,1},{0,0,1},{0,0,1},{0,0,9},{0,0,45},{0,0,45},{0,3,427},{0,2,229},{0,1,425},{0,1,233},{0,2,744},{0,1,504},{0,1,24},{0,1,584},{0,1,1105},{0,1,945},{0,3,427}, +{0,2,229},{1,1,414},{0,1,233},{0,2,744},{0,1,504},{0,1,24},{0,1,584},{2,0,739},{0,1,584},{0,2,229},{0,2,229},{0,2,229},{0,1,229},{0,1,212},{0,1,20},{0,1,20},{0,0,281},{0,0,590},{0,0,317},{0,2,229},{0,2,229},{0,2,229},{0,1,229},{1,0,180},{0,1,20},{0,1,20},{0,0,281},{1,0,164},{0,0,281},{0,3,202},{0,2,4},{1,1,53}, +{0,1,8},{0,3,202},{1,1,202},{0,1,8},{0,1,328},{1,1,202},{0,1,328},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,1,16},{0,1,16},{0,1,16},{0,0,25},{0,0,61},{0,0,61},{0,4,842},{0,3,740},{1,1,1125},{0,1,834},{0,3,744},{0,2,267},{0,1,283},{0,1,267},{0,1,1356},{0,1,476},{1,2,302},{1,2,230},{1,1,225},{1,1,225},{0,3,728}, +{0,2,251},{0,1,267},{0,1,251},{1,1,728},{0,1,251},{0,3,596},{0,3,596},{0,3,596},{0,1,713},{0,2,186},{0,1,162},{0,1,162},{0,1,146},{0,1,395},{0,1,355},{1,1,104},{1,1,104},{1,1,104},{1,1,104},{0,2,170},{0,1,146},{0,1,146},{0,1,130},{0,1,170},{0,1,130},{1,2,202},{1,2,130},{1,1,125},{1,1,125},{1,2,202},{0,2,202},{1,1,125}, +{0,1,202},{0,2,202},{0,1,202},{0,0,592},{0,0,592},{0,0,592},{0,0,592},{0,1,41},{0,1,41},{0,1,41},{0,1,65},{0,0,200},{0,0,200},{1,3,738},{1,2,522},{1,2,722},{1,1,885},{0,4,987},{0,2,443},{0,2,379},{0,1,507},{0,2,1100},{0,1,412},{1,3,254},{1,2,38},{1,2,238},{1,1,401},{1,2,739},{0,2,299},{0,2,235},{0,1,363},{0,2,739}, +{0,1,363},{1,2,497},{1,2,497},{1,2,497},{1,1,524},{0,3,324},{0,2,154},{0,2,154},{0,1,146},{0,1,411},{0,1,51},{1,2,13},{1,2,13},{1,2,13},{1,1,40},{1,1,164},{0,2,10},{0,2,10},{0,1,2},{1,1,180},{0,1,2},{2,1,202},{1,2,34},{1,2,234},{0,2,234},{2,1,202},{4,0,202},{0,2,234},{0,1,362},{4,0,202},{0,1,362},{1,0,488}, +{1,0,488},{1,0,488},{1,0,488},{0,2,145},{0,2,145},{0,2,145},{0,1,145},{0,1,50},{0,1,50},{1,3,450},{1,3,362},{1,2,306},{1,2,330},{1,2,1015},{1,2,583},{1,2,463},{1,1,990},{0,2,940},{0,1,860},{1,3,254},{1,3,166},{1,2,110},{1,2,134},{0,4,731},{0,3,288},{0,2,155},{1,1,794},{2,1,731},{1,1,794},{1,2,257},{1,2,257},{1,2,257}, +{1,2,281},{1,2,390},{1,1,392},{1,1,392},{1,1,261},{0,2,315},{0,1,131},{1,2,61},{1,2,61},{1,2,61},{1,2,85},{2,0,164},{0,2,106},{0,2,106},{1,1,65},{3,0,164},{1,1,65},{3,0,208},{1,3,130},{1,2,74},{0,2,74},{3,0,208},{3,1,208},{0,2,74},{0,1,778},{3,1,208},{0,1,778},{1,0,232},{1,0,232},{1,0,232},{1,0,232},{1,1,196}, +{1,1,196},{1,1,196},{1,1,212},{0,1,82},{0,1,82},{1,4,440},{1,3,234},{1,2,402},{1,2,234},{1,3,767},{1,2,503},{1,2,63},{1,2,687},{0,3,1140},{0,2,396},{1,4,404},{1,3,198},{1,2,366},{1,2,198},{1,3,731},{0,3,224},{1,2,27},{0,2,387},{1,2,731},{0,2,387},{1,3,233},{1,3,233},{1,3,233},{1,2,233},{1,2,230},{1,2,62},{1,2,62}, +{1,1,277},{0,2,203},{1,1,385},{1,3,197},{1,3,197},{1,3,197},{1,2,197},{0,4,162},{1,2,26},{1,2,26},{1,1,241},{2,1,162},{1,1,241},{2,2,208},{1,3,2},{2,2,65},{1,2,2},{2,2,208},{5,0,208},{1,2,2},{0,2,362},{5,0,208},{0,2,362},{1,0,232},{1,0,232},{1,0,232},{1,0,232},{1,2,61},{1,2,61},{1,2,61},{1,1,52},{0,2,34}, +{0,2,34},{1,5,810},{1,3,702},{1,2,1122},{1,2,738},{1,4,748},{1,3,281},{1,2,225},{1,2,273},{0,3,780},{0,2,252},{2,3,329},{2,3,281},{2,2,222},{2,2,230},{2,2,731},{1,3,272},{1,2,216},{0,2,216},{5,0,731},{0,2,216},{1,4,547},{1,4,547},{1,4,547},{1,2,638},{1,3,173},{1,2,125},{1,2,125},{1,2,173},{0,3,339},{0,2,152},{2,2,122}, +{2,2,122},{2,2,122},{2,2,130},{1,3,164},{1,2,116},{1,2,116},{0,2,116},{1,2,164},{0,2,116},{2,3,208},{2,3,160},{2,2,101},{2,2,109},{2,3,208},{6,0,208},{2,2,109},{0,2,200},{6,0,208},{0,2,200},{1,0,538},{1,0,538},{1,0,538},{1,0,538},{1,2,25},{1,2,25},{1,2,25},{1,2,73},{0,2,52},{0,2,52},{2,3,810},{2,3,570},{2,3,810}, +{2,2,887},{1,4,940},{1,3,393},{1,3,393},{1,2,449},{0,3,1004},{1,2,392},{2,3,281},{2,3,41},{2,3,281},{2,2,358},{3,1,731},{1,3,272},{1,3,272},{1,2,328},{4,1,731},{1,2,328},{2,3,554},{2,3,554},{2,3,554},{2,2,563},{1,4,315},{1,3,137},{1,3,137},{1,2,125},{0,3,163},{1,2,68},{2,3,25},{2,3,25},{2,3,25},{2,2,34},{2,2,162}, +{1,3,16},{1,3,16},{1,2,4},{5,0,162},{1,2,4},{4,0,208},{2,3,32},{2,3,272},{1,3,272},{4,0,208},{5,1,208},{1,3,272},{0,2,328},{5,1,208},{0,2,328},{2,0,538},{2,0,538},{2,0,538},{2,0,538},{1,3,121},{1,3,121},{1,3,121},{1,2,121},{1,2,64},{1,2,64},{2,4,458},{2,3,410},{2,3,330},{2,3,378},{2,3,1013},{2,3,629},{1,3,505}, +{2,2,962},{0,4,772},{1,2,776},{2,4,233},{2,3,185},{2,3,105},{2,3,153},{4,0,724},{1,4,323},{1,3,144},{2,2,737},{3,2,724},{2,2,737},{2,3,266},{2,3,266},{2,3,266},{2,3,314},{2,3,437},{2,2,395},{2,2,395},{2,2,286},{1,3,356},{1,2,100},{2,3,41},{2,3,41},{2,3,41},{2,3,89},{3,1,162},{1,3,80},{1,3,80},{2,2,61},{4,1,162}, +{2,2,61},{2,4,208},{2,3,160},{2,3,80},{1,3,80},{2,4,208},{7,0,208},{1,3,80},{0,2,712},{7,0,208},{0,2,712},{2,0,250},{2,0,250},{2,0,250},{2,0,250},{2,2,226},{2,2,226},{2,2,226},{2,2,250},{1,2,64},{1,2,64},{2,5,436},{2,4,222},{2,3,362},{2,3,218},{2,4,773},{2,3,485},{2,3,85},{2,3,773},{0,4,804},{1,3,452},{2,5,387}, +{2,4,173},{2,3,313},{2,3,169},{2,4,724},{1,4,211},{2,3,36},{1,3,436},{7,0,724},{1,3,436},{2,4,218},{2,4,218},{2,4,218},{2,3,218},{2,3,229},{2,3,85},{2,3,85},{2,2,254},{1,3,196},{2,2,374},{2,4,169},{2,4,169},{2,4,169},{2,3,169},{4,0,164},{2,3,36},{2,3,36},{2,2,205},{3,2,164},{2,2,205},{3,3,202},{2,4,4},{3,3,81}, +{2,3,0},{3,3,202},{6,1,202},{2,3,0},{0,3,400},{6,1,202},{0,3,400},{2,0,218},{2,0,218},{2,0,218},{2,0,218},{2,3,85},{2,3,85},{2,3,85},{2,2,58},{1,3,52},{1,3,52},{2,5,760},{2,4,618},{2,3,1010},{2,3,650},{2,5,760},{2,4,303},{2,3,175},{2,3,287},{1,4,788},{1,3,236},{3,4,362},{3,4,338},{3,3,225},{3,3,241},{3,3,724}, +{2,4,299},{2,3,171},{1,3,211},{6,1,724},{1,3,211},{2,5,504},{2,5,504},{2,5,504},{2,3,569},{2,4,166},{2,3,94},{2,3,94},{2,3,206},{0,4,363},{1,3,155},{3,3,144},{3,3,144},{3,3,144},{3,3,160},{2,4,162},{2,3,90},{2,3,90},{1,3,130},{7,0,162},{1,3,130},{4,2,208},{2,4,130},{3,3,81},{3,3,97},{4,2,208},{5,2,208},{3,3,97}, +{0,3,202},{5,2,208},{0,3,202},{2,0,488},{2,0,488},{2,0,488},{2,0,488},{2,3,13},{2,3,13},{2,3,13},{2,3,85},{1,3,34},{1,3,34},{3,4,842},{3,4,626},{3,4,906},{3,3,897},{2,5,888},{2,4,351},{2,4,415},{2,3,399},{0,5,788},{2,3,380},{3,4,266},{3,4,50},{3,4,330},{3,3,321},{4,2,724},{2,4,251},{2,4,315},{2,3,299},{5,2,724}, +{2,3,299},{3,4,617},{3,4,617},{3,4,617},{3,3,608},{2,5,312},{2,4,126},{2,4,126},{2,3,110},{1,4,164},{2,3,91},{3,4,41},{3,4,41},{3,4,41},{3,3,32},{3,3,164},{2,4,26},{2,4,26},{2,3,10},{6,1,164},{2,3,10},{5,1,202},{3,4,34},{3,4,314},{3,3,305},{5,1,202},{4,3,202},{3,3,305},{0,3,298},{4,3,202},{0,3,298},{3,0,592}, +{3,0,592},{3,0,592},{3,0,592},{2,4,101},{2,4,101},{2,4,101},{2,3,101},{2,3,82},{2,3,82},{3,5,474},{3,4,402},{3,4,362},{3,4,434},{3,4,1019},{3,4,683},{2,4,463},{3,3,942},{1,5,804},{2,3,700},{3,5,218},{3,4,146},{3,4,106},{3,4,178},{5,1,723},{2,5,364},{2,4,139},{3,3,686},{4,3,723},{3,3,686},{3,4,281},{3,4,281},{3,4,281}, +{3,4,353},{3,4,490},{2,4,382},{2,4,382},{3,3,317},{1,4,324},{2,3,75},{3,4,25},{3,4,25},{3,4,25},{3,4,97},{4,2,164},{2,4,58},{2,4,58},{3,3,61},{5,2,164},{3,3,61},{6,0,202},{3,4,130},{3,4,90},{2,4,90},{6,0,202},{3,4,202},{2,4,90},{0,3,650},{3,4,202},{0,3,650},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,3,260}, +{3,3,260},{3,3,260},{3,3,292},{2,3,50},{2,3,50},{3,6,440},{3,5,218},{3,4,330},{3,4,210},{3,5,787},{3,4,475},{3,4,115},{3,4,867},{1,5,772},{2,4,516},{3,6,376},{3,5,154},{3,4,266},{3,4,146},{6,0,723},{2,5,204},{3,4,51},{1,4,478},{3,4,723},{1,4,478},{3,5,209},{3,5,209},{3,5,209},{3,4,209},{3,4,234},{3,4,114},{3,4,114}, +{3,3,237},{2,4,195},{3,3,369},{3,5,145},{3,5,145},{3,5,145},{3,4,145},{5,1,170},{3,4,50},{3,4,50},{3,3,173},{7,1,170},{3,3,173},{4,4,200},{3,5,10},{4,4,101},{3,4,2},{4,4,200},{7,2,200},{3,4,2},{0,4,442},{7,2,200},{0,4,442},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{3,3,100},{3,3,100},{3,3,100},{3,3,68},{2,4,74}, +{2,4,74},{3,6,692},{3,5,542},{3,4,906},{3,4,570},{3,6,780},{3,5,333},{3,4,133},{3,4,309},{2,5,804},{2,4,228},{4,5,401},{4,4,395},{4,4,234},{4,4,258},{4,4,723},{3,5,332},{3,4,132},{2,4,212},{7,2,723},{2,4,212},{3,6,467},{3,6,467},{3,6,467},{3,4,506},{3,5,165},{3,4,69},{3,4,69},{3,4,245},{1,5,324},{2,4,164},{4,4,170}, +{4,4,170},{4,4,170},{4,4,194},{6,0,164},{3,4,68},{3,4,68},{2,4,148},{3,4,164},{2,4,148},{5,3,202},{3,5,100},{4,4,65},{4,4,89},{5,3,202},{6,3,202},{4,4,89},{0,4,208},{6,3,202},{0,4,208},{3,0,442},{3,0,442},{3,0,442},{3,0,442},{3,4,5},{3,4,5},{3,4,5},{3,4,101},{2,4,20},{2,4,20},{4,5,882},{4,5,690},{4,4,955}, +{4,4,915},{3,6,844},{3,5,317},{3,5,445},{3,4,357},{1,6,772},{3,4,376},{4,5,257},{4,5,65},{4,4,330},{4,4,290},{5,3,723},{3,5,236},{3,5,364},{3,4,276},{6,3,723},{3,4,276},{4,5,686},{4,5,686},{4,5,686},{4,4,659},{3,6,315},{3,5,121},{3,5,121},{3,4,101},{2,5,171},{3,4,120},{4,5,61},{4,5,61},{4,5,61},{4,4,34},{5,2,170}, +{3,5,40},{3,5,40},{3,4,20},{7,2,170},{3,4,20},{6,2,200},{4,5,40},{4,4,305},{4,4,265},{6,2,200},{5,4,200},{4,4,265},{0,4,272},{5,4,200},{0,4,272},{4,0,650},{4,0,650},{4,0,650},{4,0,650},{3,5,85},{3,5,85},{3,5,85},{3,4,85},{3,4,104},{3,4,104},{4,6,498},{4,5,402},{4,5,402},{4,5,498},{3,7,1017},{3,6,700},{3,5,429}, +{3,4,917},{2,6,844},{3,4,632},{4,6,209},{4,5,113},{4,5,113},{4,5,209},{6,2,728},{3,6,411},{3,5,140},{3,4,628},{5,4,728},{3,4,628},{4,5,302},{4,5,302},{4,5,302},{4,5,398},{3,6,459},{3,5,329},{3,5,329},{3,4,341},{2,5,283},{3,4,56},{4,5,13},{4,5,13},{4,5,13},{4,5,109},{6,1,170},{3,5,40},{3,5,40},{3,4,52},{6,3,170}, +{3,4,52},{7,1,200},{4,5,104},{4,5,104},{3,5,104},{7,1,200},{4,5,200},{3,5,104},{0,4,592},{4,5,200},{0,4,592},{4,0,298},{4,0,298},{4,0,298},{4,0,298},{4,4,298},{4,4,298},{4,4,298},{3,4,325},{3,4,40},{3,4,40},{4,7,452},{4,6,222},{4,5,306},{4,5,210},{4,6,809},{4,5,473},{4,5,153},{4,5,969},{2,6,748},{3,5,588},{4,7,371}, +{4,6,141},{4,5,225},{4,5,129},{7,1,728},{3,6,203},{4,5,72},{2,5,513},{4,5,728},{2,5,513},{4,6,206},{4,6,206},{4,6,206},{4,5,206},{4,5,245},{4,5,149},{4,5,149},{4,4,226},{3,5,200},{4,4,370},{4,6,125},{4,6,125},{4,6,125},{4,5,125},{7,0,164},{4,5,68},{4,5,68},{4,4,145},{3,5,164},{4,4,145},{5,5,202},{4,6,20},{4,5,104}, +{4,5,8},{5,5,202},{3,6,202},{4,5,8},{0,5,488},{3,6,202},{0,5,488},{4,0,202},{4,0,202},{4,0,202},{4,0,202},{4,4,106},{4,4,106},{4,4,106},{4,4,82},{3,5,100},{3,5,100},{4,7,632},{4,6,474},{4,5,810},{4,5,498},{4,7,808},{4,6,371},{4,5,99},{4,5,339},{3,6,828},{3,5,228},{5,6,446},{5,5,396},{5,5,249},{5,5,281},{5,5,728}, +{4,6,371},{4,5,99},{3,5,219},{3,6,728},{3,5,219},{4,7,436},{4,7,436},{4,7,436},{4,5,449},{4,6,170},{4,5,50},{4,5,50},{4,5,290},{2,6,291},{3,5,179},{5,5,200},{5,5,200},{5,5,200},{5,5,232},{7,1,170},{4,5,50},{4,5,50},{3,5,170},{7,3,170},{3,5,170},{6,4,200},{4,6,74},{5,5,53},{5,5,85},{6,4,200},{7,4,200},{5,5,85}, +{0,5,218},{7,4,200},{0,5,218},{4,0,400},{4,0,400},{4,0,400},{4,0,400},{4,5,1},{4,5,1},{4,5,1},{4,4,100},{3,5,10},{3,5,10},{5,6,930},{5,6,762},{5,5,973},{5,5,941},{4,7,808},{4,6,291},{4,6,483},{4,5,323},{2,7,764},{4,5,380},{5,6,254},{5,6,86},{5,5,297},{5,5,265},{6,4,728},{4,6,227},{5,5,373},{4,5,259},{7,4,728}, +{4,5,259},{5,5,748},{5,5,748},{5,5,748},{5,5,716},{4,6,298},{4,6,122},{4,6,122},{4,5,98},{3,6,184},{4,5,155},{5,5,72},{5,5,72},{5,5,72},{5,5,40},{6,3,164},{4,6,58},{4,6,58},{4,5,34},{6,4,164},{4,5,34},{7,3,202},{5,6,50},{5,5,261},{5,5,229},{7,3,202},{6,5,202},{5,5,229},{0,5,250},{6,5,202},{0,5,250},{5,0,712}, +{5,0,712},{5,0,712},{5,0,712},{4,6,73},{4,6,73},{4,6,73},{4,5,73},{4,5,130},{4,5,130},{5,7,530},{5,6,410},{5,6,450},{5,6,570},{5,6,1055},{4,7,720},{4,6,403},{4,5,819},{3,7,892},{4,5,572},{5,7,206},{5,6,86},{5,6,126},{5,6,246},{5,6,731},{3,7,387},{4,6,147},{4,5,563},{4,6,731},{4,5,563},{5,6,329},{5,6,329},{5,6,329}, +{5,5,428},{4,7,420},{4,6,282},{4,6,282},{4,5,290},{3,6,248},{4,5,43},{5,6,5},{5,6,5},{5,6,5},{5,5,104},{7,2,164},{4,6,26},{4,6,26},{4,5,34},{5,5,164},{4,5,34},{5,7,202},{5,6,82},{5,6,122},{4,6,122},{5,7,202},{5,6,202},{4,6,122},{0,5,538},{5,6,202},{0,5,538},{5,0,328},{5,0,328},{5,0,328},{5,0,328},{4,6,281}, +{4,6,281},{4,6,281},{4,5,281},{4,5,34},{4,5,34},{5,7,498},{5,7,234},{5,6,290},{5,6,218},{5,7,839},{5,6,479},{5,6,199},{5,6,1079},{3,7,732},{4,6,668},{5,7,398},{5,7,134},{5,6,190},{5,6,118},{6,5,731},{4,7,208},{5,6,99},{3,6,554},{3,7,731},{3,6,554},{5,7,209},{5,7,209},{5,7,209},{5,6,209},{5,6,262},{5,6,190},{5,6,190}, +{5,5,221},{4,6,211},{4,5,315},{5,7,109},{5,7,109},{5,7,109},{5,6,109},{5,6,162},{5,6,90},{5,6,90},{5,5,121},{4,6,162},{5,5,121},{7,4,208},{5,7,34},{5,6,90},{5,6,18},{7,4,208},{7,5,208},{5,6,18},{0,6,538},{7,5,208},{0,6,538},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{5,5,116},{5,5,116},{5,5,116},{5,5,100},{4,6,130}, +{4,6,130},{5,7,1074},{5,7,414},{5,6,722},{5,6,434},{5,7,857},{5,7,417},{5,6,73},{5,6,377},{4,7,860},{4,6,236},{6,7,497},{6,6,403},{6,6,270},{6,6,310},{7,4,731},{4,7,379},{5,6,72},{4,6,232},{7,5,731},{4,6,232},{5,7,398},{5,7,398},{5,7,398},{5,6,398},{5,7,181},{5,6,37},{5,6,37},{5,6,341},{3,7,264},{4,6,200},{6,6,234}, +{6,6,234},{6,6,234},{6,6,274},{6,5,164},{5,6,36},{5,6,36},{4,6,196},{3,7,164},{4,6,196},{7,5,202},{5,7,52},{6,6,45},{5,6,72},{7,5,202},{6,6,218},{5,6,72},{0,6,232},{6,6,218},{0,6,232},{5,0,362},{5,0,362},{5,0,362},{5,0,362},{5,6,1},{5,6,1},{5,6,1},{5,5,82},{4,6,4},{4,6,4},{6,7,986},{6,7,842},{6,6,999}, +{6,6,975},{5,7,1417},{5,7,273},{5,6,505},{5,6,297},{4,7,828},{5,6,392},{6,7,257},{6,7,113},{6,6,270},{6,6,246},{7,5,739},{5,7,224},{6,6,366},{5,6,248},{6,6,731},{5,6,248},{6,6,803},{6,6,803},{6,6,803},{6,6,779},{5,7,261},{5,7,129},{5,7,129},{5,6,101},{4,7,203},{5,6,196},{6,6,74},{6,6,74},{6,6,74},{6,6,50},{7,4,162}, +{5,7,80},{5,7,80},{5,6,52},{7,5,162},{5,6,52},{6,7,208},{6,7,64},{6,6,221},{6,6,197},{6,7,208},{7,6,208},{6,6,197},{0,6,232},{7,6,208},{0,6,232},{5,0,778},{5,0,778},{5,0,778},{5,0,778},{5,7,65},{5,7,65},{5,7,65},{5,6,65},{5,6,160},{5,6,160},{6,7,762},{6,7,426},{6,7,506},{6,7,650},{6,7,1085},{5,7,641},{5,7,385}, +{5,6,729},{5,7,980},{5,6,520},{6,7,401},{6,7,65},{6,7,145},{6,7,289},{6,7,724},{5,7,416},{5,7,160},{5,6,504},{5,7,724},{5,6,504},{6,7,362},{6,7,362},{6,7,362},{6,6,443},{6,6,555},{5,7,241},{5,7,241},{5,6,245},{4,7,219},{5,6,36},{6,7,1},{6,7,1},{6,7,1},{6,6,82},{7,5,194},{5,7,16},{5,7,16},{5,6,20},{6,6,162}, +{5,6,20},{7,6,208},{6,7,64},{6,7,144},{5,7,144},{7,6,208},{6,7,208},{5,7,144},{0,6,488},{6,7,208},{0,6,488},{6,0,362},{6,0,362},{6,0,362},{6,0,362},{5,7,241},{5,7,241},{5,7,241},{5,6,241},{5,6,32},{5,6,32},{6,7,1050},{6,7,522},{6,7,282},{6,7,234},{6,7,1069},{6,7,493},{6,7,253},{6,6,1122},{5,7,1012},{5,7,756},{7,7,843}, +{6,7,401},{6,7,161},{6,7,113},{7,6,724},{6,7,372},{6,7,132},{4,7,601},{6,7,756},{4,7,601},{6,7,266},{6,7,266},{6,7,266},{6,7,218},{6,7,285},{6,7,237},{6,7,237},{6,6,222},{5,7,228},{5,6,260},{6,7,145},{6,7,145},{6,7,145},{6,7,97},{6,7,164},{6,7,116},{6,7,116},{6,6,101},{5,7,164},{6,6,101},{7,7,218},{7,7,178},{6,7,80}, +{6,7,32},{7,7,218},{6,7,272},{6,7,32},{0,7,592},{6,7,272},{0,7,592},{6,0,202},{6,0,202},{6,0,202},{6,0,202},{6,6,130},{6,6,130},{6,6,130},{6,6,122},{5,6,160},{5,6,160},{6,7,1641},{6,7,1017},{6,7,617},{6,7,353},{6,7,1318},{6,7,430},{6,7,30},{6,7,398},{6,7,1035},{5,7,227},{7,7,393},{7,7,321},{7,7,272},{7,7,320},{7,7,621}, +{6,7,426},{6,7,26},{5,7,226},{6,7,594},{5,7,226},{6,7,617},{6,7,617},{6,7,617},{6,7,353},{6,7,294},{6,7,30},{6,7,30},{6,7,398},{5,7,291},{5,7,227},{7,7,272},{7,7,272},{7,7,272},{7,7,320},{7,6,162},{6,7,26},{6,7,26},{5,7,226},{7,6,194},{5,7,226},{7,7,137},{7,7,65},{7,7,16},{6,7,25},{7,7,137},{7,7,113},{6,7,25}, +{0,7,225},{7,7,113},{0,7,225},{6,0,328},{6,0,328},{6,0,328},{6,0,328},{6,7,5},{6,7,5},{6,7,5},{6,6,68},{5,7,2},{5,7,2},{7,7,985},{7,7,913},{7,7,864},{7,7,848},{7,7,1117},{6,7,654},{6,7,254},{6,7,110},{6,7,763},{5,7,179},{7,7,201},{7,7,129},{7,7,80},{7,7,64},{7,7,333},{7,7,245},{7,7,196},{6,7,74},{7,7,373}, +{6,7,74},{7,7,864},{7,7,864},{7,7,864},{7,7,848},{6,7,710},{6,7,254},{6,7,254},{6,7,110},{6,7,363},{5,7,179},{7,7,80},{7,7,80},{7,7,80},{7,7,64},{7,7,212},{7,7,196},{7,7,196},{6,7,74},{6,7,194},{6,7,74},{7,7,137},{7,7,65},{7,7,16},{7,7,0},{7,7,137},{7,7,49},{7,7,0},{0,7,49},{7,7,49},{0,7,49},{6,0,712}, +{6,0,712},{6,0,712},{6,0,712},{6,7,85},{6,7,85},{6,7,85},{6,7,61},{5,7,130},{5,7,130},{7,7,642},{7,7,570},{7,7,521},{7,7,449},{7,7,678},{7,7,534},{7,7,485},{6,7,205},{6,7,834},{6,7,34},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,278},{7,7,134},{7,7,85},{6,7,9},{7,7,198},{6,7,9},{7,7,521},{7,7,521},{7,7,521}, +{7,7,449},{7,7,557},{7,7,485},{7,7,485},{6,7,205},{6,7,434},{6,7,34},{7,7,121},{7,7,121},{7,7,121},{7,7,49},{7,7,157},{7,7,85},{7,7,85},{6,7,9},{7,7,149},{6,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,400},{7,0,400},{7,0,400},{7,0,400},{6,7,421}, +{6,7,421},{6,7,421},{6,7,205},{6,7,34},{6,7,34},{7,7,450},{7,7,378},{7,7,329},{7,7,257},{7,7,390},{7,7,246},{7,7,197},{7,7,148},{7,7,426},{6,7,130},{7,7,306},{7,7,234},{7,7,185},{7,7,113},{7,7,246},{7,7,102},{7,7,53},{7,7,4},{7,7,102},{7,7,4},{7,7,329},{7,7,329},{7,7,329},{7,7,257},{7,7,269},{7,7,197},{7,7,197}, +{7,7,148},{7,7,377},{6,7,130},{7,7,185},{7,7,185},{7,7,185},{7,7,113},{7,7,125},{7,7,53},{7,7,53},{7,7,4},{7,7,53},{7,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{7,7,148},{7,7,148},{7,7,148},{7,7,148},{6,7,130}, +{6,7,130},{0,2,445},{0,1,157},{0,1,117},{0,1,405},{0,1,926},{0,1,806},{0,0,670},{0,0,741},{0,0,1169},{0,0,777},{0,2,445},{0,1,157},{0,1,117},{0,1,405},{0,1,926},{0,1,806},{0,0,670},{0,0,741},{1,0,990},{0,0,741},{0,1,36},{0,1,36},{0,1,36},{0,0,9},{0,0,85},{0,0,45},{0,0,45},{0,0,116},{0,0,145},{0,0,152},{0,1,36}, +{0,1,36},{0,1,36},{0,0,9},{0,0,85},{0,0,45},{0,0,45},{0,0,116},{0,0,109},{0,0,116},{1,0,421},{0,1,157},{0,1,117},{0,1,405},{1,0,421},{0,1,445},{0,1,405},{0,0,641},{0,1,445},{0,0,641},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,461},{0,2,109},{0,1,5}, +{0,1,101},{0,1,1326},{0,1,822},{0,1,462},{0,0,1205},{0,1,1783},{0,0,1241},{0,2,461},{0,2,109},{0,1,5},{0,1,101},{1,0,1294},{0,1,822},{0,1,462},{0,0,1205},{1,0,1262},{0,0,1205},{0,1,4},{0,1,4},{0,1,4},{0,1,100},{0,0,261},{0,0,157},{0,0,157},{0,0,116},{0,0,257},{0,0,152},{0,1,4},{0,1,4},{0,1,4},{0,1,100},{0,0,261}, +{0,0,157},{0,0,157},{0,0,116},{0,0,221},{0,0,116},{1,1,461},{0,2,109},{0,1,5},{0,1,101},{1,1,461},{2,0,421},{0,1,101},{0,1,901},{2,0,421},{0,1,901},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,3,430},{0,2,38},{0,1,158},{0,1,62},{0,2,1517},{0,1,989},{0,1,309}, +{0,1,1317},{0,1,1878},{0,1,1678},{0,3,430},{0,2,38},{0,1,158},{0,1,62},{0,2,1517},{0,1,989},{0,1,309},{0,1,1317},{0,1,1517},{0,1,1317},{0,2,13},{0,2,13},{0,2,13},{0,1,13},{0,1,356},{0,1,260},{0,1,260},{0,0,193},{0,0,446},{0,0,229},{0,2,13},{0,2,13},{0,2,13},{0,1,13},{0,1,356},{0,1,260},{0,1,260},{0,0,193},{0,0,410}, +{0,0,193},{0,3,421},{0,2,29},{0,1,149},{0,1,53},{0,3,421},{1,1,421},{0,1,53},{0,1,533},{1,1,421},{0,1,533},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,49},{0,0,85},{0,0,85},{0,4,602},{0,3,234},{0,2,518},{0,1,382},{0,3,1622},{0,2,825},{0,1,325},{0,1,821},{0,1,2022},{0,1,1182},{0,4,602}, +{0,3,234},{0,2,518},{0,1,382},{1,1,1526},{0,2,825},{0,1,325},{0,1,821},{2,0,1526},{0,1,821},{0,2,157},{0,2,157},{0,2,157},{0,1,157},{0,1,388},{0,1,100},{0,1,100},{0,0,401},{0,0,766},{0,0,437},{0,2,157},{0,2,157},{0,2,157},{0,1,157},{1,0,356},{0,1,100},{0,1,100},{0,0,401},{1,0,340},{0,0,401},{1,2,425},{0,3,113},{1,1,234}, +{0,1,261},{1,2,425},{0,2,425},{0,1,261},{0,1,421},{0,2,425},{0,1,421},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,0,25},{0,0,25},{0,0,25},{0,0,1},{0,0,37},{0,0,37},{0,4,845},{0,3,405},{0,2,725},{0,2,549},{0,3,1541},{0,2,654},{0,2,270},{0,1,722},{0,2,2583},{0,1,1083},{1,3,657},{1,2,345},{1,2,345},{0,2,549},{2,0,1517}, +{0,2,654},{0,2,270},{0,1,722},{3,0,1517},{0,1,722},{0,3,404},{0,3,404},{0,3,404},{0,2,449},{0,2,346},{0,2,170},{0,2,170},{0,1,146},{0,1,707},{0,1,507},{1,1,232},{1,1,232},{1,1,232},{1,1,200},{0,2,346},{0,2,170},{0,2,170},{0,1,146},{0,1,346},{0,1,146},{2,1,421},{0,3,5},{1,2,149},{0,2,149},{2,1,421},{4,0,421},{0,2,149}, +{0,1,601},{4,0,421},{0,1,601},{0,0,400},{0,0,400},{0,0,400},{0,0,400},{0,1,1},{0,1,1},{0,1,1},{0,0,100},{0,0,136},{0,0,136},{0,5,1209},{0,4,937},{1,2,1197},{0,2,789},{0,4,1526},{0,3,737},{0,2,14},{0,1,1042},{0,2,2487},{0,1,1403},{1,3,481},{1,3,173},{1,2,41},{1,2,161},{1,2,1526},{0,3,737},{0,2,14},{0,1,1042},{2,1,1526}, +{0,1,1042},{0,4,793},{0,4,793},{0,4,793},{0,2,785},{0,3,356},{0,2,10},{0,2,10},{0,1,18},{0,1,899},{0,1,379},{1,2,37},{1,2,37},{1,2,37},{1,1,136},{1,1,340},{0,2,10},{0,2,10},{0,1,18},{1,1,356},{0,1,18},{3,0,425},{1,3,137},{1,2,5},{0,2,5},{3,0,425},{3,1,425},{0,2,5},{0,2,965},{3,1,425},{0,2,965},{0,0,784}, +{0,0,784},{0,0,784},{0,0,784},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,0,360},{0,0,360},{1,4,1158},{1,3,758},{1,2,850},{1,2,778},{0,4,1671},{0,3,546},{0,2,191},{0,2,903},{0,2,2390},{0,2,1430},{1,4,429},{1,3,29},{1,2,121},{1,2,49},{2,1,1526},{0,3,497},{0,2,142},{0,2,854},{4,0,1526},{0,2,854},{1,3,742},{1,3,742},{1,3,742}, +{1,2,742},{0,3,437},{0,2,155},{0,2,155},{0,1,195},{0,2,946},{0,1,290},{1,3,13},{1,3,13},{1,3,13},{1,2,13},{2,0,340},{0,2,106},{0,2,106},{0,1,146},{3,0,340},{0,1,146},{1,4,425},{1,3,25},{1,2,117},{1,2,45},{1,4,425},{2,2,425},{1,2,45},{0,2,565},{2,2,425},{0,2,565},{1,0,733},{1,0,733},{1,0,733},{1,0,733},{0,2,74}, +{0,2,74},{0,2,74},{0,1,74},{0,1,169},{0,1,169},{1,4,966},{1,4,606},{1,3,886},{1,2,682},{0,5,1742},{0,3,866},{0,3,641},{0,2,727},{0,3,2382},{0,2,758},{1,4,605},{1,4,245},{1,3,525},{1,2,321},{3,0,1517},{0,3,641},{1,2,302},{0,2,502},{3,1,1517},{0,2,502},{1,3,486},{1,3,486},{1,3,486},{1,2,486},{0,4,563},{0,3,241},{0,3,241}, +{0,2,531},{0,2,626},{0,1,546},{1,3,125},{1,3,125},{1,3,125},{1,2,125},{0,4,338},{0,3,16},{0,3,16},{0,2,306},{2,1,338},{0,2,306},{3,1,433},{1,4,145},{2,2,212},{1,2,221},{3,1,433},{6,0,433},{1,2,221},{0,2,421},{6,0,433},{0,2,421},{1,0,461},{1,0,461},{1,0,461},{1,0,461},{0,3,225},{0,3,225},{0,3,225},{0,2,306},{0,1,185}, +{0,1,185},{1,5,894},{1,4,462},{1,3,778},{1,3,646},{1,4,1626},{1,3,749},{1,3,429},{1,2,809},{0,3,2022},{0,2,614},{2,4,706},{1,4,362},{2,3,410},{1,3,546},{3,1,1526},{0,4,441},{0,3,227},{0,2,565},{4,1,1526},{0,2,565},{1,4,462},{1,4,462},{1,4,462},{1,3,525},{1,3,440},{1,2,296},{1,2,296},{1,2,280},{0,2,725},{0,2,85},{2,2,250}, +{2,2,250},{2,2,250},{2,2,226},{1,3,340},{0,3,106},{0,3,106},{0,2,36},{1,2,340},{0,2,36},{3,2,425},{1,4,1},{2,3,185},{0,3,146},{3,2,425},{5,1,425},{0,3,146},{0,2,565},{5,1,425},{0,2,565},{1,0,461},{1,0,461},{1,0,461},{1,0,461},{1,2,100},{1,2,100},{1,2,100},{1,1,181},{0,2,49},{0,2,49},{1,6,1166},{1,4,878},{1,3,1226}, +{1,3,742},{1,5,1545},{1,4,798},{1,3,29},{1,2,985},{0,3,2246},{0,2,1030},{2,4,482},{2,4,218},{2,3,58},{2,3,202},{2,3,1517},{0,4,521},{1,3,25},{1,2,981},{6,0,1517},{1,2,981},{1,5,749},{1,5,749},{1,5,749},{1,3,733},{1,4,374},{1,3,20},{1,3,20},{1,2,24},{0,3,482},{0,2,69},{2,3,49},{2,3,49},{2,3,49},{2,2,130},{2,2,338}, +{1,3,16},{1,3,16},{1,2,20},{5,0,338},{1,2,20},{4,1,433},{1,4,145},{2,3,9},{1,3,9},{4,1,433},{7,0,433},{1,3,9},{0,2,965},{7,0,433},{0,2,965},{1,0,733},{1,0,733},{1,0,733},{1,0,733},{1,3,20},{1,3,20},{1,3,20},{1,2,20},{0,2,65},{0,2,65},{2,5,1218},{2,4,810},{2,3,874},{2,3,826},{1,5,1625},{1,4,542},{1,3,141}, +{1,3,961},{0,4,1806},{0,3,642},{2,5,434},{2,4,26},{2,3,90},{2,3,42},{3,2,1517},{1,4,506},{1,3,105},{0,3,626},{5,1,1517},{0,3,626},{2,4,801},{2,4,801},{2,4,801},{2,3,801},{1,4,406},{1,3,116},{1,3,116},{1,2,152},{0,3,482},{1,2,285},{2,4,17},{2,4,17},{2,4,17},{2,3,17},{3,1,338},{1,3,80},{1,3,80},{1,2,116},{4,1,338}, +{1,2,116},{3,3,425},{2,4,25},{2,3,89},{2,3,41},{3,3,425},{6,1,425},{2,3,41},{0,3,601},{6,1,425},{0,3,601},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{1,3,52},{1,3,52},{1,3,52},{1,2,52},{0,3,41},{0,3,41},{2,5,962},{2,4,618},{2,3,906},{2,3,666},{1,6,1710},{1,4,798},{1,4,663},{1,3,721},{0,4,1838},{0,3,450},{2,5,562}, +{2,4,218},{2,3,506},{2,3,266},{4,1,1514},{0,5,474},{2,3,285},{0,3,434},{4,2,1514},{0,3,434},{2,4,497},{2,4,497},{2,4,497},{2,3,497},{1,5,536},{1,4,222},{1,4,222},{1,3,552},{0,4,469},{0,3,281},{2,4,97},{2,4,97},{2,4,97},{2,3,97},{4,0,340},{1,4,26},{1,4,26},{0,3,265},{3,2,340},{0,3,265},{4,2,425},{2,4,137},{3,3,194}, +{2,3,185},{4,2,425},{5,2,425},{2,3,185},{0,3,425},{5,2,425},{0,3,425},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{1,4,197},{1,4,197},{1,4,197},{1,3,296},{0,3,25},{0,3,25},{2,6,870},{2,5,446},{2,4,758},{2,4,670},{2,5,1638},{2,4,771},{2,4,515},{2,3,823},{0,5,1710},{1,3,598},{3,4,737},{2,5,325},{3,4,481},{2,4,549},{5,0,1517}, +{1,5,458},{1,4,224},{1,3,534},{3,3,1517},{1,3,534},{2,5,445},{2,5,445},{2,5,445},{2,4,526},{2,4,459},{2,3,291},{2,3,291},{2,3,339},{0,4,370},{1,3,114},{3,3,272},{3,3,272},{3,3,272},{3,3,256},{2,4,338},{1,4,80},{1,4,80},{1,3,50},{7,0,338},{1,3,50},{5,1,425},{2,5,1},{3,4,225},{1,4,160},{5,1,425},{4,3,425},{1,4,160}, +{0,3,533},{4,3,425},{0,3,533},{2,0,445},{2,0,445},{2,0,445},{2,0,445},{2,3,122},{2,3,122},{2,3,122},{2,2,185},{1,3,65},{1,3,65},{2,7,1130},{2,5,798},{2,4,1142},{2,4,702},{2,6,1571},{2,4,819},{2,4,51},{2,3,935},{0,5,1614},{1,3,950},{3,5,489},{3,5,269},{3,4,81},{3,4,249},{3,4,1514},{1,5,490},{2,4,42},{2,3,926},{7,1,1514}, +{2,3,926},{2,6,710},{2,6,710},{2,6,710},{2,4,686},{2,5,397},{2,4,35},{2,4,35},{2,3,35},{1,4,509},{1,3,50},{3,4,65},{3,4,65},{3,4,65},{3,3,128},{3,3,340},{2,4,26},{2,4,26},{2,3,26},{6,1,340},{2,3,26},{6,0,425},{2,5,113},{3,4,17},{2,4,17},{6,0,425},{3,4,425},{2,4,17},{0,3,901},{3,4,425},{0,3,901},{2,0,685}, +{2,0,685},{2,0,685},{2,0,685},{2,4,34},{2,4,34},{2,4,34},{2,3,34},{1,3,49},{1,3,49},{3,6,1286},{3,5,870},{3,4,906},{3,4,882},{2,6,1587},{2,5,546},{2,4,99},{2,4,1027},{1,5,1838},{1,4,702},{3,6,445},{3,5,29},{3,4,65},{3,4,41},{4,3,1514},{2,5,521},{2,4,74},{1,4,677},{6,2,1514},{1,4,677},{3,5,866},{3,5,866},{3,5,866}, +{3,4,866},{2,5,381},{2,4,83},{2,4,83},{2,3,115},{0,5,349},{2,3,286},{3,5,25},{3,5,25},{3,5,25},{3,4,25},{4,2,340},{2,4,58},{2,4,58},{2,3,90},{5,2,340},{2,3,90},{4,4,421},{3,5,29},{3,4,65},{3,4,41},{4,4,421},{7,2,421},{3,4,41},{0,4,641},{7,2,421},{0,4,641},{3,0,841},{3,0,841},{3,0,841},{3,0,841},{2,4,34}, +{2,4,34},{2,4,34},{2,3,34},{1,4,61},{1,4,61},{3,6,966},{3,5,614},{3,4,874},{3,4,658},{2,7,1686},{2,5,738},{2,4,659},{2,4,723},{0,6,1518},{1,4,446},{3,6,525},{3,5,173},{3,4,433},{3,4,217},{5,2,1517},{1,6,457},{3,4,274},{1,4,437},{5,3,1517},{1,4,437},{3,5,514},{3,5,514},{3,5,514},{3,4,514},{2,6,515},{2,5,209},{2,5,209}, +{2,3,579},{1,5,510},{1,4,302},{3,5,73},{3,5,73},{3,5,73},{3,4,73},{5,1,346},{2,5,40},{2,5,40},{3,3,293},{7,1,346},{3,3,293},{5,3,421},{3,5,109},{4,4,180},{3,4,153},{5,3,421},{6,3,421},{3,4,153},{0,4,433},{6,3,421},{0,4,433},{3,0,505},{3,0,505},{3,0,505},{3,0,505},{2,5,173},{2,5,173},{2,5,173},{2,3,290},{1,4,13}, +{1,4,13},{3,7,854},{3,6,438},{3,5,746},{3,5,702},{3,6,1658},{3,5,801},{3,5,609},{3,4,845},{1,6,1758},{2,4,590},{3,7,710},{3,6,294},{4,5,558},{3,5,558},{6,1,1514},{2,6,481},{2,5,227},{2,4,509},{4,4,1514},{2,4,509},{3,6,434},{3,6,434},{3,6,434},{3,5,533},{3,5,484},{3,4,292},{3,4,292},{3,4,404},{1,5,357},{2,4,149},{3,6,290}, +{3,6,290},{3,6,290},{4,4,290},{6,0,340},{2,5,58},{2,5,58},{2,4,68},{3,4,340},{2,4,68},{6,2,421},{3,6,5},{4,5,269},{2,5,178},{6,2,421},{5,4,421},{2,5,178},{0,4,505},{5,4,421},{0,4,505},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{3,4,148},{3,4,148},{3,4,148},{3,3,193},{2,4,85},{2,4,85},{3,7,1206},{3,6,726},{3,5,1066}, +{3,5,670},{3,7,1605},{3,5,785},{3,5,81},{3,4,893},{1,6,1598},{2,4,878},{4,6,502},{4,5,310},{4,5,110},{4,5,302},{7,0,1517},{2,6,465},{3,5,65},{2,4,877},{3,5,1517},{2,4,877},{3,7,677},{3,7,677},{3,7,677},{3,5,645},{3,6,426},{3,5,56},{3,5,56},{3,4,52},{2,5,542},{2,4,37},{4,5,85},{4,5,85},{4,5,85},{4,4,130},{5,2,346}, +{3,5,40},{3,5,40},{2,4,36},{7,2,346},{2,4,36},{7,1,421},{3,6,85},{4,5,29},{3,5,29},{7,1,421},{4,5,421},{3,5,29},{0,4,841},{4,5,421},{0,4,841},{3,0,641},{3,0,641},{3,0,641},{3,0,641},{3,4,52},{3,4,52},{3,4,52},{3,4,52},{2,4,37},{2,4,37},{4,7,1362},{4,6,938},{4,5,946},{4,5,946},{3,7,1557},{3,6,558},{3,5,65}, +{3,5,1101},{0,7,1662},{2,5,770},{4,7,462},{4,6,38},{4,5,46},{4,5,46},{5,4,1517},{3,6,542},{3,5,49},{1,5,721},{7,3,1517},{1,5,721},{4,5,937},{4,5,937},{4,5,937},{4,5,937},{3,6,362},{3,5,56},{3,5,56},{3,4,84},{1,6,350},{3,4,293},{4,5,37},{4,5,37},{4,5,37},{4,5,37},{6,1,346},{3,5,40},{3,5,40},{3,4,68},{6,3,346}, +{3,4,68},{5,5,421},{4,6,37},{4,5,45},{3,5,45},{5,5,421},{3,6,421},{3,5,45},{0,5,685},{3,6,421},{0,5,685},{4,0,901},{4,0,901},{4,0,901},{4,0,901},{3,5,20},{3,5,20},{3,5,20},{3,4,20},{2,5,85},{2,5,85},{4,7,978},{4,6,618},{4,5,850},{4,5,658},{3,7,2021},{3,6,686},{3,5,561},{3,5,733},{1,7,1530},{2,5,450},{4,7,494}, +{4,6,134},{4,5,366},{4,5,174},{6,3,1526},{2,7,446},{4,5,269},{2,5,446},{6,4,1526},{2,5,446},{4,6,537},{4,6,537},{4,6,537},{4,5,537},{3,7,500},{3,6,202},{3,6,202},{3,4,500},{1,6,510},{2,5,329},{4,6,53},{4,6,53},{4,6,53},{4,5,53},{7,0,340},{3,6,58},{3,6,58},{4,4,265},{3,5,340},{4,4,265},{6,4,421},{4,6,85},{5,5,170}, +{4,5,125},{6,4,421},{7,4,421},{4,5,125},{0,5,445},{7,4,421},{0,5,445},{4,0,533},{4,0,533},{4,0,533},{4,0,533},{3,6,153},{3,6,153},{3,6,153},{3,4,244},{2,5,5},{2,5,5},{4,7,1158},{4,7,438},{4,6,742},{4,6,742},{4,7,1686},{4,6,839},{3,6,677},{4,5,875},{1,7,1710},{3,5,590},{5,6,769},{4,7,269},{4,6,573},{4,6,573},{7,2,1517}, +{3,7,510},{3,6,236},{3,5,490},{5,5,1517},{3,5,490},{4,7,429},{4,7,429},{4,7,429},{4,5,546},{4,6,515},{4,5,299},{4,5,299},{4,5,475},{2,6,350},{3,5,190},{4,7,260},{4,7,260},{4,7,260},{5,5,328},{7,1,346},{3,6,40},{3,6,40},{3,5,90},{7,3,346},{3,5,90},{7,3,421},{4,7,13},{4,6,317},{3,6,200},{7,3,421},{6,5,421},{3,6,200}, +{0,5,481},{6,5,421},{0,5,481},{4,0,425},{4,0,425},{4,0,425},{4,0,425},{4,5,178},{4,5,178},{4,5,178},{4,4,205},{3,5,109},{3,5,109},{4,7,1862},{4,7,662},{4,6,998},{4,6,646},{4,7,1686},{4,6,759},{4,6,119},{4,5,859},{2,7,1590},{3,5,814},{5,7,521},{5,6,305},{5,6,145},{5,6,361},{5,6,1526},{3,7,446},{4,6,94},{3,5,810},{4,6,1526}, +{3,5,810},{4,7,637},{4,7,637},{4,7,637},{4,6,610},{4,6,435},{4,6,83},{4,6,83},{4,5,75},{1,7,565},{3,5,30},{5,6,109},{5,6,109},{5,6,109},{5,5,136},{6,3,340},{4,6,58},{4,6,58},{3,5,26},{6,4,340},{3,5,26},{5,7,421},{4,7,61},{5,6,45},{4,6,45},{5,7,421},{5,6,421},{4,6,45},{0,5,785},{5,6,421},{0,5,785},{4,0,601}, +{4,0,601},{4,0,601},{4,0,601},{4,5,50},{4,5,50},{4,5,50},{4,5,74},{3,5,29},{3,5,29},{5,7,1498},{5,7,1014},{5,6,994},{5,6,1018},{4,7,2198},{4,7,578},{4,6,39},{4,6,1183},{2,7,1878},{3,6,846},{5,7,537},{5,7,53},{5,6,33},{5,6,57},{7,3,1526},{4,7,569},{4,6,30},{2,6,758},{6,5,1526},{2,6,758},{5,6,990},{5,6,990},{5,6,990}, +{5,6,1014},{4,7,349},{4,6,35},{4,6,35},{4,5,59},{2,7,357},{3,5,254},{5,6,29},{5,6,29},{5,6,29},{5,6,53},{7,2,340},{4,6,26},{4,6,26},{4,5,50},{5,5,340},{4,5,50},{6,6,425},{5,7,49},{5,6,29},{4,6,29},{6,6,425},{4,7,425},{4,6,29},{0,6,733},{4,7,425},{0,6,733},{5,0,965},{5,0,965},{5,0,965},{5,0,965},{4,6,10}, +{4,6,10},{4,6,10},{4,5,10},{3,6,113},{3,6,113},{5,7,1466},{5,7,630},{5,6,834},{5,6,666},{5,7,2055},{4,7,642},{4,6,471},{4,6,751},{3,7,1766},{3,6,462},{5,7,937},{5,7,101},{5,6,305},{5,6,137},{5,7,1526},{4,7,521},{5,6,270},{3,6,461},{5,6,1526},{3,6,461},{5,7,566},{5,7,566},{5,7,566},{5,6,566},{4,7,621},{4,7,201},{4,7,201}, +{4,5,427},{2,7,469},{3,6,362},{5,7,37},{5,7,37},{5,7,37},{5,6,37},{5,6,338},{4,7,80},{4,7,80},{5,5,241},{4,6,338},{5,5,241},{7,5,425},{5,7,65},{6,6,164},{5,6,101},{7,5,425},{6,6,433},{5,6,101},{0,6,461},{6,6,433},{0,6,461},{5,0,565},{5,0,565},{5,0,565},{5,0,565},{4,7,137},{4,7,137},{4,7,137},{4,5,202},{3,6,1}, +{3,6,1},{5,7,2042},{5,7,810},{5,7,746},{5,7,790},{5,7,2073},{5,7,885},{4,7,651},{4,6,877},{4,7,2102},{4,6,598},{6,7,794},{6,7,530},{5,7,550},{5,7,594},{6,6,1526},{5,7,689},{4,7,251},{4,6,477},{6,6,1526},{4,6,477},{5,7,521},{5,7,521},{5,7,521},{5,6,521},{5,7,552},{5,6,312},{5,6,312},{4,6,516},{3,7,349},{4,6,237},{5,7,325}, +{5,7,325},{5,7,325},{5,6,325},{6,5,340},{4,7,26},{4,7,26},{4,6,116},{3,7,340},{4,6,116},{6,7,433},{6,7,169},{5,7,325},{4,7,226},{6,7,433},{7,6,425},{4,7,226},{0,6,461},{7,6,425},{0,6,461},{5,0,421},{5,0,421},{5,0,421},{5,0,421},{5,6,212},{5,6,212},{5,6,212},{5,5,221},{4,6,137},{4,6,137},{6,7,2362},{5,7,1514},{5,7,938}, +{5,7,630},{5,7,2633},{5,7,741},{5,7,165},{5,6,833},{4,7,2070},{4,6,758},{6,7,762},{6,7,306},{6,7,186},{6,7,426},{7,5,1526},{5,7,705},{5,7,129},{4,6,749},{5,7,1541},{4,6,749},{5,7,889},{5,7,889},{5,7,889},{5,7,581},{5,7,424},{5,7,116},{5,7,116},{5,6,104},{3,7,525},{4,6,29},{6,7,137},{6,7,137},{6,7,137},{6,6,146},{7,4,338}, +{5,7,80},{5,7,80},{4,6,20},{7,5,338},{4,6,20},{7,6,433},{6,7,185},{6,7,65},{5,7,65},{7,6,433},{6,7,425},{5,7,65},{0,6,733},{6,7,425},{0,6,733},{5,0,565},{5,0,565},{5,0,565},{5,0,565},{5,6,52},{5,6,52},{5,6,52},{5,6,100},{4,6,25},{4,6,25},{6,7,2073},{6,7,1449},{6,7,1049},{5,7,981},{6,7,2548},{5,7,1044},{5,7,20}, +{5,6,1196},{5,7,2365},{4,7,929},{6,7,1049},{6,7,425},{6,7,25},{6,7,73},{7,6,1492},{6,7,948},{5,7,16},{3,7,800},{7,6,1460},{3,7,800},{6,7,1049},{6,7,1049},{6,7,1049},{5,7,981},{5,7,680},{5,7,20},{5,7,20},{5,6,40},{4,7,434},{4,6,205},{6,7,25},{6,7,25},{6,7,25},{6,7,73},{7,5,370},{5,7,16},{5,7,16},{5,6,36},{6,6,338}, +{5,6,36},{7,7,410},{7,7,338},{6,7,16},{5,7,16},{7,7,410},{6,7,464},{5,7,16},{0,7,784},{6,7,464},{0,7,784},{5,0,965},{5,0,965},{5,0,965},{5,0,965},{5,7,4},{5,7,4},{5,7,4},{5,6,4},{4,7,145},{4,7,145},{6,7,1769},{6,7,1145},{6,7,745},{6,7,601},{6,7,1940},{6,7,1172},{5,7,308},{5,7,696},{5,7,1805},{4,7,401},{7,7,1043}, +{6,7,569},{6,7,169},{6,7,25},{7,6,1076},{6,7,596},{6,7,196},{4,7,401},{6,7,1076},{4,7,401},{6,7,745},{6,7,745},{6,7,745},{6,7,601},{6,7,916},{5,7,308},{5,7,308},{5,6,360},{4,7,626},{5,6,341},{6,7,169},{6,7,169},{6,7,169},{6,7,25},{6,7,340},{6,7,196},{6,7,196},{6,6,221},{5,7,340},{6,6,221},{7,7,202},{7,7,130},{7,7,81}, +{6,7,0},{7,7,202},{7,7,218},{6,7,0},{0,7,400},{7,7,218},{0,7,400},{6,0,601},{6,0,601},{6,0,601},{6,0,601},{5,7,164},{5,7,164},{5,7,164},{5,6,164},{4,7,1},{4,7,1},{6,7,1886},{6,7,1262},{6,7,862},{6,7,502},{6,7,1715},{6,7,731},{6,7,331},{5,7,507},{5,7,1634},{4,7,266},{7,7,521},{7,7,449},{7,7,400},{6,7,277},{7,7,797}, +{6,7,506},{6,7,106},{5,7,146},{6,7,770},{5,7,146},{6,7,862},{6,7,862},{6,7,862},{6,7,502},{6,7,691},{6,7,331},{6,7,331},{5,7,507},{5,7,610},{4,7,266},{7,7,400},{7,7,400},{7,7,400},{6,7,277},{7,6,338},{6,7,106},{6,7,106},{5,7,146},{7,6,370},{5,7,146},{7,7,121},{7,7,49},{7,7,0},{7,7,16},{7,7,121},{7,7,65},{7,7,16}, +{0,7,121},{7,7,65},{0,7,121},{6,0,421},{6,0,421},{6,0,421},{6,0,421},{6,7,250},{6,7,250},{6,7,250},{6,6,241},{4,7,145},{4,7,145},{7,7,2010},{6,7,1774},{6,7,1374},{6,7,822},{6,7,1923},{6,7,747},{6,7,347},{6,7,139},{6,7,1446},{5,7,34},{7,7,329},{7,7,257},{7,7,208},{7,7,160},{7,7,509},{7,7,389},{6,7,298},{5,7,18},{7,7,549}, +{5,7,18},{6,7,1374},{6,7,1374},{6,7,1374},{6,7,822},{6,7,899},{6,7,347},{6,7,347},{6,7,139},{5,7,866},{5,7,34},{7,7,208},{7,7,208},{7,7,208},{7,7,160},{7,7,388},{6,7,298},{6,7,298},{5,7,18},{6,7,370},{5,7,18},{7,7,185},{7,7,113},{7,7,64},{7,7,16},{7,7,185},{7,7,65},{7,7,16},{0,7,9},{7,7,65},{0,7,9},{6,0,533}, +{6,0,533},{6,0,533},{6,0,533},{6,7,58},{6,7,58},{6,7,58},{6,7,130},{5,7,25},{5,7,25},{7,7,1347},{7,7,1275},{7,7,1226},{7,7,1154},{7,7,1431},{6,7,922},{6,7,522},{6,7,2},{6,7,1125},{5,7,137},{7,7,258},{7,7,186},{7,7,137},{7,7,65},{7,7,342},{7,7,198},{7,7,149},{6,7,1},{7,7,294},{6,7,1},{7,7,1226},{7,7,1226},{7,7,1226}, +{7,7,1154},{6,7,1146},{6,7,522},{6,7,522},{6,7,2},{6,7,725},{5,7,137},{7,7,137},{7,7,137},{7,7,137},{7,7,65},{7,7,221},{7,7,149},{7,7,149},{6,7,1},{7,7,245},{6,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,901},{6,0,901},{6,0,901},{6,0,901},{6,7,122}, +{6,7,122},{6,7,122},{6,7,2},{5,7,137},{5,7,137},{7,7,883},{7,7,811},{7,7,762},{7,7,690},{7,7,871},{7,7,727},{7,7,678},{6,7,130},{6,7,949},{6,7,149},{7,7,258},{7,7,186},{7,7,137},{7,7,65},{7,7,246},{7,7,102},{7,7,53},{7,7,36},{7,7,134},{7,7,36},{7,7,762},{7,7,762},{7,7,762},{7,7,690},{7,7,750},{7,7,678},{7,7,678}, +{6,7,130},{6,7,549},{6,7,149},{7,7,137},{7,7,137},{7,7,137},{7,7,65},{7,7,125},{7,7,53},{7,7,53},{7,7,36},{7,7,85},{7,7,36},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,641},{7,0,641},{7,0,641},{7,0,641},{6,7,442},{6,7,442},{6,7,442},{6,7,130},{6,7,149}, +{6,7,149},{0,3,932},{0,2,218},{0,1,82},{0,1,250},{0,2,1971},{0,1,1371},{0,1,611},{0,0,1950},{0,1,2332},{0,0,1986},{0,3,932},{0,2,218},{0,1,82},{0,1,250},{1,0,1899},{0,1,1371},{0,1,611},{0,0,1950},{1,0,1923},{0,0,1950},{0,1,1},{0,1,1},{0,1,1},{0,0,64},{0,0,180},{0,0,100},{0,0,100},{0,0,101},{0,0,200},{0,0,137},{0,1,1}, +{0,1,1},{0,1,1},{0,0,64},{0,0,180},{0,0,100},{0,0,100},{0,0,101},{0,0,164},{0,0,101},{1,1,884},{0,2,218},{0,1,82},{0,1,250},{1,1,884},{2,0,900},{0,1,250},{0,1,1170},{2,0,900},{0,1,1170},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,3,900},{0,2,250},{0,2,314}, +{0,1,314},{0,2,2355},{0,1,1755},{0,1,675},{0,1,1875},{0,1,2716},{0,1,2236},{0,3,900},{0,2,250},{0,2,314},{0,1,314},{0,2,2355},{0,1,1755},{0,1,675},{0,1,1875},{0,1,2355},{0,1,1875},{0,2,25},{0,2,25},{0,2,25},{0,1,25},{0,1,410},{0,0,292},{0,0,292},{0,0,181},{0,0,392},{0,0,217},{0,2,25},{0,2,25},{0,2,25},{0,1,25},{0,1,410}, +{0,0,292},{0,0,292},{0,0,181},{0,0,356},{0,0,181},{2,0,884},{0,2,250},{0,2,314},{0,1,314},{2,0,884},{3,0,884},{0,1,314},{0,1,914},{3,0,884},{0,1,914},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,890},{0,3,104},{0,2,90},{0,2,442},{0,2,2995},{0,2,1851},{0,1,995}, +{0,1,1875},{0,1,3356},{0,1,2236},{0,4,890},{0,3,104},{0,2,90},{0,2,442},{1,1,2932},{0,2,1851},{0,1,995},{0,1,1875},{2,0,2900},{0,1,1875},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,1,586},{0,1,370},{0,1,370},{0,0,389},{0,0,712},{0,0,425},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,1,586},{0,1,370},{0,1,370},{0,0,389},{1,0,650}, +{0,0,389},{1,2,890},{0,3,104},{0,2,90},{0,2,442},{1,2,890},{2,1,890},{0,2,442},{0,1,914},{2,1,890},{0,1,914},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,990},{0,3,140},{0,2,158},{0,2,158},{0,3,3048},{0,2,1707},{0,2,747},{0,1,1795},{0,1,3916},{0,1,2156},{0,4,990}, +{0,3,140},{0,2,158},{0,2,158},{0,3,3048},{0,2,1707},{0,2,747},{0,1,1795},{1,1,3048},{0,1,1795},{0,3,40},{0,3,40},{0,3,40},{0,1,157},{0,1,698},{0,1,290},{0,1,290},{0,0,641},{0,0,1076},{0,0,677},{0,3,40},{0,3,40},{0,3,40},{0,1,157},{1,0,666},{0,1,290},{0,1,290},{0,0,641},{1,0,650},{0,0,641},{2,1,890},{0,3,104},{0,2,122}, +{0,2,122},{2,1,890},{4,0,890},{0,2,122},{0,1,1170},{4,0,890},{0,1,1170},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,0,0},{0,0,0},{0,0,0},{0,0,16},{0,0,52},{0,0,52},{0,5,1115},{0,4,265},{0,2,689},{0,2,293},{0,4,3096},{0,3,1731},{0,2,324},{0,1,2020},{0,2,4009},{0,1,2381},{0,5,1115},{0,4,265},{1,2,429},{0,2,293},{1,2,3048}, +{0,3,1731},{0,2,324},{0,1,2020},{0,2,3048},{0,1,2020},{0,4,261},{0,4,261},{0,4,261},{0,2,229},{0,2,656},{0,2,260},{0,2,260},{0,1,256},{0,1,1017},{0,1,617},{0,4,261},{0,4,261},{0,4,261},{0,2,229},{0,2,656},{0,2,260},{0,2,260},{0,1,256},{0,1,656},{0,1,256},{3,0,890},{0,4,40},{1,2,68},{0,2,68},{3,0,890},{5,0,890},{0,2,68}, +{0,2,1220},{5,0,890},{0,2,1220},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,1,16},{0,1,16},{0,1,16},{0,0,25},{0,0,61},{0,0,61},{0,6,1419},{0,4,569},{0,3,1078},{0,2,821},{0,4,3096},{0,3,1395},{0,2,356},{0,2,1832},{0,2,4201},{0,2,2793},{1,4,1011},{1,3,353},{1,2,413},{1,2,413},{2,1,3048},{0,3,1395},{0,2,356},{0,2,1832},{4,0,3048}, +{0,2,1832},{0,4,533},{0,4,533},{0,4,533},{0,2,565},{0,3,666},{0,2,100},{0,2,100},{0,1,128},{0,1,1209},{0,1,489},{1,2,157},{1,2,157},{1,2,157},{1,2,157},{1,1,650},{0,2,100},{0,2,100},{0,1,128},{1,1,666},{0,1,128},{3,1,890},{0,4,40},{1,2,292},{0,2,292},{3,1,890},{4,1,890},{0,2,292},{0,2,932},{4,1,890},{0,2,932},{0,0,529}, +{0,0,529},{0,0,529},{0,0,529},{0,1,16},{0,1,16},{0,1,16},{0,1,64},{0,0,205},{0,0,205},{0,6,1915},{0,5,1019},{1,3,1269},{0,3,1110},{0,5,3051},{0,3,1443},{0,3,318},{0,2,1384},{0,2,4777},{0,2,2345},{1,5,909},{1,4,131},{1,3,113},{1,3,509},{3,0,3051},{0,3,1443},{0,3,318},{0,2,1384},{3,1,3051},{0,2,1384},{0,5,970},{0,5,970},{0,5,970}, +{0,3,1010},{0,3,698},{0,2,196},{0,2,196},{0,1,256},{0,2,1641},{0,1,617},{1,3,13},{1,3,13},{1,3,13},{1,2,13},{2,0,650},{0,2,196},{0,2,196},{0,1,256},{3,0,650},{0,1,256},{2,3,884},{0,5,58},{1,3,104},{0,3,149},{2,3,884},{6,0,884},{0,3,149},{0,2,900},{6,0,884},{0,2,900},{0,0,961},{0,0,961},{0,0,961},{0,0,961},{0,2,0}, +{0,2,0},{0,2,0},{0,1,0},{0,1,361},{0,1,361},{1,5,2113},{1,4,1271},{1,3,1285},{1,3,1329},{0,6,3123},{0,4,1208},{0,3,30},{0,2,1320},{0,3,5011},{0,2,2281},{1,5,957},{1,4,115},{1,3,129},{1,3,173},{1,4,3051},{0,4,1208},{0,3,30},{0,2,1320},{2,2,3051},{0,2,1320},{1,4,1190},{1,4,1190},{1,4,1190},{1,2,1281},{0,4,648},{0,3,26},{0,3,26}, +{0,2,296},{0,2,1641},{0,1,1001},{1,4,34},{1,4,34},{1,4,34},{1,2,125},{0,4,648},{0,3,26},{0,3,26},{0,2,296},{2,1,648},{0,2,296},{3,2,884},{0,5,26},{1,3,104},{0,3,5},{3,2,884},{5,1,884},{0,3,5},{0,2,1124},{5,1,884},{0,2,1124},{1,0,1181},{1,0,1181},{1,0,1181},{1,0,1181},{0,3,25},{0,3,25},{0,3,25},{0,1,64},{0,1,425}, +{0,1,425},{1,6,1864},{1,5,1038},{1,3,1390},{1,3,1038},{0,6,3132},{0,4,1199},{0,3,201},{0,2,1743},{0,3,4924},{0,2,2332},{1,6,1080},{1,5,254},{2,3,458},{1,3,254},{2,3,3051},{0,4,1163},{0,3,165},{0,2,1707},{6,0,3051},{0,2,1707},{1,4,1016},{1,4,1016},{1,4,1016},{1,3,989},{0,5,716},{0,3,152},{0,3,152},{0,2,62},{0,2,1611},{0,2,651},{1,4,232}, +{1,4,232},{1,4,232},{1,3,205},{1,3,650},{0,3,116},{0,3,116},{0,2,26},{1,2,650},{0,2,26},{4,1,884},{1,5,58},{2,3,58},{1,3,58},{4,1,884},{4,2,884},{1,3,58},{0,3,1274},{4,2,884},{0,3,1274},{1,0,980},{1,0,980},{1,0,980},{1,0,980},{0,3,52},{0,3,52},{0,3,52},{0,2,61},{0,1,458},{0,1,458},{1,7,1784},{1,5,910},{1,4,1441}, +{1,3,1134},{0,7,3247},{0,5,1292},{0,4,567},{0,3,1474},{0,4,4900},{0,3,2178},{2,5,1028},{2,4,362},{2,3,394},{2,3,418},{3,2,3051},{0,5,1096},{1,3,331},{0,3,1278},{5,1,3051},{0,3,1278},{1,5,885},{1,5,885},{1,5,885},{1,3,909},{0,5,876},{0,4,206},{0,4,206},{0,2,254},{0,3,1548},{0,2,347},{2,3,169},{2,3,169},{2,3,169},{2,3,193},{2,2,648}, +{0,4,10},{0,4,10},{0,2,58},{5,0,648},{0,2,58},{5,0,884},{1,5,26},{2,3,250},{1,3,250},{5,0,884},{3,3,884},{1,3,250},{0,3,954},{3,3,884},{0,3,954},{1,0,884},{1,0,884},{1,0,884},{1,0,884},{0,4,197},{0,4,197},{0,4,197},{0,2,205},{0,2,298},{0,2,298},{1,7,1976},{1,6,1124},{1,4,1649},{1,4,1229},{1,6,3204},{0,5,1452},{1,4,525}, +{0,3,1474},{0,4,4420},{0,3,1474},{2,6,930},{2,5,160},{2,4,138},{2,3,546},{4,1,3060},{0,5,968},{0,4,195},{0,3,990},{4,2,3060},{0,3,990},{1,6,1060},{1,6,1060},{1,6,1060},{1,4,1108},{1,4,824},{1,3,314},{1,3,314},{1,2,370},{0,3,1260},{0,2,427},{2,4,17},{2,4,17},{2,4,17},{2,3,17},{3,1,648},{0,4,74},{0,4,74},{1,2,226},{4,1,648}, +{1,2,226},{3,4,882},{1,6,80},{2,4,122},{0,4,146},{3,4,882},{7,1,882},{0,4,146},{0,3,890},{7,1,882},{0,3,890},{1,0,1044},{1,0,1044},{1,0,1044},{1,0,1044},{1,3,145},{1,3,145},{1,3,145},{1,2,145},{0,2,202},{0,2,202},{2,6,2374},{1,6,1476},{2,4,1550},{1,4,1469},{1,6,3172},{1,5,1259},{1,4,61},{1,3,1323},{0,4,4452},{0,3,1282},{2,6,930}, +{2,5,96},{2,4,106},{2,4,194},{5,0,3060},{0,6,1144},{1,4,45},{0,3,1086},{6,1,3060},{0,3,1086},{2,5,1476},{2,5,1476},{2,5,1476},{1,4,1460},{1,5,666},{1,4,52},{1,4,52},{1,3,362},{0,3,1356},{0,3,321},{2,5,32},{2,5,32},{2,5,32},{2,3,97},{4,0,650},{1,4,36},{1,4,36},{0,3,125},{3,2,650},{0,3,125},{4,3,882},{1,6,16},{2,4,90}, +{1,4,9},{4,3,882},{6,2,882},{1,4,9},{0,3,1082},{6,2,882},{0,3,1082},{1,0,1460},{1,0,1460},{1,0,1460},{1,0,1460},{1,4,52},{1,4,52},{1,4,52},{1,2,65},{0,3,200},{0,3,200},{2,7,1892},{2,6,1090},{2,4,1370},{2,4,1062},{1,7,3100},{1,5,1169},{1,4,151},{1,3,1665},{0,5,4036},{0,3,1678},{2,7,1051},{2,6,249},{3,4,493},{2,4,221},{3,4,3060}, +{0,6,883},{1,4,126},{0,4,1528},{7,1,3060},{0,4,1528},{2,5,1035},{2,5,1035},{2,5,1035},{2,4,1026},{1,6,723},{1,4,115},{1,4,115},{1,3,65},{0,4,1004},{0,3,78},{2,5,194},{2,5,194},{2,5,194},{2,4,185},{2,4,648},{1,4,90},{1,4,90},{1,3,40},{7,0,648},{1,3,40},{5,2,882},{2,6,80},{3,4,52},{2,4,52},{5,2,882},{5,3,882},{2,4,52}, +{0,4,1332},{5,3,882},{0,4,1332},{2,0,1010},{2,0,1010},{2,0,1010},{2,0,1010},{1,4,34},{1,4,34},{1,4,34},{1,3,61},{0,3,74},{0,3,74},{2,7,1892},{2,6,898},{2,5,1451},{2,4,1094},{2,6,3501},{1,6,1308},{1,5,589},{1,4,1510},{0,5,3940},{0,4,1116},{3,6,1051},{3,5,377},{3,4,381},{3,4,429},{5,1,3060},{0,6,1059},{2,4,312},{0,4,1016},{6,2,3060}, +{0,4,1016},{2,6,882},{2,6,882},{2,6,882},{2,4,898},{1,6,835},{1,5,189},{1,5,189},{1,3,209},{0,4,1036},{0,3,270},{3,4,185},{3,4,185},{3,4,185},{3,4,233},{3,3,650},{1,5,20},{1,5,20},{1,3,40},{6,1,650},{1,3,40},{6,1,882},{2,6,16},{3,4,212},{2,4,212},{6,1,882},{4,4,882},{2,4,212},{0,4,980},{4,4,882},{0,4,980},{2,0,882}, +{2,0,882},{2,0,882},{2,0,882},{1,5,173},{1,5,173},{1,5,173},{1,3,173},{0,4,136},{0,4,136},{2,7,2404},{2,7,1116},{2,5,1595},{2,5,1235},{2,7,3244},{1,6,1404},{2,5,619},{1,4,1446},{0,6,3804},{0,4,892},{3,7,957},{3,6,195},{3,5,169},{3,4,509},{6,0,3060},{0,7,936},{1,5,196},{0,4,888},{3,4,3060},{0,4,888},{2,7,1035},{2,7,1035},{2,7,1035}, +{2,5,1091},{2,5,835},{2,4,317},{2,4,317},{2,3,369},{0,5,875},{1,3,396},{3,5,25},{3,5,25},{3,5,25},{3,4,25},{4,2,650},{1,5,52},{1,5,52},{2,3,200},{5,2,650},{2,3,200},{7,0,884},{2,7,106},{3,5,144},{1,5,160},{7,0,884},{3,5,884},{1,5,160},{0,4,884},{3,5,884},{0,4,884},{2,0,1010},{2,0,1010},{2,0,1010},{2,0,1010},{2,4,173}, +{2,4,173},{2,4,173},{2,3,173},{0,4,8},{0,4,8},{3,7,2430},{2,7,1404},{3,5,1610},{2,5,1411},{2,7,3148},{2,6,1309},{2,5,91},{2,4,1325},{0,6,3484},{0,4,1180},{3,7,909},{3,6,83},{3,5,89},{3,5,221},{4,4,3051},{0,7,1000},{2,5,66},{1,4,1053},{7,2,3051},{1,4,1053},{2,7,1403},{2,7,1403},{2,7,1403},{2,5,1395},{2,6,681},{2,5,75},{2,5,75}, +{2,4,425},{0,5,795},{0,4,280},{3,6,34},{3,6,34},{3,6,34},{3,4,73},{5,1,656},{2,5,50},{2,5,50},{1,4,153},{7,1,656},{1,4,153},{5,4,884},{2,7,10},{3,5,80},{2,5,17},{5,4,884},{7,3,884},{2,5,17},{0,4,1044},{7,3,884},{0,4,1044},{2,0,1394},{2,0,1394},{2,0,1394},{2,0,1394},{2,4,61},{2,4,61},{2,4,61},{2,3,61},{0,4,136}, +{0,4,136},{3,7,2214},{3,7,1150},{3,5,1358},{3,5,1094},{2,7,3652},{2,6,1147},{2,5,109},{2,4,1595},{0,7,3724},{0,5,1402},{3,7,1314},{3,7,250},{3,5,458},{3,5,194},{5,3,3060},{1,7,888},{2,5,93},{0,5,1398},{6,3,3060},{0,5,1398},{3,6,1060},{3,6,1060},{3,6,1060},{3,5,1069},{2,7,736},{2,5,84},{2,5,84},{2,4,74},{0,6,820},{1,4,81},{3,6,160}, +{3,6,160},{3,6,160},{3,5,169},{6,0,650},{2,5,68},{2,5,68},{1,4,45},{3,4,650},{1,4,45},{6,3,884},{3,7,106},{4,5,50},{3,5,50},{6,3,884},{6,4,884},{3,5,50},{0,5,1394},{6,4,884},{0,5,1394},{3,0,1044},{3,0,1044},{3,0,1044},{3,0,1044},{2,5,20},{2,5,20},{2,5,20},{2,4,65},{0,5,8},{0,5,8},{3,7,2566},{3,7,894},{3,6,1469}, +{3,5,1062},{3,7,3535},{2,7,1332},{2,6,619},{2,5,1554},{0,7,3276},{0,5,1146},{4,7,1080},{4,6,398},{4,5,374},{4,5,446},{6,2,3051},{1,7,1016},{3,5,299},{0,5,1046},{5,4,3051},{0,5,1046},{3,7,885},{3,7,885},{3,7,885},{3,5,893},{2,7,800},{2,6,178},{2,6,178},{2,4,170},{0,6,660},{1,4,225},{4,5,205},{4,5,205},{4,5,205},{4,5,277},{5,2,656}, +{2,6,34},{2,6,34},{2,4,26},{7,2,656},{2,4,26},{7,2,884},{3,7,10},{4,5,178},{3,5,178},{7,2,884},{5,5,884},{3,5,178},{0,5,1010},{5,5,884},{0,5,1010},{3,0,884},{3,0,884},{3,0,884},{3,0,884},{2,6,153},{2,6,153},{2,6,153},{2,4,145},{0,5,136},{0,5,136},{4,7,3320},{3,7,1150},{3,6,1549},{3,6,1249},{3,7,3487},{2,7,1364},{2,6,603}, +{2,5,1426},{0,7,3340},{1,5,892},{4,7,1016},{4,7,236},{4,6,206},{4,5,478},{7,1,3051},{2,7,964},{2,6,203},{1,5,883},{4,5,3051},{1,5,883},{3,7,1029},{3,7,1029},{3,7,1029},{3,6,1080},{3,6,852},{3,5,326},{3,5,326},{3,4,374},{0,6,884},{2,4,371},{4,6,37},{4,6,37},{4,6,37},{4,5,37},{6,1,656},{2,6,34},{2,6,34},{3,4,178},{6,3,656}, +{3,4,178},{5,6,890},{3,7,170},{4,6,170},{2,6,178},{5,6,890},{4,6,890},{2,6,178},{0,5,882},{4,6,890},{0,5,882},{3,0,980},{3,0,980},{3,0,980},{3,0,980},{3,5,205},{3,5,205},{3,5,205},{3,4,205},{1,5,10},{1,5,10},{4,7,2936},{4,7,1676},{4,6,1678},{3,6,1361},{3,7,3951},{3,7,1367},{3,6,129},{3,5,1335},{1,7,3496},{1,5,1116},{4,7,1336}, +{4,7,76},{4,6,78},{4,6,254},{5,5,3048},{2,7,1124},{3,6,93},{2,5,1026},{3,6,3048},{2,5,1026},{3,7,1557},{3,7,1557},{3,7,1557},{3,6,1336},{3,7,702},{3,6,104},{3,6,104},{3,5,494},{0,7,667},{1,5,275},{4,7,40},{4,7,40},{4,7,40},{4,5,53},{7,0,650},{3,6,68},{3,6,68},{2,5,185},{3,5,650},{2,5,185},{7,3,890},{4,7,72},{4,6,74}, +{3,6,29},{7,3,890},{6,5,890},{3,6,29},{0,5,1010},{6,5,890},{0,5,1010},{3,0,1332},{3,0,1332},{3,0,1332},{3,0,1332},{3,5,61},{3,5,61},{3,5,61},{3,4,61},{1,5,106},{1,5,106},{4,7,3116},{4,7,1316},{4,6,1354},{4,6,1134},{4,7,4084},{3,7,1133},{3,6,75},{3,5,1533},{1,7,3676},{1,6,1470},{5,7,1429},{4,7,355},{4,6,393},{4,6,173},{6,4,3051}, +{3,7,1124},{3,6,66},{1,6,1469},{7,4,3051},{1,6,1469},{4,7,1091},{4,7,1091},{4,7,1091},{4,6,1118},{3,7,729},{3,6,59},{3,6,59},{3,5,89},{0,7,820},{2,5,90},{4,7,130},{4,7,130},{4,7,130},{4,6,157},{7,1,656},{3,6,50},{3,6,50},{2,5,41},{7,3,656},{2,5,41},{7,4,890},{4,7,234},{5,6,52},{4,6,52},{7,4,890},{7,5,890},{4,6,52}, +{0,5,1460},{7,5,890},{0,5,1460},{4,0,1082},{4,0,1082},{4,0,1082},{4,0,1082},{3,6,10},{3,6,10},{3,6,10},{3,5,73},{1,6,10},{1,6,10},{4,7,3820},{4,7,1540},{4,7,1495},{4,6,1038},{4,7,4084},{3,7,1469},{3,6,571},{3,6,1606},{2,7,3916},{1,6,1150},{5,7,1349},{5,7,425},{5,6,373},{5,6,469},{7,3,3048},{3,7,1348},{4,6,292},{1,6,1069},{6,5,3048}, +{1,6,1069},{4,7,1011},{4,7,1011},{4,7,1011},{4,6,894},{3,7,1161},{3,7,173},{3,7,173},{3,5,137},{1,7,659},{2,5,186},{5,6,229},{5,6,229},{5,6,229},{5,6,325},{6,3,650},{3,7,52},{3,7,52},{3,5,16},{6,4,650},{3,5,16},{6,6,890},{5,7,200},{5,6,148},{4,6,148},{6,6,890},{6,6,890},{4,6,148},{0,6,1044},{6,6,890},{0,6,1044},{4,0,890}, +{4,0,890},{4,0,890},{4,0,890},{3,7,137},{3,7,137},{3,7,137},{3,5,121},{1,6,106},{1,6,106},{5,7,4054},{4,7,2276},{4,7,1511},{4,7,1271},{4,7,4596},{4,7,1596},{3,7,577},{3,6,1414},{2,7,4204},{2,6,900},{5,7,1653},{5,7,377},{5,7,249},{5,6,453},{5,7,3048},{4,7,1371},{3,7,216},{2,6,884},{5,6,3048},{2,6,884},{4,7,1315},{4,7,1315},{4,7,1315}, +{4,6,1054},{4,7,875},{4,6,341},{4,6,341},{4,5,385},{1,7,835},{3,5,352},{5,7,53},{5,7,53},{5,7,53},{5,6,53},{7,2,650},{3,7,20},{3,7,20},{4,5,160},{5,5,650},{4,5,160},{7,5,890},{5,7,328},{5,7,200},{3,7,200},{7,5,890},{5,7,900},{3,7,200},{0,6,884},{5,7,900},{0,6,884},{4,0,954},{4,0,954},{4,0,954},{4,0,954},{4,6,241}, +{4,6,241},{4,6,241},{4,5,241},{2,6,16},{2,6,16},{5,7,4022},{5,7,2394},{5,7,1754},{4,7,1319},{5,7,4921},{4,7,1660},{4,7,175},{4,6,1353},{3,7,4380},{2,6,1060},{6,7,2021},{5,7,713},{5,7,73},{5,7,293},{6,6,3051},{4,7,1611},{4,7,126},{3,6,1005},{4,7,3051},{3,6,1005},{5,7,1718},{5,7,1718},{5,7,1718},{4,7,1283},{4,7,859},{4,7,139},{4,7,139}, +{4,5,465},{2,7,779},{2,6,276},{5,7,37},{5,7,37},{5,7,37},{5,6,37},{5,6,648},{4,7,90},{4,7,90},{3,6,221},{4,6,648},{3,6,221},{7,6,900},{6,7,452},{5,7,72},{4,7,45},{7,6,900},{7,6,884},{4,7,45},{0,6,980},{7,6,884},{0,6,980},{4,0,1274},{4,0,1274},{4,0,1274},{4,0,1274},{4,6,65},{4,6,65},{4,6,65},{4,5,65},{2,6,80}, +{2,6,80},{5,7,4265},{5,7,2373},{5,7,1349},{5,7,1173},{5,7,4606},{4,7,2065},{4,7,40},{4,6,1266},{3,7,4455},{3,6,1261},{6,7,1649},{6,7,1025},{5,7,325},{5,7,149},{7,5,2817},{5,7,1514},{4,7,36},{3,6,1197},{6,6,2841},{3,6,1197},{5,7,1349},{5,7,1349},{5,7,1349},{5,7,1173},{4,7,1300},{4,7,40},{4,7,40},{4,6,110},{2,7,1040},{3,6,105},{5,7,325}, +{5,7,325},{5,7,325},{5,7,149},{6,5,650},{4,7,36},{4,7,36},{3,6,41},{3,7,650},{3,6,41},{7,6,801},{6,7,449},{6,7,49},{4,7,36},{7,6,801},{6,7,761},{4,7,36},{0,6,1181},{6,7,761},{0,6,1181},{5,0,1124},{5,0,1124},{5,0,1124},{5,0,1124},{4,7,4},{4,7,4},{4,7,4},{4,6,85},{2,7,16},{2,7,16},{5,7,4345},{5,7,2453},{5,7,1429}, +{5,7,901},{5,7,4190},{5,7,1770},{4,7,360},{4,6,1266},{4,7,3861},{2,7,1041},{6,7,1281},{6,7,657},{6,7,257},{5,7,325},{6,7,2250},{5,7,1194},{5,7,170},{2,7,977},{5,7,2250},{2,7,977},{5,7,1429},{5,7,1429},{5,7,1429},{5,7,901},{5,7,1274},{4,7,360},{4,7,360},{4,6,110},{3,7,979},{3,6,153},{6,7,257},{6,7,257},{6,7,257},{5,7,325},{7,4,648}, +{5,7,170},{5,7,170},{4,6,10},{7,5,648},{4,6,10},{7,7,521},{6,7,401},{6,7,1},{5,7,1},{7,7,521},{6,7,521},{5,7,1},{0,7,961},{6,7,521},{0,7,961},{5,0,900},{5,0,900},{5,0,900},{5,0,900},{4,7,164},{4,7,164},{4,7,164},{4,6,101},{2,7,80},{2,7,80},{6,7,3669},{5,7,2917},{5,7,1893},{5,7,1013},{5,7,4158},{5,7,1386},{5,7,362}, +{4,7,1049},{4,7,3381},{3,7,555},{6,7,1169},{6,7,545},{6,7,145},{6,7,73},{7,6,1802},{5,7,1130},{5,7,106},{3,7,530},{7,6,1770},{3,7,530},{5,7,1893},{5,7,1893},{5,7,1893},{5,7,1013},{5,7,1242},{5,7,362},{5,7,362},{5,6,402},{3,7,1251},{4,6,339},{6,7,145},{6,7,145},{6,7,145},{6,7,73},{7,5,680},{5,7,106},{5,7,106},{5,6,146},{6,6,648}, +{5,6,146},{7,7,265},{7,7,193},{6,7,81},{6,7,9},{7,7,265},{7,7,305},{6,7,9},{0,7,529},{7,7,305},{0,7,529},{5,0,932},{5,0,932},{5,0,932},{5,0,932},{5,7,281},{5,7,281},{5,7,281},{5,6,281},{3,7,26},{3,7,26},{6,7,3077},{6,7,2453},{6,7,2053},{5,7,1509},{6,7,3438},{5,7,1386},{5,7,362},{5,7,650},{5,7,3195},{3,7,283},{7,7,1293}, +{6,7,689},{6,7,289},{6,7,25},{7,6,1386},{6,7,786},{5,7,298},{4,7,261},{6,7,1386},{4,7,261},{6,7,2053},{6,7,2053},{6,7,2053},{5,7,1509},{5,7,1594},{5,7,362},{5,7,362},{5,6,434},{4,7,1260},{3,7,283},{6,7,289},{6,7,289},{6,7,289},{6,7,25},{6,7,650},{5,7,298},{5,7,298},{4,7,261},{5,7,650},{4,7,261},{7,7,137},{7,7,65},{7,7,16}, +{6,7,25},{7,7,137},{7,7,113},{6,7,25},{0,7,225},{7,7,113},{0,7,225},{5,0,1220},{5,0,1220},{5,0,1220},{5,0,1220},{5,7,73},{5,7,73},{5,7,73},{5,6,73},{3,7,58},{3,7,58},{6,7,2870},{6,7,2246},{6,7,1846},{6,7,1366},{6,7,2889},{6,7,1785},{5,7,821},{5,7,137},{5,7,2700},{4,7,126},{7,7,771},{7,7,699},{7,7,650},{6,7,277},{7,7,1107}, +{6,7,696},{6,7,296},{4,7,45},{6,7,1080},{4,7,45},{6,7,1846},{6,7,1846},{6,7,1846},{6,7,1366},{6,7,1865},{5,7,821},{5,7,821},{5,7,137},{4,7,1611},{4,7,126},{7,7,650},{7,7,650},{7,7,650},{6,7,277},{7,6,648},{6,7,296},{6,7,296},{4,7,45},{7,6,680},{4,7,45},{7,7,146},{7,7,74},{7,7,25},{7,7,1},{7,7,146},{7,7,50},{7,7,1}, +{0,7,36},{7,7,50},{0,7,36},{6,0,1170},{6,0,1170},{6,0,1170},{6,0,1170},{5,7,145},{5,7,145},{5,7,145},{5,7,101},{4,7,90},{4,7,90},{6,7,2962},{6,7,2338},{6,7,1938},{6,7,1314},{6,7,2677},{6,7,1429},{6,7,1029},{5,7,85},{5,7,2536},{4,7,122},{7,7,531},{7,7,459},{7,7,410},{7,7,338},{7,7,771},{7,7,627},{6,7,404},{5,7,4},{7,7,827}, +{5,7,4},{6,7,1938},{6,7,1938},{6,7,1938},{6,7,1314},{6,7,1653},{6,7,1029},{6,7,1029},{5,7,85},{5,7,1512},{4,7,122},{7,7,410},{7,7,410},{7,7,410},{7,7,338},{7,7,650},{6,7,404},{6,7,404},{5,7,4},{6,7,596},{5,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,914}, +{6,0,914},{6,0,914},{6,0,914},{5,7,481},{5,7,481},{5,7,481},{5,7,85},{4,7,122},{4,7,122},{7,7,2924},{6,7,2338},{6,7,1938},{6,7,1314},{6,7,2373},{6,7,1125},{6,7,725},{5,7,325},{6,7,2132},{5,7,232},{7,7,323},{7,7,251},{7,7,202},{7,7,130},{7,7,467},{7,7,323},{7,7,274},{5,7,36},{7,7,459},{5,7,36},{6,7,1938},{6,7,1938},{6,7,1938}, +{6,7,1314},{6,7,1349},{6,7,725},{6,7,725},{5,7,325},{5,7,1256},{5,7,232},{7,7,202},{7,7,202},{7,7,202},{7,7,130},{7,7,346},{7,7,274},{7,7,274},{5,7,36},{7,7,410},{5,7,36},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,914},{6,0,914},{6,0,914},{6,0,914},{6,7,325}, +{6,7,325},{6,7,325},{5,7,325},{5,7,232},{5,7,232},{7,7,2092},{7,7,2020},{7,7,1971},{6,7,1570},{7,7,2140},{6,7,1077},{6,7,677},{6,7,85},{6,7,1588},{5,7,232},{7,7,243},{7,7,171},{7,7,122},{7,7,50},{7,7,291},{7,7,147},{7,7,98},{6,7,4},{7,7,219},{6,7,4},{7,7,1971},{7,7,1971},{7,7,1971},{6,7,1570},{6,7,1301},{6,7,677},{6,7,677}, +{6,7,85},{6,7,1188},{5,7,232},{7,7,122},{7,7,122},{7,7,122},{7,7,50},{7,7,170},{7,7,98},{7,7,98},{6,7,4},{7,7,170},{6,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1170},{6,0,1170},{6,0,1170},{6,0,1170},{6,7,277},{6,7,277},{6,7,277},{6,7,85},{5,7,232}, +{5,7,232},{0,4,1618},{0,3,436},{0,2,74},{0,2,866},{0,2,3411},{0,2,2531},{0,1,1251},{0,1,2531},{0,1,3772},{0,1,2892},{0,4,1618},{0,3,436},{0,2,74},{0,2,866},{0,2,3411},{0,2,2531},{0,1,1251},{0,1,2531},{2,0,3376},{0,1,2531},{0,1,25},{0,1,25},{0,1,25},{0,1,49},{0,0,360},{0,0,232},{0,0,232},{0,0,149},{0,0,332},{0,0,185},{0,1,25}, +{0,1,25},{0,1,25},{0,1,49},{0,0,360},{0,0,232},{0,0,232},{0,0,149},{0,0,296},{0,0,149},{1,2,1570},{0,3,436},{0,2,74},{0,2,866},{1,2,1570},{0,2,1570},{0,2,866},{0,1,1570},{0,2,1570},{0,1,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,1586},{0,3,340},{0,2,10}, +{0,2,450},{0,3,4016},{0,2,2627},{0,2,1411},{0,1,2691},{0,1,4572},{0,1,3052},{0,4,1586},{0,3,340},{0,2,10},{0,2,450},{1,1,3968},{0,2,2627},{0,2,1411},{0,1,2691},{2,0,4016},{0,1,2691},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,530},{0,1,362},{0,1,362},{0,0,325},{0,0,620},{0,0,361},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,530}, +{0,1,362},{0,1,362},{0,0,325},{0,0,584},{0,0,325},{2,1,1570},{0,3,340},{0,2,10},{0,2,450},{2,1,1570},{4,0,1570},{0,2,450},{0,1,1730},{4,0,1570},{0,1,1730},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,5,1576},{0,4,290},{0,2,202},{0,2,290},{0,3,4656},{0,2,2979},{0,2,1251}, +{0,1,3107},{0,1,5628},{0,1,3468},{0,5,1576},{0,4,290},{0,2,202},{0,2,290},{0,3,4656},{0,2,2979},{0,2,1251},{0,1,3107},{1,1,4656},{0,1,3107},{0,3,16},{0,3,16},{0,3,16},{0,1,81},{0,1,802},{0,1,442},{0,1,442},{0,0,629},{0,0,1036},{0,0,665},{0,3,16},{0,3,16},{0,3,16},{0,1,81},{0,1,802},{0,1,442},{0,1,442},{0,0,629},{1,0,818}, +{0,0,629},{3,0,1576},{0,4,290},{0,2,202},{0,2,290},{3,0,1576},{3,1,1576},{0,2,290},{0,1,2146},{3,1,1576},{0,1,2146},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,5,1640},{0,4,162},{0,3,241},{0,2,386},{0,4,5539},{0,3,3512},{0,2,1347},{0,1,3779},{0,2,6396},{0,1,4140},{0,5,1640}, +{0,4,162},{0,3,241},{0,2,386},{1,2,5435},{0,3,3512},{0,2,1347},{0,1,3779},{0,2,5435},{0,1,3779},{0,3,16},{0,3,16},{0,3,16},{0,2,25},{0,1,1202},{0,1,650},{0,1,650},{0,1,970},{0,0,1580},{0,0,1097},{0,3,16},{0,3,16},{0,3,16},{0,2,25},{1,0,1170},{0,1,650},{0,1,650},{0,1,970},{1,0,1154},{0,1,970},{2,2,1576},{0,4,162},{0,3,241}, +{0,2,386},{2,2,1576},{5,0,1576},{0,2,386},{0,2,1730},{5,0,1576},{0,2,1730},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,1667},{0,5,259},{0,3,286},{0,3,406},{0,4,5440},{0,3,3035},{0,2,1284},{0,2,3504},{0,2,6513},{0,2,4465},{0,6,1667},{0,5,259},{0,3,286},{0,3,406},{2,1,5424}, +{0,3,3035},{0,2,1284},{0,2,3504},{4,0,5424},{0,2,3504},{0,4,81},{0,4,81},{0,4,81},{0,2,97},{0,2,1160},{0,2,500},{0,2,500},{0,1,520},{0,1,1521},{0,1,881},{0,4,81},{0,4,81},{0,4,81},{0,2,97},{0,2,1160},{0,2,500},{0,2,500},{0,1,520},{0,1,1160},{0,1,520},{2,3,1576},{0,5,178},{1,3,100},{0,3,325},{2,3,1576},{6,0,1576},{0,3,325}, +{0,2,1568},{6,0,1576},{0,2,1568},{0,0,81},{0,0,81},{0,0,81},{0,0,81},{0,0,9},{0,0,9},{0,0,9},{0,0,1},{0,0,37},{0,0,37},{0,7,1865},{0,5,339},{0,3,734},{0,3,374},{0,5,5435},{0,3,3019},{0,3,814},{0,2,2992},{0,2,7025},{0,2,3953},{1,5,1865},{0,5,339},{1,3,293},{0,3,374},{3,0,5435},{0,3,3019},{0,3,814},{0,2,2992},{3,1,5435}, +{0,2,2992},{0,5,314},{0,5,314},{0,5,314},{0,3,370},{0,3,1170},{0,2,340},{0,2,340},{0,1,392},{0,1,1713},{0,1,753},{1,3,289},{1,3,289},{1,3,289},{1,2,289},{1,1,1154},{0,2,340},{0,2,340},{0,1,392},{1,1,1170},{0,1,392},{4,0,1576},{0,5,50},{1,3,4},{0,3,85},{4,0,1576},{5,1,1576},{0,3,85},{0,2,1696},{5,1,1576},{0,2,1696},{0,0,289}, +{0,0,289},{0,0,289},{0,0,289},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,85},{0,0,85},{0,7,2265},{0,6,787},{1,3,1401},{0,3,726},{0,5,5515},{0,4,2664},{0,3,462},{0,2,2864},{0,3,7363},{0,2,3825},{1,6,1667},{1,5,405},{1,3,245},{1,3,377},{2,2,5427},{0,4,2664},{0,3,462},{0,2,2864},{5,0,5427},{0,2,2864},{0,5,634},{0,5,634},{0,5,634}, +{0,3,626},{0,3,1202},{0,3,362},{0,3,362},{0,1,520},{0,2,2145},{0,1,881},{1,4,106},{1,4,106},{1,4,106},{1,2,145},{2,0,1154},{0,3,362},{0,3,362},{0,1,520},{3,0,1154},{0,1,520},{2,4,1576},{0,6,162},{1,3,164},{0,3,101},{2,4,1576},{7,0,1576},{0,3,101},{0,2,2080},{7,0,1576},{0,2,2080},{0,0,625},{0,0,625},{0,0,625},{0,0,625},{0,1,36}, +{0,1,36},{0,1,36},{0,1,36},{0,0,261},{0,0,261},{1,6,2775},{0,6,1091},{1,4,1422},{0,3,1462},{0,6,5427},{0,4,2536},{0,3,494},{0,2,3120},{0,3,7635},{0,2,4081},{1,6,1619},{1,5,165},{1,4,266},{1,3,361},{3,1,5427},{0,4,2536},{0,3,494},{0,2,3120},{4,1,5427},{0,2,3120},{0,6,1090},{0,6,1090},{0,6,1090},{0,3,1138},{0,4,1152},{0,3,170},{0,3,170}, +{0,2,416},{0,2,2145},{0,1,1265},{1,4,10},{1,4,10},{1,4,10},{1,3,37},{0,4,1152},{0,3,170},{0,3,170},{0,2,416},{2,1,1152},{0,2,416},{3,3,1570},{0,6,2},{1,4,265},{1,3,360},{3,3,1570},{6,1,1570},{1,3,360},{0,3,1768},{6,1,1570},{0,3,1768},{0,0,1089},{0,0,1089},{0,0,1089},{0,0,1089},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,365}, +{0,1,365},{1,7,2796},{1,6,1432},{1,4,1413},{1,4,1593},{0,7,5435},{0,5,2360},{0,4,299},{0,3,2594},{0,3,8400},{0,3,4530},{1,7,1640},{1,6,276},{1,4,257},{1,4,437},{4,0,5427},{0,5,2360},{0,4,299},{0,3,2594},{3,2,5427},{0,3,2594},{1,5,1221},{1,5,1221},{1,5,1221},{1,3,1229},{0,5,1184},{0,4,250},{0,4,250},{0,2,146},{0,2,2451},{0,2,1107},{1,5,65}, +{1,5,65},{1,5,65},{1,3,73},{1,3,1154},{0,4,250},{0,4,250},{0,2,146},{1,2,1154},{0,2,146},{4,2,1576},{0,6,128},{2,4,130},{0,4,74},{4,2,1576},{5,2,1576},{0,4,74},{0,3,1570},{5,2,1576},{0,3,1570},{1,0,1220},{1,0,1220},{1,0,1220},{1,0,1220},{0,3,4},{0,3,4},{0,3,4},{0,1,121},{0,1,482},{0,1,482},{1,7,3180},{1,6,1464},{1,4,1813}, +{1,4,1513},{0,7,5515},{0,5,2168},{0,4,59},{0,3,2242},{0,4,8764},{0,3,4178},{2,6,1894},{1,6,308},{2,4,326},{1,4,357},{2,4,5427},{0,5,2168},{0,4,59},{0,3,2242},{7,0,5427},{0,3,2242},{1,6,1448},{1,6,1448},{1,6,1448},{1,4,1512},{0,5,1184},{0,4,58},{0,4,58},{0,2,178},{0,2,2995},{0,2,1139},{1,6,292},{1,6,292},{1,6,292},{2,3,325},{2,2,1152}, +{0,4,58},{0,4,58},{0,2,178},{5,0,1152},{0,2,178},{5,1,1570},{0,7,34},{2,4,2},{0,4,10},{5,1,1570},{4,3,1570},{0,4,10},{0,3,1666},{4,3,1570},{0,3,1666},{1,0,1412},{1,0,1412},{1,0,1412},{1,0,1412},{0,3,36},{0,3,36},{0,3,36},{0,2,9},{0,1,722},{0,1,722},{1,7,3816},{1,6,1748},{1,5,2450},{1,4,1685},{0,7,5983},{0,6,2180},{0,4,207}, +{0,3,2278},{0,4,9004},{0,3,4038},{2,7,1700},{2,6,462},{2,4,230},{2,4,406},{3,3,5420},{0,6,2176},{0,4,203},{0,3,2274},{6,1,5420},{0,3,2274},{1,6,1604},{1,6,1604},{1,6,1604},{1,4,1604},{0,6,1156},{0,4,126},{0,4,126},{0,3,429},{0,3,3044},{0,2,1307},{2,5,136},{2,5,136},{2,5,136},{2,3,149},{3,1,1152},{0,4,122},{0,4,122},{0,3,425},{4,1,1152}, +{0,3,425},{6,0,1570},{0,7,130},{2,4,130},{1,4,85},{6,0,1570},{3,4,1570},{1,4,85},{0,3,2018},{3,4,1570},{0,3,2018},{1,0,1600},{1,0,1600},{1,0,1600},{1,0,1600},{0,4,5},{0,4,5},{0,4,5},{0,2,29},{0,2,866},{0,2,866},{1,7,4520},{1,7,1608},{1,5,2418},{1,4,1925},{1,7,5996},{0,6,1956},{0,5,409},{0,4,2751},{0,5,9020},{0,3,3846},{2,7,1604}, +{2,6,174},{2,5,297},{2,4,342},{4,2,5420},{0,6,1856},{0,5,309},{0,4,2651},{5,2,5420},{0,4,2651},{1,7,1604},{1,7,1604},{1,7,1604},{1,4,1636},{0,7,1302},{0,5,120},{0,5,120},{0,3,189},{0,3,2820},{0,3,1245},{2,5,8},{2,5,8},{2,5,8},{2,4,53},{4,0,1154},{0,5,20},{0,5,20},{0,3,89},{3,2,1154},{0,3,89},{4,4,1568},{1,7,8},{2,5,293}, +{0,5,293},{4,4,1568},{7,2,1568},{0,5,293},{0,4,1810},{7,2,1568},{0,4,1810},{1,0,1600},{1,0,1600},{1,0,1600},{1,0,1600},{0,5,116},{0,5,116},{0,5,116},{0,3,164},{0,2,610},{0,2,610},{2,7,4356},{1,7,2004},{2,5,2635},{1,5,2006},{1,7,5924},{0,6,2316},{0,5,499},{0,4,2337},{0,5,8300},{0,4,3420},{2,7,1955},{2,7,299},{2,5,234},{2,5,474},{5,1,5420}, +{0,6,1955},{0,5,138},{0,4,1976},{4,3,5420},{0,4,1976},{1,7,1955},{1,7,1955},{1,7,1955},{1,5,1942},{1,6,1427},{0,5,435},{0,5,435},{0,3,378},{0,4,2628},{0,3,642},{2,6,53},{2,6,53},{2,6,53},{2,4,53},{2,4,1152},{0,5,74},{0,5,74},{0,3,17},{7,0,1152},{0,3,17},{5,3,1570},{1,7,98},{3,5,164},{1,5,100},{5,3,1570},{6,3,1570},{1,5,100}, +{0,4,1576},{6,3,1570},{0,4,1576},{1,0,1906},{1,0,1906},{1,0,1906},{1,0,1906},{1,4,234},{1,4,234},{1,4,234},{1,2,325},{0,3,626},{0,3,626},{2,7,4356},{2,7,1964},{2,5,2267},{2,5,2027},{1,7,6404},{1,6,2220},{1,5,117},{1,4,2314},{0,5,8204},{0,4,2684},{3,7,1929},{2,7,283},{3,5,365},{2,5,346},{6,0,5420},{0,7,1712},{1,5,68},{0,4,1784},{3,4,5420}, +{0,4,1784},{2,6,1942},{2,6,1942},{2,6,1942},{2,4,2006},{1,6,1219},{1,5,117},{1,5,117},{1,3,209},{0,4,2340},{0,3,514},{2,6,261},{2,6,261},{2,6,261},{2,4,325},{3,3,1154},{0,6,50},{0,6,50},{1,3,160},{6,1,1154},{1,3,160},{6,2,1568},{2,7,58},{3,5,4},{1,5,4},{6,2,1568},{5,4,1568},{1,5,4},{0,4,1640},{5,4,1568},{0,4,1640},{2,0,1906}, +{2,0,1906},{2,0,1906},{2,0,1906},{1,4,74},{1,4,74},{1,4,74},{1,3,65},{0,3,370},{0,3,370},{2,7,4868},{2,7,1740},{2,5,2411},{2,5,1691},{2,7,6548},{1,7,2244},{1,5,165},{1,4,2250},{0,6,7668},{0,4,2460},{3,7,1817},{3,7,525},{3,5,221},{3,5,441},{4,4,5419},{0,7,1712},{1,5,164},{0,4,1976},{7,2,5419},{0,4,1976},{2,7,1619},{2,7,1619},{2,7,1619}, +{2,5,1627},{1,7,1155},{1,5,101},{1,5,101},{1,3,417},{0,5,2379},{0,4,696},{3,5,157},{3,5,157},{3,5,157},{3,4,157},{4,2,1154},{0,6,82},{0,6,82},{0,4,212},{5,2,1154},{0,4,212},{7,1,1568},{2,7,122},{3,5,100},{2,5,73},{7,1,1568},{4,5,1568},{2,5,73},{0,4,1960},{4,5,1568},{0,4,1960},{2,0,1618},{2,0,1618},{2,0,1618},{2,0,1618},{1,5,1}, +{1,5,1},{1,5,1},{1,3,17},{0,3,370},{0,3,370},{3,7,5570},{2,7,2028},{2,6,2394},{2,5,1867},{2,7,6452},{1,7,1956},{1,6,439},{1,4,2698},{0,6,7348},{0,4,2748},{3,7,2089},{3,7,189},{3,6,334},{3,5,329},{5,3,5419},{1,7,1875},{1,6,358},{0,5,2145},{6,3,5419},{0,5,2145},{2,7,1667},{2,7,1667},{2,7,1667},{2,5,1611},{1,7,1331},{1,6,115},{1,6,115}, +{1,4,198},{0,5,1979},{0,4,248},{3,6,10},{3,6,10},{3,6,10},{3,5,73},{5,1,1160},{1,6,34},{1,6,34},{0,4,52},{7,1,1160},{0,4,52},{5,5,1570},{3,7,180},{4,5,320},{2,5,281},{5,5,1570},{3,6,1570},{2,5,281},{0,5,1856},{3,6,1570},{0,5,1856},{2,0,1586},{2,0,1586},{2,0,1586},{2,0,1586},{1,6,106},{1,6,106},{1,6,106},{1,4,162},{0,4,212}, +{0,4,212},{3,7,5354},{3,7,2770},{3,6,2717},{2,6,1986},{2,7,6956},{1,7,2244},{1,6,457},{1,5,2351},{0,7,7268},{0,5,1974},{4,7,2384},{3,7,270},{3,6,217},{3,6,517},{6,2,5419},{1,7,1920},{1,6,133},{0,5,1650},{5,4,5419},{0,5,1650},{2,7,2180},{2,7,2180},{2,7,2180},{2,6,1905},{2,7,1480},{1,6,376},{1,6,376},{1,4,333},{0,5,1988},{0,4,203},{3,7,45}, +{3,7,45},{3,7,45},{3,5,37},{6,0,1154},{1,6,52},{1,6,52},{1,4,9},{3,4,1154},{1,4,9},{6,4,1568},{3,7,234},{3,6,181},{1,6,117},{6,4,1568},{7,4,1568},{1,6,117},{0,5,1586},{7,4,1568},{0,5,1586},{2,0,1856},{2,0,1856},{2,0,1856},{2,0,1856},{2,5,272},{2,5,272},{2,5,272},{1,4,324},{0,4,194},{0,4,194},{3,7,5706},{3,7,2514},{3,6,2285}, +{3,6,2105},{3,7,7359},{2,7,2244},{2,6,147},{2,5,2358},{0,7,6820},{0,5,1718},{4,7,2256},{3,7,750},{4,6,410},{3,6,341},{7,1,5419},{2,7,2180},{2,6,83},{0,5,1618},{4,5,5419},{0,5,1618},{3,7,1985},{3,7,1985},{3,7,1985},{3,5,2041},{2,7,1224},{2,6,146},{2,6,146},{2,4,210},{0,6,1644},{1,4,509},{3,7,221},{3,7,221},{3,7,221},{3,5,277},{5,2,1160}, +{1,7,64},{1,7,64},{2,4,146},{7,2,1160},{2,4,146},{7,3,1570},{4,7,356},{4,6,10},{2,6,2},{7,3,1570},{6,5,1570},{2,6,2},{0,5,1618},{6,5,1570},{0,5,1618},{3,0,1960},{3,0,1960},{3,0,1960},{3,0,1960},{2,5,80},{2,5,80},{2,5,80},{2,4,89},{0,5,100},{0,5,100},{3,7,6570},{3,7,2770},{3,6,2365},{3,6,1705},{3,7,7311},{2,7,2276},{2,6,131}, +{2,5,2230},{0,7,6884},{0,5,1974},{4,7,2512},{4,7,532},{4,6,218},{4,6,482},{5,5,5424},{2,7,2276},{2,6,131},{1,5,1931},{3,6,5424},{1,5,1931},{3,7,1809},{3,7,1809},{3,7,1809},{3,6,1656},{2,7,1352},{2,6,82},{2,6,82},{2,4,370},{0,6,1548},{0,5,293},{4,6,169},{4,6,169},{4,6,169},{4,5,169},{6,1,1160},{1,7,64},{1,7,64},{1,5,250},{6,3,1160}, +{1,5,250},{5,7,1570},{4,7,388},{4,6,74},{3,6,65},{5,7,1570},{5,6,1570},{3,6,65},{0,5,1906},{5,6,1570},{0,5,1906},{3,0,1640},{3,0,1640},{3,0,1640},{3,0,1640},{2,6,1},{2,6,1},{2,6,1},{2,4,9},{0,5,68},{0,5,68},{4,7,6752},{3,7,3538},{3,7,2378},{3,6,1817},{3,7,7775},{2,7,2820},{2,7,477},{2,5,2614},{1,7,7360},{0,6,1978},{5,7,3110}, +{4,7,692},{4,7,377},{4,6,322},{6,4,5424},{3,7,2539},{2,7,413},{0,6,1942},{7,4,5424},{0,6,1942},{3,7,2017},{3,7,2017},{3,7,2017},{3,6,1592},{3,7,1846},{2,7,116},{2,7,116},{2,5,213},{0,7,1531},{1,5,283},{4,7,16},{4,7,16},{4,7,16},{4,6,97},{7,0,1154},{2,7,52},{2,7,52},{1,5,58},{3,5,1154},{1,5,58},{7,4,1576},{5,7,610},{5,6,306}, +{3,6,241},{7,4,1576},{7,5,1576},{3,6,241},{0,6,1906},{7,5,1576},{0,6,1906},{3,0,1576},{3,0,1576},{3,0,1576},{3,0,1576},{2,7,100},{2,7,100},{2,7,100},{2,5,164},{0,6,72},{0,6,72},{4,7,6932},{4,7,3932},{4,7,2807},{3,7,1974},{4,7,8428},{3,7,2981},{2,7,423},{2,6,2373},{1,7,7540},{0,6,1618},{5,7,2921},{4,7,1331},{4,7,206},{4,7,566},{7,3,5424}, +{3,7,2692},{2,7,134},{0,6,1609},{6,5,5424},{0,6,1609},{4,7,2707},{4,7,2707},{4,7,2707},{3,7,1874},{3,7,1513},{2,7,323},{2,7,323},{2,5,294},{0,7,1324},{1,5,184},{4,7,106},{4,7,106},{4,7,106},{4,6,25},{7,1,1160},{2,7,34},{2,7,34},{2,5,5},{7,3,1160},{2,5,5},{7,5,1570},{5,7,628},{4,7,181},{2,7,125},{7,5,1570},{6,6,1586},{2,7,125}, +{0,6,1600},{6,6,1586},{0,6,1600},{3,0,1810},{3,0,1810},{3,0,1810},{3,0,1810},{2,7,298},{2,7,298},{2,7,298},{2,5,290},{0,6,18},{0,6,18},{4,7,7636},{4,7,4156},{4,7,2311},{4,7,2191},{4,7,8428},{3,7,3317},{3,7,185},{3,6,2410},{2,7,8180},{1,6,1722},{5,7,3161},{5,7,1357},{5,7,461},{4,7,342},{5,7,5424},{4,7,2979},{3,7,104},{1,6,1601},{5,6,5424}, +{1,6,1601},{4,7,2307},{4,7,2307},{4,7,2307},{4,6,2082},{3,7,1625},{3,7,181},{3,7,181},{3,5,217},{0,7,1548},{1,5,504},{5,7,457},{5,7,457},{5,7,457},{4,6,233},{6,3,1154},{3,7,100},{3,7,100},{3,5,136},{6,4,1154},{3,5,136},{6,7,1576},{5,7,916},{5,7,20},{3,7,4},{6,7,1576},{7,6,1576},{3,7,4},{0,6,1600},{7,6,1576},{0,6,1600},{4,0,2018}, +{4,0,2018},{4,0,2018},{4,0,2018},{3,6,90},{3,6,90},{3,6,90},{3,5,117},{0,7,104},{0,7,104},{5,7,7862},{4,7,4316},{4,7,2291},{4,7,1691},{4,7,8004},{3,7,3433},{3,7,69},{3,6,1774},{2,7,7580},{1,6,1470},{5,7,3101},{5,7,1209},{5,7,185},{4,7,466},{7,4,4803},{4,7,2579},{3,7,68},{2,6,1448},{7,5,4803},{2,6,1448},{4,7,2291},{4,7,2291},{4,7,2291}, +{4,7,1691},{3,7,2121},{3,7,69},{3,7,69},{3,5,329},{1,7,1539},{1,6,314},{5,7,185},{5,7,185},{5,7,185},{5,6,185},{7,2,1154},{3,7,68},{3,7,68},{2,6,292},{5,5,1154},{2,6,292},{7,6,1252},{6,7,724},{5,7,16},{4,7,25},{7,6,1252},{7,6,1268},{4,7,25},{0,6,1412},{7,6,1268},{0,6,1412},{4,0,1666},{4,0,1666},{4,0,1666},{4,0,1666},{3,7,5}, +{3,7,5},{3,7,5},{3,5,5},{0,7,40},{0,7,40},{5,7,6806},{4,7,4684},{4,7,2659},{4,7,1579},{4,7,7668},{4,7,2988},{3,7,341},{3,6,1390},{2,7,7084},{1,6,1470},{6,7,2645},{5,7,1193},{5,7,169},{5,7,125},{6,6,4059},{4,7,2259},{4,7,234},{2,6,1224},{4,7,4059},{2,6,1224},{4,7,2659},{4,7,2659},{4,7,2659},{4,7,1579},{4,7,2043},{3,7,341},{3,7,341}, +{3,6,234},{1,7,1779},{1,6,314},{5,7,169},{5,7,169},{5,7,169},{5,7,125},{5,6,1152},{4,7,234},{4,7,234},{2,6,68},{4,6,1152},{2,6,68},{7,6,900},{6,7,500},{6,7,100},{4,7,9},{7,6,900},{6,7,884},{4,7,9},{0,6,1220},{6,7,884},{0,6,1220},{4,0,1570},{4,0,1570},{4,0,1570},{4,0,1570},{3,7,85},{3,7,85},{3,7,85},{3,5,149},{1,7,98}, +{1,7,98},{5,7,6077},{5,7,4185},{5,7,3161},{4,7,1912},{5,7,6790},{4,7,2529},{4,7,504},{3,6,1417},{3,7,6199},{1,7,1097},{6,7,1925},{6,7,1301},{5,7,457},{5,7,17},{7,5,3321},{5,7,1754},{4,7,180},{1,7,1093},{6,6,3345},{1,7,1093},{5,7,3161},{5,7,3161},{5,7,3161},{4,7,1912},{4,7,2124},{4,7,504},{4,7,504},{3,6,261},{2,7,1944},{2,6,171},{5,7,457}, +{5,7,457},{5,7,457},{5,7,17},{6,5,1154},{4,7,180},{4,7,180},{3,6,5},{3,7,1154},{3,6,5},{7,7,605},{6,7,401},{6,7,1},{5,7,1},{7,7,605},{6,7,569},{5,7,1},{0,7,1089},{6,7,569},{0,7,1089},{4,0,1768},{4,0,1768},{4,0,1768},{4,0,1768},{4,7,360},{4,7,360},{4,7,360},{3,6,260},{1,7,8},{1,7,8},{5,7,5837},{5,7,3945},{5,7,2921}, +{5,7,2129},{5,7,6054},{4,7,2529},{4,7,504},{4,6,1386},{3,7,5767},{2,7,773},{6,7,1557},{6,7,933},{6,7,533},{5,7,193},{6,7,2754},{5,7,1434},{4,7,404},{2,7,629},{5,7,2754},{2,7,629},{5,7,2921},{5,7,2921},{5,7,2921},{5,7,2129},{4,7,2604},{4,7,504},{4,7,504},{4,6,230},{2,7,2264},{2,6,443},{6,7,533},{6,7,533},{6,7,533},{5,7,193},{7,4,1152}, +{4,7,404},{4,7,404},{3,6,117},{7,5,1152},{3,6,117},{7,7,317},{7,7,245},{6,7,49},{6,7,25},{7,7,317},{7,7,373},{6,7,25},{0,7,625},{7,7,373},{0,7,625},{5,0,2080},{5,0,2080},{5,0,2080},{5,0,2080},{4,7,104},{4,7,104},{4,7,104},{4,6,149},{2,7,148},{2,7,148},{5,7,5981},{5,7,4089},{5,7,3065},{5,7,1921},{5,7,5702},{5,7,2666},{4,7,888}, +{4,7,693},{4,7,5325},{2,7,341},{6,7,1445},{6,7,821},{6,7,421},{6,7,205},{7,6,2306},{5,7,1370},{5,7,346},{2,7,325},{7,6,2274},{2,7,325},{5,7,3065},{5,7,3065},{5,7,3065},{5,7,1921},{5,7,2786},{4,7,888},{4,7,888},{4,6,294},{3,7,2355},{2,7,341},{6,7,421},{6,7,421},{6,7,421},{6,7,205},{7,5,1184},{5,7,346},{5,7,346},{4,6,290},{6,6,1152}, +{4,6,290},{7,7,157},{7,7,85},{7,7,36},{6,7,9},{7,7,157},{7,7,149},{6,7,9},{0,7,289},{7,7,149},{0,7,289},{5,0,1696},{5,0,1696},{5,0,1696},{5,0,1696},{4,7,104},{4,7,104},{4,7,104},{4,6,5},{2,7,52},{2,7,52},{6,7,5433},{5,7,4617},{5,7,3593},{5,7,2097},{5,7,5734},{5,7,2346},{5,7,1322},{4,7,261},{4,7,4909},{2,7,293},{6,7,1589}, +{6,7,965},{6,7,565},{6,7,157},{7,6,1890},{6,7,1146},{5,7,538},{3,7,82},{6,7,1890},{3,7,82},{5,7,3593},{5,7,3593},{5,7,3593},{5,7,2097},{5,7,2818},{5,7,1322},{5,7,1322},{4,7,261},{3,7,2691},{2,7,293},{6,7,565},{6,7,565},{6,7,565},{6,7,157},{6,7,1154},{5,7,538},{5,7,538},{3,7,82},{5,7,1154},{3,7,82},{7,7,125},{7,7,53},{7,7,4}, +{7,7,4},{7,7,125},{7,7,53},{7,7,4},{0,7,81},{7,7,53},{0,7,81},{5,0,1568},{5,0,1568},{5,0,1568},{5,0,1568},{4,7,360},{4,7,360},{4,7,360},{4,6,117},{2,7,212},{2,7,212},{6,7,4866},{6,7,4242},{6,7,3842},{5,7,2754},{6,7,5113},{5,7,2445},{5,7,1421},{4,7,234},{5,7,4804},{3,7,164},{7,7,1203},{7,7,1131},{6,7,1033},{6,7,409},{7,7,1611}, +{6,7,1056},{6,7,656},{4,7,9},{6,7,1584},{4,7,9},{6,7,3842},{6,7,3842},{6,7,3842},{5,7,2754},{5,7,3313},{5,7,1421},{5,7,1421},{4,7,234},{4,7,2875},{3,7,164},{6,7,1033},{6,7,1033},{6,7,1033},{6,7,409},{7,6,1152},{6,7,656},{6,7,656},{4,7,9},{7,6,1184},{4,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49}, +{0,7,0},{7,7,98},{0,7,0},{5,0,1730},{5,0,1730},{5,0,1730},{5,0,1730},{5,7,397},{5,7,397},{5,7,397},{4,7,234},{3,7,164},{3,7,164},{6,7,4194},{6,7,3570},{6,7,3170},{6,7,2546},{6,7,4137},{5,7,2365},{5,7,1341},{5,7,185},{5,7,3876},{3,7,324},{7,7,771},{7,7,699},{7,7,650},{6,7,521},{7,7,1083},{6,7,864},{6,7,464},{4,7,25},{7,7,1187}, +{4,7,25},{6,7,3170},{6,7,3170},{6,7,3170},{6,7,2546},{6,7,3113},{5,7,1341},{5,7,1341},{5,7,185},{4,7,2491},{3,7,324},{7,7,650},{7,7,650},{7,7,650},{6,7,521},{7,6,832},{6,7,464},{6,7,464},{4,7,25},{6,7,800},{4,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2146}, +{5,0,2146},{5,0,2146},{5,0,2146},{5,7,317},{5,7,317},{5,7,317},{5,7,185},{3,7,324},{3,7,324},{6,7,3778},{6,7,3154},{6,7,2754},{6,7,2130},{6,7,3417},{6,7,2169},{5,7,1517},{5,7,9},{5,7,3204},{4,7,338},{7,7,467},{7,7,395},{7,7,346},{7,7,274},{7,7,683},{7,7,539},{6,7,400},{5,7,0},{7,7,723},{5,7,0},{6,7,2754},{6,7,2754},{6,7,2754}, +{6,7,2130},{6,7,2393},{5,7,1517},{5,7,1517},{5,7,9},{5,7,2180},{4,7,338},{7,7,346},{7,7,346},{7,7,346},{7,7,274},{7,7,562},{6,7,400},{6,7,400},{5,7,0},{6,7,544},{5,7,0},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1730},{6,0,1730},{6,0,1730},{6,0,1730},{5,7,493}, +{5,7,493},{5,7,493},{5,7,9},{4,7,338},{4,7,338},{6,7,3618},{6,7,2994},{6,7,2594},{6,7,1970},{6,7,2953},{6,7,1705},{6,7,1305},{5,7,89},{5,7,2788},{4,7,466},{7,7,291},{7,7,219},{7,7,170},{7,7,98},{7,7,411},{7,7,267},{7,7,218},{6,7,16},{7,7,387},{6,7,16},{6,7,2594},{6,7,2594},{6,7,2594},{6,7,1970},{6,7,1929},{6,7,1305},{6,7,1305}, +{5,7,89},{5,7,1764},{4,7,466},{7,7,170},{7,7,170},{7,7,170},{7,7,98},{7,7,290},{7,7,218},{7,7,218},{6,7,16},{7,7,338},{6,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1570},{6,0,1570},{6,0,1570},{6,0,1570},{6,7,905},{6,7,905},{6,7,905},{5,7,89},{4,7,466}, +{4,7,466},{0,5,2665},{0,4,697},{0,3,290},{0,2,841},{0,3,5901},{0,2,4170},{0,2,1802},{0,1,4310},{0,1,6951},{0,1,4671},{0,5,2665},{0,4,697},{0,3,290},{0,2,841},{2,0,5893},{0,2,4170},{0,2,1802},{0,1,4310},{3,0,5893},{0,1,4310},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,557},{0,1,365},{0,1,365},{0,0,356},{0,0,665},{0,0,392},{0,2,4}, +{0,2,4},{0,2,4},{0,1,4},{0,1,557},{0,1,365},{0,1,365},{0,0,356},{0,0,629},{0,0,356},{3,0,2665},{0,4,697},{0,3,290},{0,2,841},{3,0,2665},{3,1,2665},{0,2,841},{0,2,3145},{3,1,2665},{0,2,3145},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,2705},{0,4,617},{0,3,34}, +{0,2,985},{0,4,6754},{0,3,4625},{0,2,1946},{0,1,5030},{0,2,7635},{0,1,5391},{0,6,2705},{0,4,617},{0,3,34},{0,2,985},{1,2,6674},{0,3,4625},{0,2,1946},{0,1,5030},{0,2,6674},{0,1,5030},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,845},{0,1,461},{0,1,461},{0,0,676},{0,0,1097},{0,0,712},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,845}, +{0,1,461},{0,1,461},{0,0,676},{1,0,853},{0,0,676},{1,4,2665},{0,4,617},{0,3,34},{0,2,985},{1,4,2665},{2,2,2665},{0,2,985},{0,2,2777},{2,2,2665},{0,2,2777},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,2689},{0,5,477},{0,3,34},{0,3,634},{0,4,7538},{0,3,4865},{0,2,2346}, +{0,2,5474},{0,2,8547},{0,1,6367},{0,6,2689},{0,5,477},{0,3,34},{0,3,634},{0,4,7538},{0,3,4865},{0,2,2346},{0,2,5474},{2,1,7538},{0,2,5474},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{0,1,1261},{0,1,685},{0,1,685},{0,1,965},{0,1,1646},{0,0,1160},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{1,0,1213},{0,1,685},{0,1,685},{0,1,965},{1,0,1205}, +{0,1,965},{2,3,2669},{0,5,477},{0,3,34},{0,3,634},{2,3,2669},{6,0,2669},{0,3,634},{0,2,2665},{6,0,2669},{0,2,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,2669},{0,5,365},{0,3,290},{0,3,410},{0,4,8578},{0,3,5361},{0,3,2346},{0,2,5618},{0,2,9715},{0,2,6579},{0,7,2669}, +{0,5,365},{0,3,290},{0,3,410},{2,1,8498},{0,3,5361},{0,3,2346},{0,2,5618},{4,0,8498},{0,2,5618},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{0,2,1637},{0,2,977},{0,2,977},{0,1,997},{0,1,1998},{0,1,1358},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{1,0,1629},{0,2,977},{0,2,977},{0,1,997},{0,1,1637},{0,1,997},{3,2,2669},{0,5,365},{1,3,185}, +{0,3,410},{3,2,2669},{5,1,2669},{0,3,410},{0,2,2809},{5,1,2669},{0,2,2809},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,2777},{0,6,257},{0,4,277},{0,3,464},{0,5,9677},{0,4,6026},{0,3,2400},{0,2,6086},{0,2,11335},{0,2,7047},{0,7,2777},{0,6,257},{0,4,277},{0,3,464},{3,0,9677}, +{0,4,6026},{0,3,2400},{0,2,6086},{3,1,9677},{0,2,6086},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,2186},{0,2,1130},{0,2,1130},{0,1,1186},{0,1,2547},{0,1,1547},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,2186},{0,2,1130},{0,2,1130},{0,1,1186},{0,1,2186},{0,1,1186},{4,1,2669},{0,6,257},{0,4,277},{0,3,464},{4,1,2669},{4,2,2669},{0,3,464}, +{0,3,3209},{4,2,2669},{0,3,3209},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,3209},{0,6,209},{0,4,389},{0,4,541},{0,6,9765},{0,4,5546},{0,3,2080},{0,2,5990},{0,3,11621},{0,2,6951},{0,7,3209},{0,6,209},{0,4,389},{0,4,541},{1,4,9677},{0,4,5546},{0,3,2080},{0,2,5990},{2,2,9677}, +{0,2,5990},{0,5,80},{0,5,80},{0,5,80},{0,3,64},{0,3,2196},{0,2,970},{0,2,970},{0,1,1058},{0,1,2739},{0,1,1419},{0,5,80},{0,5,80},{0,5,80},{0,3,64},{1,1,2180},{0,2,970},{0,2,970},{0,1,1058},{1,1,2196},{0,1,1058},{5,0,2669},{0,6,145},{1,4,52},{0,4,477},{5,0,2669},{3,3,2669},{0,4,477},{0,3,2809},{3,3,2669},{0,3,2809},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,40},{0,0,40},{0,7,4025},{0,7,441},{0,4,885},{0,4,429},{0,6,9685},{0,4,5450},{0,4,1854},{0,3,6029},{0,3,11925},{0,2,7239},{1,7,3001},{0,7,441},{1,4,344},{0,4,429},{3,1,9685},{0,4,5450},{0,4,1854},{0,3,6029},{4,1,9685},{0,3,6029},{0,6,256},{0,6,256},{0,6,256}, +{0,3,320},{0,3,2228},{0,3,848},{0,3,848},{0,1,1186},{0,2,3171},{0,1,1547},{0,6,256},{0,6,256},{0,6,256},{0,3,320},{2,0,2180},{0,3,848},{0,3,848},{0,1,1186},{3,0,2180},{0,1,1186},{4,2,2677},{0,7,185},{1,4,20},{0,4,173},{4,2,2677},{7,1,2677},{0,4,173},{0,3,2665},{7,1,2677},{0,3,2665},{0,0,256},{0,0,256},{0,0,256},{0,0,256},{0,1,9}, +{0,1,9},{0,1,9},{0,0,36},{0,0,72},{0,0,72},{1,7,4141},{0,7,617},{1,4,1500},{0,4,701},{0,7,9690},{0,5,5001},{0,4,1214},{0,3,5277},{0,3,12613},{0,3,7213},{1,7,2985},{1,6,465},{1,4,344},{1,4,524},{4,0,9674},{0,5,5001},{0,4,1214},{0,3,5277},{3,2,9674},{0,3,5277},{0,7,601},{0,7,601},{0,7,601},{0,4,601},{0,4,2178},{0,3,656},{0,3,656}, +{0,2,866},{0,2,3171},{0,2,1827},{1,5,101},{1,5,101},{1,5,101},{1,3,109},{0,4,2178},{0,3,656},{0,3,656},{0,2,866},{2,1,2178},{0,2,866},{5,1,2669},{0,7,41},{2,4,181},{0,4,125},{5,1,2669},{4,3,2669},{0,4,125},{0,3,2777},{4,3,2669},{0,3,2777},{0,0,576},{0,0,576},{0,0,576},{0,0,576},{0,1,25},{0,1,25},{0,1,25},{0,1,49},{0,0,232}, +{0,0,232},{1,7,4582},{0,7,1274},{1,5,1446},{0,4,1466},{0,7,9789},{0,5,4794},{0,4,953},{0,3,4890},{0,4,13038},{0,3,6826},{1,7,3426},{1,7,290},{1,5,290},{1,4,443},{2,4,9685},{0,5,4794},{0,4,953},{0,3,4890},{7,0,9685},{0,3,4890},{0,7,1105},{0,7,1105},{0,7,1105},{0,4,1105},{0,5,2210},{0,4,592},{0,4,592},{0,2,596},{0,2,3477},{0,2,1557},{1,6,26}, +{1,6,26},{1,6,26},{1,4,82},{1,3,2180},{0,4,592},{0,4,592},{0,2,596},{1,2,2180},{0,2,596},{6,0,2677},{0,7,185},{1,5,289},{0,4,377},{6,0,2677},{5,3,2677},{0,4,377},{0,3,3209},{5,3,2677},{0,3,3209},{0,0,1089},{0,0,1089},{0,0,1089},{0,0,1089},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,365},{0,1,365},{1,7,5382},{1,7,1350},{1,5,1510}, +{1,5,1738},{0,7,10285},{0,6,4406},{0,5,971},{0,3,4954},{0,4,13534},{0,3,6890},{2,7,3454},{1,7,194},{1,5,354},{1,5,582},{3,3,9674},{0,6,4406},{0,5,971},{0,3,4954},{6,1,9674},{0,3,4954},{1,6,1214},{1,6,1214},{1,6,1214},{1,4,1206},{0,5,2210},{0,4,400},{0,4,400},{0,2,628},{0,2,4021},{0,2,1589},{1,6,58},{1,6,58},{1,6,58},{1,4,50},{2,2,2178}, +{0,4,400},{0,4,400},{0,2,628},{5,0,2178},{0,2,628},{4,4,2669},{1,7,145},{2,5,74},{0,5,130},{4,4,2669},{7,2,2669},{0,5,130},{0,4,2845},{7,2,2669},{0,4,2845},{1,0,1205},{1,0,1205},{1,0,1205},{1,0,1205},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,461},{0,1,461},{1,7,6566},{1,7,1638},{1,5,1958},{1,5,1578},{1,7,10830},{0,6,4118},{0,5,443}, +{0,4,4785},{0,4,14414},{0,3,7338},{2,7,3390},{1,7,482},{2,5,371},{1,5,422},{4,2,9674},{0,6,4118},{0,5,443},{0,4,4785},{5,2,9674},{0,4,4785},{1,7,1382},{1,7,1382},{1,7,1382},{1,4,1430},{0,6,2178},{0,5,442},{0,5,442},{0,3,641},{0,3,4242},{0,2,1877},{1,7,226},{1,7,226},{1,7,226},{1,4,274},{3,1,2178},{0,5,442},{0,5,442},{0,3,641},{4,1,2178}, +{0,3,641},{5,3,2669},{1,7,257},{2,5,10},{0,5,2},{5,3,2669},{6,3,2669},{0,5,2},{0,4,2669},{6,3,2669},{0,4,2669},{1,0,1381},{1,0,1381},{1,0,1381},{1,0,1381},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{0,1,685},{0,1,685},{1,7,8134},{1,7,2310},{1,6,2671},{1,5,1802},{1,7,11086},{0,7,4109},{0,5,299},{0,4,4193},{0,5,14830},{0,4,7442},{2,7,3710}, +{2,7,490},{2,5,323},{2,5,563},{5,1,9669},{0,7,4109},{0,5,299},{0,4,4193},{4,3,9669},{0,4,4193},{1,7,1734},{1,7,1734},{1,7,1734},{1,5,1721},{0,7,2228},{0,5,218},{0,5,218},{0,3,305},{0,3,4626},{0,3,2241},{2,6,125},{2,6,125},{2,6,125},{2,4,125},{4,0,2180},{0,5,218},{0,5,218},{0,3,305},{3,2,2180},{0,3,305},{6,2,2665},{2,7,369},{3,5,181}, +{1,5,117},{6,2,2665},{5,4,2665},{1,5,117},{0,4,2749},{5,4,2665},{0,4,2749},{1,0,1685},{1,0,1685},{1,0,1685},{1,0,1685},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{0,2,977},{0,2,977},{2,7,9153},{1,7,3525},{1,6,3220},{1,5,2513},{1,7,11833},{0,7,3686},{0,6,548},{0,4,3986},{0,5,15577},{0,4,7235},{3,7,4141},{2,7,481},{2,6,309},{2,5,428},{6,0,9674}, +{0,7,3686},{0,6,548},{0,4,3986},{3,4,9674},{0,4,3986},{1,7,2436},{1,7,2436},{1,7,2436},{1,5,2189},{0,7,2210},{0,5,272},{0,5,272},{0,3,233},{0,3,5364},{0,3,2169},{2,7,40},{2,7,40},{2,7,40},{2,5,104},{2,4,2178},{0,5,272},{0,5,272},{0,3,233},{7,0,2178},{0,3,233},{7,1,2669},{2,7,477},{2,6,305},{0,6,292},{7,1,2669},{4,5,2669},{0,6,292}, +{0,4,3145},{4,5,2669},{0,4,3145},{1,0,2180},{1,0,2180},{1,0,2180},{1,0,2180},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,1130},{0,2,1130},{2,7,10154},{1,7,4946},{1,6,4049},{1,6,2853},{1,7,12838},{0,7,3719},{0,6,133},{0,4,4211},{0,6,16286},{0,4,7346},{3,7,4061},{2,7,881},{2,6,325},{2,6,629},{4,4,9669},{0,7,3718},{0,6,132},{0,4,4210},{7,2,9669}, +{0,4,4210},{1,7,3265},{1,7,3265},{1,7,3265},{1,6,2789},{0,7,2467},{0,6,69},{0,6,69},{0,3,442},{0,4,5602},{0,3,2290},{2,7,40},{2,7,40},{2,7,40},{2,5,40},{3,3,2180},{0,6,68},{0,6,68},{0,3,441},{6,1,2180},{0,3,441},{5,5,2665},{3,7,617},{3,6,100},{0,6,68},{5,5,2665},{3,6,2665},{0,6,68},{0,5,2885},{3,6,2665},{0,5,2885},{1,0,2689}, +{1,0,2689},{1,0,2689},{1,0,2689},{0,5,17},{0,5,17},{0,5,17},{0,3,1},{0,2,1341},{0,2,1341},{2,7,10666},{2,7,5146},{2,6,4206},{1,6,2933},{2,7,13606},{0,7,4215},{0,6,181},{0,5,3914},{0,6,15454},{0,4,7122},{3,7,4365},{3,7,1097},{3,6,404},{2,6,421},{5,3,9669},{1,7,4133},{0,6,100},{0,5,3833},{6,3,9669},{0,5,3833},{2,7,3777},{2,7,3777},{2,7,3777}, +{1,6,2933},{1,7,2805},{0,6,181},{0,6,181},{0,4,275},{0,4,5282},{0,3,2130},{2,7,296},{2,7,296},{2,7,296},{2,5,232},{4,2,2180},{0,6,100},{0,6,100},{0,4,194},{5,2,2180},{0,4,194},{6,4,2665},{3,7,697},{3,6,4},{1,6,4},{6,4,2665},{7,4,2665},{1,6,4},{0,5,2677},{7,4,2665},{0,5,2677},{1,0,2929},{1,0,2929},{1,0,2929},{1,0,2929},{0,6,81}, +{0,6,81},{0,6,81},{0,3,145},{0,3,1289},{0,3,1289},{2,7,11690},{2,7,5434},{2,7,4085},{2,6,3198},{2,7,13510},{1,7,4470},{1,6,585},{0,5,3690},{0,6,15134},{0,5,6210},{3,7,5053},{3,7,1177},{3,6,308},{2,6,597},{6,2,9670},{1,7,4181},{1,6,296},{0,5,3401},{5,4,9670},{0,5,3401},{2,7,3409},{2,7,3409},{2,7,3409},{2,6,3134},{1,7,2565},{0,7,323},{0,7,323}, +{0,4,323},{0,5,5085},{0,4,1634},{3,7,153},{3,7,153},{3,7,153},{3,5,145},{5,1,2186},{0,7,34},{0,7,34},{0,4,34},{7,1,2186},{0,4,34},{7,3,2665},{4,7,937},{3,6,164},{1,6,100},{7,3,2665},{6,5,2665},{1,6,100},{0,5,2725},{6,5,2665},{0,5,2725},{2,0,3085},{2,0,3085},{2,0,3085},{2,0,3085},{1,5,290},{1,5,290},{1,5,290},{1,3,298},{0,3,985}, +{0,3,985},{3,7,12062},{2,7,6370},{2,7,3743},{2,6,3018},{2,7,14014},{1,7,4758},{1,6,603},{1,5,4049},{0,7,14638},{0,5,5310},{4,7,4958},{3,7,1726},{3,7,334},{3,6,419},{7,1,9669},{1,7,4694},{0,7,341},{0,5,3374},{4,5,9669},{0,5,3374},{2,7,3454},{2,7,3454},{2,7,3454},{2,6,2729},{1,7,2754},{1,6,314},{1,6,314},{1,4,289},{0,5,4626},{0,4,1121},{3,7,45}, +{3,7,45},{3,7,45},{3,5,109},{6,0,2180},{0,7,52},{0,7,52},{0,4,160},{3,4,2180},{0,4,160},{5,7,2665},{4,7,1009},{4,6,293},{2,6,293},{5,7,2665},{5,6,2665},{2,6,293},{0,5,3085},{5,6,2665},{0,5,3085},{2,0,2725},{2,0,2725},{2,0,2725},{2,0,2725},{1,6,89},{1,6,89},{1,6,89},{1,4,145},{0,3,949},{0,3,949},{3,7,12414},{3,7,7246},{2,7,3983}, +{2,7,2879},{2,7,15006},{1,7,5558},{1,7,163},{1,5,4161},{0,7,14190},{0,5,5054},{4,7,5246},{4,7,2186},{3,7,302},{3,6,659},{5,5,9670},{2,7,4926},{1,7,163},{0,5,3758},{3,6,9670},{0,5,3758},{2,7,3902},{2,7,3902},{2,7,3902},{2,6,2777},{1,7,3330},{1,7,82},{1,7,82},{1,4,401},{0,6,4490},{0,4,1073},{3,7,221},{3,7,221},{3,7,221},{3,6,34},{5,2,2186}, +{1,7,82},{1,7,82},{0,5,277},{7,2,2186},{0,5,277},{6,6,2665},{5,7,1313},{4,7,130},{1,7,82},{6,6,2665},{4,7,2665},{1,7,82},{0,6,2929},{4,7,2665},{0,6,2929},{2,0,2677},{2,0,2677},{2,0,2677},{2,0,2677},{1,6,9},{1,6,9},{1,6,9},{1,4,1},{0,4,673},{0,4,673},{3,7,13278},{3,7,7502},{3,7,4254},{2,7,2895},{3,7,15045},{2,7,6114},{1,7,147}, +{1,6,3978},{0,7,14254},{0,6,4818},{4,7,5918},{4,7,2378},{4,7,443},{3,7,426},{6,4,9670},{2,7,5438},{1,7,83},{0,6,3218},{7,4,9670},{0,6,3218},{3,7,4253},{3,7,4253},{3,7,4253},{2,7,2894},{2,7,3054},{1,7,146},{1,7,146},{1,5,296},{0,6,3978},{0,5,821},{4,7,442},{4,7,442},{4,7,442},{3,6,194},{6,1,2186},{1,7,82},{1,7,82},{0,5,37},{6,3,2186}, +{0,5,37},{7,5,2665},{5,7,1361},{4,7,2},{2,7,10},{7,5,2665},{6,6,2689},{2,7,10},{0,6,2689},{6,6,2689},{0,6,2689},{2,0,2885},{2,0,2885},{2,0,2885},{2,0,2885},{1,7,65},{1,7,65},{1,7,65},{1,4,113},{0,4,625},{0,4,625},{3,7,13261},{3,7,7409},{3,7,4045},{3,7,3209},{3,7,14116},{2,7,5615},{2,7,574},{1,6,3165},{0,7,13437},{0,6,3429},{5,7,5269}, +{4,7,2275},{4,7,250},{3,7,505},{5,6,8712},{3,7,4724},{2,7,250},{0,6,2405},{4,6,8712},{0,6,2405},{3,7,4045},{3,7,4045},{3,7,4045},{3,7,3209},{2,7,3150},{2,7,574},{2,7,574},{1,5,296},{0,6,3850},{0,5,453},{4,7,250},{4,7,250},{4,7,250},{4,6,169},{7,0,2180},{2,7,250},{2,7,250},{1,5,40},{3,5,2180},{1,5,40},{6,7,2180},{5,7,1168},{4,7,81}, +{2,7,25},{6,7,2180},{5,7,2180},{2,7,25},{0,6,2180},{5,7,2180},{0,6,2180},{3,0,3145},{3,0,3145},{3,0,3145},{3,0,3145},{1,7,305},{1,7,305},{1,7,305},{1,5,292},{0,5,449},{0,5,449},{4,7,11894},{3,7,7634},{3,7,4270},{3,7,2750},{3,7,13315},{2,7,5354},{2,7,313},{1,6,2634},{1,7,12414},{0,6,2250},{5,7,4369},{4,7,2365},{4,7,340},{4,7,160},{7,3,7590}, +{3,7,4094},{2,7,232},{0,6,1721},{6,5,7590},{0,6,1721},{3,7,4270},{3,7,4270},{3,7,4270},{3,7,2750},{2,7,3717},{2,7,313},{2,7,313},{2,5,302},{0,7,3546},{0,5,498},{4,7,340},{4,7,340},{4,7,340},{4,6,97},{7,1,2186},{2,7,232},{2,7,232},{1,5,130},{7,3,2186},{1,5,130},{6,7,1649},{6,7,1025},{5,7,9},{3,7,1},{6,7,1649},{7,6,1625},{3,7,1}, +{0,6,1685},{7,6,1625},{0,6,1685},{3,0,2749},{3,0,2749},{3,0,2749},{3,0,2749},{2,7,117},{2,7,117},{2,7,117},{2,5,181},{0,5,377},{0,5,377},{4,7,11078},{4,7,7478},{3,7,4878},{3,7,2750},{4,7,12662},{3,7,5031},{2,7,489},{2,6,1911},{1,7,11470},{0,6,1610},{5,7,3841},{5,7,1949},{4,7,692},{4,7,32},{5,7,6662},{4,7,3641},{3,7,442},{0,6,1385},{5,6,6662}, +{0,6,1385},{3,7,4878},{3,7,4878},{3,7,4878},{3,7,2750},{3,7,3795},{2,7,489},{2,7,489},{2,5,366},{0,7,3354},{0,6,454},{4,7,692},{4,7,692},{4,7,692},{4,7,32},{6,3,2180},{3,7,442},{3,7,442},{0,6,229},{6,4,2180},{0,6,229},{7,6,1201},{6,7,689},{5,7,25},{4,7,16},{7,6,1201},{7,6,1225},{4,7,16},{0,6,1381},{7,6,1225},{0,6,1381},{3,0,2669}, +{3,0,2669},{3,0,2669},{3,0,2669},{2,7,5},{2,7,5},{2,7,5},{2,5,5},{0,6,229},{0,6,229},{4,7,10646},{4,7,7046},{4,7,5021},{3,7,3134},{4,7,11526},{3,7,4503},{2,7,1049},{2,6,1479},{1,7,10910},{0,6,1354},{5,7,3569},{5,7,1677},{5,7,653},{4,7,160},{7,4,5829},{4,7,3065},{3,7,410},{1,6,1209},{7,5,5829},{1,6,1209},{4,7,5021},{4,7,5021},{4,7,5021}, +{3,7,3134},{3,7,3875},{2,7,1049},{2,7,1049},{2,6,323},{0,7,3546},{0,6,198},{5,7,653},{5,7,653},{5,7,653},{4,7,160},{7,2,2180},{3,7,410},{3,7,410},{1,6,53},{5,5,2180},{1,6,53},{7,6,865},{6,7,481},{6,7,81},{4,7,16},{7,6,865},{6,7,841},{4,7,16},{0,6,1205},{6,7,841},{0,6,1205},{3,0,2845},{3,0,2845},{3,0,2845},{3,0,2845},{2,7,149}, +{2,7,149},{2,7,149},{2,5,85},{0,6,149},{0,6,149},{4,7,10598},{4,7,6998},{4,7,4973},{4,7,3353},{4,7,10774},{3,7,4359},{3,7,995},{2,6,1431},{2,7,10294},{0,7,1242},{6,7,3329},{5,7,1661},{5,7,637},{5,7,197},{6,6,5085},{4,7,2745},{3,7,634},{0,7,1098},{4,7,5085},{0,7,1098},{4,7,4973},{4,7,4973},{4,7,4973},{4,7,3353},{3,7,4339},{3,7,995},{3,7,995}, +{2,6,275},{1,7,3845},{0,6,326},{5,7,637},{5,7,637},{5,7,637},{5,7,197},{5,6,2178},{3,7,634},{3,7,634},{1,6,37},{4,6,2178},{1,6,37},{7,7,605},{6,7,401},{6,7,1},{5,7,1},{7,7,605},{6,7,569},{5,7,1},{0,7,1089},{6,7,569},{0,7,1089},{4,0,3209},{4,0,3209},{4,0,3209},{4,0,3209},{3,7,370},{3,7,370},{3,7,370},{2,6,274},{0,7,153}, +{0,7,153},{5,7,9925},{4,7,7403},{4,7,5378},{4,7,3218},{4,7,10387},{4,7,4627},{3,7,1292},{3,6,1477},{2,7,9727},{0,7,621},{6,7,2609},{5,7,1949},{5,7,925},{5,7,89},{7,5,4347},{5,7,2384},{4,7,666},{0,7,612},{6,6,4371},{0,7,612},{4,7,5378},{4,7,5378},{4,7,5378},{4,7,3218},{4,7,4762},{3,7,1292},{3,7,1292},{3,6,321},{1,7,4106},{1,6,469},{5,7,925}, +{5,7,925},{5,7,925},{5,7,89},{6,5,2180},{4,7,666},{4,7,666},{2,6,104},{3,7,2180},{2,6,104},{7,7,290},{7,7,218},{6,7,64},{6,7,16},{7,7,290},{7,7,338},{6,7,16},{0,7,576},{7,7,338},{0,7,576},{4,0,2777},{4,0,2777},{4,0,2777},{4,0,2777},{3,7,136},{3,7,136},{3,7,136},{3,5,200},{0,7,45},{0,7,45},{5,7,9269},{5,7,7377},{4,7,6146}, +{4,7,3506},{5,7,10044},{4,7,4211},{3,7,1964},{3,7,836},{3,7,9185},{0,7,477},{6,7,2241},{6,7,1617},{6,7,1217},{5,7,265},{6,7,3780},{5,7,2064},{4,7,890},{1,7,257},{5,7,3780},{1,7,257},{4,7,6146},{4,7,6146},{4,7,6146},{4,7,3506},{4,7,4826},{3,7,1964},{3,7,1964},{3,6,337},{2,7,4590},{0,7,477},{6,7,1217},{6,7,1217},{6,7,1217},{5,7,265},{7,4,2178}, +{4,7,890},{4,7,890},{1,7,257},{7,5,2178},{1,7,257},{7,7,146},{7,7,74},{7,7,25},{6,7,16},{7,7,146},{7,7,130},{6,7,16},{0,7,256},{7,7,130},{0,7,256},{4,0,2665},{4,0,2665},{4,0,2665},{4,0,2665},{3,7,200},{3,7,200},{3,7,200},{3,6,13},{0,7,221},{0,7,221},{5,7,8997},{5,7,7105},{5,7,6081},{4,7,4178},{5,7,9276},{4,7,4179},{4,7,2154}, +{3,7,356},{3,7,8721},{1,7,209},{6,7,2129},{6,7,1505},{6,7,1105},{6,7,673},{7,6,3332},{5,7,2000},{5,7,976},{2,7,73},{7,6,3300},{2,7,73},{5,7,6081},{5,7,6081},{5,7,6081},{4,7,4178},{4,7,5274},{4,7,2154},{4,7,2154},{3,7,356},{2,7,4878},{1,7,209},{6,7,1105},{6,7,1105},{6,7,1105},{6,7,673},{7,5,2210},{5,7,976},{5,7,976},{2,7,73},{6,6,2178}, +{2,7,73},{7,7,130},{7,7,58},{7,7,9},{7,7,1},{7,7,130},{7,7,50},{7,7,1},{0,7,64},{7,7,50},{0,7,64},{4,0,2809},{4,0,2809},{4,0,2809},{4,0,2809},{3,7,520},{3,7,520},{3,7,520},{3,6,61},{1,7,145},{1,7,145},{5,7,9109},{5,7,7217},{5,7,6193},{5,7,4301},{5,7,8892},{4,7,4531},{4,7,2506},{3,7,260},{4,7,8587},{1,7,289},{6,7,2273}, +{6,7,1649},{6,7,1249},{6,7,625},{7,6,2916},{6,7,1956},{5,7,1168},{2,7,25},{6,7,2916},{2,7,25},{5,7,6193},{5,7,6193},{5,7,6193},{5,7,4301},{5,7,5976},{4,7,2506},{4,7,2506},{3,7,260},{3,7,5277},{1,7,289},{6,7,1249},{6,7,1249},{6,7,1249},{6,7,625},{6,7,2180},{5,7,1168},{5,7,1168},{2,7,25},{5,7,2180},{2,7,25},{7,7,242},{7,7,170},{7,7,121}, +{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,3209},{4,0,3209},{4,0,3209},{4,0,3209},{4,7,481},{4,7,481},{4,7,481},{3,7,260},{1,7,289},{1,7,289},{6,7,8325},{5,7,6749},{5,7,5725},{5,7,3833},{5,7,7866},{5,7,4082},{4,7,2470},{4,7,265},{4,7,7219},{2,7,365},{7,7,1842},{6,7,1460},{6,7,1060},{6,7,436},{7,6,2241}, +{6,7,1425},{6,7,1025},{3,7,1},{6,7,2169},{3,7,1},{5,7,5725},{5,7,5725},{5,7,5725},{5,7,3833},{5,7,4950},{4,7,2470},{4,7,2470},{4,7,265},{3,7,4521},{2,7,365},{6,7,1060},{6,7,1060},{6,7,1060},{6,7,436},{6,7,1649},{6,7,1025},{6,7,1025},{3,7,1},{7,6,1625},{3,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49}, +{0,7,0},{7,7,98},{0,7,0},{5,0,2809},{5,0,2809},{5,0,2809},{5,0,2809},{4,7,445},{4,7,445},{4,7,445},{4,6,202},{2,7,365},{2,7,365},{6,7,7093},{6,7,6469},{5,7,5581},{5,7,3689},{5,7,7226},{5,7,3442},{5,7,2418},{4,7,25},{4,7,6275},{2,7,509},{7,7,1266},{7,7,1194},{6,7,1028},{6,7,404},{7,7,1686},{6,7,1089},{6,7,689},{4,7,16},{6,7,1641}, +{4,7,16},{5,7,5581},{5,7,5581},{5,7,5581},{5,7,3689},{5,7,4310},{5,7,2418},{5,7,2418},{4,7,25},{3,7,4121},{2,7,509},{6,7,1028},{6,7,1028},{6,7,1028},{6,7,404},{7,6,1201},{6,7,689},{6,7,689},{4,7,16},{7,6,1225},{4,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2665}, +{5,0,2665},{5,0,2665},{5,0,2665},{4,7,685},{4,7,685},{4,7,685},{4,7,25},{2,7,509},{2,7,509},{6,7,6117},{6,7,5493},{6,7,5093},{5,7,3801},{6,7,6098},{5,7,3058},{5,7,2034},{4,7,41},{4,7,5587},{3,7,613},{7,7,818},{7,7,746},{7,7,697},{6,7,500},{7,7,1142},{6,7,881},{6,7,481},{4,7,16},{6,7,1241},{4,7,16},{6,7,5093},{6,7,5093},{6,7,5093}, +{5,7,3801},{5,7,3926},{5,7,2034},{5,7,2034},{4,7,41},{4,7,3562},{3,7,613},{7,7,697},{7,7,697},{7,7,697},{6,7,500},{7,6,865},{6,7,481},{6,7,481},{4,7,16},{6,7,841},{4,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2777},{5,0,2777},{5,0,2777},{5,0,2777},{5,7,1010}, +{5,7,1010},{5,7,1010},{4,7,41},{3,7,613},{3,7,613},{6,7,5397},{6,7,4773},{6,7,4373},{6,7,3749},{6,7,5074},{5,7,2930},{5,7,1906},{4,7,313},{5,7,4753},{3,7,725},{7,7,498},{7,7,426},{7,7,377},{7,7,305},{7,7,726},{7,7,582},{6,7,401},{5,7,1},{7,7,774},{5,7,1},{6,7,4373},{6,7,4373},{6,7,4373},{6,7,3749},{5,7,3798},{5,7,1906},{5,7,1906}, +{4,7,313},{4,7,3130},{3,7,725},{7,7,377},{7,7,377},{7,7,377},{7,7,305},{7,7,605},{6,7,401},{6,7,401},{5,7,1},{6,7,569},{5,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,3145},{5,0,3145},{5,0,3145},{5,0,3145},{5,7,882},{5,7,882},{5,7,882},{4,7,313},{3,7,725}, +{3,7,725},{1,7,34142},{0,7,5184},{0,5,609},{0,5,4841},{1,7,46442},{0,7,24449},{0,5,9741},{0,4,24761},{0,5,65162},{0,3,40820},{0,7,10048},{0,7,2880},{0,5,545},{0,4,3204},{1,4,18070},{0,4,13297},{0,4,6453},{0,2,13857},{2,2,18070},{0,2,13857},{0,3,9},{0,3,9},{0,3,9},{0,2,36},{0,1,1145},{0,1,617},{0,1,617},{0,1,977},{0,0,1505},{0,0,1036},{0,3,9}, +{0,3,9},{0,3,9},{0,2,36},{1,0,1129},{0,1,617},{0,1,617},{0,1,977},{1,0,1105},{0,1,977},{4,2,9250},{0,7,2880},{0,5,545},{0,4,3204},{4,2,9250},{5,2,9250},{0,4,3204},{0,3,9280},{5,2,9250},{0,3,9280},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,38782},{0,7,6720},{0,5,305}, +{0,5,3801},{1,7,51082},{0,7,24961},{0,5,8701},{0,4,25849},{0,5,65535},{0,4,42094},{1,7,10502},{0,7,2624},{0,5,241},{0,4,3044},{3,1,19334},{0,4,14065},{0,4,6293},{0,3,14756},{4,1,19334},{0,3,14756},{0,4,4},{0,4,4},{0,4,4},{0,2,4},{0,2,1537},{0,1,937},{0,1,937},{0,1,977},{0,1,1898},{0,1,1338},{0,4,4},{0,4,4},{0,4,4},{0,2,4},{1,0,1513}, +{0,1,937},{0,1,937},{0,1,977},{0,1,1537},{0,1,977},{5,1,9256},{0,7,2624},{0,5,241},{0,4,3044},{5,1,9256},{7,1,9256},{0,4,3044},{0,3,9280},{7,1,9256},{0,3,9280},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,44190},{0,7,9024},{0,5,769},{0,5,3529},{1,7,56490},{0,7,26241},{0,5,8429}, +{0,4,27705},{0,5,65535},{0,4,43950},{1,7,10854},{0,7,2624},{0,5,193},{0,5,2953},{2,3,20689},{0,5,14598},{0,4,6389},{0,3,15012},{6,0,20689},{0,3,15012},{0,4,36},{0,4,36},{0,4,36},{0,2,100},{0,2,1985},{0,2,1061},{0,2,1061},{0,1,1105},{0,1,2346},{0,1,1466},{0,4,36},{0,4,36},{0,4,36},{0,2,100},{0,2,1985},{0,2,1061},{0,2,1061},{0,1,1105},{0,1,1985}, +{0,1,1105},{6,0,9256},{0,7,2624},{0,5,193},{0,5,2953},{6,0,9256},{6,2,9256},{0,5,2953},{0,3,9536},{6,2,9256},{0,3,9536},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,50366},{0,7,12096},{0,6,1101},{0,5,4025},{1,7,62666},{0,7,28289},{0,5,8925},{0,5,28912},{0,6,65535},{0,4,46574},{1,7,11462}, +{0,7,2880},{0,6,317},{0,5,2425},{4,0,22137},{0,5,15206},{0,4,6741},{0,3,15524},{3,2,22137},{0,3,15524},{0,5,1},{0,5,1},{0,5,1},{0,3,9},{0,2,2561},{0,2,1285},{0,2,1285},{0,1,1361},{0,1,2922},{0,1,1722},{0,5,1},{0,5,1},{0,5,1},{0,3,9},{1,1,2561},{0,2,1285},{0,2,1285},{0,1,1361},{2,0,2521},{0,1,1361},{5,2,9250},{0,7,2880},{0,6,317}, +{0,5,2425},{5,2,9250},{5,3,9250},{0,5,2425},{0,4,9640},{5,3,9250},{0,4,9640},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,58232},{0,7,16470},{0,6,129},{0,6,3489},{1,7,65535},{0,7,31511},{0,6,10213},{0,5,29110},{0,6,65535},{0,4,50444},{1,7,12452},{0,7,3474},{0,6,29},{0,5,2137},{2,4,23851}, +{0,6,16172},{0,5,7037},{0,3,16406},{7,0,23851},{0,3,16406},{0,6,4},{0,6,4},{0,6,4},{0,3,36},{0,3,3232},{0,2,1690},{0,2,1690},{0,1,1802},{0,1,3723},{0,1,2163},{0,6,4},{0,6,4},{0,6,4},{0,3,36},{1,1,3200},{0,2,1690},{0,2,1690},{0,1,1802},{2,0,3232},{0,1,1802},{6,1,9256},{1,7,3232},{0,6,29},{0,5,2137},{6,1,9256},{6,3,9256},{0,5,2137}, +{0,4,9298},{6,3,9256},{0,4,9298},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,65535},{0,7,21174},{0,6,81},{0,6,2545},{1,7,65535},{0,7,35191},{0,6,9269},{0,5,30102},{0,6,65535},{0,5,54602},{1,7,13604},{0,7,4274},{0,6,45},{0,5,2153},{4,1,25472},{0,6,16620},{0,5,7053},{0,3,17462},{4,2,25472}, +{0,3,17462},{0,6,36},{0,6,36},{0,6,36},{0,4,49},{0,3,3872},{0,3,2132},{0,3,2132},{0,1,2330},{0,1,4571},{0,1,2691},{0,6,36},{0,6,36},{0,6,36},{0,4,49},{0,3,3872},{0,3,2132},{0,3,2132},{0,1,2330},{1,1,3872},{0,1,2330},{7,0,9250},{1,7,3488},{0,6,45},{0,5,2153},{7,0,9250},{3,5,9250},{0,5,2153},{0,4,9266},{3,5,9250},{0,4,9266},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{2,7,65535},{1,7,25204},{0,6,805},{0,6,2373},{1,7,65535},{0,7,39263},{0,6,8769},{0,5,31402},{0,6,65535},{0,5,55902},{1,7,14880},{1,7,5024},{0,6,321},{0,6,1889},{5,0,26756},{0,6,17000},{0,5,7049},{0,4,18139},{6,1,26756},{0,4,18139},{0,7,5},{0,7,5},{0,7,5}, +{0,4,5},{0,3,4468},{0,3,2248},{0,3,2248},{0,2,2722},{0,2,5411},{0,1,3227},{0,7,5},{0,7,5},{0,7,5},{0,4,5},{2,0,4420},{0,3,2248},{0,3,2248},{0,2,2722},{3,0,4420},{0,2,2722},{5,4,9250},{1,7,4000},{1,6,185},{0,6,1885},{5,4,9250},{7,3,9250},{0,6,1885},{0,4,9490},{7,3,9250},{0,4,9490},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,16}, +{0,0,16},{0,0,16},{0,0,64},{0,0,100},{0,0,100},{2,7,65535},{1,7,29620},{0,7,1306},{0,6,3061},{2,7,65535},{0,7,43055},{0,6,8145},{0,6,31878},{0,7,65535},{0,5,56670},{2,7,14924},{1,7,5344},{0,7,406},{0,6,1617},{4,2,26756},{0,6,16744},{0,5,6553},{0,4,17195},{5,2,26756},{0,4,17195},{0,7,181},{0,7,181},{0,7,181},{0,4,181},{0,4,4418},{0,3,2056},{0,3,2056}, +{0,2,2210},{0,2,5411},{0,2,3171},{0,7,181},{0,7,181},{0,7,181},{0,4,181},{0,4,4418},{0,3,2056},{0,3,2056},{0,2,2210},{2,1,4418},{0,2,2210},{6,3,9248},{2,7,4292},{0,7,306},{0,6,1517},{6,3,9248},{6,4,9248},{0,6,1517},{0,5,9698},{6,4,9248},{0,5,9698},{0,0,100},{0,0,100},{0,0,100},{0,0,100},{0,0,16},{0,0,16},{0,0,16},{0,0,0},{0,0,36}, +{0,0,36},{2,7,65535},{1,7,35659},{0,7,865},{0,7,2657},{2,7,65535},{1,7,48315},{0,7,8072},{0,6,29745},{0,7,65535},{0,5,58605},{2,7,15347},{1,7,6163},{1,7,270},{0,6,1770},{5,1,26756},{0,7,15992},{0,6,5378},{0,4,16592},{6,2,26756},{0,4,16592},{0,7,685},{0,7,685},{0,7,685},{0,5,370},{0,5,4450},{0,4,1768},{0,4,1768},{0,2,1940},{0,2,5717},{0,2,2901},{1,7,234}, +{1,7,234},{1,7,234},{1,4,250},{1,3,4420},{0,4,1768},{0,4,1768},{0,2,1940},{1,2,4420},{0,2,1940},{7,2,9250},{3,7,4820},{1,7,45},{0,6,1409},{7,2,9250},{5,5,9250},{0,6,1409},{0,5,9320},{5,5,9250},{0,5,9320},{0,0,361},{0,0,361},{0,0,361},{0,0,361},{0,1,0},{0,1,0},{0,1,0},{0,0,81},{0,0,117},{0,0,117},{2,7,65535},{1,7,40299},{0,7,1405}, +{0,7,2173},{2,7,65535},{1,7,50747},{0,7,6068},{0,6,28101},{0,7,65535},{0,5,59897},{2,7,15659},{1,7,6963},{1,7,74},{1,6,2046},{6,0,26264},{0,7,15284},{0,6,4470},{0,4,16052},{3,4,26264},{0,4,16052},{1,7,1230},{1,7,1230},{1,7,1230},{0,5,754},{0,5,4450},{0,4,1576},{0,4,1576},{0,2,1972},{0,2,6261},{0,2,2933},{1,7,74},{1,7,74},{1,7,74},{1,5,113},{2,2,4418}, +{0,4,1576},{0,4,1576},{0,2,1972},{5,0,4418},{0,2,1972},{5,6,8980},{3,7,4808},{1,7,25},{0,7,1444},{5,6,8980},{4,6,8980},{0,7,1444},{0,5,8980},{4,6,8980},{0,5,8980},{0,0,729},{0,0,729},{0,0,729},{0,0,729},{0,2,16},{0,2,16},{0,2,16},{0,1,16},{0,0,325},{0,0,325},{2,7,65535},{1,7,40395},{0,7,2381},{0,7,2125},{2,7,65535},{1,7,48635},{0,7,4500}, +{0,6,24853},{0,7,65535},{0,5,57545},{3,7,14605},{2,7,6211},{1,7,170},{1,6,1598},{5,2,24379},{0,7,13716},{0,6,3446},{0,5,14549},{5,3,24379},{0,5,14549},{1,7,1326},{1,7,1326},{1,7,1326},{1,5,1157},{0,6,4418},{0,5,1394},{0,5,1394},{0,3,1621},{0,3,6482},{0,2,3221},{1,7,170},{1,7,170},{1,7,170},{1,5,1},{3,1,4418},{0,5,1394},{0,5,1394},{0,3,1621},{4,1,4418}, +{0,3,1621},{7,3,7940},{3,7,4264},{2,7,81},{0,7,900},{7,3,7940},{6,5,7940},{0,7,900},{0,5,7988},{6,5,7940},{0,5,7988},{1,0,1157},{1,0,1157},{1,0,1157},{1,0,1157},{0,2,16},{0,2,16},{0,2,16},{0,1,16},{0,1,377},{0,1,377},{2,7,65535},{1,7,40747},{0,7,3613},{0,7,2333},{2,7,65535},{1,7,46779},{0,7,3188},{0,6,21861},{0,7,65535},{0,5,55449},{3,7,13181}, +{2,7,5667},{1,7,522},{1,7,1306},{6,1,22571},{0,7,12404},{0,6,2678},{0,5,12453},{4,4,22571},{0,5,12453},{1,7,1678},{1,7,1678},{1,7,1678},{1,5,1301},{0,7,4468},{0,5,1170},{0,5,1170},{0,3,1285},{0,3,6866},{0,3,3221},{1,7,522},{1,7,522},{1,7,522},{1,5,145},{4,0,4420},{0,5,1170},{0,5,1170},{0,3,1285},{3,2,4420},{0,3,1285},{6,5,6964},{3,7,3848},{2,7,1}, +{0,7,484},{6,5,6964},{3,7,6964},{0,7,484},{0,5,7124},{3,7,6964},{0,5,7124},{1,0,1237},{1,0,1237},{1,0,1237},{1,0,1237},{0,3,1},{0,3,1},{0,3,1},{0,2,100},{0,1,505},{0,1,505},{2,7,65535},{1,7,41449},{0,7,5305},{0,7,2873},{2,7,65535},{1,7,44997},{0,7,2018},{0,6,18801},{0,7,65535},{0,6,52421},{3,7,11885},{2,7,5361},{2,7,320},{1,7,1000},{5,3,20645}, +{0,7,11234},{0,7,2018},{0,5,10401},{6,3,20645},{0,5,10401},{1,7,2380},{1,7,2380},{1,7,2380},{1,6,1496},{0,7,4450},{0,6,964},{0,6,964},{0,3,1213},{0,3,7604},{0,3,3149},{2,7,320},{2,7,320},{2,7,320},{2,5,272},{2,4,4418},{0,6,964},{0,6,964},{0,3,1213},{7,0,4418},{0,3,1213},{5,7,5941},{4,7,3181},{2,7,64},{0,7,169},{5,7,5941},{5,6,5941},{0,7,169}, +{0,5,6305},{5,6,5941},{0,5,6305},{1,0,1480},{1,0,1480},{1,0,1480},{1,0,1480},{0,4,25},{0,4,25},{0,4,25},{0,2,1},{0,1,802},{0,1,802},{2,7,65535},{1,7,42345},{0,7,7081},{0,7,3625},{2,7,65535},{1,7,43685},{0,7,1250},{0,6,16353},{0,7,65535},{0,6,49973},{3,7,11005},{3,7,5153},{2,7,320},{2,7,964},{7,0,19026},{1,7,10349},{0,7,1250},{0,5,8849},{3,5,19026}, +{0,5,8849},{1,7,3276},{1,7,3276},{1,7,3276},{1,6,1848},{0,7,4706},{0,6,740},{0,6,740},{0,4,1226},{0,4,7955},{0,3,3357},{2,7,320},{2,7,320},{2,7,320},{2,6,145},{3,3,4420},{0,6,740},{0,6,740},{0,4,1226},{6,1,4420},{0,4,1226},{7,4,5105},{4,7,2701},{3,7,25},{0,7,25},{7,4,5105},{7,5,5105},{0,7,25},{0,6,5645},{7,5,5105},{0,6,5645},{1,0,1832}, +{1,0,1832},{1,0,1832},{1,0,1832},{0,4,9},{0,4,9},{0,4,9},{0,2,49},{0,2,1010},{0,2,1010},{2,7,65535},{1,7,43497},{1,7,9052},{0,7,4633},{2,7,65535},{1,7,42629},{0,7,738},{0,6,14161},{0,7,65535},{0,6,47781},{3,7,10381},{3,7,4529},{2,7,576},{2,7,484},{5,4,17490},{1,7,9293},{0,7,738},{0,5,7553},{7,3,17490},{0,5,7553},{1,7,4428},{1,7,4428},{1,7,4428}, +{1,7,2412},{0,7,5218},{0,7,738},{0,7,738},{0,4,810},{0,4,8467},{0,3,3821},{2,7,576},{2,7,576},{2,7,576},{2,6,1},{4,2,4420},{0,7,738},{0,7,738},{0,4,810},{5,2,4420},{0,4,810},{6,6,4329},{4,7,2349},{3,7,9},{0,7,9},{6,6,4329},{4,7,4329},{0,7,9},{0,6,4637},{4,7,4329},{0,6,4637},{1,0,2312},{1,0,2312},{1,0,2312},{1,0,2312},{0,5,4}, +{0,5,4},{0,5,4},{0,3,36},{0,2,1186},{0,2,1186},{2,7,65535},{1,7,44905},{1,7,10460},{0,7,5897},{2,7,65535},{1,7,41829},{0,7,482},{0,6,12225},{0,7,65535},{0,6,45845},{4,7,9325},{3,7,4161},{3,7,797},{2,7,260},{7,1,16034},{1,7,8493},{0,7,482},{0,5,6513},{4,5,16034},{0,5,6513},{2,7,5712},{2,7,5712},{2,7,5712},{1,7,2924},{1,7,5672},{0,7,482},{0,7,482}, +{0,4,650},{0,4,9235},{0,4,3899},{3,7,797},{3,7,797},{3,7,797},{2,6,113},{5,1,4426},{0,7,482},{0,7,482},{0,4,650},{7,1,4426},{0,4,650},{6,6,3625},{5,7,1985},{4,7,100},{1,7,4},{6,6,3625},{6,6,3617},{1,7,4},{0,6,3757},{6,6,3617},{0,6,3757},{1,0,2920},{1,0,2920},{1,0,2920},{1,0,2920},{0,5,36},{0,5,36},{0,5,36},{0,3,4},{0,2,1490}, +{0,2,1490},{2,7,65535},{1,7,46795},{1,7,12350},{0,7,7625},{2,7,65535},{1,7,41235},{0,7,500},{0,6,10353},{0,7,65535},{0,6,43973},{4,7,8227},{3,7,4053},{3,7,689},{2,7,314},{6,3,14507},{2,7,7875},{0,7,500},{0,5,5649},{6,4,14507},{0,5,5649},{2,7,6594},{2,7,6594},{2,7,6594},{1,7,3806},{1,7,6086},{0,7,500},{0,7,500},{0,4,776},{0,5,9830},{0,4,4025},{3,7,689}, +{3,7,689},{3,7,689},{3,6,298},{6,0,4420},{0,7,500},{0,7,500},{0,4,776},{3,4,4420},{0,4,776},{7,5,2890},{5,7,1508},{4,7,1},{2,7,25},{7,5,2890},{6,6,2906},{2,7,25},{0,6,2920},{6,6,2906},{0,6,2920},{1,0,3757},{1,0,3757},{1,0,3757},{1,0,3757},{0,6,9},{0,6,9},{0,6,9},{0,4,100},{0,2,1985},{0,2,1985},{2,7,65535},{1,7,48747},{1,7,14302}, +{0,7,9433},{2,7,65535},{1,7,40979},{0,7,788},{0,6,8961},{0,7,65535},{0,6,42581},{4,7,7523},{4,7,3923},{3,7,865},{3,7,181},{7,2,13243},{2,7,7075},{1,7,754},{0,6,4337},{5,5,13243},{0,6,4337},{2,7,7650},{2,7,7650},{2,7,7650},{1,7,4862},{1,7,6726},{0,7,788},{0,7,788},{0,5,529},{0,5,10470},{0,4,4409},{3,7,865},{3,7,865},{3,7,865},{3,7,181},{5,2,4426}, +{1,7,754},{1,7,754},{0,5,529},{7,2,4426},{0,5,529},{6,7,2320},{5,7,1220},{4,7,49},{2,7,9},{6,7,2320},{5,7,2320},{2,7,9},{0,6,2312},{5,7,2320},{0,6,2312},{1,0,4637},{1,0,4637},{1,0,4637},{1,0,4637},{0,7,4},{0,7,4},{0,7,4},{0,4,4},{0,3,2297},{0,3,2297},{2,7,65535},{1,7,50955},{1,7,16510},{1,7,10798},{2,7,65535},{1,7,40979},{0,7,1332}, +{0,7,6964},{0,7,65535},{0,6,41445},{4,7,7075},{4,7,3475},{3,7,1297},{3,7,5},{6,4,12051},{2,7,6531},{1,7,754},{0,6,3201},{7,4,12051},{0,6,3201},{2,7,8962},{2,7,8962},{2,7,8962},{2,7,5834},{1,7,7622},{0,7,1332},{0,7,1332},{0,5,289},{0,6,11342},{0,4,5049},{3,7,1297},{3,7,1297},{3,7,1297},{3,7,5},{6,1,4426},{1,7,754},{1,7,754},{0,5,289},{6,3,4426}, +{0,5,289},{6,7,1808},{5,7,1060},{5,7,36},{3,7,4},{6,7,1808},{7,6,1808},{3,7,4},{0,6,1832},{7,6,1808},{0,6,1832},{1,0,5645},{1,0,5645},{1,0,5645},{1,0,5645},{0,7,36},{0,7,36},{0,7,36},{0,4,36},{0,3,2665},{0,3,2665},{2,7,65535},{1,7,53419},{1,7,18974},{1,7,12366},{2,7,65535},{1,7,41235},{0,7,2132},{0,7,5204},{0,7,65535},{0,6,40565},{5,7,6641}, +{4,7,3283},{4,7,1258},{3,7,85},{5,6,10952},{3,7,5900},{1,7,1010},{0,6,2321},{4,6,10952},{0,6,2321},{2,7,10530},{2,7,10530},{2,7,10530},{2,7,6666},{1,7,8774},{0,7,2132},{0,7,2132},{0,5,305},{0,6,11790},{0,5,5205},{4,7,1258},{4,7,1258},{4,7,1258},{3,7,85},{7,0,4420},{1,7,1010},{1,7,1010},{0,5,305},{3,5,4420},{0,5,305},{7,6,1360},{6,7,800},{5,7,4}, +{3,7,36},{7,6,1360},{7,6,1360},{3,7,36},{0,6,1480},{7,6,1360},{0,6,1480},{2,0,6305},{2,0,6305},{2,0,6305},{2,0,6305},{0,7,196},{0,7,196},{0,7,196},{0,5,49},{0,3,3161},{0,3,3161},{2,7,65535},{2,7,56301},{1,7,22052},{1,7,14436},{2,7,65535},{1,7,41829},{0,7,3338},{0,7,3530},{0,7,65535},{0,6,39881},{5,7,5741},{4,7,3373},{4,7,1348},{4,7,328},{7,3,9830}, +{3,7,5270},{2,7,1184},{0,6,1637},{6,5,9830},{0,6,1637},{2,7,12600},{2,7,12600},{2,7,12600},{2,7,7908},{1,7,10376},{1,7,2760},{1,7,2760},{0,6,481},{0,6,12600},{0,5,5529},{4,7,1348},{4,7,1348},{4,7,1348},{4,7,328},{7,1,4426},{2,7,1184},{2,7,1184},{0,6,481},{7,3,4426},{0,6,481},{7,6,937},{6,7,521},{5,7,121},{4,7,4},{7,6,937},{6,7,929},{4,7,4}, +{0,6,1237},{6,7,929},{0,6,1237},{2,0,7124},{2,0,7124},{2,0,7124},{2,0,7124},{0,7,529},{0,7,529},{0,7,529},{0,5,4},{0,4,3778},{0,4,3778},{2,7,65535},{2,7,58413},{1,7,25060},{1,7,16548},{2,7,65535},{1,7,42629},{0,7,4682},{0,7,2314},{0,7,65535},{0,6,39545},{5,7,5213},{5,7,3321},{4,7,1700},{4,7,200},{5,7,8902},{3,7,4982},{2,7,1440},{0,6,1301},{5,6,8902}, +{0,6,1301},{3,7,14701},{3,7,14701},{3,7,14701},{2,7,9284},{2,7,11492},{1,7,3560},{1,7,3560},{0,6,145},{0,6,13592},{0,5,6089},{4,7,1700},{4,7,1700},{4,7,1700},{4,7,200},{6,3,4420},{2,7,1440},{2,7,1440},{0,6,145},{6,4,4420},{0,6,145},{7,6,697},{6,7,409},{6,7,9},{5,7,9},{7,6,697},{6,7,625},{5,7,9},{0,6,1157},{6,7,625},{0,6,1157},{2,0,7988}, +{2,0,7988},{2,0,7988},{2,0,7988},{0,7,961},{0,7,961},{0,7,961},{0,5,100},{0,4,4210},{0,4,4210},{3,7,65535},{2,7,60781},{1,7,28324},{1,7,18916},{2,7,65535},{1,7,43685},{0,7,6282},{0,7,1354},{0,7,65535},{0,6,39465},{5,7,4941},{5,7,3049},{5,7,2025},{4,7,328},{7,4,8069},{4,7,4465},{3,7,1586},{0,6,1221},{7,5,8069},{0,6,1221},{3,7,16189},{3,7,16189},{3,7,16189}, +{2,7,10916},{2,7,12740},{1,7,4616},{1,7,4616},{0,6,65},{0,7,14411},{0,6,6789},{5,7,2025},{5,7,2025},{5,7,2025},{4,7,328},{7,2,4420},{3,7,1586},{3,7,1586},{0,6,65},{5,5,4420},{0,6,65},{7,7,377},{7,7,305},{6,7,25},{5,7,25},{7,7,377},{6,7,449},{5,7,25},{0,7,729},{6,7,449},{0,7,729},{2,0,8980},{2,0,8980},{2,0,8980},{2,0,8980},{1,7,1480}, +{1,7,1480},{1,7,1480},{0,6,16},{0,4,4770},{0,4,4770},{3,7,65535},{2,7,59505},{1,7,29984},{1,7,19680},{2,7,65535},{1,7,43137},{0,7,8318},{0,7,830},{0,7,65535},{0,6,34901},{5,7,4925},{5,7,3033},{5,7,2009},{4,7,712},{6,6,7325},{4,7,4145},{3,7,1810},{0,7,650},{4,7,7325},{0,7,650},{3,7,16745},{3,7,16745},{3,7,16745},{2,7,12024},{2,7,13464},{1,7,5556},{1,7,5556}, +{0,6,277},{0,7,14139},{0,6,6017},{5,7,2009},{5,7,2009},{5,7,2009},{4,7,712},{5,6,4418},{3,7,1810},{3,7,1810},{0,6,241},{4,6,4418},{0,6,241},{7,7,185},{7,7,113},{7,7,64},{6,7,1},{7,7,185},{7,7,193},{6,7,1},{0,7,361},{7,7,193},{0,7,361},{2,0,9320},{2,0,9320},{2,0,9320},{2,0,9320},{1,7,1460},{1,7,1460},{1,7,1460},{0,6,52},{0,5,4772}, +{0,5,4772},{3,7,65535},{2,7,57588},{1,7,32135},{1,7,20823},{3,7,65535},{1,7,42804},{1,7,8359},{0,7,1289},{0,7,65535},{0,6,28970},{6,7,4317},{5,7,3321},{5,7,2297},{5,7,845},{7,5,6587},{5,7,4008},{4,7,2066},{0,7,164},{6,6,6611},{0,7,164},{3,7,17366},{3,7,17366},{3,7,17366},{3,7,12274},{2,7,14427},{2,7,6699},{2,7,6699},{0,7,389},{0,7,13860},{0,6,5234},{5,7,2297}, +{5,7,2297},{5,7,2297},{5,7,845},{6,5,4420},{4,7,2066},{4,7,2066},{0,7,164},{3,7,4420},{0,7,164},{7,7,122},{7,7,50},{7,7,1},{7,7,9},{7,7,122},{7,7,58},{7,7,9},{0,7,100},{7,7,58},{0,7,100},{2,0,9698},{2,0,9698},{2,0,9698},{2,0,9698},{1,7,1586},{1,7,1586},{1,7,1586},{0,7,289},{0,5,4250},{0,5,4250},{3,7,65535},{2,7,56836},{2,7,31631}, +{1,7,22791},{3,7,65535},{2,7,40532},{1,7,9015},{1,7,778},{0,7,65535},{0,6,24650},{6,7,3949},{6,7,3325},{5,7,2825},{5,7,1021},{6,7,6020},{5,7,3688},{4,7,2290},{0,7,4},{5,7,6020},{0,7,4},{3,7,18326},{3,7,18326},{3,7,18326},{3,7,12626},{3,7,15077},{2,7,7227},{2,7,7227},{1,7,294},{0,7,14020},{0,6,4946},{5,7,2825},{5,7,2825},{5,7,2825},{5,7,1021},{7,4,4418}, +{4,7,2290},{4,7,2290},{0,7,4},{7,5,4418},{0,7,4},{7,7,202},{7,7,130},{7,7,81},{7,7,25},{7,7,202},{7,7,74},{7,7,25},{0,7,4},{7,7,74},{0,7,4},{3,0,9490},{3,0,9490},{3,0,9490},{3,0,9490},{1,7,1970},{1,7,1970},{1,7,1970},{1,6,202},{0,6,3922},{0,6,3922},{3,7,65535},{3,7,55466},{2,7,30335},{2,7,21687},{3,7,65535},{2,7,37932},{1,7,9535}, +{1,7,70},{0,7,65535},{0,7,20544},{6,7,3417},{6,7,2793},{6,7,2393},{5,7,1033},{6,7,5184},{5,7,3204},{5,7,2180},{1,7,25},{7,6,5168},{1,7,25},{4,7,17611},{4,7,17611},{4,7,17611},{3,7,12630},{3,7,14321},{2,7,7251},{2,7,7251},{1,7,34},{0,7,13376},{0,7,4160},{6,7,2393},{6,7,2393},{6,7,2393},{5,7,1033},{6,6,3874},{5,7,2180},{5,7,2180},{1,7,25},{4,7,3874}, +{1,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{3,0,9266},{3,0,9266},{3,0,9266},{3,0,9266},{2,7,2210},{2,7,2210},{2,7,2210},{1,7,34},{0,6,3442},{0,6,3442},{3,7,65535},{3,7,51210},{2,7,29343},{2,7,20695},{3,7,65535},{2,7,35820},{1,7,10495},{1,7,134},{1,7,65535},{0,7,15936},{6,7,2889}, +{6,7,2265},{6,7,1865},{5,7,1049},{7,6,4288},{5,7,2724},{5,7,1700},{1,7,9},{7,6,4272},{1,7,9},{4,7,16555},{4,7,16555},{4,7,16555},{3,7,12662},{3,7,13441},{2,7,7251},{2,7,7251},{1,7,34},{0,7,12608},{0,7,3392},{6,7,1865},{6,7,1865},{6,7,1865},{5,7,1049},{7,5,3218},{5,7,1700},{5,7,1700},{1,7,9},{6,6,3202},{1,7,9},{7,7,242},{7,7,170},{7,7,121}, +{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{3,0,9298},{3,0,9298},{3,0,9298},{3,0,9298},{2,7,2210},{2,7,2210},{2,7,2210},{1,7,34},{0,6,3218},{0,6,3218},{4,7,65535},{3,7,47340},{2,7,29145},{2,7,20497},{3,7,65535},{2,7,34362},{2,7,9157},{1,7,1124},{1,7,64598},{0,7,11670},{6,7,2448},{6,7,1824},{6,7,1424},{6,7,800},{7,6,3361}, +{6,7,2321},{5,7,1313},{2,7,0},{6,7,3401},{2,7,0},{4,7,15673},{4,7,15673},{4,7,15673},{4,7,12073},{3,7,12757},{3,7,6905},{3,7,6905},{1,7,340},{1,7,11657},{0,7,2834},{6,7,1424},{6,7,1424},{6,7,1424},{6,7,800},{7,5,2525},{5,7,1313},{5,7,1313},{2,7,0},{5,7,2545},{2,7,0},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49}, +{0,7,0},{7,7,98},{0,7,0},{3,0,9640},{3,0,9640},{3,0,9640},{3,0,9640},{2,7,2516},{2,7,2516},{2,7,2516},{1,7,340},{0,7,2834},{0,7,2834},{4,7,65535},{3,7,44716},{3,7,27896},{2,7,21137},{4,7,65535},{3,7,31853},{2,7,8677},{2,7,784},{1,7,59734},{0,7,8694},{6,7,2192},{6,7,1568},{6,7,1168},{6,7,544},{7,6,2673},{6,7,1761},{5,7,1105},{3,7,25},{6,7,2649}, +{3,7,25},{4,7,15161},{4,7,15161},{4,7,15161},{4,7,11561},{4,7,12169},{3,7,6569},{3,7,6569},{2,7,208},{1,7,10889},{0,7,2610},{6,7,1168},{6,7,1168},{6,7,1168},{6,7,544},{6,7,1985},{5,7,1105},{5,7,1105},{3,7,25},{5,7,1985},{3,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9536}, +{4,0,9536},{4,0,9536},{4,0,9536},{2,7,3060},{2,7,3060},{2,7,3060},{2,7,208},{0,7,2610},{0,7,2610},{4,7,63766},{3,7,42860},{3,7,26040},{3,7,20188},{4,7,60070},{3,7,29085},{2,7,8965},{2,7,336},{1,7,55638},{0,7,6486},{7,7,1686},{6,7,1440},{6,7,1040},{6,7,416},{7,6,2113},{6,7,1329},{6,7,929},{3,7,9},{6,7,2025},{3,7,9},{4,7,14905},{4,7,14905},{4,7,14905}, +{4,7,11305},{4,7,11209},{3,7,6489},{3,7,6489},{2,7,272},{1,7,10377},{0,7,2642},{6,7,1040},{6,7,1040},{6,7,1040},{6,7,416},{7,6,1537},{6,7,929},{6,7,929},{3,7,9},{7,6,1513},{3,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3125}, +{3,7,3125},{3,7,3125},{2,7,272},{0,7,2642},{0,7,2642},{4,7,59414},{4,7,41414},{3,7,24952},{3,7,19100},{4,7,55014},{3,7,27085},{2,7,10021},{2,7,656},{1,7,52310},{0,7,5046},{7,7,1142},{7,7,1070},{7,7,1021},{6,7,416},{7,7,1538},{6,7,1025},{6,7,625},{4,7,4},{6,7,1529},{4,7,4},{5,7,13964},{5,7,13964},{5,7,13964},{4,7,11305},{4,7,10505},{3,7,6665},{3,7,6665}, +{2,7,592},{2,7,9973},{0,7,2930},{7,7,1021},{7,7,1021},{7,7,1021},{6,7,416},{7,6,1105},{6,7,625},{6,7,625},{4,7,4},{6,7,1129},{4,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3301},{3,7,3301},{3,7,3301},{2,7,592},{0,7,2930}, +{0,7,2930}, diff --git a/src/deps/basis-universal/transcoder/basisu_transcoder_uastc.h b/src/deps/basis-universal/transcoder/basisu_transcoder_uastc.h new file mode 100644 index 0000000000..d501a2af6e --- /dev/null +++ b/src/deps/basis-universal/transcoder/basisu_transcoder_uastc.h @@ -0,0 +1,297 @@ +// basisu_transcoder_uastc.h +#pragma once +#include "basisu_transcoder_internal.h" + +namespace basist +{ + struct color_quad_u8 + { + uint8_t m_c[4]; + }; + + const uint32_t TOTAL_UASTC_MODES = 19; + const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8; + + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30; + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11; + const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19; + + extern const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES]; + + struct astc_bc7_common_partition2_desc + { + uint8_t m_bc7; + uint16_t m_astc; + bool m_invert; + }; + + extern const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2]; + + struct bc73_astc2_common_partition_desc + { + uint8_t m_bc73; + uint16_t m_astc2; + uint8_t k; // 0-5 - how to modify the BC7 3-subset pattern to match the ASTC pattern (LSB=invert) + }; + + extern const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS]; + + struct astc_bc7_common_partition3_desc + { + uint8_t m_bc7; + uint16_t m_astc; + uint8_t m_astc_to_bc7_perm; // converts ASTC to BC7 partition using g_astc_bc7_partition_index_perm_tables[][] + }; + + extern const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3]; + + extern const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16]; + extern const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16]; + extern const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16]; + + extern const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3]; + extern const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3]; + extern const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3]; + + extern const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2]; + + extern const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3]; + extern const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3]; // inverse of g_astc_to_bc7_partition_index_perm_tables + + extern const uint8_t* s_uastc_to_bc1_weights[6]; + + uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k); + + inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w, bool srgb) + { + if (srgb) + { + l = (l << 8) | 0x80; + h = (h << 8) | 0x80; + } + else + { + l = (l << 8) | l; + h = (h << 8) | h; + } + + uint32_t k = (l * (64 - w) + h * w + 32) >> 6; + + return k >> 8; + } + + struct astc_block_desc + { + int m_weight_range; // weight BISE range + + int m_subsets; // number of ASTC partitions + int m_partition_seed; // partition pattern seed + int m_cem; // color endpoint mode used by all subsets + + int m_ccs; // color component selector (dual plane only) + bool m_dual_plane; // true if dual plane + + // Weight and endpoint BISE values. + // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order + }; + + const uint32_t BC7ENC_TOTAL_ASTC_RANGES = 21; + + // See tables 81, 93, 18.13.Endpoint Unquantization + const uint32_t TOTAL_ASTC_RANGES = 21; + extern const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3]; + + struct astc_quant_bin + { + uint8_t m_unquant; // unquantized value + uint8_t m_index; // sorted index + }; + + extern astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index] + + int astc_get_levels(int range); + bool astc_is_valid_endpoint_range(uint32_t range); + uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range); + uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range); + + const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern); + + // BC7 + const uint32_t BC7ENC_BLOCK_SIZE = 16; + + struct bc7_block + { + uint64_t m_qwords[2]; + }; + + struct bc7_optimization_results + { + uint32_t m_mode; + uint32_t m_partition; + uint8_t m_selectors[16]; + uint8_t m_alpha_selectors[16]; + color_quad_u8 m_low[3]; + color_quad_u8 m_high[3]; + uint32_t m_pbits[3][2]; + uint32_t m_index_selector; + uint32_t m_rotation; + }; + + extern const uint32_t g_bc7_weights1[2]; + extern const uint32_t g_bc7_weights2[4]; + extern const uint32_t g_bc7_weights3[8]; + extern const uint32_t g_bc7_weights4[16]; + extern const uint32_t g_astc_weights4[16]; + extern const uint32_t g_astc_weights5[32]; + extern const uint32_t g_astc_weights_3levels[3]; + extern const uint8_t g_bc7_partition1[16]; + extern const uint8_t g_bc7_partition2[64 * 16]; + extern const uint8_t g_bc7_partition3[64 * 16]; + extern const uint8_t g_bc7_table_anchor_index_second_subset[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_1[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_2[64]; + extern const uint8_t g_bc7_num_subsets[8]; + extern const uint8_t g_bc7_partition_bits[8]; + extern const uint8_t g_bc7_color_index_bitcount[8]; + extern const uint8_t g_bc7_mode_has_p_bits[8]; + extern const uint8_t g_bc7_mode_has_shared_p_bits[8]; + extern const uint8_t g_bc7_color_precision_table[8]; + extern const int8_t g_bc7_alpha_precision_table[8]; + extern const uint8_t g_bc7_alpha_index_bitcount[8]; + + inline bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } + inline int get_bc7_color_index_size(int mode, int index_selection_bit) { return g_bc7_color_index_bitcount[mode] + index_selection_bit; } + inline int get_bc7_alpha_index_size(int mode, int index_selection_bit) { return g_bc7_alpha_index_bitcount[mode] - index_selection_bit; } + + struct endpoint_err + { + uint16_t m_error; uint8_t m_lo; uint8_t m_hi; + }; + + extern endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit] + const uint32_t BC7ENC_MODE_6_OPTIMAL_INDEX = 5; + + extern endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c] + const uint32_t BC7ENC_MODE_5_OPTIMAL_INDEX = 1; + + // Packs a BC7 block from a high-level description. Handles all BC7 modes. + void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults); + + // Packs an ASTC block + // Constraints: Always 4x4, all subset CEM's must be equal, only tested with LDR CEM's. + bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t mode); + + void pack_astc_solid_block(void* pDst_block, const color32& color); + +#ifdef _DEBUG + int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); +#endif + + struct uastc_block + { + union + { + uint8_t m_bytes[16]; + uint32_t m_dwords[4]; + +#ifndef __EMSCRIPTEN__ + uint64_t m_qwords[2]; +#endif + }; + }; + + struct unpacked_uastc_block + { + astc_block_desc m_astc; + + uint32_t m_mode; + uint32_t m_common_pattern; + + color32 m_solid_color; + + bool m_bc1_hint0; + bool m_bc1_hint1; + + bool m_etc1_flip; + bool m_etc1_diff; + uint32_t m_etc1_inten0; + uint32_t m_etc1_inten1; + + uint32_t m_etc1_bias; + + uint32_t m_etc2_hints; + + uint32_t m_etc1_selector; + uint32_t m_etc1_r, m_etc1_g, m_etc1_b; + }; + + color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); + + struct decoder_etc_block; + struct eac_block; + + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); + bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); + + bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb); + bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool undo_blue_contract, bool read_hints = true); + + bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst); + + bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst); + + void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel); + + void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst); + + // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); + + enum + { + cEncodeBC1HighQuality = 1, + cEncodeBC1HigherQuality = 2, + cEncodeBC1UseSelectors = 4, + }; + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); + + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); + + void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst); + void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality); + + bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); + bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); + + // uastc_init() MUST be called before using this module. + void uastc_init(); + +} // namespace basist diff --git a/src/deps/basis-universal/zstd/zstd.c b/src/deps/basis-universal/zstd/zstd.c new file mode 100644 index 0000000000..eaf1373835 --- /dev/null +++ b/src/deps/basis-universal/zstd/zstd.c @@ -0,0 +1,38717 @@ +/** + * \file zstd.c + * Single-file Zstandard library. + * + * Generate using: + * \code + * combine.sh -r ../../lib -o zstd.c zstd-in.c + * \endcode + */ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/* + * Settings to bake for the single library file. + * + * Note: It's important that none of these affects 'zstd.h' (only the + * implementation files we're amalgamating). + * + * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also + * defined in mem.h (breaking C99 compatibility). + * + * Note: the undefs for xxHash allow Zstd's implementation to coinside with with + * standalone xxHash usage (with global defines). + * + * Note: multithreading is enabled for all platforms apart from Emscripten. + */ +#define DEBUGLEVEL 0 +#define MEM_MODULE +#undef XXH_NAMESPACE +#define XXH_NAMESPACE ZSTD_ +#undef XXH_PRIVATE_API +#define XXH_PRIVATE_API +#undef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#define ZSTD_LEGACY_SUPPORT 0 +#ifndef __EMSCRIPTEN__ +#define ZSTD_MULTITHREAD +#endif +#define ZSTD_TRACE 0 + +/* Include zstd_deps.h first with all the options we need enabled. */ +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +/**** start inlining common/zstd_deps.h ****/ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) +#else +# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) memset((p),(v),(l)) +#endif + +#endif /* ZSTD_DEPS_COMMON */ + +/* Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#include + +#define ZSTD_malloc(s) malloc(s) +#define ZSTD_calloc(n,s) calloc((n), (s)) +#define ZSTD_free(p) free((p)) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include +#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* Only requested when is known to be present. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +#include + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ +/**** ended inlining common/zstd_deps.h ****/ + +/**** start inlining common/debug.c ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +/**** start inlining debug.h ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# define ZSTD_DEPS_NEED_ASSERT +/**** skipping file: zstd_deps.h ****/ +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# define ZSTD_DEPS_NEED_IO +/**** skipping file: zstd_deps.h ****/ +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ +/**** ended inlining debug.h ****/ + +int g_debuglevel = DEBUGLEVEL; +/**** ended inlining common/debug.c ****/ +/**** start inlining common/entropy_common.c ****/ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +/**** start inlining mem.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +/**** start inlining compiler.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if !defined(ZSTD_NO_INLINE) +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#if defined(_MSC_VER) +# define WIN_CDECL __cdecl +#else +# define WIN_CDECL +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) || defined(__ICCARM__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ +#ifndef ZSTD_MEMORY_SANITIZER +# if __has_feature(memory_sanitizer) +# define ZSTD_MEMORY_SANITIZER 1 +# else +# define ZSTD_MEMORY_SANITIZER 0 +# endif +#endif + +#if ZSTD_MEMORY_SANITIZER +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ +#define ZSTD_DEPS_NEED_STDINT +/**** skipping file: zstd_deps.h ****/ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); +#endif + +/* detects whether we are being compiled under asan */ +#ifndef ZSTD_ADDRESS_SANITIZER +# if __has_feature(address_sanitizer) +# define ZSTD_ADDRESS_SANITIZER 1 +# elif defined(__SANITIZE_ADDRESS__) +# define ZSTD_ADDRESS_SANITIZER 1 +# else +# define ZSTD_ADDRESS_SANITIZER 0 +# endif +#endif + +#if ZSTD_ADDRESS_SANITIZER +/* Not all platforms that support asan provide sanitizers/asan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ + +/** + * Marks a memory region ([addr, addr+size)) as unaddressable. + * + * This memory must be previously allocated by your program. Instrumented + * code is forbidden from accessing addresses in this region until it is + * unpoisoned. This function is not guaranteed to poison the entire region - + * it could poison only a subregion of [addr, addr+size) due to ASan + * alignment restrictions. + * + * \note This function is not thread-safe because no two threads can poison or + * unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_poison_memory_region(void const volatile *addr, size_t size); + +/** + * Marks a memory region ([addr, addr+size)) as addressable. + * + * This memory must be previously allocated by your program. Accessing + * addresses in this region is allowed until this region is poisoned again. + * This function could unpoison a super-region of [addr, addr+size) due + * to ASan alignment restrictions. + * + * \note This function is not thread-safe because no two threads can + * poison or unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#endif + +#endif /* ZSTD_COMPILER_H */ +/**** ended inlining compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: zstd_deps.h ****/ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else +# include +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif + typedef unsigned char BYTE; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif + typedef unsigned short U16; + typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif + typedef unsigned int U32; + typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, is preferred */ + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + + +/*-************************************************************** +* Memory I/O Implementation +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + +/* code only tested on 32 and 64 bits systems */ +MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ +/**** ended inlining mem.h ****/ +/**** start inlining error_private.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +/**** skipping file: zstd_deps.h ****/ +/**** start inlining zstd_errors.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_ERRORS_H_398273423 */ +/**** ended inlining zstd_errors.h ****/ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/**** ended inlining error_private.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +/**** start inlining fse.h ****/ +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +/**** skipping file: zstd_deps.h ****/ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +/**** start inlining bitstream.h ****/ +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +/**** skipping file: mem.h ****/ +/**** skipping file: compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ + + +/*========================================= +* Target specific +=========================================*/ +#ifndef ZSTD_NO_INTRINSICS +# if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +# elif defined(__ICCARM__) +# include +# endif +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val) ^ 31; +# else + unsigned long r = 0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 + return _bzhi_u64(bitContainer, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +#endif +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStreamFast(bitD); + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/**** ended inlining bitstream.h ****/ + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** + * FSE advanced API + ***************************************** */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ + +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ + +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ + +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining fse.h ****/ +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +/**** start inlining huf.h ****/ +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +/**** skipping file: zstd_deps.h ****/ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +/**** skipping file: mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); + +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif + +#endif /* HUF_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining huf.h ****/ + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; + int previous0 = 0; + + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 8); + + /* init */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; + } else { + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + } + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; + bitCount += 2; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } } + if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} +/**** ended inlining common/entropy_common.c ****/ +/**** start inlining common/error_private.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +/**** skipping file: error_private.h ****/ + +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(maxCode): + default: return notErrorCode; + } +#endif +} +/**** ended inlining common/error_private.c ****/ +/**** start inlining common/fse_decompress.c ****/ +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: debug.h ****/ +/**** skipping file: bitstream.h ****/ +/**** skipping file: compiler.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + ZSTD_free(dt); +} + +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + FSE_DTable* const dtable = (FSE_DTable*)workSpace; + + /* normal FSE decoding mode */ + size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { + U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; + return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); +} + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + /* Static analyzer seems unable to understand this table will be properly initialized later */ + U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); +} +#endif + + +#endif /* FSE_COMMONDEFS_ONLY */ +/**** ended inlining common/fse_decompress.c ****/ +/**** start inlining common/threading.c ****/ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/**** start inlining threading.h ****/ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +/**** skipping file: debug.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#include +#undef ERROR +#define ERROR(name) ZSTD_ERROR(name) + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#if DEBUGLEVEL < 1 + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* DEBUGLEVEL >= 1 */ + +/* Debug implementation of threading. + * In this implementation we use pointers for mutexes and condition variables. + * This way, if we forget to init/destroy them the program will crash or ASAN + * will report leaks. + */ + +#define ZSTD_pthread_mutex_t pthread_mutex_t* +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) + +#define ZSTD_pthread_cond_t pthread_cond_t* +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#endif + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ +/**** ended inlining threading.h ****/ + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useless_symbol; + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ + +#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) + +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ + +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) +{ + *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); + if (!*mutex) + return 1; + return pthread_mutex_init(*mutex, attr); +} + +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) +{ + if (!*mutex) + return 0; + { + int const ret = pthread_mutex_destroy(*mutex); + ZSTD_free(*mutex); + return ret; + } +} + +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) +{ + *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); + if (!*cond) + return 1; + return pthread_cond_init(*cond, attr); +} + +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) +{ + if (!*cond) + return 0; + { + int const ret = pthread_cond_destroy(*cond); + ZSTD_free(*cond); + return ret; + } +} + +#endif +/**** ended inlining common/threading.c ****/ +/**** start inlining common/pool.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: debug.h ****/ +/**** start inlining zstd_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) +#include +#endif +/**** skipping file: compiler.h ****/ +/**** skipping file: mem.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_STATIC_LINKING_ONLY +/**** start inlining ../zstd.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 9 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining ../zstd.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: huf.h ****/ +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +/**** start inlining xxhash.h ****/ +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +/**** skipping file: zstd_deps.h ****/ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +/**** start inlining xxhash.c ****/ +/* + * xxHash - Fast Hash algorithm + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ + defined(__ICCARM__) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for ZSTD_malloc(), ZSTD_free() */ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); } +static void XXH_free (void* p) { ZSTD_free(p); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +/**** skipping file: xxhash.h ****/ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +/**** skipping file: compiler.h ****/ + + +/* ************************************* +* Basic Types +***************************************/ +/**** skipping file: mem.h ****/ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +#if defined(__ICCARM__) +# include +# define XXH_rotl32(x,r) __ROR(x,(32 - r)) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + } + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} +/**** ended inlining xxhash.c ****/ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining xxhash.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define ZSTD_FRAMECHECKSUMSIZE 4 + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + + +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; /* Offset code of the sequence */ + U16 litLength; + U16 matchLength; +} seqDef; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + +/* custom memory allocation functions */ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); + + +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CCOMMON_H_MODULE */ +/**** ended inlining zstd_internal.h ****/ +/**** start inlining pool.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +/**** skipping file: zstd_deps.h ****/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ +/**** skipping file: ../zstd.h ****/ + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif +/**** ended inlining pool.h ****/ + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +/**** skipping file: threading.h ****/ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + ZSTD_customMem customMem; + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + if (ctx->queueSize == 1) { + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +POOL_ctx* ZSTD_createThreadPool(size_t numThreads) { + return POOL_create (numThreads, 0); +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem) { + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + { + int error = 0; + error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + if (error) { POOL_free(ctx); return NULL; } + } + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threadCapacity = 0; + ctx->customMem = customMem; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + ZSTD_customFree(ctx->queue, ctx->customMem); + ZSTD_customFree(ctx->threads, ctx->customMem); + ZSTD_customFree(ctx, ctx->customMem); +} + +void ZSTD_freeThreadPool (ZSTD_threadPool* pool) { + POOL_free (pool); +} + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + if (!threadPool) return 1; + /* replace existing thread pool */ + ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + ZSTD_customFree(ctx->threads, ctx->customMem); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* ZSTD_MULTITHREAD not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_poolCtx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { + (void)numThreads; + (void)queueSize; + (void)customMem; + return &g_poolCtx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_poolCtx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_poolCtx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ +/**** ended inlining common/pool.c ****/ +/**** start inlining common/zstd_common.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: error_private.h ****/ +/**** skipping file: zstd_internal.h ****/ + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} + +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} + +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} +/**** ended inlining common/zstd_common.c ****/ + +/**** start inlining compress/fse_compress.c ****/ +/* ****************************************************************** + * FSE : Finite State Entropy encoder + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/debug.h ****/ +/**** start inlining hist.h ****/ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +/**** skipping file: ../common/zstd_deps.h ****/ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); +/**** ended inlining hist.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +/**** skipping file: ../common/error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +/**** skipping file: ../common/zstd_deps.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + + U32* cumul = (U32*)workSpace; + FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + + U32 highThreshold = tableSize-1; + + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); + /* CTable header */ + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u <= maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurrences; + int const freq = normalizedCounter[symbol]; + for (nbOccurrences=0; nbOccurrences highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } } + + assert(position==0); /* Must have initialized all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; + + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (symbol >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (symbol-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); +} + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)ZSTD_malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = lowProbCount; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if (ToDistribute == 0) + return 0; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ZSTD_div64((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue, unsigned useLowProbCount) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + short const lowProbCount = useLowProbCount ? -1 : 1; + U64 const scale = 62 - tableLog; + U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = lowProbCount; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` size must be `(1< not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + } + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) ); + + /* Write table description header */ + { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; + + return op-ostart; +} + +typedef struct { + FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + union { + U32 hist_wksp[HIST_WKSP_SIZE_U32]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; + } workspace; +} fseWkspMax_t; + +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + fseWkspMax_t scratchBuffer; + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); +} + +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} +#endif + +#endif /* FSE_COMMONDEFS_ONLY */ +/**** ended inlining compress/fse_compress.c ****/ +/**** start inlining compress/hist.c ****/ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/debug.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: hist.h ****/ + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + /* safety checks */ + assert(*maxSymbolValuePtr <= 255); + if (!sourceSize) { + ZSTD_memset(count, 0, countSize); + *maxSymbolValuePtr = 0; + return 0; + } + ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip max) max = Counting1[s]; + } } + + { unsigned maxSymbolValue = 255; + while (!Counting1[maxSymbolValue]) maxSymbolValue--; + if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); + *maxSymbolValuePtr = maxSymbolValue; + ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ + } + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); +} + +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); +} +#endif +/**** ended inlining compress/hist.c ****/ +/**** start inlining compress/huf_compress.c ****/ +/* ****************************************************************** + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: hist.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 +static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + unsigned maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; + + unsigned count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; + + /* init conditions */ + if (wtSize <= 1) return 0; /* Not compressible */ + + /* Scan input and build symbol stats */ + { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ + if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return (size_t)(op-ostart); +} + + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) +{ + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; + BYTE* op = (BYTE*)dst; + U32 n; + + /* check conditions */ + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; n 0); + + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 curr = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = curr; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n maxNbBits to be maxNbBits. Then it adjusts + * the tree to so that it is a valid canonical Huffman tree. + * + * @pre The sum of the ranks of each symbol == 2^largestBits, + * where largestBits == huffNode[lastNonNull].nbBits. + * @post The sum of the ranks of each symbol == 2^largestBits, + * where largestBits is the return value <= maxNbBits. + * + * @param huffNode The Huffman tree modified in place to enforce maxNbBits. + * @param lastNonNull The symbol with the lowest count in the Huffman tree. + * @param maxNbBits The maximum allowed number of bits, which the Huffman tree + * may not respect. After this function the Huffman tree will + * respect maxNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment, + * necessarily no more than maxNbBits. + */ +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + /* early exit : no elt > maxNbBits, so the tree is already valid. */ + if (largestBits <= maxNbBits) return largestBits; + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + int n = (int)lastNonNull; + + /* Adjust any ranks > maxNbBits to maxNbBits. + * Compute totalCost, which is how far the sum of the ranks is + * we are over 2^largestBits after adjust the offending ranks. + */ + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n--; + } + /* n stops at huffNode[n].nbBits <= maxNbBits */ + assert(huffNode[n].nbBits <= maxNbBits); + /* n end at index of smallest symbol using < maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) --n; + + /* renorm totalCost from 2^largestBits to 2^maxNbBits + * note : totalCost is necessarily a multiple of baseCost */ + assert((totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - maxNbBits); + assert(totalCost > 0); + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + + /* Get pos of last (smallest = lowest cum. count) symbol per rank */ + ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + int pos; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = (U32)pos; + } } + + while (totalCost > 0) { + /* Try to reduce the next power of 2 above totalCost because we + * gain back half the rank. + */ + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + /* Decrease highPos if no symbols of lowPos or if it is + * not cheaper to remove 2 lowPos than highPos. + */ + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease++; + assert(rankLast[nBitsToDecrease] != noSymbol); + /* Increase the number of bits to gain back half the rank cost. */ + totalCost -= 1 << (nBitsToDecrease-1); + huffNode[rankLast[nBitsToDecrease]].nbBits++; + + /* Fix up the new rank. + * If the new rank was empty, this symbol is now its smallest. + * Otherwise, this symbol will be the largest in the new rank so no adjustment. + */ + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; + /* Fix up the old rank. + * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, + * it must be the only symbol in its rank, so the old rank now has no symbols. + * Otherwise, since the Huffman nodes are sorted by count, the previous position is now + * the smallest node in the rank. If the previous position belongs to a different rank, + * then the rank is now empty. + */ + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } + } /* while (totalCost > 0) */ + + /* If we've removed too much weight, then we have to add it back. + * To avoid overshooting again, we only adjust the smallest rank. + * We take the largest nodes from the lowest rank 0 and move them + * to rank 1. There's guaranteed to be enough rank 0 symbols because + * TODO. + */ + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + /* special case : no rank 1 symbol (using maxNbBits-1); + * let's create one from largest rank 0 (using maxNbBits). + */ + if (rankLast[1] == noSymbol) { + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + assert(n >= 0); + rankLast[1] = (U32)(n+1); + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } + } /* repay normalized cost */ + } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + +typedef struct { + U32 base; + U32 curr; +} rankPos; + +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + +#define RANK_POSITION_TABLE_SIZE 32 + +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +/** + * HUF_sort(): + * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * + * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. + * Must have (maxSymbolValue + 1) entries. + * @param[in] count Histogram of the symbols. + * @param[in] maxSymbolValue Maximum symbol value. + * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. + */ +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +{ + int n; + int const maxSymbolValue1 = (int)maxSymbolValue + 1; + + /* Compute base and set curr to base. + * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. + * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * We attribute each symbol to lowerRank's base value, because we want to know where + * each rank begins in the output, so for rank R we want to count ranks R+1 and above. + */ + ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n = 0; n < maxSymbolValue1; ++n) { + U32 lowerRank = BIT_highbit32(count[n] + 1); + rankPosition[lowerRank].base++; + } + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { + rankPosition[n-1].base += rankPosition[n].base; + rankPosition[n-1].curr = rankPosition[n-1].base; + } + /* Sort */ + for (n = 0; n < maxSymbolValue1; ++n) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rankPosition[r].curr++; + /* Insert into the correct position in the rank. + * We have at most 256 symbols, so this insertion should be fine. + */ + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) + +/* HUF_buildTree(): + * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. + * + * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. + * @param maxSymbolValue The maximum symbol value. + * @return The smallest node in the Huffman tree (by count). + */ +static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) +{ + nodeElt* const huffNode0 = huffNode - 1; + int nonNullRank; + int lowS, lowN; + int nodeNb = STARTNODE; + int n, nodeRoot; + /* init for parents */ + nonNullRank = (int)maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + return nonNullRank; +} + +/** + * HUF_buildCTableFromTree(): + * Build the CTable given the Huffman tree in huffNode. + * + * @param[out] CTable The output Huffman CTable. + * @param huffNode The Huffman tree. + * @param nonNullRank The last and smallest node in the Huffman tree. + * @param maxSymbolValue The maximum symbol value. + * @param maxNbBits The exact maximum number of bits used in the Huffman tree. + */ +static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) +{ + /* fill result into ctable (val, nbBits) */ + int n; + U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine starting value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nhuffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + + /* build tree */ + nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + + HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + + return maxNbBits; +} + +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + /* fall-through */ + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + /* fall-through */ + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + /* fall-through */ + case 0 : /* fall-through */ + default: break; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } + + return (size_t)(op-ostart); +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + assert(op >= ostart); + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return (size_t)(op-ostart); +} + +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + HUF_buildCTable_wksp_tables buildCTable_wksp; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace_align4` must be aligned on 4-bytes boundaries, + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace_align4, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + + /* checks & inits */ + if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); + CHECK_F(maxBits); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, bmi2); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/** HUF_buildCTable() : + * @return : maxNbBits + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) +{ + HUF_buildCTable_wksp_tables workspace; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); +} + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); +} +#endif +/**** ended inlining compress/huf_compress.c ****/ +/**** start inlining compress/zstd_compress_literals.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_literals.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +/**** start inlining zstd_compress_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** start inlining ../common/zstd_trace.h ****/ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include + +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. + */ +typedef unsigned long long ZSTD_TraceCtx; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +#endif /* ZSTD_TRACE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_TRACE_H */ +/**** ended inlining ../common/zstd_trace.h ****/ +/**** start inlining zstd_cwksp.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_internal.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ + +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; + +/** + * Used to describe whether the workspace is statically allocated (and will not + * necessarily ever be freed), or if it's dynamically allocated and we can + * expect a well-formed caller to free this. + */ +typedef enum { + ZSTD_cwksp_dynamic_alloc, + ZSTD_cwksp_static_alloc +} ZSTD_cwksp_static_alloc_e; + +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_customFree{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned + * 4. Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + + BYTE allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; + ZSTD_cwksp_static_alloc_e isStatic; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} + +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +/** + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { + if (size == 0) + return 0; +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#else + return size; +#endif +} + +MEM_STATIC void ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + /* If unaligned allocations down from a too-large top have left us + * unaligned, we need to realign our alloc ptr. Technically, this + * can consume space that is unaccounted for in the neededSpace + * calculation. However, I believe this can only happen when the + * workspace is too large, and specifically when it is too large + * by a larger margin than the space that will be consumed. */ + /* TODO: cleaner, compiler warning friendly way to do this??? */ + ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); + if (ws->allocStart < ws->tableValidEnd) { + ws->tableValidEnd = ws->allocStart; + } + } + ws->phase = phase; + } +} + +/** + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} + +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + void* bottom = ws->tableEnd; + ZSTD_cwksp_internal_advance_phase(ws, phase); + alloc = (BYTE *)ws->allocStart - bytes; + + if (bytes == 0) + return NULL; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/** + * Reserves and returns memory sized on and aligned on sizeof(unsigned). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + assert((bytes & (sizeof(U32)-1)) == 0); + return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +} + +/** + * Aligned on sizeof(unsigned). These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc = ws->tableEnd; + void* end = (BYTE *)alloc + bytes; + void* top = ws->allocStart; + + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_internal_advance_phase(ws, phase); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +/** + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(5, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)alloc & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. */ + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + assert(__msan_test_shadow(ws->objectEnd, size) == -1); + __msan_poison(ws->objectEnd, size); + } +#endif + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the context re-use logic is sound, and that we don't + * access stuff that this compression hasn't initialized, we re-"poison" + * the workspace (or at least the non-static, non-table parts of it) + * every time we start a new compression. */ + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; + __msan_poison(ws->tableValidEnd, size); + } +#endif + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ws->isStatic = isStatic; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_customMalloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); + ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_customFree(ptr, customMem); +} + +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before it's used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CWKSP_H */ +/**** ended inlining zstd_cwksp.h ****/ +#ifdef ZSTD_MULTITHREAD +/**** start inlining zstdmt_compress.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : This is an internal API. + * These APIs used to be exposed with ZSTDLIB_API, + * because it used to be the only way to invoke MT compression. + * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead. + * + * This API requires ZSTD_MULTITHREAD to be defined during compilation, + * otherwise ZSTDMT_createCCtx*() will fail. + */ + +/* === Dependencies === */ +/**** skipping file: ../common/zstd_deps.h ****/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +/**** skipping file: ../zstd.h ****/ + + +/* === Constants === */ +#ifndef ZSTDMT_NBWORKERS_MAX +# define ZSTDMT_NBWORKERS_MAX 200 +#endif +#ifndef ZSTDMT_JOBSIZE_MIN +# define ZSTDMT_JOBSIZE_MIN (1 MB) +#endif +#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) +#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) + + +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + +/* === Memory management === */ +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem, + ZSTD_threadPool *pool); +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); + +/* === Streaming functions === */ + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + +/*! ZSTDMT_compressStream_generic() : + * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() + * depending on flush directive. + * @return : minimum amount of data still to be flushed + * 0 if fully flushed + * or an error code + * note : needs to be init using any ZSTD_initCStream*() variant */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + /*! ZSTDMT_toFlushNow() + * Tell how many bytes are ready to be flushed immediately. + * Probe the oldest active job (not yet entirely flushed) and check its output buffer. + * If return 0, it means there is no active job, + * or, it means oldest job is still active, but everything produced has been flushed so far, + * therefore flushing is limited by speed of oldest job. */ +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); + +/*! ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ +/**** ended inlining zstdmt_compress.h ****/ +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + +typedef struct { + HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + U32 len; /* Raw length of match */ +} ZSTD_match_t; + +typedef struct { + U32 offset; /* Offset of sequence */ + U32 litLength; /* Length of literals prior to match */ + U32 matchLength; /* Raw length of match */ +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The index in seq where reading stopped. pos <= size. */ + size_t posInSequence; /* The position within the sequence at seq[pos] where reading + stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ +} ZSTD_window_t; + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t* dictMatchState; + ZSTD_compressionParameters cParams; + const rawSeqStore_t* ldmSeqStore; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + U32 loadedDictEnd; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} + +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). + * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. +*/ +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +{ + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _tzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _lzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/** ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} + +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 + +/** ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} + +/** ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} + +/** ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +{ + return ZSTD_ipow(prime8bytes, length - 1); +} + +/** ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +{ + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} + +/*-************************************* +* Round buffer management +***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : + ZSTD_noDict; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + return curr > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const currentCycle0 = curr & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; + U32 const newCurrent = currentCycle1 + maxDist; + U32 const correction = curr - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(curr > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + + window->base += correction; + window->dictBase += correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. + * + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. + * + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } +} + +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + ZSTD_memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + /* ms->nextToUpdate = window->dictLimit; */ + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary + * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't + * valid for the entire block. So this check is sufficient to find the lowest valid match index. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When computing the lowest prefix index we need to take the dictionary into account to handle + * the edge case where the dictionary and the source are contiguous in memory. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + + + +/* debug functions */ +#if (DEBUGLEVEL>=2) + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#endif + + +#if defined (__cplusplus) +} +#endif + +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small ( 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); + +/** ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + +#endif /* ZSTD_COMPRESS_H */ +/**** ended inlining zstd_compress_internal.h ****/ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ +/**** ended inlining zstd_compress_literals.h ****/ + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ZSTD_memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); + return srcSize + flSize; +} + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); + return flSize+1; +} + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); + return lhSize+cLitSize; +} +/**** ended inlining compress/zstd_compress_literals.c ****/ +/**** start inlining compress/zstd_compress_sequences.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_sequences.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +/**** skipping file: ../common/fse.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ +/**** ended inlining zstd_compress_sequences.h ****/ + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/** + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * comprssibility. + */ + return nbSeq >= 2048; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); + *op = codeTable[0]; + return 1; + case set_repeat: + ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} +/**** ended inlining compress/zstd_compress_sequences.c ****/ +/**** start inlining compress/zstd_compress_superblock.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_superblock.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +/**** skipping file: ../zstd.h ****/ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ +/**** ended inlining zstd_compress_superblock.h ****/ + +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: hist.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_compress_sequences.h ****/ +/**** skipping file: zstd_compress_literals.h ****/ + +/*-************************************* +* Superblock entropy buffer structs +***************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + + +/** ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + ZSTD_memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; +} + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; /* TODO bmi2... */ + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; +} + +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return op - ostart; +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} +/**** ended inlining compress/zstd_compress_superblock.c ****/ +/**** start inlining compress/zstd_compress.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** start inlining ../common/cpu.h ****/ +/* + * Copyright (c) 2018-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +/**** skipping file: mem.h ****/ + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ +/**** ended inlining ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_trace.h ****/ +/**** skipping file: hist.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_compress_sequences.h ****/ +/**** skipping file: zstd_compress_literals.h ****/ +/**** start inlining zstd_fast.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ +/**** ended inlining zstd_fast.h ****/ +/**** start inlining zstd_double_fast.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ +/**** ended inlining zstd_double_fast.h ****/ +/**** start inlining zstd_lazy.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ + +/** + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ +/**** ended inlining zstd_lazy.h ****/ +/**** start inlining zstd_opt.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ + +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_OPT_H */ +/**** ended inlining zstd_opt.h ****/ +/**** start inlining zstd_ldm.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: ../zstd.h ****/ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + +/* ZSTD_ldm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. + * Not to be used in conjunction with ZSTD_ldm_skipSequences(). + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ +/**** ended inlining zstd_ldm.h ****/ +/**** skipping file: zstd_compress_superblock.h ****/ + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * COMPRESS_HEAPMODE : + * Select how default decompression function ZSTD_compress() allocates its context, + * on stack (0, default), or into heap (1). + * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. + */ +#ifndef ZSTD_COMPRESS_HEAPMODE +# define ZSTD_COMPRESS_HEAPMODE 0 +#endif + + +/*-************************************* +* Helper functions +***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). + */ +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + const void* dictContent; + size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_customMem customMem; + U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + ZSTD_memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +{ + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; + + ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); + cctx->staticSize = workspaceSize; + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +/** + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} + +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} + +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_clearAllDicts(cctx); +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + { + int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) { + ZSTD_customFree(cctx, cctx->customMem); + } + } + return 0; +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void)cctx; + return 0; +#endif +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +/* Returns 1 if compression parameters are such that we should + * enable long distance matching (wlog >= 27, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +} + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); + cctxParams.cParams = cParams; + + if (ZSTD_CParams_shouldEnableLdm(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); + cctxParams.ldmParams.enableLdm = 1; + /* LDM is enabled by default for optimal parser and window size >= 128MB */ + ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); + assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); + assert(cctxParams.ldmParams.hashRateLog < 32); + } + + assert(!ZSTD_checkCParams(cParams)); + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_customCalloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); + params->customMem = customMem; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_customFree(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +#define ZSTD_NO_CLEVEL 0 + +/** + * Initializes the cctxParams from params and compressionLevel. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return 0; +} + +/** + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param param Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +{ + assert(!ZSTD_checkCParams(params->cParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; +} + +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; + + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; + + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; + + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; + + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; + + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; + + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; + + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_NBWORKERS_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_jobSize: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_JOBSIZE_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD + bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; + bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; + + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; + + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; + + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; + + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; + + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + + case ZSTD_c_blockDelimiters: + bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; + bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; + return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + default: + bounds.error = ERROR(parameter_unsupported); + return bounds; + } +} + +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} + +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound, "Param out of bounds"); \ +} + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; + + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); + } } + + switch(param) + { + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; + + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + break; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value == 0) + CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else + CCtxParams->compressionLevel = value; + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = (U32)value; + return CCtxParams->cParams.windowLog; + + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = (U32)value; + return CCtxParams->cParams.hashLog; + + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = (U32)value; + return CCtxParams->cParams.chainLog; + + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; + + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = value; + return CCtxParams->cParams.minMatch; + + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return CCtxParams->forceWindow; + + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; + } + + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + CCtxParams->nbWorkers = value; + return CCtxParams->nbWorkers; +#endif + + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + /* Adjust to the minimum non-default value. */ + if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) + value = ZSTDMT_JOBSIZE_MIN; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + assert(value >= 0); + CCtxParams->jobSize = value; + return CCtxParams->jobSize; +#endif + + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->overlapLog = value; + return CCtxParams->overlapLog; +#endif + + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->rsyncable = value; + return CCtxParams->rsyncable; +#endif + + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return CCtxParams->enableDedicatedDictSearch; + + case ZSTD_c_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value!=0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_c_ldmHashRateLog : + RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, + parameter_outOfBound, "Param out of bounds!"); + CCtxParams->ldmParams.hashRateLog = value; + return CCtxParams->ldmParams.hashRateLog; + + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; + + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; + + case ZSTD_c_stableInBuffer: + BOUNDCHECK(ZSTD_c_stableInBuffer, value); + CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->inBufferMode; + + case ZSTD_c_stableOutBuffer: + BOUNDCHECK(ZSTD_c_stableOutBuffer, value); + CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->outBufferMode; + + case ZSTD_c_blockDelimiters: + BOUNDCHECK(ZSTD_c_blockDelimiters, value); + CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; + return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return CCtxParams->validateSequences; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } +} + +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) +{ + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = (int)CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = (int)CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = (int)CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + assert(CCtxParams->nbWorkers == 0); +#endif + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + assert(CCtxParams->jobSize <= INT_MAX); + *value = (int)CCtxParams->jobSize; + break; +#endif + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->overlapLog; + break; +#endif + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->rsyncable; + break; +#endif + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; + case ZSTD_c_stableInBuffer : + *value = (int)CCtxParams->inBufferMode; + break; + case ZSTD_c_stableOutBuffer : + *value = (int)CCtxParams->outBufferMode; + break; + case ZSTD_c_blockDelimiters : + *value = (int)CCtxParams->blockDelimiters; + break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} + +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); + + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + const ZSTD_compressionParameters* cParams); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); + +/** + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced2( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + &cctx->requestedParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); + cctx->cdict = dl->cdict; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + void* dictBuffer; + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); + ZSTD_memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} + +size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a pool when ctx not in init stage."); + cctx->pool = pool; + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); + ZSTD_clearAllDicts(cctx); + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } + return 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} + + +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} + +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_dictAndWindowLog() : + * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * The zstd format says that the entire dictionary is valid if one byte of the dictionary + * is within the window. So the hashLog and chainLog should be large enough to reference both + * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing + * the hashLog and windowLog. + * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. + */ +static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +{ + const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; + /* No dictionary ==> No change */ + if (dictSize == 0) { + return windowLog; + } + assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ + { + U64 const windowSize = 1ULL << windowLog; + U64 const dictAndWindowSize = dictSize + windowSize; + /* If the window size is already large enough to fit both the source and the dictionary + * then just use the window size. Otherwise adjust so that it fits the dictionary and + * the window. + */ + if (windowSize >= dictSize + srcSize) { + return windowLog; /* Window size large enough already */ + } else if (dictAndWindowSize >= maxWindowSize) { + return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ + } else { + return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + } + } +} + +/** ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. + * note : `srcSize==0` means 0! + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize, + ZSTD_cParamMode_e mode) +{ + const U64 minSrcSize = 513; /* (1<<9) + 1 */ + const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; + case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unkown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; + break; + case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ + dictSize = 0; + break; + default: + assert(0); + break; + } + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; + if (cycleLog > dictAndWindowLog) + cPar.chainLog -= (cycleLog - dictAndWindowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + + return cPar; +} + +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +static void ZSTD_overrideCParams( + ZSTD_compressionParameters* cParams, + const ZSTD_compressionParameters* overrides) +{ + if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + if (overrides->hashLog) cParams->hashLog = overrides->hashLog; + if (overrides->chainLog) cParams->chainLog = overrides->chainLog; + if (overrides->searchLog) cParams->searchLog = overrides->searchLog; + if (overrides->minMatch) cParams->minMatch = overrides->minMatch; + if (overrides->targetLength) cParams->targetLength = overrides->targetLength; + if (overrides->strategy) cParams->strategy = overrides->strategy; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); + assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); +} + +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't + * surrounded by redzones in ASAN. */ + size_t const tableSpace = chainSize * sizeof(U32) + + hSize * sizeof(U32) + + h3Size * sizeof(U32); + size_t const optPotentialSpace = + ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} + +static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + const ZSTD_compressionParameters* cParams, + const ldmParams_t* ldmParams, + const int isStatic, + const size_t buffInSize, + const size_t buffOutSize, + const U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (cParams->minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); + size_t const ldmSeqSpace = ldmParams->enableLdm ? + ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + + + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + + ZSTD_cwksp_alloc_size(buffOutSize); + + size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCCtxSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) + ? ((size_t)1 << cParams.windowLog) + blockSize + : 0; + size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + ZSTD_CONTENTSIZE_UNKNOWN); + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } + +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_toFlushNow(cctx->mtctx); + } +#endif + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ +} + +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; +} + +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; + +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; + +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; + +static size_t +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + const ZSTD_compressionParameters* cParams, + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + if (forceResetIndex == ZSTDirp_reset) { + ZSTD_window_init(&ms->window); + ZSTD_cwksp_mark_tables_dirty(ws); + } + + ms->hashLog3 = hashLog3; + + ZSTD_invalidateMatchState(ms); + + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + + /* opt parser space */ + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + } + + ms->cParams = *cParams; + + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + return 0; +} + +/* ZSTD_indexTooCloseToMax() : + * minor optimization : prefer memset() rather than reduceIndex() + * which is measurably slow in some circumstances (reported for Visual Studio). + * Works when re-using a context for a lot of smallish inputs : + * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, + * memset() will be triggered before reduceIndex(). + */ +#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) +static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) +{ + return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, + U64 const pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + ZSTD_cwksp* const ws = &zc->workspace; + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + zc->isFirstBlock = 1; + + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + ? windowSize + blockSize + : 0; + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + + int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + ZSTD_indexResetPolicy_e needsIndexReset = + (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; + + size_t const neededSpace = + ZSTD_estimateCCtxSize_usingCCtxParams_internal( + ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + buffInSize, buffOutSize, pledgedSrcSize); + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); + + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); + + /* Check if workspace is large enough, alloc a new one if needed */ + { + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); + + DEBUGLOG(4, "Need %zu B workspace", neededSpace); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (workspaceTooSmall || workspaceWasteful) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, + neededSpace >> 10); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + + needsIndexReset = ZSTDirp_reset; + + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); + + DEBUGLOG(5, "reserving object space"); + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + } } + + ZSTD_cwksp_clear(ws); + + /* init params */ + zc->appliedParams = params; + zc->blockState.matchState.cParams = params.cParams; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->dictContentSize = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; + + /* buffers */ + zc->bufferedPolicy = zbuff; + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const numBuckets = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); + } + + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms.cParams, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx), ""); + + /* ldm hash table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; + + ZSTD_window_init(&zc->ldmState.window); + ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; + } + + /* Due to alignment, when reusing a workspace, we can actually consume + * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h + */ + assert(ZSTD_cwksp_used(ws) >= neededSpace && + ZSTD_cwksp_used(ws) <= neededSpace + 3); + + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; + +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize) +{ + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return dedicatedDictSearch + || ( ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow ); /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ +} + +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + /* pledgedSrcSize == 0 means 0! */ + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict); + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); + } + + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + /* loadedDictEnd is expressed within the referential of the active context */ + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + + assert(!cdict->matchState.dedicatedDictSearch); + + DEBUGLOG(4, "copying dictionary into context"); + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } + + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + + /* copy tables */ + { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + + ZSTD_memcpy(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize * sizeof(U32)); + } + + /* Zero the hashTable3, since the cdict never fills it */ + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + assert(cdict->matchState.hashLog3 == 0); + ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); + + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); + + ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * + * This function however is intended to operate on those dirty tables and + * re-clean them. So when this function is used correctly, we can unpoison + * the memory it operated on. This introduces a blind spot though, since + * if we now try to operate on __actually__ poisoned memory, we will not + * detect that. */ + __msan_unpoison(table, size * sizeof(U32)); +#endif + + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columncParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (params->cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} + +/* ZSTD_entropyCompressSequences_internal(): + * actually compresses both literals and sequences */ +MEM_STATIC size_t +ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned* count = (unsigned*)entropyWorkspace; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + BYTE* seqHead; + BYTE* lastNCount = NULL; + + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + assert(op <= oend); + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, + count, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->fse.litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, + sizeof(prevEntropy->fse.litlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, + count, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->fse.offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, + sizeof(prevEntropy->fse.offcodeCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, + count, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->fse.matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, + sizeof(prevEntropy->fse.matchlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + assert(op <= oend); + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } + + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return (size_t)(op - ostart); +} + +MEM_STATIC size_t +ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) +{ + size_t const cSize = ZSTD_entropyCompressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + entropyWorkspace, entropyWkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra, + ZSTD_compressBlock_btultra2 }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_compressBlock_greedy_dedicatedDictSearch, + ZSTD_compressBlock_lazy_dedicatedDictSearch, + ZSTD_compressBlock_lazy2_dedicatedDictSearch, + NULL, + NULL, + NULL, + NULL } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { + ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); + } else { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + } + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 curr = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (curr > ms->nextToUpdate + 384) + ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize), ""); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + return ZSTDbss_compress; +} + +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqStoreSeqs = seqStore->sequencesStart; + size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; + size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); + size_t literalsRead = 0; + size_t lastLLSize; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; + repcodes_t updatedRepcodes; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + /* Ensure we have enough space for last literals "sequence" */ + assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); + ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (i = 0; i < seqStoreSeqSize; ++i) { + U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + outSeqs[i].litLength = seqStoreSeqs[i].litLength; + outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].rep = 0; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + /* Derive the correct offset corresponding to a repcode */ + outSeqs[i].rep = seqStoreSeqs[i].offset; + if (outSeqs[i].litLength != 0) { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; + } else { + if (outSeqs[i].rep == 3) { + rawOffset = updatedRepcodes.rep[0] - 1; + } else { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; + } + } + } + outSeqs[i].offset = rawOffset; + /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode + so we provide seqStoreSeqs[i].offset - 1 */ + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offset - 1, + seqStoreSeqs[i].litLength == 0); + literalsRead += outSeqs[i].litLength; + } + /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. + * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker + * for the block boundary, according to the API. + */ + assert(seqStoreLiteralsSize >= literalsRead); + lastLLSize = seqStoreLiteralsSize - literalsRead; + outSeqs[i].litLength = (U32)lastLLSize; + outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; + seqStoreSeqSize++; + zc->seqCollector.seqIndex += seqStoreSeqSize; +} + +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_customFree(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + +/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ +static int ZSTD_isRLE(const BYTE* src, size_t length) { + const BYTE* ip = src; + const BYTE value = ip[0]; + const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); + const size_t unrollSize = sizeof(size_t) * 4; + const size_t unrollMask = unrollSize - 1; + const size_t prefixLength = length & unrollMask; + size_t i; + size_t u; + if (length == 1) return 1; + /* Check if prefix is RLE first before using unrolled loop */ + if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { + return 0; + } + for (i = prefixLength; i != length; i += unrollSize) { + for (u = 0; u < unrollSize; u += sizeof(size_t)) { + if (MEM_readST(ip + i + u) != valueST) { + return 0; + } + } + } + return 1; +} + +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +{ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) +{ + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_confirmRepcodesAndEntropyTables(zc); + return 0; + } + + /* encode sequences and literals */ + cSize = ZSTD_entropyCompressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + + + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } + +out: + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + return cSize; + } + } + } + } + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) +{ + if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { + U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); + ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + + DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); + if (remaining < blockSize) blockSize = remaining; + + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else { + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } + } + + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + cctx->isFirstBlock = 0; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return (size_t)(op-ostart); +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + if (params->format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported, + "incompatible with ldm"); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + cctx->externSeqStore.posInSequence = 0; + return 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + assert(fhSize <= dstCapacity); + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + } + + if (!frame) { + /* overflow check and correction for block mode */ + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); + } + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } + + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + + if (params->ldmParams.enableLdm && ls != NULL) { + ZSTD_window_update(&ls->window, src, srcSize); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); + + if (srcSize <= HASH_READ_SIZE) return 0; + + while (iend - ip > HASH_READ_SIZE) { + size_t const remaining = (size_t)(iend - ip); + size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); + const BYTE* const ichunk = ip + chunk; + + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + + if (params->ldmParams.enableLdm && ls != NULL) + ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, ichunk, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { + assert(chunk == remaining); /* must load everything in one go */ + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + } else if (chunk >= HASH_READ_SIZE) { + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + } + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + if (chunk >= HASH_READ_SIZE) + ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ip = ichunk; + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check + * and only dictionaries with 100% valid symbols can be assumed valid. + */ +static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) +{ + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) { + return FSE_repeat_check; + } + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) { + return FSE_repeat_check; + } + } + return FSE_repeat_valid; +} + +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize) +{ + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; + + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ + bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); + + /* All repCodes must be <= dictContentSize and != 0 */ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } } + + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { + size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); + } + return dictID; +} + +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + return 0; + } + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent( + ms, ls, ws, params, dict, dictSize, dtlm); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); +} + +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ +#if ZSTD_TRACE + cctx->traceCtx = ZSTD_trace_compress_begin(cctx); +#endif + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff) , ""); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, cdict->dictContentType, dtlm, + cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= UINT_MAX); + cctx->dictID = (U32)dictID; + cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + &cctxParams, pledgedSrcSize); +} + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ +#if ZSTD_TRACE + if (cctx->traceCtx) { + int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + trace.dictionaryID = cctx->dictID; + trace.dictionarySize = cctx->dictContentSize; + trace.uncompressedSize = cctx->consumedSrcSize; + trace.compressedSize = cctx->producedCSize + extraCSize; + trace.params = &cctx->appliedParams; + trace.cctx = cctx; + ZSTD_trace_compress_end(cctx->traceCtx, &trace); + } + cctx->traceCtx = 0; +#else + (void)cctx; + (void)extraCSize; +#endif +} + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + ZSTD_CCtx_trace(cctx, endResult); + return cSize + endResult; +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctxParams); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) , ""); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); +} + +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + size_t result; +#if ZSTD_COMPRESS_HEAPMODE + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); + result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtx(cctx); +#else + ZSTD_CCtx ctxBody; + ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ +#endif + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params params) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(params.cParams)); + cdict->matchState.cParams = params.cParams; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; + if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { + cdict->matchState.dedicatedDictSearch = 0; + } + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictContent = dictBuffer; + } else { + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); + cdict->dictContent = internalBuffer; + ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; + + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + ¶ms.cParams, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict), ""); + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is < 8 bytes. + */ + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); + void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; + + if (!workspace) { + ZSTD_customFree(workspace, customMem); + return NULL; + } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); + cdict->customMem = customMem; + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ + + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); + ZSTD_CCtxParams_init(&cctxParams, 0); + cctxParams.cParams = cParams; + cctxParams.customMem = customMem; + return ZSTD_createCDict_advanced2( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + &cctxParams, customMem); +} + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* originalCctxParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams = *originalCctxParams; + ZSTD_compressionParameters cParams; + ZSTD_CDict* cdict; + + DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + if (cctxParams.enableDedicatedDictSearch) { + cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + ZSTD_overrideCParams(&cParams, &cctxParams.cParams); + } else { + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { + /* Fall back to non-DDSS params */ + cctxParams.enableDedicatedDictSearch = 0; + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + cctxParams.cParams = cParams; + + cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cctxParams.cParams, + customMem); + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_customFree(cdict, cMem); + } + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + ZSTD_CCtx_params params; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + ZSTD_CCtxParams_init(¶ms, 0); + params.cParams = cParams; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + params) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; +} + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; + return cdict->dictID; +} + + +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); + } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) +{ + if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) + return ZSTD_cpm_attachDict; + else + return ZSTD_cpm_noAttachDict; +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + } + return 0; +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) +{ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + return 0; +} + +/*====== Compression ======*/ + +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; +} + +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; + char* const ostart = (char*)output->dst; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + } + if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + } + assert(output->pos <= output->size); + assert(input->pos <= input->size); + assert((U32)flushMode <= (U32)ZSTD_e_end); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ + || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer in buffered mode */ + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + if (loaded != 0) + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); + void* cDst; + size_t cSize; + size_t oSize = oend-op; + size_t const iSize = inputBuffered + ? zcs->inBuffPos - zcs->inToCompress + : MIN((size_t)(iend - ip), zcs->blockSize); + if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + if (inputBuffered) { + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + } else { + unsigned const lastBlock = (ip + iSize == iend); + assert(flushMode == ZSTD_e_end /* Already validated */); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); + /* Consume the input prior to error checking to mirror buffered mode. */ + if (iSize > 0) + ip += iSize; + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + if (lastBlock) + assert(ip == iend); + } + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + /* fall-through */ + case zcss_flush: + DEBUGLOG(5, "flush stage"); + assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); + if (flushed) + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + return ZSTD_nextInputSizeHint(zcs); +} + +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers >= 1) { + assert(cctx->mtctx != NULL); + return ZSTDMT_nextInputSizeHint(cctx->mtctx); + } +#endif + return ZSTD_nextInputSizeHint(cctx); + +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} + +/* After a compression call set the expected input/output buffer. + * This is validated at the start of the next compression call. + */ +static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + cctx->expectedInBuffer = *input; + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + cctx->expectedOutBufferSize = output->size - output->pos; + } +} + +/* Validate that the input/output buffers match the expectations set by + * ZSTD_setBufferExpectations. + */ +static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, + ZSTD_outBuffer const* output, + ZSTD_inBuffer const* input, + ZSTD_EndDirective endOp) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + ZSTD_inBuffer const expect = cctx->expectedInBuffer; + if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); + if (endOp != ZSTD_e_end) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + size_t const outBufferSize = output->size - output->pos; + if (cctx->expectedOutBufferSize != outBufferSize) + RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); + } + return 0; +} + +static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, + ZSTD_EndDirective endOp, + size_t inSize) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict) + params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ + { + size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { + /* Enable LDM by default for optimal parser and window size >= 128MB */ + DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); + params.ldmParams.enableLdm = 1; + } + +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { +#if ZSTD_TRACE + cctx->traceCtx = ZSTD_trace_compress_begin(cctx); +#endif + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; + cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; + cctx->streamStage = zcss_load; + cctx->appliedParams = params; + } else +#endif + { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { + /* for small input: avoid automatic flush on reaching end of block, since + * it would require to add a 3-bytes null block to end frame + */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + } else { + cctx->inBuffTarget = 0; + } + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + return 0; +} + +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); + RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); + RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); + assert(cctx != NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ + } + /* end of transparent initialization stage */ + + FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + size_t flushMin; + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + for (;;) { + size_t const ipos = input->pos; + size_t const opos = output->pos; + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + cctx->consumedSrcSize += (U64)(input->pos - ipos); + cctx->producedCSize += (U64)(output->pos - opos); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + if (flushMin == 0) + ZSTD_CCtx_trace(cctx, 0); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + } + FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); + + if (endOp == ZSTD_e_continue) { + /* We only require some progress with ZSTD_e_continue, not maximal progress. + * We're done if we've consumed or produced any bytes, or either buffer is + * full. + */ + if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size) + break; + } else { + assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + /* We require maximal progress. We're done when the flush is complete or the + * output buffer is full. + */ + if (flushMin == 0 || output->pos == output->size) + break; + } + } + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size); + ZSTD_setBufferExpectations(cctx, output, input); + return flushMin; + } +#endif + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + ZSTD_setBufferExpectations(cctx, output, input); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; + ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + /* Enable stable input/output buffers. */ + cctx->requestedParams.inBufferMode = ZSTD_bm_stable; + cctx->requestedParams.outBufferMode = ZSTD_bm_stable; + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + /* Reset to the original values. */ + cctx->requestedParams.inBufferMode = originalInBufferMode; + cctx->requestedParams.outBufferMode = originalOutBufferMode; + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall, ""); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} + +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +/* Returns a ZSTD error code if sequence is not valid */ +static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { + size_t offsetBound; + U32 windowSize = 1 << windowLog; + /* posInSrc represents the amount of data the the decoder would decode up to this point. + * As long as the amount of data decoded is less than or equal to window size, offsets may be + * larger than the total length of output decoded in order to reference the dict, even larger than + * window size. After output surpasses windowSize, we're limited to windowSize offsets again. + */ + offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + return 0; +} + +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { + U32 offCode = rawOffset + ZSTD_REP_MOVE; + U32 repCode = 0; + + if (!ll0 && rawOffset == rep[0]) { + repCode = 1; + } else if (rawOffset == rep[1]) { + repCode = 2 - ll0; + } else if (rawOffset == rep[2]) { + repCode = 3 - ll0; + } else if (ll0 && rawOffset == rep[0] - 1) { + repCode = 3; + } + if (repCode) { + /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ + offCode = repCode - 1; + } + return offCode; +} + +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + BYTE const* ip = (BYTE const*)(src); + const BYTE* const iend = ip + blockSize; + repcodes_t updatedRepcodes; + U32 dictSize; + U32 litLength; + U32 matchLength; + U32 ll0; + U32 offCode; + + if (cctx->cdict) { + dictSize = (U32)cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = (U32)cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { + litLength = inSeqs[idx].litLength; + matchLength = inSeqs[idx].matchLength; + ll0 = litLength == 0; + offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + if (inSeqs[idx].litLength) { + DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); + ip += inSeqs[idx].litLength; + seqPos->posInSrc += inSeqs[idx].litLength; + } + RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); + seqPos->idx = idx+1; + return 0; +} + +/* Returns the number of bytes to move the current read position back by. Only non-zero + * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something + * went wrong. + * + * This function will attempt to scan through blockSize bytes represented by the sequences + * in inSeqs, storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + U32 startPosInSequence = seqPos->posInSequence; + U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; + size_t dictSize; + BYTE const* ip = (BYTE const*)(src); + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ + repcodes_t updatedRepcodes; + U32 bytesAdjustment = 0; + U32 finalMatchSplit = 0; + U32 litLength; + U32 matchLength; + U32 rawOffset; + U32 offCode; + + if (cctx->cdict) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); + DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { + const ZSTD_Sequence currSeq = inSeqs[idx]; + litLength = currSeq.litLength; + matchLength = currSeq.matchLength; + rawOffset = currSeq.offset; + + /* Modify the sequence depending on where endPosInSequence lies */ + if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { + if (startPosInSequence >= litLength) { + startPosInSequence -= litLength; + litLength = 0; + matchLength -= startPosInSequence; + } else { + litLength -= startPosInSequence; + } + /* Move to the next sequence */ + endPosInSequence -= currSeq.litLength + currSeq.matchLength; + startPosInSequence = 0; + idx++; + } else { + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence + does not reach the end of the match. So, we have to split the sequence */ + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + if (endPosInSequence > litLength) { + U32 firstHalfMatchLength; + litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; + firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; + if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { + /* Only ever split the match if it is larger than the block size */ + U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; + if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { + /* Move the endPosInSequence backward so that it creates match of minMatch length */ + endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + firstHalfMatchLength -= bytesAdjustment; + } + matchLength = firstHalfMatchLength; + /* Flag that we split the last match - after storing the sequence, exit the loop, + but keep the value of endPosInSequence */ + finalMatchSplit = 1; + } else { + /* Move the position in sequence backwards so that we don't split match, and break to store + * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small + */ + bytesAdjustment = endPosInSequence - currSeq.litLength; + endPosInSequence = currSeq.litLength; + break; + } + } else { + /* This sequence ends inside the literals, break to store the last literals */ + break; + } + } + /* Check if this offset can be represented with a repcode */ + { U32 ll0 = (litLength == 0); + offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + } + + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + seqPos->idx = idx; + seqPos->posInSequence = endPosInSequence; + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + iend -= bytesAdjustment; + if (ip != iend) { + /* Store any last literals */ + U32 lastLLSize = (U32)(iend - ip); + assert(ip <= iend); + DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); + seqPos->posInSrc += lastLLSize; + } + + return bytesAdjustment; +} + +typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { + ZSTD_sequenceCopier sequenceCopier = NULL; + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; + } + assert(sequenceCopier != NULL); + return sequenceCopier; +} + +/* Compress, block-by-block, all of the sequences given. + * + * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + */ +static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + size_t cSize = 0; + U32 lastBlock; + size_t blockSize; + size_t compressedSeqsSize; + size_t remaining = srcSize; + ZSTD_sequencePosition seqPos = {0, 0, 0}; + + BYTE const* ip = (BYTE const*)src; + BYTE* op = (BYTE*)dst; + ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + + DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); + /* Special case: empty frame */ + if (remaining == 0) { + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + cSize += ZSTD_blockHeaderSize; + } + + while (remaining) { + size_t cBlockSize; + size_t additionalByteAdjustment; + lastBlock = remaining <= cctx->blockSize; + blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; + ZSTD_resetSeqStore(&cctx->seqStore); + DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); + + additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); + FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); + blockSize -= additionalByteAdjustment; + + /* If blocks are too small, emit as a nocompress block */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + cSize += cBlockSize; + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + continue; + } + + compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, + blockSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); + + if (!cctx->isFirstBlock && + ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + compressedSeqsSize = 1; + } + + if (compressedSeqsSize == 0) { + /* ZSTD_noCompressBlock writes the block header as well */ + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); + } else if (compressedSeqsSize == 1) { + cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(cctx); + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + /* Write block header into beginning of block*/ + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); + MEM_writeLE24(op, cBlockHeader); + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); + } + + cSize += cBlockSize; + DEBUGLOG(4, "cSize running total: %zu", cSize); + + if (lastBlock) { + break; + } else { + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + cctx->isFirstBlock = 0; + } + } + + return cSize; +} + +size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + BYTE* op = (BYTE*)dst; + size_t cSize = 0; + size_t compressedBlocksSize = 0; + size_t frameHeaderSize = 0; + + /* Transparent initialization stage, same as compressStream2() */ + DEBUGLOG(3, "ZSTD_compressSequences()"); + assert(cctx != NULL); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + /* Begin writing output, starting with frame header */ + frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + dstCapacity -= frameHeaderSize; + cSize += frameHeaderSize; + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + XXH64_update(&cctx->xxhState, src, srcSize); + } + /* cSize includes block header size and compressed sequences size */ + compressedBlocksSize = ZSTD_compressSequences_internal(cctx, + op, dstCapacity, + inSeqs, inSeqsSize, + src, srcSize); + FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); + cSize += compressedBlocksSize; + dstCapacity -= compressedBlocksSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); + MEM_writeLE32((char*)dst + cSize, checksum); + cSize += 4; + } + + DEBUGLOG(3, "Final compressed size: %zu", cSize); + return cSize; +} + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; +} + +static int ZSTD_dedicatedDictSearch_isSupported( + ZSTD_compressionParameters const* cParams) +{ + return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2); +} + +/** + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + +static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } + { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + } +} + +/*! ZSTD_getCParams_internal() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + + /* row */ + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else row = compressionLevel; + + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + /* acceleration factor */ + if (compressionLevel < 0) { + int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); + cp.targetLength = (unsigned)(-clampedCompressionLevel); + } + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); + } +} + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + ZSTD_memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} +/**** ended inlining compress/zstd_compress.c ****/ +/**** start inlining compress/zstd_double_fast.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_double_fast.h ****/ + + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = curr + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = curr + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = + dictMode == ZSTD_dictMatchState ? + &dms->cParams : NULL; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? + dictCParams->hashLog : hBitsL; + const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? + dictCParams->chainLog : hBitsS; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + } + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictHashLong[dictHL]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictHashSmall[dictHS]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } } + + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; + +_search_next_long: + + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = curr + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } } } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(curr - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = curr - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = curr + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = curr+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = curr - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* move to next sequence start */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (repIndex2 > dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} +/**** ended inlining compress/zstd_double_fast.c ****/ +/**** start inlining compress/zstd_fast.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_fast.h ****/ + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = curr; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = curr + p; + } } } } +} + + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2 - offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ + + assert(ip0 + 1 == ip1); + + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + mLength += 4; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; + } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 4; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + const U32 dictHLog = dictCParams->hashLog; + + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + /* ensure there will be no underflow + * when translating a dict index into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); + ip += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = curr; /* update hash table */ + + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) ) { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else if (MEM_read32(match) != MEM_read32(ip)) { + /* it's not a match, and we're not going to check the dictionary */ + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + assert(base+curr+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + hashTable[h] = curr; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); + assert(offset_1 <= curr +1); /* check repIndex */ + + if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 const offset = curr - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; + } } + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} +/**** ended inlining compress/zstd_fast.c ****/ +/**** start inlining compress/zstd_lazy.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_lazy.h ****/ + + +/*-************************************* +* Binary Tree search +***************************************/ + +static void +ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : curr >= btlow == (curr - btmask) + * doesn't fail */ +static void +ZSTD_insertDUBT1(ZSTD_matchState_t* ms, + U32 curr, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; + const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; + + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + curr, dictLimit, windowLow); + assert(curr >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (curr < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (curr < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + curr, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t +ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const curr = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static size_t +ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); + assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offsetPtr, bestLength, nbCompares, + mls, dictMode); + } + + assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +} + + +static size_t +ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32* const hashTable = ms->hashTable; + U32* const chainTable = ms->chainTable; + U32 const chainSize = 1 << ms->cParams.chainLog; + U32 idx = ms->nextToUpdate; + U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const cacheSize = bucketSize - 1; + U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; + U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; + + /* We know the hashtable is oversized by a factor of `bucketSize`. + * We are going to temporarily pretend `bucketSize == 1`, keeping only a + * single entry. We will use the rest of the space to construct a temporary + * chaintable. + */ + U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + U32* const tmpHashTable = hashTable; + U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); + U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; + + U32 hashIdx; + + assert(ms->cParams.chainLog <= 24); + assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(idx != 0); + assert(tmpMinChain <= minChain); + + /* fill conventional hash table and conventional chain table */ + for ( ; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); + if (idx >= tmpMinChain) { + tmpChainTable[idx - tmpMinChain] = hashTable[h]; + } + tmpHashTable[h] = idx; + } + + /* sort chains into ddss chain table */ + { + U32 chainPos = 0; + for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { + U32 count; + U32 countBeyondMinChain = 0; + U32 i = tmpHashTable[hashIdx]; + for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { + /* skip through the chain to the first position that won't be + * in the hash cache bucket */ + if (i < minChain) { + countBeyondMinChain++; + } + i = tmpChainTable[i - tmpMinChain]; + } + if (count == cacheSize) { + for (count = 0; count < chainLimit;) { + if (i < minChain) { + if (!i || countBeyondMinChain++ > cacheSize) { + /* only allow pulling `cacheSize` number of entries + * into the cache or chainTable beyond `minChain`, + * to replace the entries pulled out of the + * chainTable into the cache. This lets us reach + * back further without increasing the total number + * of entries in the chainTable, guaranteeing the + * DDSS chain table will fit into the space + * allocated for the regular one. */ + break; + } + } + chainTable[chainPos++] = i; + count++; + if (i < tmpMinChain) { + break; + } + i = tmpChainTable[i - tmpMinChain]; + } + } else { + count = 0; + } + if (count) { + tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; + } else { + tmpHashTable[hashIdx] = 0; + } + } + assert(chainPos <= chainSize); /* I believe this is guaranteed... */ + } + + /* move chain pointers into the last entry of each hash bucket */ + for (hashIdx = (1 << hashLog); hashIdx; ) { + U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const chainPackedPointer = tmpHashTable[hashIdx]; + U32 i; + for (i = 0; i < cacheSize; i++) { + hashTable[bucketIdx + i] = 0; + } + hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; + } + + /* fill the buckets of the hash table */ + for (idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) + << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 i; + /* Shift hash cache down 1. */ + for (i = cacheSize - 1; i; i--) + hashTable[h + i] = hashTable[h + i - 1]; + hashTable[h] = idx; + } + + ms->nextToUpdate = target; +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 minChain = curr > chainSize ? curr - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + + U32 matchIndex; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + } else if (dictMode == ZSTD_dictMatchState) { + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; + + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +typedef enum { search_hashChain, search_binaryTree } searchMethod_e; + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + + /** + * This table is indexed first by the four ZSTD_dictMode_e values, and then + * by the two searchMethod_e values. NULLs are placed for configurations + * that should never occur (extDict modes go to the other implementation + * below and there is no DDSS for binary tree search yet). + */ + const searchMax_f searchFuncs[4][2] = { + { + ZSTD_HcFindBestMatch_selectMLS, + ZSTD_BtFindBestMatch_selectMLS + }, + { + NULL, + NULL + }, + { + ZSTD_HcFindBestMatch_dictMatchState_selectMLS, + ZSTD_BtFindBestMatch_dictMatchState_selectMLS + }, + { + ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, + NULL + } + }; + + searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; + const int isDxS = isDMS || isDDS; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; + const BYTE* const dictBase = isDxS ? dms->window.base : NULL; + const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; + const U32 dictIndexDelta = isDxS ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + assert(searchMax != NULL); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + if (isDxS) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (isDxS) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (isDxS) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + if (isDxS) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + + /* init */ + ip += (ip == prefixStart); + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 curr = (U32)(ip-base); + + /* check repCode */ + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + const U32 repIndex = (U32)(curr+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); +} +/**** ended inlining compress/zstd_lazy.c ****/ +/**** start inlining compress/zstd_ldm.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_ldm.h ****/ + +/**** skipping file: ../common/debug.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: zstd_fast.h ****/ +/**** skipping file: zstd_double_fast.h ****/ +/**** start inlining zstd_ldm_geartab.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +static U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ +/**** ended inlining zstd_ldm_geartab.h ****/ + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/** ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/** ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); + +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +void ZSTD_ldm_fillHashTable( + ldmState_t* ldmState, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; + } +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + curr - MIN(512, curr - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U32 const entsPerBucket = 1U << params->bucketSizeLog; + U32 const hBits = params->hashLog - params->bucketSizeLog; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - HASH_READ_SIZE; + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + { + size_t n = 0; + + while (n < minMatchLength) { + numSplits = 0; + n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, + splits, &numSplits); + } + ip += minMatchLength; + } + + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); + } + + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(split, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); + } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + { + U32 const offset = (U32)(split - base) - bestEntry->offset; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + + anchor = split + forwardMatchLength; + } + + ip += hashed; + } + + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the literals */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} +/**** ended inlining compress/zstd_ldm.c ****/ +/**** start inlining compress/zstd_opt.c ****/ +/* + * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: hist.h ****/ +/**** skipping file: zstd_opt.h ****/ + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif + +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + optPtr->priceType = zop_predef; + } + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + } + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; + + } + + } else { /* new block : re-use previous statistics, scaled down */ + + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + *nextToUpdate3 = target; + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 curr = (U32)(ip-base); + const U32 btLow = btMask >= curr ? 0 : curr - btMask; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = curr+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = curr; /* Update Hash Table */ + + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > curr + 8); + return MAX(positions, matchEndIdx - (curr + 8)); + } +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + const U32 lengthToBeat, + U32 const mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); + + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = curr - repOffset; + U32 repLen = 0; + assert(curr >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= curr */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(curr >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); + if ((matchIndex3 >= matchLow) + & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(curr > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = curr+1; /* skip insertion */ + return 1; + } } } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } + + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(curr > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] read */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + + assert(matchEndIdx > curr+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const matchLengthSearch = cParams->minMatch; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + } +} + +/************************* +* LDM helper functions * +*************************/ + +/* Struct containing info needed to make decision about ldm inclusion */ +typedef struct { + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ +} ZSTD_optLdm_t; + +/* ZSTD_optLdm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + */ +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +/* ZSTD_opt_getNextMatchAndUpdateSeqStore(): + * Calculates the beginning and end of the next match in the current block. + * Updates 'pos' and 'posInSequence' of the ldmSeqStore. + */ +static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) { + rawSeq currSeq; + U32 currBlockEndPos; + U32 literalsBytesRemaining; + U32 matchBytesRemaining; + + /* Setting match end position to MAX to ensure we never use an LDM during this block */ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + return; + } + /* Calculate appropriate bytes left in matchLength and litLength after adjusting + based on ldmSeqStore->posInSequence */ + currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; + assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); + currBlockEndPos = currPosInBlock + blockBytesRemaining; + literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? + currSeq.litLength - (U32)optLdm->seqStore.posInSequence : + 0; + matchBytesRemaining = (literalsBytesRemaining == 0) ? + currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : + currSeq.matchLength; + + /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ + if (literalsBytesRemaining >= blockBytesRemaining) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); + return; + } + + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; + optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; + optLdm->offset = currSeq.offset; + + if (optLdm->endPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ + optLdm->endPosInBlock = currBlockEndPos; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); + } else { + /* Consume nb of bytes equal to size of sequence left */ + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); + } +} + +/* ZSTD_optLdm_maybeAddMatch(): + * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' + * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + */ +static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, + ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { + U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ + U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; + U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + + /* Ensure that current block position is not outside of the match */ + if (currPosInBlock < optLdm->startPosInBlock + || currPosInBlock >= optLdm->endPosInBlock + || candidateMatchLength < MINMATCH) { + return; + } + + if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", + candidateOffCode, candidateMatchLength, currPosInBlock); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } +} + +/* ZSTD_optLdm_processMatchCandidate(): + * Wrapper function to update ldm seq store and call ldm functions as necessary. + */ +static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) { + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + return; + } + + if (currPosInBlock >= optLdm->endPosInBlock) { + if (currPosInBlock > optLdm->endPosInBlock) { + /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily + * at the end of a match from the ldm seq store, and will often be some bytes + * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" + */ + U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); + } + ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); + } + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); +} + +/*-******************************* +* Optimal parser +*********************************/ + + +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} + +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; + ZSTD_optLdm_t optLdm; + + optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; + optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; + ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(ip-istart), (U32)(iend - ip)); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 matchNb; + + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(inr-istart), (U32)(iend-inr)); + + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(reps)); + } else { + ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} + + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const curr = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } + + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ +/**** ended inlining compress/zstd_opt.c ****/ +#ifdef ZSTD_MULTITHREAD +/**** start inlining compress/zstdmt_compress.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Constants ====== */ +#define ZSTDMT_OVERLAPLOG_DEFAULT 0 + + +/* ====== Dependencies ====== */ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_ldm.h ****/ +/**** skipping file: zstdmt_compress.h ****/ + +/* Guards code to support resizing the SeqPool. + * We will want to resize the SeqPool to save memory in the future. + * Until then, comment the code out since it is unused. + */ +#define ZSTD_RESIZE_SEQPOOL 0 + +/* ====== Debug ====== */ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \ + && !defined(_MSC_VER) \ + && !defined(__MINGW32__) + +# include +# include +# include + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + RAWLOG(l, " \n"); \ +} + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); +} } + +#define MUTEX_WAIT_TIME_DLEVEL 6 +#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \ + if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + ZSTD_pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ + } else { \ + ZSTD_pthread_mutex_lock(mutex); \ + } \ +} + +#else + +# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} + +#endif + + +/* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ + +typedef struct buffer_s { + void* start; + size_t capacity; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + ZSTD_pthread_mutex_t poolMutex; + size_t bufferSize; + unsigned totalBuffers; + unsigned nbBuffers; + ZSTD_customMem cMem; + buffer_t bTable[1]; /* variable size */ +} ZSTDMT_bufferPool; + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc( + sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); + if (bufPool==NULL) return NULL; + if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_customFree(bufPool, cMem); + return NULL; + } + bufPool->bufferSize = 64 KB; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + bufPool->cMem = cMem; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool); + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; utotalBuffers; u++) { + DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start); + ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem); + } + ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); + ZSTD_customFree(bufPool, bufPool->cMem); +} + +/* only works at initialization, not during compression */ +static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) +{ + size_t const poolSize = sizeof(*bufPool) + + (bufPool->totalBuffers - 1) * sizeof(buffer_t); + unsigned u; + size_t totalBufferSize = 0; + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + for (u=0; utotalBuffers; u++) + totalBufferSize += bufPool->bTable[u].capacity; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + + return poolSize + totalBufferSize; +} + +/* ZSTDMT_setBufferSize() : + * all future buffers provided by this buffer pool will have _at least_ this size + * note : it's better for all buffers to have same size, + * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */ +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) +{ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); + bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); +} + + +static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + if (srcBufPool==NULL) return NULL; + if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */ + return srcBufPool; + /* need a larger buffer pool */ + { ZSTD_customMem const cMem = srcBufPool->cMem; + size_t const bSize = srcBufPool->bufferSize; /* forward parameters */ + ZSTDMT_bufferPool* newBufPool; + ZSTDMT_freeBufferPool(srcBufPool); + newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (newBufPool==NULL) return newBufPool; + ZSTDMT_setBufferSize(newBufPool, bSize); + return newBufPool; + } +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid + * @return : a buffer, with start pointer and size + * note: allocation may fail, in this case, start==NULL and size==0 */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; + size_t const availBufferSize = buf.capacity; + bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { + /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.capacity); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return buf; + } + /* size conditions not respected : scratch this buffer, create new one */ + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); + ZSTD_customFree(buf.start, bufPool->cMem); + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* create new buffer */ + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); + { buffer_t buffer; + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.capacity = (start==NULL) ? 0 : bSize; + if (start==NULL) { + DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!"); + } else { + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); + } + return buffer; + } +} + +#if ZSTD_RESIZE_SEQPOOL +/** ZSTDMT_resizeBuffer() : + * assumption : bufPool must be valid + * @return : a buffer that is at least the buffer pool buffer size. + * If a reallocation happens, the data in the input buffer is copied. + */ +static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) +{ + size_t const bSize = bufPool->bufferSize; + if (buffer.capacity < bSize) { + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer_t newBuffer; + newBuffer.start = start; + newBuffer.capacity = start == NULL ? 0 : bSize; + if (start != NULL) { + assert(newBuffer.capacity >= buffer.capacity); + ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity); + DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); + return newBuffer; + } + DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!"); + } + return buffer; +} +#endif + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + if (buf.start == NULL) return; /* compatible with release on NULL */ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return; + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* Reached bufferPool capacity (should not happen) */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); + ZSTD_customFree(buf.start, bufPool->cMem); +} + + +/* ===== Seq Pool Wrapper ====== */ + +typedef ZSTDMT_bufferPool ZSTDMT_seqPool; + +static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) +{ + return ZSTDMT_sizeof_bufferPool(seqPool); +} + +static rawSeqStore_t bufferToSeq(buffer_t buffer) +{ + rawSeqStore_t seq = kNullRawSeqStore; + seq.seq = (rawSeq*)buffer.start; + seq.capacity = buffer.capacity / sizeof(rawSeq); + return seq; +} + +static buffer_t seqToBuffer(rawSeqStore_t seq) +{ + buffer_t buffer; + buffer.start = seq.seq; + buffer.capacity = seq.capacity * sizeof(rawSeq); + return buffer; +} + +static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool) +{ + if (seqPool->bufferSize == 0) { + return kNullRawSeqStore; + } + return bufferToSeq(ZSTDMT_getBuffer(seqPool)); +} + +#if ZSTD_RESIZE_SEQPOOL +static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq))); +} +#endif + +static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq)); +} + +static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) +{ + ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq)); +} + +static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (seqPool == NULL) return NULL; + ZSTDMT_setNbSeq(seqPool, 0); + return seqPool; +} + +static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool) +{ + ZSTDMT_freeBufferPool(seqPool); +} + +static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers) +{ + return ZSTDMT_expandBufferPool(pool, nbWorkers); +} + + +/* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ + +typedef struct { + ZSTD_pthread_mutex_t poolMutex; + int totalCCtx; + int availCCtx; + ZSTD_customMem cMem; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + int cid; + for (cid=0; cidtotalCCtx; cid++) + ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */ + ZSTD_pthread_mutex_destroy(&pool->poolMutex); + ZSTD_customFree(pool, pool->cMem); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, + ZSTD_customMem cMem) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc( + sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); + assert(nbWorkers > 0); + if (!cctxPool) return NULL; + if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_customFree(cctxPool, cMem); + return NULL; + } + cctxPool->cMem = cMem; + cctxPool->totalCCtx = nbWorkers; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); + return cctxPool; +} + +static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool, + int nbWorkers) +{ + if (srcPool==NULL) return NULL; + if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */ + /* need a larger cctx pool */ + { ZSTD_customMem const cMem = srcPool->cMem; + ZSTDMT_freeCCtxPool(srcPool); + return ZSTDMT_createCCtxPool(nbWorkers, cMem); + } +} + +/* only works during initialization phase, not during compression */ +static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) +{ + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbWorkers = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool) + + (nbWorkers-1) * sizeof(ZSTD_CCtx*); + unsigned u; + size_t totalCCtxSize = 0; + for (u=0; ucctx[u]); + } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + assert(nbWorkers > 0); + return poolSize + totalCCtxSize; + } +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) +{ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + ZSTD_pthread_mutex_lock(&pool->poolMutex); + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else { + /* pool overflow : should not happen, since totalCCtx==nbWorkers */ + DEBUGLOG(4, "CCtx pool overflow : free cctx"); + ZSTD_freeCCtx(cctx); + } + ZSTD_pthread_mutex_unlock(&pool->poolMutex); +} + +/* ==== Serial State ==== */ + +typedef struct { + void const* start; + size_t size; +} range_t; + +typedef struct { + /* All variables in the struct are protected by mutex. */ + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + ZSTD_CCtx_params params; + ldmState_t ldmState; + XXH64_state_t xxhState; + unsigned nextJobID; + /* Protects ldmWindow. + * Must be acquired after the main mutex when acquiring both. + */ + ZSTD_pthread_mutex_t ldmWindowMutex; + ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */ + ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ +} serialState_t; + +static int +ZSTDMT_serialState_reset(serialState_t* serialState, + ZSTDMT_seqPool* seqPool, + ZSTD_CCtx_params params, + size_t jobSize, + const void* dict, size_t const dictSize, + ZSTD_dictContentType_e dictContentType) +{ + /* Adjust parameters */ + if (params.ldmParams.enableLdm) { + DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } else { + ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + } + serialState->nextJobID = 0; + if (params.fParams.checksumFlag) + XXH64_reset(&serialState->xxhState, 0); + if (params.ldmParams.enableLdm) { + ZSTD_customMem cMem = params.customMem; + unsigned const hashLog = params.ldmParams.hashLog; + size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); + unsigned const bucketLog = + params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; + unsigned const prevBucketLog = + serialState->params.ldmParams.hashLog - + serialState->params.ldmParams.bucketSizeLog; + size_t const numBuckets = (size_t)1 << bucketLog; + /* Size the seq pool tables */ + ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); + /* Reset the window */ + ZSTD_window_init(&serialState->ldmState.window); + /* Resize tables and output space if necessary. */ + if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem); + } + if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem); + } + if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) + return 1; + /* Zero the tables */ + ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize); + ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets); + + /* Update window state and fill hash table with dict */ + serialState->ldmState.loadedDictEnd = 0; + if (dictSize > 0) { + if (dictContentType == ZSTD_dct_rawContent) { + BYTE const* const dictEnd = (const BYTE*)dict + dictSize; + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize); + ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); + serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); + } else { + /* don't even load anything */ + } + } + + /* Initialize serialState's copy of ldmWindow. */ + serialState->ldmWindow = serialState->ldmState.window; + } + + serialState->params = params; + serialState->params.jobSize = (U32)jobSize; + return 0; +} + +static int ZSTDMT_serialState_init(serialState_t* serialState) +{ + int initError = 0; + ZSTD_memset(serialState, 0, sizeof(*serialState)); + initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); + initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL); + return initError; +} + +static void ZSTDMT_serialState_free(serialState_t* serialState) +{ + ZSTD_customMem cMem = serialState->params.customMem; + ZSTD_pthread_mutex_destroy(&serialState->mutex); + ZSTD_pthread_cond_destroy(&serialState->cond); + ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); + ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); +} + +static void ZSTDMT_serialState_update(serialState_t* serialState, + ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore, + range_t src, unsigned jobID) +{ + /* Wait for our turn */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + while (serialState->nextJobID < jobID) { + DEBUGLOG(5, "wait for serialState->cond"); + ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex); + } + /* A future job may error and skip our job */ + if (serialState->nextJobID == jobID) { + /* It is now our turn, do any processing necessary */ + if (serialState->params.ldmParams.enableLdm) { + size_t error; + assert(seqStore.seq != NULL && seqStore.pos == 0 && + seqStore.size == 0 && seqStore.capacity > 0); + assert(src.size <= serialState->params.jobSize); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size); + error = ZSTD_ldm_generateSequences( + &serialState->ldmState, &seqStore, + &serialState->params.ldmParams, src.start, src.size); + /* We provide a large enough buffer to never fail. */ + assert(!ZSTD_isError(error)); (void)error; + /* Update ldmWindow to match the ldmState.window and signal the main + * thread if it is waiting for a buffer. + */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + serialState->ldmWindow = serialState->ldmState.window; + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + if (serialState->params.fParams.checksumFlag && src.size > 0) + XXH64_update(&serialState->xxhState, src.start, src.size); + } + /* Now it is the next jobs turn */ + serialState->nextJobID++; + ZSTD_pthread_cond_broadcast(&serialState->cond); + ZSTD_pthread_mutex_unlock(&serialState->mutex); + + if (seqStore.size > 0) { + size_t const err = ZSTD_referenceExternalSequences( + jobCCtx, seqStore.seq, seqStore.size); + assert(serialState->params.ldmParams.enableLdm); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState, + unsigned jobID, size_t cSize) +{ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + if (serialState->nextJobID <= jobID) { + assert(ZSTD_isError(cSize)); (void)cSize; + DEBUGLOG(5, "Skipping past job %u because of error", jobID); + serialState->nextJobID = jobID + 1; + ZSTD_pthread_cond_broadcast(&serialState->cond); + + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + ZSTD_window_clear(&serialState->ldmWindow); + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + ZSTD_pthread_mutex_unlock(&serialState->mutex); + +} + + +/* ------------------------------------------ */ +/* ===== Worker thread ===== */ +/* ------------------------------------------ */ + +static const range_t kNullRange = { NULL, 0 }; + +typedef struct { + size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */ + size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */ + ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */ + ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */ + ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */ + serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */ + buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */ + range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */ + range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */ + unsigned jobID; /* set by mtctx, then read by worker => no barrier */ + unsigned firstJob; /* set by mtctx, then read by worker => no barrier */ + unsigned lastJob; /* set by mtctx, then read by worker => no barrier */ + ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */ + const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */ + unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */ + size_t dstFlushed; /* used only by mtctx */ + unsigned frameChecksumNeeded; /* used only by mtctx */ +} ZSTDMT_jobDescription; + +#define JOB_ERROR(e) { \ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \ + job->cSize = e; \ + ZSTD_pthread_mutex_unlock(&job->job_mutex); \ + goto _endJob; \ +} + +/* ZSTDMT_compressionJob() is a POOL_function type */ +static void ZSTDMT_compressionJob(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool); + buffer_t dstBuff = job->dstBuff; + size_t lastCBlockSize = 0; + + /* resources */ + if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); + if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation)); + job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */ + } + if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) + JOB_ERROR(ERROR(memory_allocation)); + + /* Don't compute the checksum for chunks, since we compute it externally, + * but write it in the header. + */ + if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; + /* Don't run LDM for the chunks, since we handle it externally */ + jobParams.ldmParams.enableLdm = 0; + /* Correct nbWorkers to 0. */ + jobParams.nbWorkers = 0; + + + /* init */ + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize); + assert(job->firstJob); /* only allowed for first job */ + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } else { /* srcStart points at reloaded section */ + U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; + { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); + if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); + } + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + ZSTD_dtlm_fast, + NULL, /*cdict*/ + &jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } } + + /* Perform serial step as early as possible, but after CCtx initialization */ + ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID); + + if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */ + size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0); + if (ZSTD_isError(hSize)) JOB_ERROR(hSize); + DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize); + ZSTD_invalidateRepCodes(cctx); + } + + /* compress */ + { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX; + int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize); + const BYTE* ip = (const BYTE*) job->src.start; + BYTE* const ostart = (BYTE*)dstBuff.start; + BYTE* op = ostart; + BYTE* oend = op + dstBuff.capacity; + int chunkNb; + if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */ + DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks); + assert(job->cSize == 0); + for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) { + size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + ip += chunkSize; + op += cSize; assert(op < oend); + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + job->consumed = chunkSize * chunkNb; + DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */ + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } + /* last block */ + assert(chunkSize > 0); + assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */ + if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->src.size & (chunkSize-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1; + size_t const cSize = (job->lastJob) ? + ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) : + ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + lastCBlockSize = cSize; + } } + ZSTD_CCtx_trace(cctx, 0); + +_endJob: + ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); + if (job->prefix.size > 0) + DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start); + DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start); + /* release resources */ + ZSTDMT_releaseSeq(job->seqPool, rawSeqStore); + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + /* report */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0); + job->cSize += lastCBlockSize; + job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */ + ZSTD_pthread_cond_signal(&job->job_cond); + ZSTD_pthread_mutex_unlock(&job->job_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +typedef struct { + range_t prefix; /* read-only non-owned prefix buffer */ + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + BYTE* buffer; /* The round input buffer. All jobs get references + * to pieces of the buffer. ZSTDMT_tryGetInputRange() + * handles handing out job input buffers, and makes + * sure it doesn't overlap with any pieces still in use. + */ + size_t capacity; /* The capacity of buffer. */ + size_t pos; /* The position of the current inBuff in the round + * buffer. Updated past the end if the inBuff once + * the inBuff is sent to the worker thread. + * pos <= capacity. + */ +} roundBuff_t; + +static const roundBuff_t kNullRoundBuff = {NULL, 0, 0}; + +#define RSYNC_LENGTH 32 + +typedef struct { + U64 hash; + U64 hitMask; + U64 primePower; +} rsyncState_t; + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_jobDescription* jobs; + ZSTDMT_bufferPool* bufPool; + ZSTDMT_CCtxPool* cctxPool; + ZSTDMT_seqPool* seqPool; + ZSTD_CCtx_params params; + size_t targetSectionSize; + size_t targetPrefixSize; + int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */ + inBuff_t inBuff; + roundBuff_t roundBuff; + serialState_t serial; + rsyncState_t rsync; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned long long frameContentSize; + unsigned long long consumed; + unsigned long long produced; + ZSTD_customMem cMem; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; + unsigned providedFactory: 1; +}; + +static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) +{ + U32 jobNb; + if (jobTable == NULL) return; + for (jobNb=0; jobNb mtctx->jobIDMask+1) { /* need more job capacity */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + mtctx->jobIDMask = 0; + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem); + if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + } + return 0; +} + + +/* ZSTDMT_CCtxParam_setNbWorkers(): + * Internal use only */ +static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) +{ + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); +} + +MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ + ZSTDMT_CCtx* mtctx; + U32 nbJobs = nbWorkers + 2; + int initError; + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers); + + if (nbWorkers < 1) return NULL; + nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX); + if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) + /* invalid custom allocator */ + return NULL; + + mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem); + if (!mtctx) return NULL; + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + mtctx->cMem = cMem; + mtctx->allJobsCompleted = 1; + if (pool != NULL) { + mtctx->factory = pool; + mtctx->providedFactory = 1; + } + else { + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->providedFactory = 0; + } + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); + assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem); + mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem); + initError = ZSTDMT_serialState_init(&mtctx->serial); + mtctx->roundBuff = kNullRoundBuff; + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers); + return mtctx; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool); +#else + (void)nbWorkers; + (void)cMem; + (void)pool; + return NULL; +#endif +} + + +/* ZSTDMT_releaseAllJobResources() : + * note : ensure all workers are killed first ! */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + /* Copy the mutex/cond out */ + ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex; + ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond; + + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + + /* Clear the job description, but keep the mutex/cond */ + ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); + mtctx->jobs[jobID].job_mutex = mutex; + mtctx->jobs[jobID].job_cond = cond; + } + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->allJobsCompleted = 1; +} + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) +{ + DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted"); + while (mtctx->doneJobID < mtctx->nextJobID) { + unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask; + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */ + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + mtctx->doneJobID++; + } +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + if (!mtctx->providedFactory) + POOL_free(mtctx->factory); /* stop and free worker threads */ + ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + ZSTDMT_freeBufferPool(mtctx->bufPool); + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTDMT_freeSeqPool(mtctx->seqPool); + ZSTDMT_serialState_free(&mtctx->serial); + ZSTD_freeCDict(mtctx->cdictLocal); + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_customFree(mtctx, mtctx->cMem); + return 0; +} + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx == NULL) return 0; /* supports sizeof NULL */ + return sizeof(*mtctx) + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTDMT_sizeof_seqPool(mtctx->seqPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal) + + mtctx->roundBuff.capacity; +} + + +/* ZSTDMT_resize() : + * @return : error code if fails, 0 on success */ +static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) +{ + if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , ""); + mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); + if (mtctx->bufPool == NULL) return ERROR(memory_allocation); + mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); + if (mtctx->cctxPool == NULL) return ERROR(memory_allocation); + mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers); + if (mtctx->seqPool == NULL) return ERROR(memory_allocation); + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + return 0; +} + + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates a selected set of compression parameters, remaining compatible with currently active frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) +{ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; + DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", + compressionLevel); + mtctx->params.compressionLevel = compressionLevel; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } +} + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + * Note : mutex will be acquired during statistics collection inside workers. */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx) +{ + ZSTD_frameProgression fps; + DEBUGLOG(5, "ZSTDMT_getFrameProgression"); + fps.ingested = mtctx->consumed + mtctx->inBuff.filled; + fps.consumed = mtctx->consumed; + fps.produced = fps.flushed = mtctx->produced; + fps.currentJobID = mtctx->nextJobID; + fps.nbActiveWorkers = 0; + { unsigned jobNb; + unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); + DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", + mtctx->doneJobID, lastJobNb, mtctx->jobReady) + for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { + unsigned const wJobID = jobNb & mtctx->jobIDMask; + ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + fps.ingested += jobPtr->src.size; + fps.consumed += jobPtr->consumed; + fps.produced += produced; + fps.flushed += flushed; + fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + } + return fps; +} + + +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) +{ + size_t toFlush; + unsigned const jobID = mtctx->doneJobID; + assert(jobID <= mtctx->nextJobID); + if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */ + + /* look into oldest non-fully-flushed job */ + { unsigned const wJobID = jobID & mtctx->jobIDMask; + ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + assert(jobPtr->consumed <= jobPtr->src.size); + toFlush = produced - flushed; + /* if toFlush==0, nothing is available to flush. + * However, jobID is expected to still be active: + * if jobID was already completed and fully flushed, + * ZSTDMT_flushProduced() should have already moved onto next job. + * Therefore, some input has not yet been consumed. */ + if (toFlush==0) { + assert(jobPtr->consumed < jobPtr->src.size); + } + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + + return toFlush; +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) +{ + unsigned jobLog; + if (params->ldmParams.enableLdm) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on cycleLog instead. */ + jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3); + } else { + jobLog = MAX(20, params->cParams.windowLog + 2); + } + return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); +} + +static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) +{ + switch(strat) + { + case ZSTD_btultra2: + return 9; + case ZSTD_btultra: + case ZSTD_btopt: + return 8; + case ZSTD_btlazy2: + case ZSTD_lazy2: + return 7; + case ZSTD_lazy: + case ZSTD_greedy: + case ZSTD_dfast: + case ZSTD_fast: + default:; + } + return 6; +} + +static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat) +{ + assert(0 <= ovlog && ovlog <= 9); + if (ovlog == 0) return ZSTDMT_overlapLog_default(strat); + return ovlog; +} + +static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) +{ + int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy); + int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog); + assert(0 <= overlapRLog && overlapRLog <= 8); + if (params->ldmParams.enableLdm) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on chainLog instead. + * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ + ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) + - overlapRLog; + } + assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); + DEBUGLOG(4, "overlapLog : %i", params->overlapLog); + DEBUGLOG(4, "overlap size : %i", 1 << ovLog); + return (ovLog==0) ? 0 : (size_t)1 << ovLog; +} + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +size_t ZSTDMT_initCStream_internal( + ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); + + /* params supposed partially fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + /* init */ + if (params.nbWorkers != mtctx->params.nbWorkers) + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , ""); + + if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; + if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; + + DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); + + if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + mtctx->allJobsCompleted = 1; + } + + mtctx->params = params; + mtctx->frameContentSize = pledgedSrcSize; + if (dict) { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); + mtctx->cdict = mtctx->cdictLocal; + if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = NULL; + mtctx->cdict = cdict; + } + + mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms); + DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); + mtctx->targetSectionSize = params.jobSize; + if (mtctx->targetSectionSize == 0) { + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms); + } + assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); + + if (params.rsyncable) { + /* Aim for the targetsectionSize as the average job size. */ + U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); + U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20; + assert(jobSizeMB >= 1); + DEBUGLOG(4, "rsyncLog = %u", rsyncBits); + mtctx->rsync.hash = 0; + mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1; + mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH); + } + if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize); + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10)); + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); + { + /* If ldm is enabled we need windowSize space. */ + size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0; + /* Two buffers of slack, plus extra space for the overlap + * This is the minimum slack that LDM works with. One extra because + * flush might waste up to targetSectionSize-1 bytes. Another extra + * for the overlap (if > 0), then one to fill which doesn't overlap + * with the LDM window. + */ + size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0); + size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers; + /* Compute the total size, and always have enough slack */ + size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1); + size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers; + size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; + if (mtctx->roundBuff.capacity < capacity) { + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem); + if (mtctx->roundBuff.buffer == NULL) { + mtctx->roundBuff.capacity = 0; + return ERROR(memory_allocation); + } + mtctx->roundBuff.capacity = capacity; + } + } + DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10)); + mtctx->roundBuff.pos = 0; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->inBuff.prefix = kNullRange; + mtctx->doneJobID = 0; + mtctx->nextJobID = 0; + mtctx->frameEnded = 0; + mtctx->allJobsCompleted = 0; + mtctx->consumed = 0; + mtctx->produced = 0; + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, + dict, dictSize, dictContentType)) + return ERROR(memory_allocation); + return 0; +} + + +/* ZSTDMT_writeLastEmptyBlock() + * Write a single empty block with an end-of-frame to finish a frame. + * Job must be created from streaming variant. + * This function is always successful if expected conditions are fulfilled. + */ +static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) +{ + assert(job->lastJob == 1); + assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */ + assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */ + assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */ + job->dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (job->dstBuff.start == NULL) { + job->cSize = ERROR(memory_allocation); + return; + } + assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */ + job->src = kNullRange; + job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity); + assert(!ZSTD_isError(job->cSize)); + assert(job->consumed == 0); +} + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp) +{ + unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask; + int const endFrame = (endOp == ZSTD_e_end); + + if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full"); + assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask)); + return 0; + } + + if (!mtctx->jobReady) { + BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start; + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", + mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size); + mtctx->jobs[jobID].src.start = src; + mtctx->jobs[jobID].src.size = srcSize; + assert(mtctx->inBuff.filled >= srcSize); + mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix; + mtctx->jobs[jobID].consumed = 0; + mtctx->jobs[jobID].cSize = 0; + mtctx->jobs[jobID].params = mtctx->params; + mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL; + mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize; + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cctxPool = mtctx->cctxPool; + mtctx->jobs[jobID].bufPool = mtctx->bufPool; + mtctx->jobs[jobID].seqPool = mtctx->seqPool; + mtctx->jobs[jobID].serial = &mtctx->serial; + mtctx->jobs[jobID].jobID = mtctx->nextJobID; + mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0); + mtctx->jobs[jobID].lastJob = endFrame; + mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0); + mtctx->jobs[jobID].dstFlushed = 0; + + /* Update the round buffer pos and clear the input buffer to be reset */ + mtctx->roundBuff.pos += srcSize; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + /* Set the prefix */ + if (!endFrame) { + size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize); + mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize; + mtctx->inBuff.prefix.size = newPrefixSize; + } else { /* endFrame==1 => no need for another input buffer */ + mtctx->inBuff.prefix = kNullRange; + mtctx->frameEnded = endFrame; + if (mtctx->nextJobID == 0) { + /* single job exception : checksum is already calculated directly within worker thread */ + mtctx->params.fParams.checksumFlag = 0; + } } + + if ( (srcSize == 0) + && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame"); + assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */ + ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID); + mtctx->nextJobID++; + return 0; + } + } + + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))", + mtctx->nextJobID, + (U32)mtctx->jobs[jobID].src.size, + mtctx->jobs[jobID].lastJob, + mtctx->nextJobID, + jobID); + if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) { + mtctx->nextJobID++; + mtctx->jobReady = 0; + } else { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID); + mtctx->jobReady = 1; + } + return 0; +} + + +/*! ZSTDMT_flushProduced() : + * flush whatever data has been produced but not yet flushed in current job. + * move to next job if current one is fully flushed. + * `output` : `pos` will be updated with amount of data flushed . + * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */ +static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end) +{ + unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)", + blockToFlush, mtctx->doneJobID, mtctx->nextJobID); + assert(output->size >= output->pos); + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + if ( blockToFlush + && (mtctx->doneJobID < mtctx->nextJobID) ) { + assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize); + while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */ + if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) { + DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size); + break; + } + DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */ + } } + + /* try to flush something */ + { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */ + size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */ + size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */ + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + if (ZSTD_isError(cSize)) { + DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s", + mtctx->doneJobID, ZSTD_getErrorName(cSize)); + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + return cSize; + } + /* add frame checksum if necessary (can only happen once) */ + assert(srcConsumed <= srcSize); + if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */ + && mtctx->jobs[wJobID].frameChecksumNeeded ) { + U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState); + DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum); + cSize += 4; + mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */ + mtctx->jobs[wJobID].frameChecksumNeeded = 0; + } + + if (cSize > 0) { /* compression is ongoing or completed */ + size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos); + DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)", + (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize); + assert(mtctx->doneJobID < mtctx->nextJobID); + assert(cSize >= mtctx->jobs[wJobID].dstFlushed); + assert(mtctx->jobs[wJobID].dstBuff.start != NULL); + if (toFlush > 0) { + ZSTD_memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + } + output->pos += toFlush; + mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ + + if ( (srcConsumed == srcSize) /* job is completed */ + && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */ + DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff); + DEBUGLOG(5, "dstBuffer released"); + mtctx->jobs[wJobID].dstBuff = g_nullBuffer; + mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */ + mtctx->consumed += srcSize; + mtctx->produced += cSize; + mtctx->doneJobID++; + } } + + /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */ + if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed); + if (srcSize > srcConsumed) return 1; /* current job not completely compressed */ + } + if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */ + if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */ + if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */ + mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */ + if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */ + return 0; /* internal buffers fully flushed */ +} + +/** + * Returns the range of data used by the earliest job that is not yet complete. + * If the data of the first job is broken up into two segments, we cover both + * sections. + */ +static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx) +{ + unsigned const firstJobID = mtctx->doneJobID; + unsigned const lastJobID = mtctx->nextJobID; + unsigned jobID; + + for (jobID = firstJobID; jobID < lastJobID; ++jobID) { + unsigned const wJobID = jobID & mtctx->jobIDMask; + size_t consumed; + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + consumed = mtctx->jobs[wJobID].consumed; + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + + if (consumed < mtctx->jobs[wJobID].src.size) { + range_t range = mtctx->jobs[wJobID].prefix; + if (range.size == 0) { + /* Empty prefix */ + range = mtctx->jobs[wJobID].src; + } + /* Job source in multiple segments not supported yet */ + assert(range.start <= mtctx->jobs[wJobID].src.start); + return range; + } + } + return kNullRange; +} + +/** + * Returns non-zero iff buffer and range overlap. + */ +static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) +{ + BYTE const* const bufferStart = (BYTE const*)buffer.start; + BYTE const* const bufferEnd = bufferStart + buffer.capacity; + BYTE const* const rangeStart = (BYTE const*)range.start; + BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart; + + if (rangeStart == NULL || bufferStart == NULL) + return 0; + /* Empty ranges cannot overlap */ + if (bufferStart == bufferEnd || rangeStart == rangeEnd) + return 0; + + return bufferStart < rangeEnd && rangeStart < bufferEnd; +} + +static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window) +{ + range_t extDict; + range_t prefix; + + DEBUGLOG(5, "ZSTDMT_doesOverlapWindow"); + extDict.start = window.dictBase + window.lowLimit; + extDict.size = window.dictLimit - window.lowLimit; + + prefix.start = window.base + window.dictLimit; + prefix.size = window.nextSrc - (window.base + window.dictLimit); + DEBUGLOG(5, "extDict [0x%zx, 0x%zx)", + (size_t)extDict.start, + (size_t)extDict.start + extDict.size); + DEBUGLOG(5, "prefix [0x%zx, 0x%zx)", + (size_t)prefix.start, + (size_t)prefix.start + prefix.size); + + return ZSTDMT_isOverlapped(buffer, extDict) + || ZSTDMT_isOverlapped(buffer, prefix); +} + +static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer) +{ + if (mtctx->params.ldmParams.enableLdm) { + ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex; + DEBUGLOG(5, "ZSTDMT_waitForLdmComplete"); + DEBUGLOG(5, "source [0x%zx, 0x%zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + ZSTD_PTHREAD_MUTEX_LOCK(mutex); + while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) { + DEBUGLOG(5, "Waiting for LDM to finish..."); + ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex); + } + DEBUGLOG(6, "Done waiting for LDM to finish"); + ZSTD_pthread_mutex_unlock(mutex); + } +} + +/** + * Attempts to set the inBuff to the next section to fill. + * If any part of the new section is still in use we give up. + * Returns non-zero if the buffer is filled. + */ +static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) +{ + range_t const inUse = ZSTDMT_getInputDataInUse(mtctx); + size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos; + size_t const target = mtctx->targetSectionSize; + buffer_t buffer; + + DEBUGLOG(5, "ZSTDMT_tryGetInputRange"); + assert(mtctx->inBuff.buffer.start == NULL); + assert(mtctx->roundBuff.capacity >= target); + + if (spaceLeft < target) { + /* ZSTD_invalidateRepCodes() doesn't work for extDict variants. + * Simply copy the prefix to the beginning in that case. + */ + BYTE* const start = (BYTE*)mtctx->roundBuff.buffer; + size_t const prefixSize = mtctx->inBuff.prefix.size; + + buffer.start = start; + buffer.capacity = prefixSize; + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + ZSTDMT_waitForLdmComplete(mtctx, buffer); + ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize); + mtctx->inBuff.prefix.start = start; + mtctx->roundBuff.pos = prefixSize; + } + buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos; + buffer.capacity = target; + + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix)); + + ZSTDMT_waitForLdmComplete(mtctx, buffer); + + DEBUGLOG(5, "Using prefix range [%zx, %zx)", + (size_t)mtctx->inBuff.prefix.start, + (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size); + DEBUGLOG(5, "Using source range [%zx, %zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + + + mtctx->inBuff.buffer = buffer; + mtctx->inBuff.filled = 0; + assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity); + return 1; +} + +typedef struct { + size_t toLoad; /* The number of bytes to load from the input. */ + int flush; /* Boolean declaring if we must flush because we found a synchronization point. */ +} syncPoint_t; + +/** + * Searches through the input for a synchronization point. If one is found, we + * will instruct the caller to flush, and return the number of bytes to load. + * Otherwise, we will load as many bytes as possible and instruct the caller + * to continue as normal. + */ +static syncPoint_t +findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input) +{ + BYTE const* const istart = (BYTE const*)input.src + input.pos; + U64 const primePower = mtctx->rsync.primePower; + U64 const hitMask = mtctx->rsync.hitMask; + + syncPoint_t syncPoint; + U64 hash; + BYTE const* prev; + size_t pos; + + syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled); + syncPoint.flush = 0; + if (!mtctx->params.rsyncable) + /* Rsync is disabled. */ + return syncPoint; + if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH) + /* Not enough to compute the hash. + * We will miss any synchronization points in this RSYNC_LENGTH byte + * window. However, since it depends only in the internal buffers, if the + * state is already synchronized, we will remain synchronized. + * Additionally, the probability that we miss a synchronization point is + * low: RSYNC_LENGTH / targetSectionSize. + */ + return syncPoint; + /* Initialize the loop variables. */ + if (mtctx->inBuff.filled >= RSYNC_LENGTH) { + /* We have enough bytes buffered to initialize the hash. + * Start scanning at the beginning of the input. + */ + pos = 0; + prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH; + hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH); + if ((hash & hitMask) == hitMask) { + /* We're already at a sync point so don't load any more until + * we're able to flush this sync point. + * This likely happened because the job table was full so we + * couldn't add our job. + */ + syncPoint.toLoad = 0; + syncPoint.flush = 1; + return syncPoint; + } + } else { + /* We don't have enough bytes buffered to initialize the hash, but + * we know we have at least RSYNC_LENGTH bytes total. + * Start scanning after the first RSYNC_LENGTH bytes less the bytes + * already buffered. + */ + pos = RSYNC_LENGTH - mtctx->inBuff.filled; + prev = (BYTE const*)mtctx->inBuff.buffer.start - pos; + hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled); + hash = ZSTD_rollingHash_append(hash, istart, pos); + } + /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll + * through the input. If we hit a synchronization point, then cut the + * job off, and tell the compressor to flush the job. Otherwise, load + * all the bytes and continue as normal. + * If we go too long without a synchronization point (targetSectionSize) + * then a block will be emitted anyways, but this is okay, since if we + * are already synchronized we will remain synchronized. + */ + for (; pos < syncPoint.toLoad; ++pos) { + BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH]; + /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */ + hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower); + if ((hash & hitMask) == hitMask) { + syncPoint.toLoad = pos + 1; + syncPoint.flush = 1; + break; + } + } + return syncPoint; +} + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx) +{ + size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled; + if (hintInSize==0) hintInSize = mtctx->targetSectionSize; + return hintInSize; +} + +/** ZSTDMT_compressStream_generic() : + * internal use only - exposed to be invoked from zstd_compress.c + * assumption : output and input are valid (pos <= size) + * @return : minimum amount of data remaining to flush, 0 if none */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)", + (U32)endOp, (U32)(input->size - input->pos)); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { + /* current frame being ended. Only flush/end are allowed */ + return ERROR(stage_wrong); + } + + /* fill input buffer */ + if ( (!mtctx->jobReady) + && (input->size > input->pos) ) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */ + if (!ZSTDMT_tryGetInputRange(mtctx)) { + /* It is only possible for this operation to fail if there are + * still compression jobs ongoing. + */ + DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed"); + assert(mtctx->doneJobID != mtctx->nextJobID); + } else + DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start); + } + if (mtctx->inBuff.buffer.start != NULL) { + syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input); + if (syncPoint.flush && endOp == ZSTD_e_continue) { + endOp = ZSTD_e_flush; + } + assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); + DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", + (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); + ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad); + input->pos += syncPoint.toLoad; + mtctx->inBuff.filled += syncPoint.toLoad; + forwardInputProgress = syncPoint.toLoad>0; + } + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) { + /* Can't end yet because the input is not fully consumed. + * We are in one of these cases: + * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job. + * - We filled the input buffer: flush this job but don't end the frame. + * - We hit a synchronization point: flush this job but don't end the frame. + */ + assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable); + endOp = ZSTD_e_flush; + } + + if ( (mtctx->jobReady) + || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */ + || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */ + || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ + size_t const jobSize = mtctx->inBuff.filled; + assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , ""); + } + + /* check for potential compressed data ready to be flushed */ + { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */ + if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */ + DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush); + return remainingToFlush; + } +} +/**** ended inlining compress/zstdmt_compress.c ****/ +#endif + +/**** start inlining decompress/huf_decompress.c ****/ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ + +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; + + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + +#ifndef HUF_FORCE_DECOMPRESS_X1 + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX2 DElt; + U32 rankVal[HUF_TABLELOG_MAX + 1]; + + /* get pre-calculated rankVal */ + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_MAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; + + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + rankStart = rankStart0 + 1; + ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; w> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + + +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; +#endif + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif + } +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +#endif +/**** ended inlining decompress/huf_decompress.c ****/ +/**** start inlining decompress/zstd_ddict.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** start inlining zstd_decompress_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../common/zstd_trace.h ****/ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ +/**** ended inlining zstd_decompress_internal.h ****/ +/**** start inlining zstd_ddict.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ +/**** ended inlining zstd_ddict.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** start inlining ../legacy/zstd_legacy.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif + +#if (ZSTD_LEGACY_SUPPORT <= 1) +/**** start inlining zstd_v01.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V01_H_28739879432 */ +/**** ended inlining zstd_v01.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +/**** start inlining zstd_v02.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V02_H_4174539423 +#define ZSTD_V02_H_4174539423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error +*/ +unsigned ZSTDv02_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx; +ZSTDv02_Dctx* ZSTDv02_createDCtx(void); +size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx); +size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv02_magicNumber 0xFD2FB522 /* v0.2 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V02_H_4174539423 */ +/**** ended inlining zstd_v02.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +/**** start inlining zstd_v03.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V03_H_298734209782 +#define ZSTD_V03_H_298734209782 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + + /** +ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error +*/ +unsigned ZSTDv03_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx; +ZSTDv03_Dctx* ZSTDv03_createDCtx(void); +size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx); +size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv03_magicNumber 0xFD2FB523 /* v0.3 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V03_H_298734209782 */ +/**** ended inlining zstd_v03.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) +/**** start inlining zstd_v04.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V04_H_91868324769238 +#define ZSTD_V04_H_91868324769238 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error +*/ +unsigned ZSTDv04_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx; +ZSTDv04_Dctx* ZSTDv04_createDCtx(void); +size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + +/* ************************************* +* Direct Streaming +***************************************/ +size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx); +size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + + +/* ************************************* +* Buffered Streaming +***************************************/ +typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx; +ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void); +size_t ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx); + +size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx); +size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); + +/** ************************************************ +* Streaming decompression +* +* A ZBUFF_DCtx object is required to track streaming operation. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation. +* ZBUFF_DCtx objects can be reused multiple times. +* +* Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary() +* It must be the same content as the one set during compression phase. +* Dictionary content must remain accessible during the decompression process. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize +* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. +* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* **************************************************/ +unsigned ZBUFFv04_isError(size_t errorCode); +const char* ZBUFFv04_getErrorName(size_t errorCode); + + +/** The below functions provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are not compulsory, they just tend to offer better latency */ +size_t ZBUFFv04_recommendedDInSize(void); +size_t ZBUFFv04_recommendedDOutSize(void); + + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv04_magicNumber 0xFD2FB524 /* v0.4 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V04_H_91868324769238 */ +/**** ended inlining zstd_v04.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +/**** start inlining zstd_v05.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv05_H +#define ZSTDv05_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +#include /* size_t */ +/**** skipping file: ../common/mem.h ****/ + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv05_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */ +size_t ZSTDv05_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + + /** + ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +/* Error Management */ +unsigned ZSTDv05_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +const char* ZSTDv05_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx; +ZSTDv05_DCtx* ZSTDv05_createDCtx(void); +size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv05_decompressDCtx() : +* Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */ +size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Simple Dictionary API +*************************/ +/*! ZSTDv05_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */ +size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + +/*-************************ +* Advanced Streaming API +***************************/ +typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy; +typedef struct { + U64 srcSize; + U32 windowLog; /* the only useful information to retrieve */ + U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy; +} ZSTDv05_parameters; +size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize); + +size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize); +void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx); +size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx); +size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* ZBUFF API +*************************/ +typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx; +ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void); +size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx); + +size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx); +size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression +* +* A ZBUFFv05_DCtx object is required to track streaming operations. +* Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources. +* Use ZBUFFv05_decompressInit() to start a new decompression operation, +* or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv05_DCtx objects can be reused multiple times. +* +* Use ZBUFFv05_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFFv05_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize() +* output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZBUFFv05_isError(size_t errorCode); +const char* ZBUFFv05_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, and tend to offer better latency */ +size_t ZBUFFv05_recommendedDInSize(void); +size_t ZBUFFv05_recommendedDOutSize(void); + + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv05_MAGICNUMBER 0xFD2FB525 /* v0.5 */ + + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv0505_H */ +/**** ended inlining zstd_v05.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +/**** start inlining zstd_v06.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv06_H +#define ZSTDv06_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv06_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1) +# define ZSTDLIBv06_API __declspec(dllexport) +#else +# define ZSTDLIBv06_API +#endif + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv06_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +ZSTDLIBv06_API size_t ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ + +/* Error Management */ +ZSTDLIBv06_API unsigned ZSTDv06_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx; +ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void); +ZSTDLIBv06_API size_t ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv06_decompressDCtx() : +* Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Dictionary API +*************************/ +/*! ZSTDv06_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */ +ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************ +* Advanced Streaming API +***************************/ +struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; }; +typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams; + +ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIBv06_API void ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx); + +ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + + +/* ************************************* +* ZBUFF API +***************************************/ + +typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx; +ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void); +ZSTDLIBv06_API size_t ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv06_DCtx object is required to track streaming operations. +* Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources. +* Use ZBUFFv06_decompressInit() to start a new decompression operation, +* or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv06_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv06_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv06_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize() +* output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv06_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode); +ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void); +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv06_MAGICNUMBER 0xFD2FB526 /* v0.6 */ + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv06_BUFFERED_H */ +/**** ended inlining zstd_v06.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +/**** start inlining zstd_v07.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv07_H_235446 +#define ZSTDv07_H_235446 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv07_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1) +# define ZSTDLIBv07_API __declspec(dllexport) +#else +# define ZSTDLIBv07_API +#endif + + +/* ************************************* +* Simple API +***************************************/ +/*! ZSTDv07_getDecompressedSize() : +* @return : decompressed size if known, 0 otherwise. + note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause. + note 2 : decompressed size could be wrong or intentionally modified ! + always ensure results fit within application's authorized limits */ +unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTDv07_decompress() : + `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail. + `dstCapacity` must be equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/*====== Helper functions ======*/ +ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code); /*!< provides readable string from an error code */ + + +/*-************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx; +ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void); +ZSTDLIBv07_API size_t ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv07_decompressDCtx() : +* Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-************************ +* Simple dictionary API +***************************/ +/*! ZSTDv07_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression. +* Note : This function load the dictionary, resulting in a significant startup time */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************** +* Advanced Dictionary API +****************************/ +/*! ZSTDv07_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTDv07_DDict_s ZSTDv07_DDict; +ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize); +ZSTDLIBv07_API size_t ZSTDv07_freeDDict(ZSTDv07_DDict* ddict); + +/*! ZSTDv07_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTDv07_DDict* ddict); + +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTDv07_frameParams; + +ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ + + + + +/* ************************************* +* Streaming functions +***************************************/ +typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx; +ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void); +ZSTDLIBv07_API size_t ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx); +ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv07_DCtx object is required to track streaming operations. +* Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources. +* Use ZBUFFv07_decompressInit() to start a new decompression operation, +* or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv07_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv07_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv07_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize() +* output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv07_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode); +ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void); +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv07_MAGICNUMBER 0xFD2FB527 /* v0.7 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv07_H_235446 */ +/**** ended inlining zstd_v07.h ****/ +#endif + +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} + + +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + U32 const version = ZSTD_isLegacy(src, srcSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + ZSTDv01_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + ZSTDv02_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + ZSTDv03_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + ZSTDv04_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + ZSTDv05_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + ZSTDv06_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + ZSTDv07_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif + default : + frameSizeInfo.compressedSize = ERROR(prefix_unknown); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + break; + } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } + return frameSizeInfo; +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize); + return frameSizeInfo.compressedSize; +} + +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} + + +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + + + +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ +/**** ended inlining ../legacy/zstd_legacy.h ****/ +#endif + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + ZSTD_memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} +/**** ended inlining decompress/zstd_ddict.c ****/ +/**** start inlining decompress/zstd_decompress.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_trace.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** start inlining zstd_decompress_block.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); + + +#endif /* ZSTD_DEC_BLOCK_H */ +/**** ended inlining zstd_decompress_block.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** skipping file: ../legacy/zstd_legacy.h ****/ +#endif + + + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->staticSize = 0; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_customFree(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; + return 0; +} + +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif + + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; +} + +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + + +/*-************************************************************* + * Frame decoding + ***************************************************************/ + +/** ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart, blockSize); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memcpy(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memset(dst, b, regenSize); + return regenSize; +} + +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + + +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); + + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->validateChecksum) + XXH64_update(&dctx->xxhState, op, decodedSize); + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } + ip += 4; + remainingSrcSize -= 4; + } + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return (size_t)(op-ostart); +} + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + + assert(decodedSize <= dstCapacity); + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (size_t)((BYTE*)dst - (BYTE*)dststart); +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + /* fall-through */ + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + return 0; +} + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, (size_t)(dictEnd - dictPtr), + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return (size_t)(dictPtr - (const BYTE*)dict); +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = ZSTD_trace_decompress_begin(dctx); +#endif + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } + return 0; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} + + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } + } + return 0; +} + +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + ZSTD_DCtx_resetParameters(dctx); + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); + const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; + size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize), ""); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} +/**** ended inlining decompress/zstd_decompress.c ****/ +/**** start inlining decompress/zstd_decompress_block.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** skipping file: zstd_decompress_block.h ****/ + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + /* fall-through */ + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { + U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart-match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +{ + seq_t seq; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } + + return seq; +} + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + size_t error = 0; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__GNUC__) && defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; + /* gcc and clang both don't like early returns in this loop. + * Instead break and check for an error at the end of the loop. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } + if (UNLIKELY(!--nbSeq)) break; + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + } +} + + +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) +{ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst, dstCapacity); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} +/**** ended inlining decompress/zstd_decompress_block.c ****/ + +/**** start inlining dictBuilder/cover.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* ***************************************************************************** + * Constructs a dictionary using a heuristic based on the following paper: + * + * Liao, Petri, Moffat, Wirth + * Effective Construction of Relative Lempel-Ziv Dictionaries + * Published in WWW 2016. + * + * Adapted from code originally written by @ot (Giuseppe Ottaviano). + ******************************************************************************/ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** start inlining cover.h ****/ +/* + * Copyright (c) 2017-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +/**** start inlining zdict.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DICTBUILDER_H_001 +#define DICTBUILDER_H_001 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*====== Dependencies ======*/ +#include /* size_t */ + + +/* ===== ZDICTLIB_API : control library symbols visibility ===== */ +#ifndef ZDICTLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZDICTLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZDICTLIB_API ZDICTLIB_VISIBILITY +#endif + + +/*! ZDICT_trainFromBuffer(): + * Train a dictionary from an array of samples. + * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4, + * f=20, and accel=1. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: Dictionary training will fail if there are not enough samples to construct a + * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit). + * If dictionary training fails, you should use zstd without a dictionary, as the dictionary + * would've been ineffective anyways. If you believe your samples would benefit from a dictionary + * please open an issue with details, and we can look into it. + * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples); + +typedef struct { + int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */ +} ZDICT_params_t; + +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics according to the zstd + * dictionary format. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each + * sample in order. The samples are used to construct the statistics, so they + * should be representative of what you will compress with this dictionary. + * + * The compression level can be set in `parameters`. You should pass the + * compression level you expect to use in production. The statistics for each + * compression level differ, so tuning the dictionary for the compression level + * can help quite a bit. + * + * You can set an explicit dictionary ID in `parameters`, or allow us to pick + * a random dictionary ID for you, but we can't guarantee no collisions. + * + * The dstDictBuffer and the dictContent may overlap, and the content will be + * appended to the end of the header. If the header + the content doesn't fit in + * maxDictSize the beginning of the content is truncated to make room, since it + * is presumed that the most profitable content is at the end of the dictionary, + * since that is the cheapest to reference. + * + * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes. + * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN). + * + * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if + * instructed to, using notificationLevel>0. + * NOTE: This function currently may fail in several edge cases including: + * * Not enough samples + * * Samples are uncompressible + * * Samples are all exactly the same + */ +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, + const void* dictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + + +/*====== Helper functions ======*/ +ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */ +ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); +ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); + + + +#ifdef ZDICT_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +#define ZDICT_CONTENTSIZE_MIN 128 +#define ZDICT_DICTSIZE_MIN 256 + +/*! ZDICT_cover_params_t: + * k and d are the only required parameters. + * For others, value 0 means default. + */ +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ + ZDICT_params_t zParams; +} ZDICT_cover_params_t; + +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ + unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ + + ZDICT_params_t zParams; +} ZDICT_fastCover_params_t; + +/*! ZDICT_trainFromBuffer_cover(): + * Train a dictionary from an array of samples using the COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_cover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations and picks the best parameters. + * `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * + * All of the parameters d, k, steps are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters); + +/*! ZDICT_trainFromBuffer_fastCover(): + * Train a dictionary from an array of samples using a modified version of COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * d and k are required. + * All other parameters are optional, will use default values if not provided + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, + size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_fastCover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations (specifically, k and d combinations) + * and picks the best parameters. `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * All of the parameters d, k, steps, f, and accel are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. + * If f is zero, default value of 20 is used. + * If accel is zero, default value of 1 is used. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, + size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters); + +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ + ZDICT_params_t zParams; +} ZDICT_legacy_params_t; + +/*! ZDICT_trainFromBuffer_legacy(): + * Train a dictionary from an array of samples. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * `parameters` is optional and can be provided with values set to 0 to mean "default". + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); + + +/* Deprecation warnings */ +/* It is generally possible to disable deprecation warnings from compiler, + for example with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS +# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ +#else +# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API +# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) +# elif (ZDICT_GCC_VERSION >= 301) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") +# define ZDICT_DEPRECATED(message) ZDICTLIB_API +# endif +#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ + +ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +#endif /* ZDICT_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif + +#endif /* DICTBUILDER_H_001 */ +/**** ended inlining zdict.h ****/ + +/** + * COVER_best_t is used for two purposes: + * 1. Synchronizing threads. + * 2. Saving the best parameters and dictionary. + * + * All of the methods except COVER_best_init() are thread safe if zstd is + * compiled with multithreaded support. + */ +typedef struct COVER_best_s { + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + size_t liveJobs; + void *dict; + size_t dictSize; + ZDICT_cover_params_t parameters; + size_t compressedSize; +} COVER_best_t; + +/** + * A segment is a range in the source as well as the score of the segment. + */ +typedef struct { + U32 begin; + U32 end; + U32 score; +} COVER_segment_t; + +/** + *Number of epochs and size of each epoch. + */ +typedef struct { + U32 num; + U32 size; +} COVER_epoch_info_t; + +/** + * Struct used for the dictionary selection function. + */ +typedef struct COVER_dictSelection { + BYTE* dictContent; + size_t dictSize; + size_t totalCompressedSize; +} COVER_dictSelection_t; + +/** + * Computes the number of epochs and the size of each epoch. + * We will make sure that each epoch gets at least 10 * k bytes. + * + * The COVER algorithms divide the data up into epochs of equal size and + * select one segment from each epoch. + * + * @param maxDictSize The maximum allowed dictionary size. + * @param nbDmers The number of dmers we are training on. + * @param k The parameter k (segment size). + * @param passes The target number of passes over the dmer corpus. + * More passes means a better dictionary. + */ +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, + U32 k, U32 passes); + +/** + * Warns the user when their corpus is too small. + */ +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); + +/** + * Checks total compressed size of a dictionary + */ +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity); + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best); + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best); + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best); + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best); + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection); +/** + * Error function for COVER_selectDict function. Checks if the return + * value is an error. + */ +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); + + /** + * Error function for COVER_selectDict function. Returns a struct where + * return.totalCompressedSize is a ZSTD error. + */ +COVER_dictSelection_t COVER_dictSelectionError(size_t error); + +/** + * Always call after selectDict is called to free up used memory from + * newly created dictionary. + */ +void COVER_dictSelectionFree(COVER_dictSelection_t selection); + +/** + * Called to finalize the dictionary and select one based on whether or not + * the shrink-dict flag was enabled. If enabled the dictionary used is the + * smallest dictionary within a specified regression of the compressed size + * from the largest dictionary. + */ + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); +/**** ended inlining cover.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +/**** skipping file: zdict.h ****/ + +/*-************************************* +* Constants +***************************************/ +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define COVER_DEFAULT_SPLITPOINT 1.0 + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 2; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + +/*-************************************* +* Hash table +*************************************** +* A small specialized hash map for storing activeDmers. +* The map does not resize, so if it becomes full it will loop forever. +* Thus, the map must be large enough to store every value. +* The map implements linear probing and keeps its load less than 0.5. +*/ + +#define MAP_EMPTY_VALUE ((U32)-1) +typedef struct COVER_map_pair_t_s { + U32 key; + U32 value; +} COVER_map_pair_t; + +typedef struct COVER_map_s { + COVER_map_pair_t *data; + U32 sizeLog; + U32 size; + U32 sizeMask; +} COVER_map_t; + +/** + * Clear the map. + */ +static void COVER_map_clear(COVER_map_t *map) { + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); +} + +/** + * Initializes a map of the given size. + * Returns 1 on success and 0 on failure. + * The map must be destroyed with COVER_map_destroy(). + * The map is only guaranteed to be large enough to hold size elements. + */ +static int COVER_map_init(COVER_map_t *map, U32 size) { + map->sizeLog = ZSTD_highbit32(size) + 2; + map->size = (U32)1 << map->sizeLog; + map->sizeMask = map->size - 1; + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); + if (!map->data) { + map->sizeLog = 0; + map->size = 0; + return 0; + } + COVER_map_clear(map); + return 1; +} + +/** + * Internal hash function + */ +static const U32 COVER_prime4bytes = 2654435761U; +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { + return (key * COVER_prime4bytes) >> (32 - map->sizeLog); +} + +/** + * Helper function that returns the index that a key should be placed into. + */ +static U32 COVER_map_index(COVER_map_t *map, U32 key) { + const U32 hash = COVER_map_hash(map, key); + U32 i; + for (i = hash;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *pos = &map->data[i]; + if (pos->value == MAP_EMPTY_VALUE) { + return i; + } + if (pos->key == key) { + return i; + } + } +} + +/** + * Returns the pointer to the value for key. + * If key is not in the map, it is inserted and the value is set to 0. + * The map must not be full. + */ +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; + if (pos->value == MAP_EMPTY_VALUE) { + pos->key = key; + pos->value = 0; + } + return &pos->value; +} + +/** + * Deletes key from the map if present. + */ +static void COVER_map_remove(COVER_map_t *map, U32 key) { + U32 i = COVER_map_index(map, key); + COVER_map_pair_t *del = &map->data[i]; + U32 shift = 1; + if (del->value == MAP_EMPTY_VALUE) { + return; + } + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *const pos = &map->data[i]; + /* If the position is empty we are done */ + if (pos->value == MAP_EMPTY_VALUE) { + del->value = MAP_EMPTY_VALUE; + return; + } + /* If pos can be moved to del do so */ + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { + del->key = pos->key; + del->value = pos->value; + del = pos; + shift = 1; + } else { + ++shift; + } + } +} + +/** + * Destroys a map that is inited with COVER_map_init(). + */ +static void COVER_map_destroy(COVER_map_t *map) { + if (map->data) { + free(map->data); + } + map->data = NULL; + map->size = 0; +} + +/*-************************************* +* Context +***************************************/ + +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + U32 *suffix; + size_t suffixSize; + U32 *freqs; + U32 *dmerAt; + unsigned d; +} COVER_ctx_t; + +/* We need a global context for qsort... */ +static COVER_ctx_t *g_coverCtx = NULL; + +/*-************************************* +* Helper functions +***************************************/ + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { + size_t sum = 0; + unsigned i; + for (i = 0; i < nbSamples; ++i) { + sum += samplesSizes[i]; + } + return sum; +} + +/** + * Returns -1 if the dmer at lp is less than the dmer at rp. + * Return 0 if the dmers at lp and rp are equal. + * Returns 1 if the dmer at lp is greater than the dmer at rp. + */ +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U32 const lhs = *(U32 const *)lp; + U32 const rhs = *(U32 const *)rp; + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); +} +/** + * Faster version for d <= 8. + */ +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; + if (lhs < rhs) { + return -1; + } + return (lhs > rhs); +} + +/** + * Same as COVER_cmp() except ties are broken by pointer value + * NOTE: g_coverCtx must be set to call this function. A global is required because + * qsort doesn't take an opaque pointer. + */ +static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { + int result = COVER_cmp(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} +/** + * Faster version for d <= 8. + */ +static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} + +/** + * Returns the first pointer in [first, last) whose element does not compare + * less than value. If no such element exists it returns last. + */ +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, + size_t value) { + size_t count = last - first; + while (count != 0) { + size_t step = count / 2; + const size_t *ptr = first; + ptr += step; + if (*ptr < value) { + first = ++ptr; + count -= step + 1; + } else { + count = step; + } + } + return first; +} + +/** + * Generic groupBy function. + * Groups an array sorted by cmp into groups with equivalent values. + * Calls grp for each group. + */ +static void +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, + int (*cmp)(COVER_ctx_t *, const void *, const void *), + void (*grp)(COVER_ctx_t *, const void *, const void *)) { + const BYTE *ptr = (const BYTE *)data; + size_t num = 0; + while (num < count) { + const BYTE *grpEnd = ptr + size; + ++num; + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { + grpEnd += size; + ++num; + } + grp(ctx, ptr, grpEnd); + ptr = grpEnd; + } +} + +/*-************************************* +* Cover functions +***************************************/ + +/** + * Called on each group of positions with the same dmer. + * Counts the frequency of each dmer and saves it in the suffix array. + * Fills `ctx->dmerAt`. + */ +static void COVER_group(COVER_ctx_t *ctx, const void *group, + const void *groupEnd) { + /* The group consists of all the positions with the same first d bytes. */ + const U32 *grpPtr = (const U32 *)group; + const U32 *grpEnd = (const U32 *)groupEnd; + /* The dmerId is how we will reference this dmer. + * This allows us to map the whole dmer space to a much smaller space, the + * size of the suffix array. + */ + const U32 dmerId = (U32)(grpPtr - ctx->suffix); + /* Count the number of samples this dmer shows up in */ + U32 freq = 0; + /* Details */ + const size_t *curOffsetPtr = ctx->offsets; + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a + * different sample than the last. + */ + size_t curSampleEnd = ctx->offsets[0]; + for (; grpPtr != grpEnd; ++grpPtr) { + /* Save the dmerId for this position so we can get back to it. */ + ctx->dmerAt[*grpPtr] = dmerId; + /* Dictionaries only help for the first reference to the dmer. + * After that zstd can reference the match from the previous reference. + * So only count each dmer once for each sample it is in. + */ + if (*grpPtr < curSampleEnd) { + continue; + } + freq += 1; + /* Binary search to find the end of the sample *grpPtr is in. + * In the common case that grpPtr + 1 == grpEnd we can skip the binary + * search because the loop is over. + */ + if (grpPtr + 1 != grpEnd) { + const size_t *sampleEndPtr = + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); + curSampleEnd = *sampleEndPtr; + curOffsetPtr = sampleEndPtr + 1; + } + } + /* At this point we are never going to look at this segment of the suffix + * array again. We take advantage of this fact to save memory. + * We store the frequency of the dmer in the first position of the group, + * which is dmerId. + */ + ctx->suffix[dmerId] = freq; +} + + +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of dmer d. + * Let S_i be the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, U32 begin, + U32 end, + ZDICT_cover_params_t parameters) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 dmersInK = k - d + 1; + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + /* Reset the activeDmers in the segment */ + COVER_map_clear(activeDmers); + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* The dmerId for the dmer at the next position */ + U32 newDmer = ctx->dmerAt[activeSegment.end]; + /* The entry in activeDmers for this dmerId */ + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); + /* If the dmer isn't already present in the segment add its score. */ + if (*newDmerOcc == 0) { + /* The paper suggest using the L-0.5 norm, but experiments show that it + * doesn't help. + */ + activeSegment.score += freqs[newDmer]; + } + /* Add the dmer to the segment */ + activeSegment.end += 1; + *newDmerOcc += 1; + + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + U32 delDmer = ctx->dmerAt[activeSegment.begin]; + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); + activeSegment.begin += 1; + *delDmerOcc -= 1; + /* If this is the last occurrence of the dmer, subtract its score */ + if (*delDmerOcc == 0) { + COVER_map_remove(activeDmers, delDmer); + activeSegment.score -= freqs[delDmer]; + } + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + { + /* Trim off the zero frequency head and tail from the segment. */ + U32 newBegin = bestSegment.end; + U32 newEnd = bestSegment.begin; + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + U32 freq = freqs[ctx->dmerAt[pos]]; + if (freq != 0) { + newBegin = MIN(newBegin, pos); + newEnd = pos + 1; + } + } + bestSegment.begin = newBegin; + bestSegment.end = newEnd; + } + { + /* Zero out the frequency of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + freqs[ctx->dmerAt[pos]] = 0; + } + } + return bestSegment; +} + +/** + * Check the validity of the parameters. + * Returns non-zero if the parameters are valid and 0 otherwise. + */ +static int COVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize) { + /* k and d are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){ + return 0; + } + return 1; +} + +/** + * Clean up a context initialized with `COVER_ctx_init()`. + */ +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { + if (!ctx) { + return; + } + if (ctx->suffix) { + free(ctx->suffix); + ctx->suffix = NULL; + } + if (ctx->freqs) { + free(ctx->freqs); + ctx->freqs = NULL; + } + if (ctx->dmerAt) { + free(ctx->dmerAt); + ctx->dmerAt = NULL; + } + if (ctx->offsets) { + free(ctx->offsets); + ctx->offsets = NULL; + } +} + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `COVER_ctx_destroy()`. + */ +static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint) { + const BYTE *const samples = (const BYTE *)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); + return ERROR(srcSize_wrong); + } + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); + return ERROR(srcSize_wrong); + } + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + /* Partial suffix array */ + ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* Maps index to the dmerID */ + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* The offsets of each file */ + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); + COVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + ctx->freqs = NULL; + ctx->d = d; + + /* Fill offsets from the samplesSizes */ + { + U32 i; + ctx->offsets[0] = 0; + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); + { + /* suffix is a partial suffix array. + * It only sorts suffixes by their first parameters.d bytes. + * The sort is stable, so each dmer group is sorted by position in input. + */ + U32 i; + for (i = 0; i < ctx->suffixSize; ++i) { + ctx->suffix[i] = i; + } + /* qsort doesn't take an opaque pointer, so pass as a global. + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. + */ + g_coverCtx = ctx; +#if defined(__OpenBSD__) + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#else + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#endif + } + DISPLAYLEVEL(2, "Computing frequencies\n"); + /* For each dmer group (group of positions with the same first d bytes): + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is + * (groupBeginPtr - suffix). This allows us to go from position to + * dmerID so we can look up values in freq. + * 2. We calculate how many samples the dmer occurs in and save it in + * freqs[dmerId]. + */ + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); + ctx->freqs = ctx->suffix; + ctx->suffix = NULL; + return 0; +} + +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) +{ + const double ratio = (double)nbDmers / maxDictSize; + if (ratio >= 10) { + return; + } + LOCALDISPLAYLEVEL(displayLevel, 1, + "WARNING: The maximum dictionary size %u is too large " + "compared to the source size %u! " + "size(source)/size(dictionary) = %f, but it should be >= " + "10! This may lead to a subpar dictionary! We recommend " + "training on sources at least 10x, and preferably 100x " + "the size of the dictionary! \n", (U32)maxDictSize, + (U32)nbDmers, ratio); +} + +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, + U32 nbDmers, U32 k, U32 passes) +{ + const U32 minEpochSize = k * 10; + COVER_epoch_info_t epochs; + epochs.num = MAX(1, maxDictSize / k / passes); + epochs.size = nbDmers / epochs.num; + if (epochs.size >= minEpochSize) { + assert(epochs.size * epochs.num <= nbDmers); + return epochs; + } + epochs.size = MIN(minEpochSize, nbDmers); + epochs.num = nbDmers / epochs.size; + assert(epochs.size * epochs.num <= nbDmers); + return epochs; +} + +/** + * Given the prepared context build the dictionary. + */ +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, void *dictBuffer, + size_t dictBufferCapacity, + ZDICT_cover_params_t parameters) { + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); + const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = COVER_selectSegment( + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + COVER_ctx_t ctx; + COVER_map_t activeDmers; + parameters.splitPoint = 1.0; + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Checks */ + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Initialize context and activeDmers */ + { + size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d, parameters.splitPoint); + if (ZSTD_isError(initVal)) { + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + COVER_ctx_destroy(&ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Building dictionary\n"); + { + const size_t tail = + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, + dictBufferCapacity, parameters); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + COVER_ctx_destroy(&ctx); + COVER_map_destroy(&activeDmers); + return dictionarySize; + } +} + + + +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity) { + size_t totalCompressedSize = ERROR(GENERIC); + /* Pointers */ + ZSTD_CCtx *cctx; + ZSTD_CDict *cdict; + void *dst; + /* Local variables */ + size_t dstCapacity; + size_t i; + /* Allocate dst with enough space to compress the maximum sized sample */ + { + size_t maxSampleSize = 0; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + maxSampleSize = MAX(samplesSizes[i], maxSampleSize); + } + dstCapacity = ZSTD_compressBound(maxSampleSize); + dst = malloc(dstCapacity); + } + /* Create the cctx and cdict */ + cctx = ZSTD_createCCtx(); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); + if (!dst || !cctx || !cdict) { + goto _compressCleanup; + } + /* Compress each sample and sum their sizes (or error) */ + totalCompressedSize = dictBufferCapacity; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + const size_t size = ZSTD_compress_usingCDict( + cctx, dst, dstCapacity, samples + offsets[i], + samplesSizes[i], cdict); + if (ZSTD_isError(size)) { + totalCompressedSize = size; + goto _compressCleanup; + } + totalCompressedSize += size; + } +_compressCleanup: + ZSTD_freeCCtx(cctx); + ZSTD_freeCDict(cdict); + if (dst) { + free(dst); + } + return totalCompressedSize; +} + + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best) { + if (best==NULL) return; /* compatible with init on NULL */ + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); + (void)ZSTD_pthread_cond_init(&best->cond, NULL); + best->liveJobs = 0; + best->dict = NULL; + best->dictSize = 0; + best->compressedSize = (size_t)-1; + memset(&best->parameters, 0, sizeof(best->parameters)); +} + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + while (best->liveJobs != 0) { + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); + } + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best) { + if (!best) { + return; + } + COVER_best_wait(best); + if (best->dict) { + free(best->dict); + } + ZSTD_pthread_mutex_destroy(&best->mutex); + ZSTD_pthread_cond_destroy(&best->cond); +} + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + ++best->liveJobs; + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection) { + void* dict = selection.dictContent; + size_t compressedSize = selection.totalCompressedSize; + size_t dictSize = selection.dictSize; + if (!best) { + return; + } + { + size_t liveJobs; + ZSTD_pthread_mutex_lock(&best->mutex); + --best->liveJobs; + liveJobs = best->liveJobs; + /* If the new dictionary is better */ + if (compressedSize < best->compressedSize) { + /* Allocate space if necessary */ + if (!best->dict || best->dictSize < dictSize) { + if (best->dict) { + free(best->dict); + } + best->dict = malloc(dictSize); + if (!best->dict) { + best->compressedSize = ERROR(GENERIC); + best->dictSize = 0; + ZSTD_pthread_cond_signal(&best->cond); + ZSTD_pthread_mutex_unlock(&best->mutex); + return; + } + } + /* Save the dictionary, parameters, and size */ + if (dict) { + memcpy(best->dict, dict, dictSize); + best->dictSize = dictSize; + best->parameters = parameters; + best->compressedSize = compressedSize; + } + } + if (liveJobs == 0) { + ZSTD_pthread_cond_broadcast(&best->cond); + } + ZSTD_pthread_mutex_unlock(&best->mutex); + } +} + +COVER_dictSelection_t COVER_dictSelectionError(size_t error) { + COVER_dictSelection_t selection = { NULL, 0, error }; + return selection; +} + +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { + return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); +} + +void COVER_dictSelectionFree(COVER_dictSelection_t selection){ + free(selection.dictContent); +} + +COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { + + size_t largestDict = 0; + size_t largestCompressed = 0; + BYTE* customDictContentEnd = customDictContent + dictContentSize; + + BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity); + BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity); + double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; + + if (!largestDictbuffer || !candidateDictBuffer) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + /* Initial dictionary size and compressed size */ + memcpy(largestDictbuffer, customDictContent, dictContentSize); + dictContentSize = ZDICT_finalizeDictionary( + largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + largestDictbuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (params.shrinkDict == 0) { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } + + largestDict = dictContentSize; + largestCompressed = totalCompressedSize; + dictContentSize = ZDICT_DICTSIZE_MIN; + + /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ + while (dictContentSize < largestDict) { + memcpy(candidateDictBuffer, largestDictbuffer, largestDict); + dictContentSize = ZDICT_finalizeDictionary( + candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + candidateDictBuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (totalCompressedSize <= largestCompressed * regressionTolerance) { + COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; + free(largestDictbuffer); + return selection; + } + dictContentSize *= 2; + } + dictContentSize = largestDict; + totalCompressedSize = largestCompressed; + { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } +} + +/** + * Parameters for COVER_tryParameters(). + */ +typedef struct COVER_tryParameters_data_s { + const COVER_ctx_t *ctx; + COVER_best_t *best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} COVER_tryParameters_data_t; + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void COVER_tryParameters(void *opaque) +{ + /* Save parameters as local variables */ + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque; + const COVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Allocate space for hash table, dict, and freqs */ + COVER_map_t activeDmers; + BYTE* const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32)); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + goto _cleanup; + } + if (!dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); + /* Build the dictionary */ + { + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, + dictBufferCapacity, parameters); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + COVER_map_destroy(&activeDmers); + COVER_dictSelectionFree(selection); + free(freqs); +} + +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters) +{ + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + COVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( + sizeof(COVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + COVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = *parameters; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &COVER_tryParameters, data); + } else { + COVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + COVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + *parameters = best.parameters; + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } +} +/**** ended inlining dictBuilder/cover.c ****/ +/**** start inlining dictBuilder/divsufsort.c ****/ +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +/**** start inlining divsufsort.h ****/ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ +/**** ended inlining divsufsort.h ****/ + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of type B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} +/**** ended inlining dictBuilder/divsufsort.c ****/ +/**** start inlining dictBuilder/fastcover.c ****/ +/* + * Copyright (c) 2018-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: cover.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../compress/zstd_compress_internal.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +/**** skipping file: zdict.h ****/ + + +/*-************************************* +* Constants +***************************************/ +#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define FASTCOVER_MAX_F 31 +#define FASTCOVER_MAX_ACCEL 10 +#define FASTCOVER_DEFAULT_SPLITPOINT 0.75 +#define DEFAULT_F 20 +#define DEFAULT_ACCEL 1 + + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 2; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + + +/*-************************************* +* Hash Functions +***************************************/ +/** + * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector + */ +static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) { + if (d == 6) { + return ZSTD_hash6Ptr(p, f); + } + return ZSTD_hash8Ptr(p, f); +} + + +/*-************************************* +* Acceleration +***************************************/ +typedef struct { + unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */ + unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */ +} FASTCOVER_accel_t; + + +static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = { + { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */ + { 100, 0 }, /* accel = 1 */ + { 50, 1 }, /* accel = 2 */ + { 34, 2 }, /* accel = 3 */ + { 25, 3 }, /* accel = 4 */ + { 20, 4 }, /* accel = 5 */ + { 17, 5 }, /* accel = 6 */ + { 14, 6 }, /* accel = 7 */ + { 13, 7 }, /* accel = 8 */ + { 11, 8 }, /* accel = 9 */ + { 10, 9 }, /* accel = 10 */ +}; + + +/*-************************************* +* Context +***************************************/ +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + size_t nbDmers; + U32 *freqs; + unsigned d; + unsigned f; + FASTCOVER_accel_t accelParams; +} FASTCOVER_ctx_t; + + +/*-************************************* +* Helper functions +***************************************/ +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of all dmers with hash value d. + * Let S_i be hash value of the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer with hash value d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, + U32 *freqs, U32 begin, U32 end, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 f = ctx->f; + const U32 dmersInK = k - d + 1; + + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + + /* Reset the activeDmers in the segment */ + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* Get hash value of current dmer */ + const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); + + /* Add frequency of this index to score if this is the first occurrence of index in active segment */ + if (segmentFreqs[idx] == 0) { + activeSegment.score += freqs[idx]; + } + /* Increment end of segment and segmentFreqs*/ + activeSegment.end += 1; + segmentFreqs[idx] += 1; + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + /* Get hash value of the dmer to be eliminated from active segment */ + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ + if (segmentFreqs[delIndex] == 0) { + activeSegment.score -= freqs[delIndex]; + } + /* Increment start of segment */ + activeSegment.begin += 1; + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + + /* Zero out rest of segmentFreqs array */ + while (activeSegment.begin < end) { + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + activeSegment.begin += 1; + } + + { + /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d); + freqs[i] = 0; + } + } + + return bestSegment; +} + + +static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize, unsigned f, + unsigned accel) { + /* k, d, and f are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* d has to be 6 or 8 */ + if (parameters.d != 6 && parameters.d != 8) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < f <= FASTCOVER_MAX_F*/ + if (f > FASTCOVER_MAX_F || f == 0) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { + return 0; + } + /* 0 < accel <= 10 */ + if (accel > 10 || accel == 0) { + return 0; + } + return 1; +} + + +/** + * Clean up a context initialized with `FASTCOVER_ctx_init()`. + */ +static void +FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx) +{ + if (!ctx) return; + + free(ctx->freqs); + ctx->freqs = NULL; + + free(ctx->offsets); + ctx->offsets = NULL; +} + + +/** + * Calculate for frequency of hash value of each dmer in ctx->samples + */ +static void +FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) +{ + const unsigned f = ctx->f; + const unsigned d = ctx->d; + const unsigned skip = ctx->accelParams.skip; + const unsigned readLength = MAX(d, 8); + size_t i; + assert(ctx->nbTrainSamples >= 5); + assert(ctx->nbTrainSamples <= ctx->nbSamples); + for (i = 0; i < ctx->nbTrainSamples; i++) { + size_t start = ctx->offsets[i]; /* start of current dmer */ + size_t const currSampleEnd = ctx->offsets[i+1]; + while (start + readLength <= currSampleEnd) { + const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d); + freqs[dmerIndex]++; + start = start + skip + 1; + } + } +} + + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `FASTCOVER_ctx_destroy()`. + */ +static size_t +FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint, unsigned f, + FASTCOVER_accel_t accelParams) +{ + const BYTE* const samples = (const BYTE*)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); + return ERROR(srcSize_wrong); + } + + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); + return ERROR(srcSize_wrong); + } + + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->d = d; + ctx->f = f; + ctx->accelParams = accelParams; + + /* The offsets of each file */ + ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t)); + if (ctx->offsets == NULL) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + /* Fill offsets from the samplesSizes */ + { U32 i; + ctx->offsets[0] = 0; + assert(nbSamples >= 5); + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + + /* Initialize frequency array of size 2^f */ + ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32)); + if (ctx->freqs == NULL) { + DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Computing frequencies\n"); + FASTCOVER_computeFrequency(ctx->freqs, ctx); + + return 0; +} + + +/** + * Given the prepared context build the dictionary. + */ +static size_t +FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx, + U32* freqs, + void* dictBuffer, size_t dictBufferCapacity, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) +{ + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); + const size_t maxZeroScoreRun = 10; + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = FASTCOVER_selectSegment( + ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); + + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +/** + * Parameters for FASTCOVER_tryParameters(). + */ +typedef struct FASTCOVER_tryParameters_data_s { + const FASTCOVER_ctx_t* ctx; + COVER_best_t* best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} FASTCOVER_tryParameters_data_t; + + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void FASTCOVER_tryParameters(void* opaque) +{ + /* Save parameters as local variables */ + FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque; + const FASTCOVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16)); + /* Allocate space for hash table, dict, and freqs */ + BYTE *const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); + if (!segmentFreqs || !dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); + /* Build the dictionary */ + { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, + parameters, segmentFreqs); + + const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + free(segmentFreqs); + COVER_dictSelectionFree(selection); + free(freqs); +} + + +static void +FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, + ZDICT_cover_params_t* coverParams) +{ + coverParams->k = fastCoverParams.k; + coverParams->d = fastCoverParams.d; + coverParams->steps = fastCoverParams.steps; + coverParams->nbThreads = fastCoverParams.nbThreads; + coverParams->splitPoint = fastCoverParams.splitPoint; + coverParams->zParams = fastCoverParams.zParams; + coverParams->shrinkDict = fastCoverParams.shrinkDict; +} + + +static void +FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, + ZDICT_fastCover_params_t* fastCoverParams, + unsigned f, unsigned accel) +{ + fastCoverParams->k = coverParams.k; + fastCoverParams->d = coverParams.d; + fastCoverParams->steps = coverParams.steps; + fastCoverParams->nbThreads = coverParams.nbThreads; + fastCoverParams->splitPoint = coverParams.splitPoint; + fastCoverParams->f = f; + fastCoverParams->accel = accel; + fastCoverParams->zParams = coverParams.zParams; + fastCoverParams->shrinkDict = coverParams.shrinkDict; +} + + +ZDICTLIB_API size_t +ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + FASTCOVER_ctx_t ctx; + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Assign splitPoint and f if not provided */ + parameters.splitPoint = 1.0; + parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f; + parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel; + /* Convert to cover parameter */ + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(parameters, &coverParams); + /* Checks */ + if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, + parameters.accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Assign corresponding FASTCOVER_accel_t to accelParams*/ + accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; + /* Initialize context */ + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + coverParams.d, parameters.splitPoint, parameters.f, + accelParams); + if (ZSTD_isError(initVal)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); + /* Build the dictionary */ + DISPLAYLEVEL(2, "Building dictionary\n"); + { + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16)); + const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, + dictBufferCapacity, coverParams, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + FASTCOVER_ctx_destroy(&ctx); + free(segmentFreqs); + return dictionarySize; + } +} + + +ZDICTLIB_API size_t +ZDICT_optimizeTrainFromBuffer_fastCover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters) +{ + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; + const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); + return ERROR(parameter_outOfBound); + } + if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(*parameters, &coverParams); + accelParams = FASTCOVER_defaultAccelParameters[accel]; + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + FASTCOVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc( + sizeof(FASTCOVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + FASTCOVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = coverParams; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, + data->ctx->f, accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &FASTCOVER_tryParameters, data); + } else { + FASTCOVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + FASTCOVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel); + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } + +} +/**** ended inlining dictBuilder/fastcover.c ****/ +/**** start inlining dictBuilder/zdict.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************** +* Tuning parameters +****************************************/ +#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */ +#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) +#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO) + + +/*-************************************** +* Compiler Options +****************************************/ +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE +# endif +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# ifndef _LARGEFILE64_SOURCE +# define _LARGEFILE64_SOURCE +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: divsufsort.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif +/**** skipping file: zdict.h ****/ +/**** skipping file: ../compress/zstd_compress_internal.h ****/ + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DICTLISTSIZE_DEFAULT 10000 + +#define NOISELENGTH 32 + +static const U32 g_selectivity_default = 9; + + +/*-************************************* +* Console display +***************************************/ +#undef DISPLAY +#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ + +static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + +static void ZDICT_printHex(const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAY("%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dictBuffer + 4); +} + +size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) +{ + size_t headerSize; + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); + + { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + if (!bs || !wksp) { + headerSize = ERROR(memory_allocation); + } else { + ZSTD_reset_compressedBlockState(bs); + headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize); + } + + free(bs); + free(wksp); + } + + return headerSize; +} + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static unsigned ZDICT_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } + pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio, U32 notificationLevel) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t maxLength = LLIMIT; + size_t pos = suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 const pattern16 = MEM_read16(b+pos+4); + U32 u, patternEnd = 6; + while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ; + if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++; + for (u=1; u= MINMATCHLENGTH); + } + + /* look backward */ + { size_t length; + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + } + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos); + DISPLAYLEVEL(4, "\n"); + + for (mml = MINMATCHLENGTH ; ; mml++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[suffix[id] + mml] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + mml]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new dict */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + } + + /* look backward */ + { size_t length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } + } + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n", + (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { U32 id; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 eltEnd = elt.pos + elt.length; + const char* const buf = (const char*) buffer; + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ + /* append */ + U32 const addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + elt = table[u]; + /* sort : improve rank */ + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ + /* append */ + int const addedLength = (int)eltEnd - (table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx bonus */ + if (addedLength > 0) { /* otherwise, elt fully included into existing */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + /* sort : improve rank */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } + + if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) { + if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) { + size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 ); + table[u].pos = elt.pos; + table[u].savings += (U32)(elt.savings * addedLength / elt.length); + table[u].length = MIN(elt.length, table[u].length + 1); + return u; + } + } + } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : table[0].pos stores nb of elts */ + U32 const max = table[0].pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer) +{ + /* merge if possible */ + U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u=l) { \ + if (ZDICT_clockSpan(displayClock) > refreshRate) \ + { displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (notificationLevel>=4) fflush(stderr); } } + + /* init */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { + result = ERROR(memory_allocation); + goto _cleanup; + } + if (minRatio < MINRATIO) minRatio = MINRATIO; + memset(doneMarks, 0, bufferSize+16); + + /* limit sample set size (divsufsort limitation)*/ + if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20)); + { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + /* build reverse suffix sort */ + { size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ + filePos[0] = 0; + for (pos=1; pos> 21); + } +} + + +typedef struct +{ + ZSTD_CDict* dict; /* dictionary */ + ZSTD_CCtx* zc; /* working context */ + void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ +} EStats_ress_t; + +#define MAXREPOFFSET 1024 + +static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, + unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize, + U32 notificationLevel) +{ + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog); + size_t cSize; + + if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ + { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; } + + } + cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; } + + if (cSize) { /* if == 0; block is not compressible */ + const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + ZSTD_seqToCodes(seqStorePtr); + + { const BYTE* codePtr = seqStorePtr->ofCode; + U32 u; + for (u=0; umlCode; + U32 u; + for (u=0; ullCode; + U32 u; + for (u=0; u= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offset - 3; + U32 offset2 = seq[1].offset - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } +} + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total=0; + unsigned u; + for (u=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + +/* ZDICT_flatLit() : + * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. + * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. + */ +static void ZDICT_flatLit(unsigned* countLit) +{ + int u; + for (u=1; u<256; u++) countLit[u] = 2; + countLit[0] = 4; + countLit[253] = 1; + countLit[254] = 1; +} + +#define OFFCODE_MAX 30 /* only applicable to first block */ +static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, + int compressionLevel, + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + unsigned notificationLevel) +{ + unsigned countLit[256]; + HUF_CREATE_STATIC_CTABLE(hufTable, 255); + unsigned offcodeCount[OFFCODE_MAX+1]; + short offcodeNCount[OFFCODE_MAX+1]; + U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); + unsigned matchLengthCount[MaxML+1]; + short matchLengthNCount[MaxML+1]; + unsigned litLengthCount[MaxLL+1]; + short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET]; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; + EStats_ress_t esr = { NULL, NULL, NULL }; + ZSTD_parameters params; + U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; + size_t pos = 0, errorCode; + size_t eSize = 0; + size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + BYTE* dstPtr = (BYTE*)dstBuffer; + + /* init */ + DEBUGLOG(4, "ZDICT_analyzeEntropy"); + if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ + for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ + for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; + for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1; + for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1; + memset(repOffset, 0, sizeof(repOffset)); + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); + if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT; + params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); + + esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); + if (!esr.dict || !esr.zc || !esr.workPlace) { + eSize = ERROR(memory_allocation); + DISPLAYLEVEL(1, "Not enough memory \n"); + goto _cleanup; + } + + /* collect stats on all samples */ + for (u=0; u dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + { size_t const dictSize = hSize + dictContentSize; + char* dictEnd = (char*)dictBuffer + dictSize; + memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); + memcpy(dictBuffer, header, hSize); + return dictSize; + } +} + + +static size_t ZDICT_addEntropyTablesFromBuffer_advanced( + void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + size_t hSize = 8; + + /* calculate entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* add dictionary header (after entropy tables) */ + MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); + { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32((char*)dictBuffer+4, dictID); + } + + if (hSize + dictContentSize < dictBufferCapacity) + memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); + return MIN(dictBufferCapacity, hSize+dictContentSize); +} + +/*! ZDICT_trainFromBuffer_unsafe_legacy() : +* Warning : `samplesBuffer` must be followed by noisy guard band !!! +* @return : size of dictionary, or an error code which can be tested with ZDICT_isError() +*/ +static size_t ZDICT_trainFromBuffer_unsafe_legacy( + void* dictBuffer, size_t maxDictSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); + dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); + unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; + unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; + size_t const targetDictSize = maxDictSize; + size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + size_t dictSize = 0; + U32 const notificationLevel = params.zParams.notificationLevel; + + /* checks */ + if (!dictList) return ERROR(memory_allocation); + if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */ + if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */ + + /* init */ + ZDICT_initDictItem(dictList); + + /* build dictionary */ + ZDICT_trainBuffer_legacy(dictList, dictListSize, + samplesBuffer, samplesBuffSize, + samplesSizes, nbSamples, + minRep, notificationLevel); + + /* display best matches */ + if (params.zParams.notificationLevel>= 3) { + unsigned const nb = MIN(25, dictList[0].pos); + unsigned const dictContentSize = ZDICT_dictSize(dictList); + unsigned u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb-1); + for (u=1; u samplesBuffSize) || ((pos + length) > samplesBuffSize)) { + free(dictList); + return ERROR(GENERIC); /* should never happen */ + } + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, length, pos, (unsigned)dictList[u].savings); + ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); + DISPLAYLEVEL(3, "| \n"); + } } + + + /* create dictionary */ + { unsigned dictContentSize = ZDICT_dictSize(dictList); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */ + if (dictContentSize < targetDictSize/4) { + DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize); + if (samplesBuffSize < 10 * targetDictSize) + DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20)); + if (minRep > MINRATIO) { + DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); + DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); + } + } + + if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + unsigned proposedSelectivity = selectivity-1; + while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } + DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize); + DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); + } + + /* limit dictionary size */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n targetDictSize) { currentSize -= dictList[n].length; break; } + } + dictList->pos = n; + dictContentSize = currentSize; + } + + /* build dict content */ + { U32 u; + BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; + if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } } + + dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, + samplesBuffer, samplesSizes, nbSamples, + params.zParams); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* ZDICT_trainFromBuffer_legacy() : + * issue : samplesBuffer need to be followed by a noisy guard band. + * work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + size_t result; + void* newBuff; + size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + result = + ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, + samplesSizes, nbSamples, params); + free(newBuff); + return result; +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_fastCover_params_t params; + DEBUGLOG(3, "ZDICT_trainFromBuffer"); + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + /* Use default level since no compression level information is available */ + params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) + params.zParams.notificationLevel = DEBUGLEVEL; +#endif + return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + ¶ms); +} + +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} +/**** ended inlining dictBuilder/zdict.c ****/ diff --git a/src/deps/basis-universal/zstd/zstd.h b/src/deps/basis-universal/zstd/zstd.h new file mode 100644 index 0000000000..222339d71a --- /dev/null +++ b/src/deps/basis-universal/zstd/zstd.h @@ -0,0 +1,2450 @@ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 9 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif From 70da698fabc29e8a8564fd1d5b20cc5a4d56baa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 16 Dec 2021 18:15:17 +0100 Subject: [PATCH 42/53] Strange things happening with external Magnum if I don't do this. --- src/esp/core/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/esp/core/CMakeLists.txt b/src/esp/core/CMakeLists.txt index 25a931d8e0..dfe2b02b80 100644 --- a/src/esp/core/CMakeLists.txt +++ b/src/esp/core/CMakeLists.txt @@ -31,6 +31,8 @@ configure_file( ) find_package(Corrade REQUIRED Utility) +find_package(Magnum REQUIRED) +find_package(MagnumIntegration REQUIRED Eigen) add_library( core STATIC From 98c5efbdf72b19611636dd3ed6df2897268cb5f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 16 Dec 2021 18:12:13 +0100 Subject: [PATCH 43/53] Why those bugs? Nothing works. Everything broken. --- src/esp/assets/GenericMeshData.cpp | 14 ++++++++++---- src/esp/assets/ResourceManager.cpp | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/esp/assets/GenericMeshData.cpp b/src/esp/assets/GenericMeshData.cpp index bb8d063907..036321c78a 100644 --- a/src/esp/assets/GenericMeshData.cpp +++ b/src/esp/assets/GenericMeshData.cpp @@ -65,10 +65,16 @@ void GenericMeshData::setMeshData(Magnum::Trade::MeshData&& meshData) { /* For collision data we need indices as UnsignedInt. If the mesh already has those, just make the collision data reference them. If not, unpack them and store them here. */ - if (meshData_->indexType() == Mn::MeshIndexType::UnsignedInt) - collisionMeshData_.indices = meshData_->mutableIndices(); - else - collisionMeshData_.indices = indexData_ = meshData_->indicesAsArray(); + if(meshData_->isIndexed()) { // TODO how did this even work for non-indexed meshes???? + if (meshData_->indexType() == Mn::MeshIndexType::UnsignedInt) + collisionMeshData_.indices = meshData_->mutableIndices(); + else + collisionMeshData_.indices = indexData_ = meshData_->indicesAsArray(); + } else { + collisionMeshData_.indices = indexData_ = Cr::Containers::Array{Mn::NoInit, meshData_->vertexCount()}; + for(std::size_t i = 0; i != indexData_.size(); ++i) + indexData_[i] = i; + } } // setMeshData void GenericMeshData::importAndSetMeshData( diff --git a/src/esp/assets/ResourceManager.cpp b/src/esp/assets/ResourceManager.cpp index c97b7edb32..3d7e85c922 100644 --- a/src/esp/assets/ResourceManager.cpp +++ b/src/esp/assets/ResourceManager.cpp @@ -1377,8 +1377,8 @@ bool ResourceManager::loadRenderAssetGeneral(const AssetInfo& info) { meshes_.at(meshMetaData.meshIndex.first)) { // no default scene --- standalone OBJ/PLY files, for example // take a wild guess and load the first mesh with the first material - // addMeshToDrawables(metaData, *parent, drawables, 0, 0); - loadMeshHierarchy(*fileImporter_, meshMetaData.root, 0); + meshMetaData.root.children.emplace_back(); + meshMetaData.root.children.back().meshIDLocal = 0; } else { ESP_ERROR() << "No default scene available and no meshes found, exiting"; return false; From 05ae5bc2c0e06ab32c6075966db6c5442837a3c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Wed, 8 Dec 2021 17:09:59 +0100 Subject: [PATCH 44/53] ye --- src/cmake/dependencies.cmake | 6 + src/deps/corrade | 2 +- src/deps/magnum | 2 +- src/deps/magnum-bindings | 2 +- src/deps/magnum-integration | 2 +- src/deps/magnum-plugins | 2 +- src/esp/assets/GenericMeshData.cpp | 2 +- src/esp/batched_sim/BatchedSimAssert.h | 8 +- src/esp/batched_sim/BatchedSimulator.cpp | 295 ++++++- src/esp/batched_sim/BatchedSimulator.h | 43 +- src/esp/batched_sim/BpsImporter.conf | 30 + src/esp/batched_sim/BpsImporter.cpp | 718 ++++++++++++++++++ src/esp/batched_sim/CMakeLists.txt | 144 +++- src/esp/batched_sim/EpisodeGenerator.cpp | 11 +- src/esp/batched_sim/EpisodeGenerator.h | 4 + src/esp/batched_sim/EpisodeSet.cpp | 8 + src/esp/batched_sim/EpisodeSet.h | 14 +- src/esp/batched_sim/GlmUtils.cpp | 6 + src/esp/batched_sim/GlmUtils.h | 8 + src/esp/batched_sim/MagnumRenderer.cpp | 538 +++++++++++++ src/esp/batched_sim/MagnumRenderer.h | 79 ++ .../batched_sim/MagnumRendererConverter.cpp | 377 +++++++++ src/esp/batched_sim/MagnumRendererDemo.cpp | 112 +++ .../batched_sim/MagnumRendererStandalone.cpp | 137 ++++ .../batched_sim/MagnumRendererStandalone.h | 32 + src/esp/batched_sim/PlacementHelper.cpp | 3 + src/esp/batched_sim/PlacementHelper.h | 2 +- src/esp/batched_sim/README.md | 106 +++ src/esp/batched_sim/configure.h.cmake | 2 + src/esp/bindings/BatchedSimBindings.cpp | 6 + src/esp/geo/VoxelGrid.cpp | 6 +- src/esp/sim/Simulator.cpp | 8 +- src/tests/BatchedSimTest.cpp | 58 +- src/tests/CMakeLists.txt | 7 + src/tests/configure.h.cmake | 2 + 35 files changed, 2703 insertions(+), 79 deletions(-) create mode 100644 src/esp/batched_sim/BpsImporter.conf create mode 100644 src/esp/batched_sim/BpsImporter.cpp create mode 100644 src/esp/batched_sim/MagnumRenderer.cpp create mode 100644 src/esp/batched_sim/MagnumRenderer.h create mode 100644 src/esp/batched_sim/MagnumRendererConverter.cpp create mode 100644 src/esp/batched_sim/MagnumRendererDemo.cpp create mode 100644 src/esp/batched_sim/MagnumRendererStandalone.cpp create mode 100644 src/esp/batched_sim/MagnumRendererStandalone.h create mode 100644 src/esp/batched_sim/README.md create mode 100644 src/esp/batched_sim/configure.h.cmake diff --git a/src/cmake/dependencies.cmake b/src/cmake/dependencies.cmake index 59ed1ad41e..18e033358c 100644 --- a/src/cmake/dependencies.cmake +++ b/src/cmake/dependencies.cmake @@ -207,6 +207,12 @@ if(NOT USE_SYSTEM_MAGNUM) set(WITH_OPENGLTESTER ON CACHE BOOL "" FORCE) endif() + # Extra plugins for batch renderer asset import / conversion + set(WITH_GLTFIMPORTER ON CACHE BOOL "" FORCE) + set(WITH_GLTFSCENECONVERTER ON CACHE BOOL "" FORCE) + set(WITH_KTXIMPORTER ON CACHE BOOL "" FORCE) + set(WITH_KTXIMAGECONVERTER ON CACHE BOOL "" FORCE) + # Basis Universal. The repo is extremely huge and so instead of a Git # submodule we bundle just the transcoder files, and only a subset of the # formats (BC7 mode 6 has > 1 MB tables, ATC/FXT1/PVRTC2 are quite rare and diff --git a/src/deps/corrade b/src/deps/corrade index 3acef468e5..72ee390afa 160000 --- a/src/deps/corrade +++ b/src/deps/corrade @@ -1 +1 @@ -Subproject commit 3acef468e5bf982f3fc506c0895995139bf91f88 +Subproject commit 72ee390afa8dd1f9d94355595ff4dc74408977fc diff --git a/src/deps/magnum b/src/deps/magnum index da27aaca3b..21ef0f66eb 160000 --- a/src/deps/magnum +++ b/src/deps/magnum @@ -1 +1 @@ -Subproject commit da27aaca3ba30731cbf1ac43d0c60be7648570ac +Subproject commit 21ef0f66ebbd96f8c3a120a007e8feba20a9f95d diff --git a/src/deps/magnum-bindings b/src/deps/magnum-bindings index 5994150a68..24f2242842 160000 --- a/src/deps/magnum-bindings +++ b/src/deps/magnum-bindings @@ -1 +1 @@ -Subproject commit 5994150a68a216621582f76ecf394d1b42758abc +Subproject commit 24f22428428355e983f1cad2030369415b641d55 diff --git a/src/deps/magnum-integration b/src/deps/magnum-integration index da3c1ab2fb..5a6aa83561 160000 --- a/src/deps/magnum-integration +++ b/src/deps/magnum-integration @@ -1 +1 @@ -Subproject commit da3c1ab2fb19e547d7da6dfcd942a52958b8bd5a +Subproject commit 5a6aa83561d4e411ddd968ed50c92aa9443e8191 diff --git a/src/deps/magnum-plugins b/src/deps/magnum-plugins index 9e30f511ee..ee1a20981a 160000 --- a/src/deps/magnum-plugins +++ b/src/deps/magnum-plugins @@ -1 +1 @@ -Subproject commit 9e30f511ee37d88c95367acda3ed72be707dd725 +Subproject commit ee1a20981ad5fd8670b98969d54c8ed47bd446db diff --git a/src/esp/assets/GenericMeshData.cpp b/src/esp/assets/GenericMeshData.cpp index 036321c78a..62d00be776 100644 --- a/src/esp/assets/GenericMeshData.cpp +++ b/src/esp/assets/GenericMeshData.cpp @@ -67,7 +67,7 @@ void GenericMeshData::setMeshData(Magnum::Trade::MeshData&& meshData) { and store them here. */ if(meshData_->isIndexed()) { // TODO how did this even work for non-indexed meshes???? if (meshData_->indexType() == Mn::MeshIndexType::UnsignedInt) - collisionMeshData_.indices = meshData_->mutableIndices(); + collisionMeshData_.indices = meshData_->mutableIndices().asContiguous(); else collisionMeshData_.indices = indexData_ = meshData_->indicesAsArray(); } else { diff --git a/src/esp/batched_sim/BatchedSimAssert.h b/src/esp/batched_sim/BatchedSimAssert.h index 0c3d8f1585..ab822c850e 100644 --- a/src/esp/batched_sim/BatchedSimAssert.h +++ b/src/esp/batched_sim/BatchedSimAssert.h @@ -5,13 +5,9 @@ #ifndef ESP_BATCHEDSIM_BATCHEDSIMASSERT_H_ #define ESP_BATCHEDSIM_BATCHEDSIMASSERT_H_ -#include +#include -#ifdef NDEBUG -#define BATCHED_SIM_ASSERT(expr) do {} while(0) -#else -#define BATCHED_SIM_ASSERT(expr) CORRADE_INTERNAL_ASSERT(expr) -#endif +#define BATCHED_SIM_ASSERT(expr) CORRADE_INTERNAL_DEBUG_ASSERT(expr) #define BATCHED_SIM_ASSERT_VECTOR_ACCESS(vec, i) BATCHED_SIM_ASSERT(i >= 0 && i < vec.size()) diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index 63e22181d7..da5a85cbb1 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -3,35 +3,50 @@ // LICENSE file in the root directory of this source tree. #include "BatchedSimulator.h" +#ifndef MAGNUM_RENDERER #include "esp/batched_sim/BatchedSimAssert.h" +#endif #include "esp/batched_sim/GlmUtils.h" #include "esp/batched_sim/PlacementHelper.h" #include "esp/batched_sim/ProfilingScope.h" #include "esp/gfx/replay/Keyframe.h" #include "esp/io/json.h" - +#ifndef MAGNUM_RENDERER // #include +#endif #include #include #include #include +#include #ifndef NDEBUG #define ENABLE_DEBUG_INSTANCES #endif +#ifdef MAGNUM_RENDERER +#include +#endif + namespace esp { namespace batched_sim { +#ifdef MAGNUM_RENDERER +using namespace Cr::Containers::Literals; +using namespace Mn::Math::Literals; +#endif + namespace { -static Mn::Vector3 INVALID_VEC3 = Mn::Vector3(NAN, NAN, NAN); +#ifndef MAGNUM_RENDERER +static Mn::Vector3 INVALID_VEC3 = Mn::Vector3{Mn::Constants::nan()}; static constexpr glm::mat4x3 identityGlMat_ = glm::mat4x3(1.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f); +#endif float remapAction(float action, float stepMin, float stepMax) { // map (-1..+1) to (stepMin..stepMax) and clamp @@ -40,6 +55,7 @@ float remapAction(float action, float stepMin, float stepMax) { return Mn::Math::lerp(stepMin, stepMax, clampedNormalizedAction); } +#ifndef MAGNUM_RENDERER Mn::Vector3 toMagnumVector3(const btVector3& src) { return {src.x(), src.y(), src.z()}; } @@ -53,6 +69,7 @@ Mn::Matrix4 toMagnumMatrix4(const btTransform& btTrans) { Mn::Matrix4 tmp2{btTrans}; return tmp2; } +#endif Mn::Vector3 groundPositionToVector3(const Mn::Vector2& src) { constexpr float groundY = 0.f; @@ -61,24 +78,26 @@ Mn::Vector3 groundPositionToVector3(const Mn::Vector2& src) { std::string getMeshNameFromURDFVisualFilepath(const std::string& filepath) { // "../meshes/base_link.dae" => "base_link" - auto index0 = filepath.rfind('/'); - if (index0 == -1) { - // this is okay; the substring will start at 0 - } - auto index1 = filepath.rfind('.'); - BATCHED_SIM_ASSERT(index1 != -1); - - auto retval = filepath.substr(index0 + 1, index1 - index0 - 1); - return retval; + return Cr::Utility::Path::splitExtension(Cr::Utility::Path::split(filepath).second()).first(); } } // namespace RobotInstanceSet::RobotInstanceSet(Robot* robot, const BatchedSimulatorConfig* config, + #ifdef MAGNUM_RENDERER + MagnumRendererStandalone& renderer, + #else std::vector* envs, + #endif RolloutRecord* rollouts) - : config_(config), envs_(envs), robot_(robot), rollouts_(rollouts) { + : config_(config), + #ifdef MAGNUM_RENDERER + renderer_{&renderer}, + #else + envs_(envs), + #endif + robot_(robot), rollouts_(rollouts) { const int numLinks = robot->artObj->getNumLinks(); const int numNodes = numLinks + 1; // include base const int numEnvs = config_->numEnvs; @@ -94,7 +113,9 @@ RobotInstanceSet::RobotInstanceSet(Robot* robot, int baseInstanceIndex = 0; for (int b = 0; b < numEnvs; b++) { + #ifndef MAGNUM_RENDERER auto& env = (*envs_)[b]; + #endif // sloppy: pass -1 to getLinkVisualSceneNodes to get base for (int i = -1; i < numLinks; i++) { @@ -107,14 +128,21 @@ RobotInstanceSet::RobotInstanceSet(Robot* robot, int instanceId = -1; if (!visualAttachments.empty()) { const std::string linkVisualFilepath = visualAttachments[0].second; + + #ifdef MAGNUM_RENDERER + if(i != -1) // TODO ?! + // TODO why the heck is this stil treating meshes and materials + // separate?! + instanceId = renderer.add(b, visualAttachments[0].second); + #else const std::string nodeName = getMeshNameFromURDFVisualFilepath(linkVisualFilepath); auto instanceBlueprint = robot_->sceneMapping_->findInstanceBlueprint(nodeName); - instanceId = env.addInstance(instanceBlueprint.meshIdx_, instanceBlueprint.mtrlIdx_, identityGlMat_); + #endif } else { // these are camera links // sloppy: we should avoid having these items in the nodeInstanceIds_ @@ -220,7 +248,12 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, mb->forwardKinematics(robots_.scratch_q_, robots_.scratch_m_); + #ifdef MAGNUM_RENDERER +// TODO wtf!! where is this used +// Cr::Containers::StridedArrayView1D environmentTransforms = renderer_->transformations(b); + #else auto& env = bpsWrapper_->envs_[b]; + #endif int baseInstanceIndex = b * numNodes; const int baseSphereIndex = b * robots_.robot_->numCollisionSpheres_; @@ -247,7 +280,13 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, // todo: avoid btTransform copy for case of i != -1 const auto btTrans = i == -1 ? mb->getBaseWorldTransform() : mb->getLink(i).m_cachedWorldTransform; - Mn::Matrix4 mat = toMagnumMatrix4(btTrans); + Mn::Matrix4 mat = + #ifdef MAGNUM_RENDERER + Mn::Matrix4{btTrans} + #else + toMagnumMatrix4(btTrans) + #endif + ; auto tmp = robots_.robot_->nodeTransformFixups[nodeIndex]; // auto vec = tmp[3]; @@ -255,9 +294,13 @@ void BatchedSimulator::updateLinkTransforms(int currRolloutSubstep, // tmp[3] = Mn::Vector4(vec.xyz() * scale, 1.f); mat = mat * tmp; +// #ifdef MAGNUM_RENDERER +// environmentTransforms[instanceId] = mat; // TODO where is this gone?! +// #else if (robots_.robot_->gripperLink_ == i) { robotInstance.cachedGripperLinkMat_ = mat; } +// #endif if (updateForPhysics) { // perf todo: loop through collision spheres (and look up link id), @@ -441,7 +484,9 @@ void BatchedSimulator::updateGripping() { continue; } + #ifndef MAGNUM_RENDERER auto& env = bpsWrapper_->envs_[b]; + #endif auto& robotInstance = robots_.robotInstances_[b]; auto& envState = safeVectorGet(pythonEnvStates_, b); @@ -747,7 +792,9 @@ void BatchedSimulator::updateCollision() { safeVectorGet(episodeSet_.episodes_, episodeInstance.episodeIndex_); const int baseSphereIndex = b * robot_.numCollisionSpheres_; bool hit = false; + #ifndef MAGNUM_RENDERER auto& env = bpsWrapper_->envs_[b]; + #endif const auto& robotInstance = robots_.robotInstances_[b]; // perf todo: if there was a hit last frame, cache that sphere and test it @@ -938,7 +985,9 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { for (int b = 0; b < numEnvs; b++) { auto& robotInstance = robots_.robotInstances_[b]; + #ifndef MAGNUM_RENDERER auto& env = bpsWrapper_->envs_[b]; + #endif // temp hack: we don't currently have bookeeping to know if a robot moved // over several substeps, so we assume it did here. perf todo: fix this @@ -962,9 +1011,13 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { continue; } + #ifdef MAGNUM_RENDERER + renderer_->transformations(b)[instanceId] = safeVectorGet(robots_.nodeNewTransforms_, instanceIndex); + #else const auto glMat = toGlmMat4x3( safeVectorGet(robots_.nodeNewTransforms_, instanceIndex)); env.updateInstanceTransform(instanceId, glMat); + #endif } } @@ -972,9 +1025,13 @@ void BatchedSimulator::updateRenderInstances(bool forceUpdate) { if (didRobotMove && robotInstance.grippedFreeObjectIndex_ != -1) { int freeObjectIndex = robotInstance.grippedFreeObjectIndex_; auto mat = getHeldObjectTransform(b); - glm::mat4x3 glMat = toGlmMat4x3(mat); int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + #ifdef MAGNUM_RENDERER + renderer_->transformations(b)[instanceId] = mat; + #else + glm::mat4x3 glMat = toGlmMat4x3(mat); env.updateInstanceTransform(instanceId, glMat); + #endif auto& envState = safeVectorGet(pythonEnvStates_, b); safeVectorGet(envState.obj_positions, freeObjectIndex) = @@ -1112,8 +1169,11 @@ Mn::Matrix4 BatchedSimulator::getHeldObjectTransform(int b) const { } Robot::Robot(const serialize::Collection& serializeCollection, - esp::sim::Simulator* sim, - BpsSceneMapping* sceneMapping) { + esp::sim::Simulator* sim + #ifndef MAGNUM_RENDERER + , BpsSceneMapping* sceneMapping + #endif + ) { ESP_CHECK(!serializeCollection.robots.empty(), "no robot found in collection.json"); const serialize::Robot& serializeRobot = serializeCollection.robots.front(); @@ -1124,7 +1184,9 @@ Robot::Robot(const serialize::Collection& serializeCollection, sim->getArticulatedObjectManager()->addBulletArticulatedObjectFromURDF( filepath); + #ifndef MAGNUM_RENDERER sceneMapping_ = sceneMapping; + #endif artObj = static_cast( managedObj->hackGetBulletObjectReference().get()); @@ -1252,6 +1314,7 @@ void Robot::updateFromSerializeCollection( numCollisionSpheres_ = numCollisionSpheres; } +#ifndef MAGNUM_RENDERER BpsWrapper::BpsWrapper(int gpuId, int numEnvs, bool includeDepth, @@ -1301,6 +1364,7 @@ BpsWrapper::~BpsWrapper() { loader_ = nullptr; renderer_ = nullptr; } +#endif BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { ESP_CHECK(config.numDebugEnvs <= config.numEnvs, @@ -1309,23 +1373,47 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { config_ = config; const int numEnvs = config_.numEnvs; + #ifndef MAGNUM_RENDERER /* BpsImporter handles this */ const std::string sceneMappingFilepath = config_.renderAssetCompositeFilepath + ".mapping.json"; sceneMapping_ = BpsSceneMapping::loadFromFile(sceneMappingFilepath); + #endif serializeCollection_ = serialize::Collection::loadFromFile(config_.collectionFilepath); + #ifdef MAGNUM_RENDERER + renderer_.emplace(MagnumRendererConfiguration{} +// .setFilename("combined_Stage_v3_sc0_staging_trimesh.bps"_s) + .setTileSizeCount({config_.sensor0.width, config_.sensor0.height}, + // TODO better way to specify this + {16, (config_.numEnvs + 15)/16}) + ); + // TODO GPU ID, if include depth/color + renderer_->addFile(config_.renderAssetCompositeFilepath); +// #ifdef MAGNUM_RENDERER +// /* Hardcode camera position + projection for all views (taken from +// BpsWrapper) */ +// for(Mn::Matrix4& projection: renderer_->cameras()) projection = +// Mn::Matrix4::perspectiveProjection(Mn::Deg(config_.sensor0.hfov), Mn::Vector2{Mn::Float(config_.sensor0.width), Mn::Float(config_.sensor0.height)}.aspectRatio(), 0.01f, 1000.0f)* +// (Mn::Matrix4::from(Mn::Quaternion{{0.0f, -0.529178f, 0.0f}, 0.848511f}.toMatrix(), {-1.61004f, 1.5f, 3.5455f}).inverted()); +// #endif + #else bpsWrapper_ = std::make_unique( config_.gpuId, config_.numEnvs, config_.includeDepth, config_.includeColor, config_.sensor0, config_.renderAssetCompositeFilepath); + #endif if (config_.numDebugEnvs > 0) { + #ifdef MAGNUM_RENDERER + CORRADE_INTERNAL_ASSERT_UNREACHABLE(); + #else // perf todo: separate renderAssetsComposite for debug models debugBpsWrapper_ = std::make_unique( config_.gpuId, config_.numDebugEnvs, /*includeDepth*/ false, /*includeColor*/ true, config_.debugSensor, config_.renderAssetCompositeFilepath); + #endif } if (config_.numDebugEnvs > 0) { @@ -1344,15 +1432,24 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { legacySim_ = esp::sim::Simulator::create_unique(simConfig); - robot_ = Robot(serializeCollection_, legacySim_.get(), &sceneMapping_); + robot_ = Robot(serializeCollection_, legacySim_.get() + #ifndef MAGNUM_RENDERER + , &sceneMapping_ + #endif + ); checkDisableRobotAndFreeObjectsCollision(); int numLinks = robot_.artObj->getNumLinks(); int numNodes = numLinks + 1; // include base - robots_ = - RobotInstanceSet(&robot_, &config_, &bpsWrapper_->envs_, &rollouts_); + robots_ = RobotInstanceSet(&robot_, &config_, + #ifdef MAGNUM_RENDERER + *renderer_, + #else + &bpsWrapper_->envs_, + #endif + &rollouts_); actionDim_ = getNumActions(); @@ -1411,6 +1508,7 @@ int BatchedSimulator::getNumActions() const { return serializeCollection_.robots[0].actionMap.numActions; } +#ifndef MAGNUM_RENDERER bps3D::Environment& BatchedSimulator::getBpsEnvironment(int envIndex) { BATCHED_SIM_ASSERT(envIndex < config_.numEnvs); return bpsWrapper_->envs_[envIndex]; @@ -1421,6 +1519,7 @@ bps3D::Environment& BatchedSimulator::getDebugBpsEnvironment(int envIndex) { BATCHED_SIM_ASSERT(envIndex < config_.numDebugEnvs); return debugBpsWrapper_->envs_[envIndex]; } +#endif // one-time init for envs void BatchedSimulator::initEpisodeInstances() { @@ -1449,7 +1548,9 @@ void BatchedSimulator::initEpisodeInstances() { } void BatchedSimulator::clearEpisodeInstance(int b) { + #ifndef MAGNUM_RENDERER auto& env = getBpsEnvironment(b); + #endif auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); @@ -1458,13 +1559,18 @@ void BatchedSimulator::clearEpisodeInstance(int b) { } if (b < config_.numDebugEnvs) { + #ifndef MAGNUM_RENDERER for (auto& instanceId : episodeInstance.persistentDebugInstanceIds_) { auto& env = getDebugBpsEnvironment(b); env.deleteInstance(instanceId); } + #else + CORRADE_INTERNAL_ASSERT_UNREACHABLE(); // TODO + #endif episodeInstance.persistentDebugInstanceIds_.clear(); } + #ifndef MAGNUM_RENDERER // Remove free object bps instances **in reverse order**. This is so bps3D // will later allocate us new instance IDs (from its free list) in ascending // order (see assert in spawnFreeObject). @@ -1475,13 +1581,20 @@ void BatchedSimulator::clearEpisodeInstance(int b) { int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); env.deleteInstance(instanceId); } + #else + renderer_->clear(b); // lol + #endif episodeInstance.firstFreeObjectInstanceId_ = -1; + #ifndef MAGNUM_RENDERER // remove staticScene bps instances for (const auto id : episodeInstance.staticSceneInstanceIds_) { env.deleteInstance(id); } + #else + renderer_->clear(b); // no-op, it's all cleared from above already + #endif episodeInstance.staticSceneInstanceIds_.clear(); // remove all free objects from collision grid @@ -1495,7 +1608,9 @@ void BatchedSimulator::resetEpisodeInstance(int b) { << resets_[b] << " is invalid for getNumEpisodes()=" << getNumEpisodes()); + #ifndef MAGNUM_RENDERER // TODO useless variable auto& env = getBpsEnvironment(b); + #endif clearEpisodeInstance(b); @@ -1516,10 +1631,14 @@ void BatchedSimulator::resetEpisodeInstance(int b) { for (const auto& instance : staticScene.renderAssetInstances_) { const auto& renderAsset = safeVectorGet(episodeSet_.renderAssets_, instance.renderAssetIndex_); + #ifndef MAGNUM_RENDERER const auto& blueprint = renderAsset.instanceBlueprint_; - episodeInstance.staticSceneInstanceIds_.push_back(env.addInstance( blueprint.meshIdx_, blueprint.mtrlIdx_, instance.glMat_)); + #else + // TODO i hope the name is what the blueprint corresponds to?? + episodeInstance.staticSceneInstanceIds_.push_back(renderer_->add(b, renderAsset.name_)); + #endif } auto& envState = safeVectorGet(pythonEnvStates_, b); @@ -1535,7 +1654,13 @@ void BatchedSimulator::resetEpisodeInstance(int b) { { auto& robotInstance = robots_.robotInstances_[b]; - const int numEnvs = bpsWrapper_->envs_.size(); + const int numEnvs = + #ifndef MAGNUM_RENDERER + bpsWrapper_->envs_.size() + #else + renderer_->sceneCount() + #endif + ; const int numPosVars = robot_.numPosVars; float* yaws = &safeVectorGet(rollouts_.yaws_, currStorageStep_ * numEnvs); Mn::Vector2* positions = @@ -1707,7 +1832,9 @@ bool BatchedSimulator::isEnvResetting(int b) const { void BatchedSimulator::spawnFreeObject(int b, int freeObjectIndex, bool reinsert) { + #ifndef MAGNUM_RENDERER auto& env = getBpsEnvironment(b); + #endif auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); const auto& episode = @@ -1721,7 +1848,9 @@ void BatchedSimulator::spawnFreeObject(int b, // indirection const auto& renderAsset = safeVectorGet(episodeSet_.renderAssets_, freeObject.renderAssetIndex_); + #ifndef MAGNUM_RENDERER const auto& blueprint = renderAsset.instanceBlueprint_; + #endif const auto& rotation = safeVectorGet(freeObject.startRotations_, freeObjectSpawn.startRotationIndex_); @@ -1730,9 +1859,14 @@ void BatchedSimulator::spawnFreeObject(int b, // sloppy: this matrix gets created differently on episode reset Mn::Matrix4 mat = Mn::Matrix4::from(rotation.toMatrix(), freeObjectSpawn.startPos_); + #ifndef MAGNUM_RENDERER glm::mat4x3 glMat = toGlmMat4x3(mat); int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); + #else + // TODO is the name actually corresponding to the blueprint?? + int instanceId = renderer_->add(b, renderAsset.name_, mat); + #endif // store the first free object's bps instanceId and assume the rest will be // contiguous if (freeObjectIndex == 0) { @@ -1764,9 +1898,10 @@ void BatchedSimulator::removeFreeObjectFromCollisionGrid(int b, // perf todo: remove this auto& envState = safeVectorGet(pythonEnvStates_, b); - safeVectorGet(envState.obj_positions, freeObjectIndex) = INVALID_VEC3; + safeVectorGet(envState.obj_positions, freeObjectIndex) = Mn::Vector3{Mn::Constants::nan()}; } +// TODO eh what is this doing actually? i am no B P S int BatchedSimulator::getFreeObjectBpsInstanceId(int b, int freeObjectIndex) const { auto& episodeInstance = @@ -1779,16 +1914,22 @@ void BatchedSimulator::reinsertFreeObject(int b, int freeObjectIndex, const Magnum::Vector3& pos, const Magnum::Quaternion& rotation) { + #ifndef MAGNUM_RENDERER auto& env = getBpsEnvironment(b); + #endif auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); episodeInstance.colGrid_.reinsertObstacle(freeObjectIndex, pos, rotation); // sloppy quat to Matrix3x3 Mn::Matrix4 mat = Mn::Matrix4::from(rotation.toMatrix(), pos); - glm::mat4x3 glMat = toGlmMat4x3(mat); int instanceId = getFreeObjectBpsInstanceId(b, freeObjectIndex); + #ifndef MAGNUM_RENDERER + glm::mat4x3 glMat = toGlmMat4x3(mat); env.updateInstanceTransform(instanceId, glMat); + #else + renderer_->transformations(b)[instanceId] = mat; + #endif auto& envState = safeVectorGet(pythonEnvStates_, b); safeVectorGet(envState.obj_positions, freeObjectIndex) = pos; @@ -1804,14 +1945,22 @@ void BatchedSimulator::initEpisodeSet() { "specify episodeSetFilepath"); episodeSet_ = generateBenchmarkEpisodeSet( - config_.episodeGeneratorConfig, sceneMapping_, serializeCollection_); + config_.episodeGeneratorConfig, + #ifndef MAGNUM_RENDERER + sceneMapping_, + #endif + serializeCollection_); episodeSet_.saveToFile("../data/generated.episode_set.json"); } else { ESP_CHECK(!config_.episodeSetFilepath.empty(), "For BatchedSimulatorConfig::doProceduralEpisodeSet==false, you " "must specify episodeSetFilepath"); episodeSet_ = EpisodeSet::loadFromFile(config_.episodeSetFilepath); - postLoadFixup(episodeSet_, sceneMapping_, serializeCollection_); + postLoadFixup(episodeSet_, + #ifndef MAGNUM_RENDERER + sceneMapping_, + #endif + serializeCollection_); } } @@ -1878,7 +2027,13 @@ void BatchedSimulator::reset(std::vector&& resets) { // called within step to reset whatever envs are requested to reset void BatchedSimulator::resetHelper() { - const int numEnvs = bpsWrapper_->envs_.size(); + const int numEnvs = + #ifndef MAGNUM_RENDERER + bpsWrapper_->envs_.size() + #else + renderer_->sceneCount() + #endif + ; for (int b = 0; b < numEnvs; b++) { if (!isEnvResetting(b)) { @@ -1914,7 +2069,13 @@ void BatchedSimulator::startStepPhysicsOrReset(std::vector&& actions, void BatchedSimulator::stepPhysics() { ProfilingScope scope("step physics"); - const int numEnvs = bpsWrapper_->envs_.size(); + const int numEnvs = + #ifndef MAGNUM_RENDERER + bpsWrapper_->envs_.size() + #else + renderer_->sceneCount() + #endif + ; BATCHED_SIM_ASSERT(config_.numSubsteps > 0); for (substep_ = 0; substep_ < config_.numSubsteps; substep_++) { @@ -1946,7 +2107,13 @@ void BatchedSimulator::substepPhysics() { prevStorageStep_ = currStorageStep_; currStorageStep_ = (currStorageStep_ + 1) % maxStorageSteps_; - int numEnvs = bpsWrapper_->envs_.size(); + const int numEnvs = + #ifndef MAGNUM_RENDERER + bpsWrapper_->envs_.size() + #else + renderer_->sceneCount() + #endif + ; int numPosVars = robot_.numPosVars; auto& robots = robots_; @@ -2011,8 +2178,8 @@ void BatchedSimulator::substepPhysics() { float remappedBaseMovementAction = remapAction( baseMoveAction, baseMoveSetup.stepMin, baseMoveSetup.stepMax); positions[b] = - prevPositions[b] + Mn::Vector2(Mn::Math::cos(Mn::Math::Rad(yaws[b])), - -Mn::Math::sin(Mn::Math::Rad(yaws[b]))) * + prevPositions[b] + Mn::Vector2(Mn::Math::cos(Mn::Rad(yaws[b])), + -Mn::Math::sin(Mn::Rad(yaws[b]))) * remappedBaseMovementAction; // sloppy: copy over all jointPositions, then process actionJointDegreePairs @@ -2159,9 +2326,14 @@ void BatchedSimulator::setFreeCamera(const Mn::Vector3& pos, const Mn::Quaternio } #endif +// TODO i am not B.P.S. void BatchedSimulator::setBpsCameraHelper(bool isDebug, int b, + #ifndef MAGNUM_RENDERER const glm::mat4& glCameraInvTransform, + #else + const Mn::Matrix4& glCameraInvTransform, + #endif float hfov) { const CameraSensorConfig& sensor = isDebug ? config_.debugSensor : config_.sensor0; @@ -2169,16 +2341,24 @@ void BatchedSimulator::setBpsCameraHelper(bool isDebug, BATCHED_SIM_ASSERT(hfov > 0.f && hfov < 180.f); constexpr float near = 0.01; constexpr float far = 1000.f; + #ifndef MAGNUM_RENDERER auto& env = isDebug ? getDebugBpsEnvironment(b) : getBpsEnvironment(b); env.setCamera(glCameraInvTransform, hfov, aspectRatio, near, far); + #else + renderer_->camera(b) = Mn::Matrix4::perspectiveProjection(Mn::Rad{hfov}, aspectRatio, near, far)*glCameraInvTransform; } +// TODO i am not B.P.S. void BatchedSimulator::updateBpsCameras(bool isDebug) { const Camera& cam = isDebug ? debugCam_ : mainCam_; const int numEnvs = isDebug ? config_.numDebugEnvs : config_.numEnvs; if (cam.attachNodeIndex == -1) { + #ifndef MAGNUM_RENDERER glm::mat4 world_to_camera(glm::inverse(toGlmMat4(cam.transform))); + #else + Mn::Matrix4 world_to_camera = cam.transform.inverted(); + #endif for (int b = 0; b < numEnvs; b++) { setBpsCameraHelper(isDebug, b, world_to_camera, cam.hfov); @@ -2197,12 +2377,16 @@ void BatchedSimulator::updateBpsCameras(bool isDebug) { safeVectorGet(robots_.nodeNewTransforms_, instanceIndex); auto cameraTransform = cameraAttachNodeTransform * cameraAttachTransform; + #ifndef MAGNUM_RENDERER auto glCameraNewInvTransform = glm::inverse(toGlmMat4(cameraTransform)); - setBpsCameraHelper(isDebug, b, glCameraNewInvTransform, cam.hfov); + #else + setBpsCameraHelper(isDebug, b, cameraTransform.inverted(), cam.hfov); + #endif } } } +#endif void BatchedSimulator::startRender() { BATCHED_SIM_ASSERT(!isPhysicsThreadActive()); @@ -2210,11 +2394,19 @@ void BatchedSimulator::startRender() { BATCHED_SIM_ASSERT(isOkToRender_); updateBpsCameras(/*isDebug*/ false); + #ifdef MAGNUM_RENDERER + renderer_->draw(); + #else bpsWrapper_->renderer_->render(bpsWrapper_->envs_.data()); + #endif if (config_.numDebugEnvs > 0 && enableDebugSensor_) { updateBpsCameras(/*isDebug*/ true); + #ifdef MAGNUM_RENDERER + CORRADE_INTERNAL_ASSERT_UNREACHABLE(); // TODO + #else debugBpsWrapper_->renderer_->render(debugBpsWrapper_->envs_.data()); + #endif } isOkToRender_ = false; @@ -2224,24 +2416,39 @@ void BatchedSimulator::startRender() { void BatchedSimulator::waitRender() { ProfilingScope scope("wait for GPU render"); BATCHED_SIM_ASSERT(isRenderStarted_); + #ifdef MAGNUM_RENDERER + /* Nothing, all blocking will happen when retrieving the CUDA device + pointer */ + #else bpsWrapper_->renderer_->waitForFrame(); if (config_.numDebugEnvs > 0 && enableDebugSensor_) { debugBpsWrapper_->renderer_->waitForFrame(); } + #endif isRenderStarted_ = false; isOkToRender_ = true; } +#ifdef MAGNUM_RENDERER +MagnumRendererStandalone& BatchedSimulator::getMagnumRenderer() { + return *renderer_; +} +#else bps3D::Renderer& BatchedSimulator::getBpsRenderer() { BATCHED_SIM_ASSERT(bpsWrapper_->renderer_.get()); return *bpsWrapper_->renderer_.get(); } +#endif +#ifdef MAGNUM_RENDERER +// TODO +#else bps3D::Renderer& BatchedSimulator::getDebugBpsRenderer() { BATCHED_SIM_ASSERT(config_.numDebugEnvs > 0); BATCHED_SIM_ASSERT(debugBpsWrapper_->renderer_.get()); return *debugBpsWrapper_->renderer_.get(); } +#endif RolloutRecord::RolloutRecord(int numRolloutSubsteps, int numEnvs, @@ -2260,6 +2467,7 @@ RolloutRecord::RolloutRecord(int numRolloutSubsteps, void BatchedSimulator::deleteDebugInstances() { if (config_.numDebugEnvs > 0) { + #ifndef MAGNUM_RENDERER const int numEnvs = config_.numDebugEnvs; for (int b = 0; b < numEnvs; b++) { auto& env = getDebugBpsEnvironment(b); @@ -2269,6 +2477,9 @@ void BatchedSimulator::deleteDebugInstances() { safeVectorGet(debugInstancesByEnv_, b).clear(); } + #else + CORRADE_INTERNAL_ASSERT_UNREACHABLE(); // TODO + #endif } } @@ -2278,6 +2489,7 @@ int BatchedSimulator::addDebugInstance(const std::string& name, bool persistent) { BATCHED_SIM_ASSERT(config_.numDebugEnvs > 0); + #ifndef MAGNUM_RENDERER glm::mat4x3 glMat = toGlmMat4x3(transform); const auto& blueprint = sceneMapping_.findInstanceBlueprint(name); BATCHED_SIM_ASSERT(envIndex < config_.numEnvs); @@ -2288,6 +2500,9 @@ int BatchedSimulator::addDebugInstance(const std::string& name, safeVectorGet(debugInstancesByEnv_, envIndex).push_back(instanceId); } return instanceId; + #else + CORRADE_INTERNAL_ASSERT_UNREACHABLE(); // TODO + #endif } std::string BatchedSimulator::getRecentStatsAndReset() const { @@ -2403,7 +2618,9 @@ void BatchedSimulator::debugRenderColumnGrids(int b, int maxProgress) { BATCHED_SIM_ASSERT(b < config_.numDebugEnvs); + #ifndef MAGNUM_RENDERER auto& env = getDebugBpsEnvironment(b); + #endif auto& episodeInstance = safeVectorGet(episodeInstanceSet_.episodeInstanceByEnv_, b); auto& episode = @@ -2412,8 +2629,10 @@ void BatchedSimulator::debugRenderColumnGrids(int b, safeVectorGet(episodeSet_.staticScenes_, episode.staticSceneIndex_); const auto& source = staticScene.columnGridSet_.getColumnGrid(0); + #ifndef MAGNUM_RENDERER const auto& blueprint = sceneMapping_.findInstanceBlueprint("cube_gray_shaded"); + #endif // note off by one const float maxOccludedY = 3.f; @@ -2449,17 +2668,27 @@ void BatchedSimulator::debugRenderColumnGrids(int b, Mn::Matrix4 localToBox = Mn::Matrix4::translation(aabb.center()) * Mn::Matrix4::scaling(aabb.size() * 0.5); + #ifndef MAGNUM_RENDERER glm::mat4x3 glMat = toGlmMat4x3(localToBox); int instanceId = env.addInstance(blueprint.meshIdx_, blueprint.mtrlIdx_, glMat); episodeInstance.persistentDebugInstanceIds_.push_back(instanceId); + #else + CORRADE_INTERNAL_ASSERT_UNREACHABLE(); // TODO + #endif } } } } void BatchedSimulator::reloadSerializeCollection() { - int numEnvs = bpsWrapper_->envs_.size(); + const int numEnvs = + #ifndef MAGNUM_RENDERER + bpsWrapper_->envs_.size() + #else + renderer_->sceneCount() + #endif + ; serializeCollection_ = serialize::Collection::loadFromFile(config_.collectionFilepath); diff --git a/src/esp/batched_sim/BatchedSimulator.h b/src/esp/batched_sim/BatchedSimulator.h index 9e88db16bd..5f4686d7a6 100644 --- a/src/esp/batched_sim/BatchedSimulator.h +++ b/src/esp/batched_sim/BatchedSimulator.h @@ -5,16 +5,22 @@ #ifndef ESP_BATCHEDSIM_BATCHED_SIMULATOR_H_ #define ESP_BATCHEDSIM_BATCHED_SIMULATOR_H_ -#include "esp/batched_sim/BpsSceneMapping.h" #include "esp/batched_sim/ColumnGrid.h" #include "esp/batched_sim/EpisodeGenerator.h" #include "esp/batched_sim/EpisodeSet.h" #include "esp/batched_sim/SerializeCollection.h" #include "esp/core/random.h" + +#include "esp/batched_sim/configure.h" #include "esp/physics/bullet/BulletArticulatedObject.h" #include "esp/sim/Simulator.h" +#ifdef MAGNUM_RENDERER +#include "MagnumRendererStandalone.h" +#else +#include "esp/batched_sim/BpsSceneMapping.h" #include +#endif #include #include @@ -92,6 +98,7 @@ struct PythonEnvironmentState { ESP_SMART_POINTERS(PythonEnvironmentState); }; +#ifndef MAGNUM_RENDERER struct BpsWrapper { BpsWrapper(int gpuId, int numEnvs, @@ -106,11 +113,15 @@ struct BpsWrapper { std::unique_ptr renderer_; std::unique_ptr loader_; }; +#endif struct Robot { Robot(const serialize::Collection& serializeCollection, - esp::sim::Simulator* sim, - BpsSceneMapping* sceneMapping); + esp::sim::Simulator* sim + #ifndef MAGNUM_RENDERER + , BpsSceneMapping* sceneMapping + #endif + ); Robot() = default; void updateFromSerializeCollection( @@ -122,7 +133,9 @@ struct Robot { // todo: delete artObj esp::physics::BulletArticulatedObject* artObj = nullptr; + #ifndef MAGNUM_RENDERER BpsSceneMapping* sceneMapping_ = nullptr; + #endif std::vector nodeTransformFixups; std::pair, std::vector> jointPositionLimits; @@ -176,7 +189,11 @@ class RobotInstanceSet { RobotInstanceSet() = default; RobotInstanceSet(Robot* robot, const BatchedSimulatorConfig* config, + #ifdef MAGNUM_RENDERER + MagnumRendererStandalone& renderer_, + #else std::vector* envs, + #endif RolloutRecord* rollouts); void applyActionPenalties(const std::vector& actions); @@ -204,7 +221,11 @@ class RobotInstanceSet { btAlignedObjectArray scratch_q_; btAlignedObjectArray scratch_m_; + #ifdef MAGNUM_RENDERER + MagnumRendererStandalone* renderer_ = nullptr; + #else std::vector* envs_; + #endif std::vector robotInstances_; }; @@ -242,11 +263,16 @@ class BatchedSimulator { std::vector&& resets); void waitStepPhysicsOrReset(); + #ifdef MAGNUM_RENDERER + MagnumRendererStandalone& getMagnumRenderer(); + // TODO debug renderer?! + #else bps3D::Renderer& getBpsRenderer(); bps3D::Renderer& getDebugBpsRenderer(); bps3D::Environment& getBpsEnvironment(int envIndex); bps3D::Environment& getDebugBpsEnvironment(int envIndex); + #endif void deleteDebugInstances(); // probably call at start of frame render // probably add these every frame ("immediate mode", not persistent) @@ -303,10 +329,15 @@ class BatchedSimulator { void updateGripping(); void resetHelper(); void updatePythonEnvironmentState(); + // TODO i am not B.P.S. void updateBpsCameras(bool isDebug); void setBpsCameraHelper(bool isDebug, int b, + #ifndef MAGNUM_RENDERER const glm::mat4& glCameraInvTransform, + #else + const Mn::Matrix4& glCameraInvTransform, + #endif float hfov); // uses episode spawn location @@ -348,8 +379,12 @@ class BatchedSimulator { int substep_ = -1; RolloutRecord rollouts_; std::unique_ptr legacySim_; + #ifdef MAGNUM_RENDERER + Corrade::Containers::Optional renderer_; + #else std::unique_ptr bpsWrapper_; std::unique_ptr debugBpsWrapper_; + #endif int actionDim_ = -1; std::vector actions_; std::vector resets_; // episode index, or -1 if not resetting @@ -361,7 +396,9 @@ class BatchedSimulator { EpisodeSet episodeSet_; EpisodeInstanceSet episodeInstanceSet_; + #ifndef MAGNUM_RENDERER BpsSceneMapping sceneMapping_; + #endif std::vector> debugInstancesByEnv_; mutable StatRecord recentStats_; esp::core::Random random_{0}; diff --git a/src/esp/batched_sim/BpsImporter.conf b/src/esp/batched_sim/BpsImporter.conf new file mode 100644 index 0000000000..0c381be542 --- /dev/null +++ b/src/esp/batched_sim/BpsImporter.conf @@ -0,0 +1,30 @@ +depends=BasisImporter + +[configuration] +# If enabled, imports just one mesh with all vertex and index data together and +# the scene containing mesh index offsets and count +meshViews=false + +# If enabled together with meshViews, the mesh will have a second level with +# MeshPrimitive::Meshlets, containing view info together with bounding spheres +# and other metadata +meshlets=true + +# If enabled, combines all 2D textures together into a single 2D array texture +# with mip levels, throwing away levels above given size. +textureArrays=false +textureArrayMaxLevelSize=128 +# If enabled together with textureArrays, creates one additional all-white +# array layer and makes all texture-less materials use it to avoid having a +# dedicated second draw for texture-less objects +textureArraysForceAllMaterialsTextured=false + +# Include the instance scene in the output. If disabled, only the second scene +# without transformation or parent info is included. +instanceScene=true + +# Same as the format option in BasisImporter +basisFormat= + +# Provide basic Phong material attributes as a fallback to PBR +phongMaterialFallback=true diff --git a/src/esp/batched_sim/BpsImporter.cpp b/src/esp/batched_sim/BpsImporter.cpp new file mode 100644 index 0000000000..76d54ed852 --- /dev/null +++ b/src/esp/batched_sim/BpsImporter.cpp @@ -0,0 +1,718 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Magnum; + +class CORRADE_VISIBILITY_EXPORT BpsImporter: public Trade::AbstractImporter { + public: + explicit BpsImporter(PluginManager::AbstractManager& manager, const Containers::StringView& plugin): Trade::AbstractImporter{manager, plugin} {} + + ~BpsImporter() = default; + + private: + CORRADE_VISIBILITY_LOCAL Trade::ImporterFeatures doFeatures() const override { return {}; } + CORRADE_VISIBILITY_LOCAL bool doIsOpened() const override { return _in; } + CORRADE_VISIBILITY_LOCAL void doClose() override { _in = nullptr; } + CORRADE_VISIBILITY_LOCAL void doOpenFile(Containers::StringView filename) override; + + CORRADE_VISIBILITY_LOCAL UnsignedInt doMeshCount() const override; + CORRADE_VISIBILITY_LOCAL UnsignedInt doMeshLevelCount(UnsignedInt id) override; + CORRADE_VISIBILITY_LOCAL Containers::String doMeshAttributeName(UnsignedShort name) override; + CORRADE_VISIBILITY_LOCAL Trade::MeshAttribute doMeshAttributeForName(Containers::StringView name) override; + CORRADE_VISIBILITY_LOCAL Containers::Optional doMesh(UnsignedInt id, UnsignedInt level) override; + + CORRADE_VISIBILITY_LOCAL Containers::String doObjectName(UnsignedLong id) override; +// CORRADE_VISIBILITY_LOCAL Long doObjectForName(Containers::StringView name) override; + + CORRADE_VISIBILITY_LOCAL UnsignedInt doMaterialCount() const override; + CORRADE_VISIBILITY_LOCAL Containers::Optional doMaterial(UnsignedInt id) override; + + CORRADE_VISIBILITY_LOCAL UnsignedInt doTextureCount() const override; + CORRADE_VISIBILITY_LOCAL Containers::Optional doTexture(UnsignedInt id) override; + + CORRADE_VISIBILITY_LOCAL UnsignedInt doImage2DCount() const override; + CORRADE_VISIBILITY_LOCAL UnsignedInt doImage2DLevelCount(UnsignedInt id) override; + CORRADE_VISIBILITY_LOCAL Containers::Optional doImage2D(UnsignedInt id, UnsignedInt level) override; + + CORRADE_VISIBILITY_LOCAL UnsignedInt doImage3DCount() const override; + CORRADE_VISIBILITY_LOCAL UnsignedInt doImage3DLevelCount(UnsignedInt id) override; + CORRADE_VISIBILITY_LOCAL Containers::Optional doImage3D(UnsignedInt id, UnsignedInt level) override; + + CORRADE_VISIBILITY_LOCAL UnsignedLong doObjectCount() const override; + CORRADE_VISIBILITY_LOCAL UnsignedInt doSceneCount() const override; + CORRADE_VISIBILITY_LOCAL Int doDefaultScene() const override; + CORRADE_VISIBILITY_LOCAL Containers::String doSceneFieldName(UnsignedInt name) override; + CORRADE_VISIBILITY_LOCAL Trade::SceneField doSceneFieldForName(Containers::StringView name) override; + CORRADE_VISIBILITY_LOCAL Containers::Optional doScene(UnsignedInt id) override; + + UnsignedInt _lightCount, _instanceCount; + std::size_t _lightOffset, _instanceOffset, _dataOffset; + Containers::StringView _textureDir; + Containers::Array _textureNames; + Containers::Array _in; + Containers::String _basename; + + struct Mapping { + Containers::String name; + UnsignedInt mesh, material; + }; + Containers::Array _mappings; +}; + +namespace { + +/* + Offset Size Content Value / type + ------ ------- -------------- ---------------- + 0 64 header BpsHeader + 64 padding + 256 n*20 meshes BpsMeshInfo[n] + 4 light count + n*24 lights BpsLightProperties[n] + 1+ texture dir char[] (null-terminated) + 4 texture count + n*1+ texture names char[][n] (null-terminated) + 4 instance count + n*56 instances BpsInstanceProperties[n] + padding + ------ ------- -------------- ---------------- + n*256 mesh data(?) +*/ + +struct BpsHeader { + UnsignedInt magic; /* 0x55555555 */ + UnsignedInt version; /* 1 */ + + UnsignedInt numMeshes; + UnsignedInt numVertices; + UnsignedInt numIndices; + UnsignedInt numChunks; + UnsignedInt numMaterials; + + UnsignedLong indexOffset; + UnsignedLong chunkOffset; + UnsignedLong materialOffset; + + UnsignedLong totalBytes; /* ... after the base data offset */ +}; + +struct BpsMeshInfo { + UnsignedInt indexOffset; + UnsignedInt chunkOffset; + UnsignedInt numTriangles; + UnsignedInt numVertices; /* what's this for if there's no vertexOffset? */ + UnsignedInt numChunks; +}; + +struct BpsLightProperties { + Vector3 position; + Color3 color; +}; + +struct BpsInstanceProperties { + UnsignedInt meshIndex; + Int materialIndex; /* used to be an UnsignedInt, we have Int for unassigned materials */ + Matrix4x3 transformation; +}; + +struct BpsMaterialParams { + Vector3 baseAlbedo; + Float roughness; + /* unsigned in BPS source, but contains values like -1 so i suppose it was + meant to be signed. Magnum expects unsigned for texture IDs, leaving + signed only for the raw texIds attribute. */ + Vector4ui texIdxs; +}; + +} + +void BpsImporter::doOpenFile(const Containers::StringView filename) { + Containers::Optional> data = Utility::Path::read(filename); + if(!data) { + Error{} << "BpsImporter::openFile(): cannot open" << filename; + return; + } + + if(data->size() < sizeof(BpsHeader)) { + Error{} << "BpsImporter::openFile(): thing too small, expected at least" << sizeof(BpsHeader) << "but got" << data->size(); + return; + } + + /* Save basename for loading other files */ + _basename = Utility::Path::split(filename).first(); + + /* Header, mesh infos */ + Containers::ArrayView f = *data; + const auto& header = *reinterpret_cast(f.data()); + f = f.exceptPrefix(256 + header.numMeshes*sizeof(BpsMeshInfo)); + + /* Lights */ + _lightCount = *reinterpret_cast(f.data()); + f = f.exceptPrefix(4); + _lightOffset = f.data() - data->data(); + f = f.exceptPrefix(_lightCount*sizeof(BpsLightProperties)); + + /* Texture dir */ + _textureDir = f.data(); + f = f.exceptPrefix(_textureDir.size() + 1); + + /* Textures */ + const auto textureCount = *reinterpret_cast(f.data()); + _textureNames = Containers::Array{textureCount}; + f = f.exceptPrefix(4); + for(std::size_t i = 0; i != textureCount; ++i) { + _textureNames[i] = f.data(); + f = f.exceptPrefix(_textureNames[i].size() + 1); + } + + /* Instances */ + _instanceCount = *reinterpret_cast(f.data()); + f = f.exceptPrefix(4); + _instanceOffset = f.data() - data->data(); + f = f.exceptPrefix(_instanceCount*sizeof(BpsInstanceProperties)); + + /* Padding after */ + _dataOffset = 256*((f.data() - data->data() + 255)/256); + + if(_dataOffset >= data->size()) { + Error{} << "BpsImporter::openFile(): ended up at" << _dataOffset << "bytes for a" << data->size() << "byte file"; + return; + } + + if(data->size() != _dataOffset + header.totalBytes) { + Error{} << "BpsImporter::openFile(): calculated data size" << (data->size() - _dataOffset) << "differs from" << header.totalBytes; + return; + } + + /* Extra material/mesh mapping, added as additional scene objects without + parent or transformation fields */ + Containers::Optional mapping = Utility::Json::fromFile(filename + ".mapping.json", Utility::Json::Option::ParseLiterals|Utility::Json::Option::ParseStrings); + if(!mapping || !mapping->parseUnsignedInts(mapping->root())) { + Error{} << "BpsImporter::openFile(): cannot parse the mapping file"; + return; + } + for(const Utility::JsonToken& i: mapping->root()["mapping"]["meshMappings"].asArray()) + arrayAppend(_mappings, InPlaceInit, + i["name"].asString(), + i["meshIdx"].asUnsignedInt(), + i["mtrlIdx"].asUnsignedInt()); + + _in = *std::move(data); +} + +UnsignedInt BpsImporter::doMeshCount() const { + return configuration().value("meshViews") ? + 1 : reinterpret_cast(_in.data())->numMeshes; +} + +UnsignedInt BpsImporter::doMeshLevelCount(const UnsignedInt id) { + if(configuration().value("meshViews") && configuration().value("meshlets")) { + CORRADE_INTERNAL_ASSERT(id == 0); + return 2; + } + + return 1; +} + +namespace { + constexpr Trade::MeshAttribute MeshAttributeIndexOffset = Trade::meshAttributeCustom(0); + constexpr Trade::MeshAttribute MeshAttributeChunkOffset = Trade::meshAttributeCustom(1); + constexpr Trade::MeshAttribute MeshAttributeTriangleCount = Trade::meshAttributeCustom(2); + constexpr Trade::MeshAttribute MeshAttributeVertexCount = Trade::meshAttributeCustom(3); + constexpr Trade::MeshAttribute MeshAttributeChunkCount = Trade::meshAttributeCustom(4); +} + +Containers::String BpsImporter::doMeshAttributeName(const UnsignedShort name) { + if(configuration().value("meshViews")) { + switch(name) { + #define _c(name) \ + case meshAttributeCustom(MeshAttribute ## name): return #name; + _c(IndexOffset) + _c(ChunkOffset) + _c(TriangleCount) + _c(VertexCount) + _c(ChunkCount) + #undef _c + } + } + + return {}; +} + +Trade::MeshAttribute BpsImporter::doMeshAttributeForName(const Containers::StringView name) { + if(configuration().value("meshViews")) { + #define _c(name_) \ + if(name == #name_) return MeshAttribute ## name_; + _c(IndexOffset) + _c(ChunkOffset) + _c(TriangleCount) + _c(VertexCount) + _c(ChunkCount) + #undef _c + } + + return {}; +} + +Containers::Optional BpsImporter::doMesh(const UnsignedInt id, UnsignedInt level) { + const auto& header = *reinterpret_cast(_in.data()); + + if(configuration().value("meshViews") && level == 1) { + Containers::Array data{NoInit, header.numMeshes*sizeof(BpsMeshInfo)}; + Containers::StridedArrayView1D meshInfos = Containers::arrayCast(data); + Utility::copy(Containers::StridedArrayView1D{reinterpret_cast(_in.data() + 256), header.numMeshes}, meshInfos); + + return Trade::MeshData{MeshPrimitive::Meshlets, std::move(data), { + Trade::MeshAttributeData{MeshAttributeIndexOffset, + meshInfos.slice(&BpsMeshInfo::indexOffset)}, + Trade::MeshAttributeData{MeshAttributeChunkOffset, + meshInfos.slice(&BpsMeshInfo::chunkOffset)}, + Trade::MeshAttributeData{MeshAttributeTriangleCount, + meshInfos.slice(&BpsMeshInfo::numTriangles)}, + Trade::MeshAttributeData{MeshAttributeVertexCount, + meshInfos.slice(&BpsMeshInfo::numVertices)}, + Trade::MeshAttributeData{MeshAttributeChunkCount, + meshInfos.slice(&BpsMeshInfo::numChunks)}, + }}; + } + + Containers::Array indexData; + Containers::ArrayView indices; + if(configuration().value("meshViews")) { + indexData = Containers::Array{NoInit, header.numIndices*4}; + indices = Containers::arrayCast(indexData); + Utility::copy(Containers::arrayCast(_in.exceptPrefix(_dataOffset + header.indexOffset).prefix(header.numIndices*4)), indices); + } else { + const auto& meshInfo = reinterpret_cast(_in.data() + 256)[id]; + indexData = Containers::Array{NoInit, meshInfo.numTriangles*3*4}; + indices = Containers::arrayCast(indexData); + Utility::copy(Containers::arrayCast(_in.exceptPrefix(_dataOffset + header.indexOffset + meshInfo.indexOffset*4).prefix(meshInfo.numTriangles*3*4)), indices); + } + + struct Vertex { + Vector3 position; + Vector3 normal; + Vector2 textureCoordinates; + }; + // TODO header.numVertices means, if not using views, each chunk has the + // whole copy of the mesh, not exactly great + // TODO OTOH this could be quite a common use case, introduce + // `DataFlag::Shared` and a unique data index (with data retrievable + // through AbstractImporter::data()?) so the app can just upload the data + // once for all references + Containers::Array vertexData{NoInit, header.numVertices*sizeof(Vertex)}; + Containers::StridedArrayView1D vertices = Containers::arrayCast(vertexData); + Utility::copy(Containers::arrayView(reinterpret_cast(_in + _dataOffset), header.numVertices), vertices); + + return Trade::MeshData{MeshPrimitive::Triangles, + std::move(indexData), Trade::MeshIndexData{indices}, + std::move(vertexData), { + Trade::MeshAttributeData{Trade::MeshAttribute::Position, + vertices.slice(&Vertex::position)}, + Trade::MeshAttributeData{Trade::MeshAttribute::Normal, + vertices.slice(&Vertex::normal)}, + Trade::MeshAttributeData{Trade::MeshAttribute::TextureCoordinates, + vertices.slice(&Vertex::textureCoordinates)}, + } + }; +} + +UnsignedInt BpsImporter::doMaterialCount() const { + return reinterpret_cast(_in.data())->numMaterials; +} + +Containers::Optional BpsImporter::doMaterial(const UnsignedInt id) { + const auto& material = reinterpret_cast(_in.data() + _dataOffset + reinterpret_cast(_in.data())->materialOffset)[id]; + + Containers::Array attributes; + arrayAppend(attributes, { + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColor, + Color4{material.baseAlbedo}}, + Trade::MaterialAttributeData{Trade::MaterialAttribute::Roughness, + material.roughness}, + /* Just for introspection purposes, in case the other 3 values have + something important */ + Trade::MaterialAttributeData{"texIdxs", Trade::MaterialAttributeType::Vector4i, + &material.texIdxs} + }); + if(configuration().value("phongMaterialFallback")) + arrayAppend(attributes, InPlaceInit, + Trade::MaterialAttribute::DiffuseColor, + Color4{material.baseAlbedo}); + + if(configuration().value("textureArrays")) { + if(configuration().value("textureArraysForceAllMaterialsTextured") || material.texIdxs[0] != ~UnsignedInt{}) { + arrayAppend(attributes, { + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTexture, + 0u}, + // TODO builtin attributes + Trade::MaterialAttributeData{"baseColorTextureLayer", + material.texIdxs[0] + (configuration().value("textureArraysForceAllMaterialsTextured") ? 1 : 0)} + }); + if(configuration().value("phongMaterialFallback")) { + arrayAppend(attributes, { + Trade::MaterialAttributeData{Trade::MaterialAttribute::DiffuseTexture, + 0u}, + // TODO builtin attributes + Trade::MaterialAttributeData{"diffuseTextureLayer", + material.texIdxs[0] + (configuration().value("textureArraysForceAllMaterialsTextured") ? 1 : 0)} + }); + } + } + } else if(material.texIdxs[0] != ~UnsignedInt{}) { + arrayAppend(attributes, InPlaceInit, + Trade::MaterialAttribute::BaseColorTexture, + material.texIdxs[0]); + if(configuration().value("phongMaterialFallback")) + arrayAppend(attributes, InPlaceInit, + Trade::MaterialAttribute::DiffuseTexture, + material.texIdxs[0]); + } + + arrayShrink(attributes, DefaultInit); // TODO allow growable deleters + + Trade::MaterialTypes types = Trade::MaterialType::Flat|Trade::MaterialType::PbrMetallicRoughness; + if(configuration().value("phongMaterialFallback")) + types |= Trade::MaterialType::Phong; + return Trade::MaterialData{types, std::move(attributes)}; +} + +UnsignedInt BpsImporter::doTextureCount() const { + return configuration().value("textureArrays") ? 1 : _textureNames.size(); +} + +Containers::Optional BpsImporter::doTexture(const UnsignedInt id) { + return Trade::TextureData{ + configuration().value("textureArrays") ? + Trade::TextureType::Texture2DArray : Trade::TextureType::Texture2D, + SamplerFilter::Linear, + SamplerFilter::Linear, + SamplerMipmap::Linear, + SamplerWrapping::ClampToEdge, + configuration().value("textureArrays") ? + 0 : id + }; +} + +UnsignedInt BpsImporter::doImage2DCount() const { + return configuration().value("textureArrays") ? 0 : _textureNames.size(); +} + +UnsignedInt BpsImporter::doImage2DLevelCount(UnsignedInt) { + // TODO + return 1; +} + +Containers::Optional BpsImporter::doImage2D(const UnsignedInt id, const UnsignedInt level) { + // TODO reuse the importer, let config be propagated to it from global plugin mgr (how???) + + Trade::BasisImporter importer; + importer.configuration().setValue("format", configuration().value("basisFormat")); + if(!importer.openFile(Utility::Path::join({_basename, _textureDir, _textureNames[id]}))) + return {}; + return importer.image2D(0, level); +} + +UnsignedInt BpsImporter::doImage3DCount() const { + return configuration().value("textureArrays") ? 1 : 0; +} + +UnsignedInt BpsImporter::doImage3DLevelCount(UnsignedInt) { + return Math::log2(configuration().value("textureArrayMaxLevelSize")) + 1; +} + +Containers::Optional BpsImporter::doImage3D(const UnsignedInt, const UnsignedInt level) { + const Int maxLevelSize = configuration().value("textureArrayMaxLevelSize"); + if(Math::popcount(UnsignedInt(maxLevelSize)) != 1) { + Error{} << "BpsImporter::image3D(): the textureArrayMaxLevelSize option has to be a power of two, got" << configuration().value("textureArrayMaxLevelSize"); + return {}; + } + + const Vector3i imageSize{Vector2i{maxLevelSize, maxLevelSize} >> level, Int(_textureNames.size() + (configuration().value("textureArraysForceAllMaterialsTextured") ? 1 : 0))}; + + Trade::BasisImporter importer; + importer.configuration().setValue("format", configuration().value("basisFormat")); + + Containers::Optional out; + for(std::size_t i = 0; i != _textureNames.size(); ++i) { + if(!importer.openFile(Utility::Path::join({_basename, _textureDir, _textureNames[i]}))) + return {}; + + /* Assuming the last level is 1x1, pick the Nth level from the end */ + const UnsignedInt levelFromTheEndToLoad = Math::log2(maxLevelSize) - level; + const UnsignedInt levelCount = importer.image2DLevelCount(0); + if(levelFromTheEndToLoad >= levelCount) { + Error{} << "BpsImporter::image3D(): image" << i << "has only" << levelCount << "levels which is not enough to get images of size" << imageSize.xy() << Debug::nospace << ", ignoring"; + continue; + } + + /* Import desired level */ + Containers::Optional image = importer.image2D(0, levelCount - levelFromTheEndToLoad - 1); + if(!image) return {}; + + if(!image->isCompressed()) { + Error{} << "BpsImporter::image3D(): expected compressed image, please set basisFormat"; + return {}; + } + + /* Allocate the data as appropriate once we know the format and size */ + // TODO figure out a way to know the block size beforehand + if(!out) { + out = Trade::ImageData3D{image->compressedFormat(), imageSize, Containers::Array{ValueInit, ((imageSize + compressedBlockSize(image->compressedFormat()) - Vector3i{1})/compressedBlockSize(image->compressedFormat())).product()*compressedBlockDataSize(image->compressedFormat())}}; + + /* fill the first image with 1s to use for texture-less objects */ + if(configuration().value("textureArraysForceAllMaterialsTextured")) for(char& c: out->mutableData().prefix(out->data().size()/imageSize.z())) { + c = '\xff'; // TODO ffs need an astc white block + } + } + + /* Copy the image data over */ + // TODO implement blocks(), sigh + const std::size_t levelDataSize = out->data().size()/imageSize.z(); + Utility::copy(image->data(), out->mutableData().slice((i + 1)*levelDataSize, (i + 2)*levelDataSize)); + } + + return out; +} + +UnsignedInt BpsImporter::doSceneCount() const { + return configuration().value("instanceScene") ? 2 : 1; +} + +UnsignedLong BpsImporter::doObjectCount() const { + return _instanceCount + _mappings.size(); +} + +Containers::String BpsImporter::doObjectName(UnsignedLong id) { + if(id < _instanceCount) return {}; + return _mappings[id - _instanceCount].name; +} + +Int BpsImporter::doDefaultScene() const { + return 0; +} + +namespace { + constexpr Trade::SceneField SceneFieldMeshView = Trade::sceneFieldCustom(0); + constexpr Trade::SceneField SceneFieldMeshViewMaterial = Trade::sceneFieldCustom(1); + constexpr Trade::SceneField SceneFieldMeshViewIndexOffset = Trade::sceneFieldCustom(2); + constexpr Trade::SceneField SceneFieldMeshViewIndexCount = Trade::sceneFieldCustom(3); +} + +Containers::String BpsImporter::doSceneFieldName(UnsignedInt name) { + if(configuration().value("meshViews")) { + switch(name) { + #define _c(field, name) \ + case sceneFieldCustom(SceneField ## field): return #name; + _c(MeshViewIndexOffset, meshViewIndexOffset) + _c(MeshViewIndexCount, meshViewIndexCount) + _c(MeshView, meshView) + _c(MeshViewMaterial, meshViewMaterial) + #undef _c + } + } + + return {}; +} + +Trade::SceneField BpsImporter::doSceneFieldForName(const Containers::StringView name) { + if(configuration().value("meshViews")) { + #define _c(name_, field) \ + if(name == #name_) return SceneField ## field; + _c(meshViewIndexOffset, MeshViewIndexOffset) + _c(meshViewIndexCount, MeshViewIndexCount) + _c(meshView, MeshView) + _c(meshViewMaterial, MeshViewMaterial) + #undef _c + } + + return {}; +} + +Containers::Optional BpsImporter::doScene(const UnsignedInt id) { + const Containers::StridedArrayView1D instances{reinterpret_cast(_in.data() + _instanceOffset), _instanceCount}; + + if((configuration().value("instanceScene") && id == 1) || + (!configuration().value("instanceScene") && id == 0)) { + if(configuration().value("meshViews")) { + Containers::ArrayView objects; + Containers::StridedArrayView1D mesh; + Containers::ArrayView meshIndexOffsets; + Containers::ArrayView meshIndexCounts; + Containers::ArrayView materials; + + Containers::Array data = Containers::ArrayTuple{ + {NoInit, _mappings.size(), objects}, + {NoInit, 1, mesh}, + {NoInit, _mappings.size(), meshIndexOffsets}, + {NoInit, _mappings.size(), meshIndexCounts}, + {NoInit, _mappings.size(), materials}, + }; + + const auto* meshViews = reinterpret_cast(_in.data() + 256); + + mesh[0] = 0; + for(std::size_t i = 0; i != _mappings.size(); ++i) { + objects[i] = _instanceCount + i; + meshIndexOffsets[i] = meshViews[_mappings[i].mesh].indexOffset*4; + meshIndexCounts[i] = meshViews[_mappings[i].mesh].numTriangles*3; + materials[i] = _mappings[i].material; + } + + return Trade::SceneData{Trade::SceneMappingType::UnsignedInt, _instanceCount + _mappings.size(), std::move(data), { + Trade::SceneFieldData{Trade::SceneField::Mesh, + objects, + mesh.broadcasted<0>(_mappings.size()), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{SceneFieldMeshViewIndexOffset, + objects, + meshIndexOffsets, + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{SceneFieldMeshViewIndexCount, + objects, + meshIndexCounts, + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{SceneFieldMeshViewMaterial, + objects, + materials, + Trade::SceneFieldFlag::ImplicitMapping}, + /* To mark the scene as 3D */ + Trade::SceneFieldData{Trade::SceneField::Transformation, + Trade::SceneMappingType::UnsignedInt, nullptr, + Trade::SceneFieldType::Matrix4x4, nullptr} + }}; + } + + Containers::ArrayView objects; + Containers::ArrayView meshes; + Containers::ArrayView materials; + + Containers::Array data = Containers::ArrayTuple{ + {NoInit, _mappings.size(), objects}, + {NoInit, _mappings.size(), meshes}, + {NoInit, _mappings.size(), materials}, + }; + + for(std::size_t i = 0; i != _mappings.size(); ++i) { + objects[i] = _instanceCount + i; + meshes[i] = _mappings[i].mesh; + materials[i] = _mappings[i].material; + } + + return Trade::SceneData{Trade::SceneMappingType::UnsignedInt, _instanceCount + _mappings.size(), std::move(data), { + Trade::SceneFieldData{Trade::SceneField::Mesh, + objects, + meshes, + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{Trade::SceneField::MeshMaterial, + objects, + materials, + Trade::SceneFieldFlag::ImplicitMapping}, + /* To mark the scene as 3D */ + Trade::SceneFieldData{Trade::SceneField::Transformation, + Trade::SceneMappingType::UnsignedInt, nullptr, + Trade::SceneFieldType::Matrix4x4, nullptr} + }}; + } + + if(configuration().value("meshViews")) { + Containers::ArrayView objects; + Containers::StridedArrayView1D parent; + Containers::StridedArrayView1D mesh; + // TODO change to index offset + count also + Containers::StridedArrayView1D instancesCopy; + Containers::Array data = Containers::ArrayTuple{ + {NoInit, _instanceCount, objects}, + {NoInit, 1, parent}, + {NoInit, 1, mesh}, + {NoInit, _instanceCount, instancesCopy}, + }; + + for(std::size_t i = 0; i != _instanceCount; ++i) + objects[i] = i; + mesh[0] = 0; + parent[0] = -1; + Utility::copy(instances, instancesCopy); + + // TODO empty if no instances + return Trade::SceneData{Trade::SceneMappingType::UnsignedInt, _instanceCount, std::move(data), { + Trade::SceneFieldData{Trade::SceneField::Parent, + objects, + parent.broadcasted<0>(_instanceCount), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{Trade::SceneField::Mesh, + objects, + mesh.broadcasted<0>(_instanceCount), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{SceneFieldMeshView, + objects, + instancesCopy.slice(&BpsInstanceProperties::meshIndex), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{SceneFieldMeshViewMaterial, + objects, + instancesCopy.slice(&BpsInstanceProperties::materialIndex), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{Trade::SceneField::Transformation, + objects, + instancesCopy.slice(&BpsInstanceProperties::transformation), + Trade::SceneFieldFlag::ImplicitMapping} + }}; + } + + Containers::ArrayView objects; + Containers::StridedArrayView1D parent; + Containers::StridedArrayView1D instancesCopy; + Containers::Array data = Containers::ArrayTuple{ + {NoInit, _instanceCount, objects}, + {NoInit, 1, parent}, + {NoInit, _instanceCount, instancesCopy}, + }; + + for(std::size_t i = 0; i != _instanceCount; ++i) + objects[i] = i; + parent[0] = -1; + Utility::copy(instances, instancesCopy); + + return Trade::SceneData{Trade::SceneMappingType::UnsignedInt, _instanceCount, std::move(data), { + Trade::SceneFieldData{Trade::SceneField::Parent, + objects, + parent.broadcasted<0>(_instanceCount), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{Trade::SceneField::Mesh, + objects, + instancesCopy.slice(&BpsInstanceProperties::meshIndex), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{Trade::SceneField::MeshMaterial, + objects, + instancesCopy.slice(&BpsInstanceProperties::materialIndex), + Trade::SceneFieldFlag::ImplicitMapping}, + Trade::SceneFieldData{Trade::SceneField::Transformation, + objects, + instancesCopy.slice(&BpsInstanceProperties::transformation), + Trade::SceneFieldFlag::ImplicitMapping} + }}; +} + +CORRADE_PLUGIN_REGISTER(BpsImporter, BpsImporter, + "cz.mosra.magnum.Trade.AbstractImporter/0.5") + +// kate: indent-width 4 diff --git a/src/esp/batched_sim/CMakeLists.txt b/src/esp/batched_sim/CMakeLists.txt index a7a65e8fc0..5b1580d95a 100644 --- a/src/esp/batched_sim/CMakeLists.txt +++ b/src/esp/batched_sim/CMakeLists.txt @@ -1,13 +1,44 @@ -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED 17) +option(MAGNUM_RENDERER "Use Magnum renderer instead of BPS3D" ON) -add_library( - batched_sim STATIC +# Required by Magnum +if(MAGNUM_RENDERER) + if(NOT BUILD_WITH_CUDA) + message(FATAL_ERROR "BUILD_WITH_CUDA has to be enabled for the Magnum Batch Renderer") + endif() +# Required by BPS3D +else() + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD_REQUIRED 17) +endif() + +# BpsImporter plugin +if(MAGNUM_RENDERER) + find_package(Corrade REQUIRED PluginManager) + find_package(Magnum REQUIRED GL GlfwApplication SceneTools Trade MeshTools Shaders DebugTools WindowlessEglApplication) + if(MAGNUM_BUILD_STATIC) + find_package(MagnumPlugins REQUIRED BasisImporter) + corrade_add_static_plugin(BpsImporter ";" BpsImporter.conf BpsImporter.cpp) + else() + corrade_add_plugin(BpsImporter "/" "/" BpsImporter.conf BpsImporter.cpp) + endif() + target_link_libraries(BpsImporter PUBLIC Magnum::Trade) +endif() + +if(MAGNUM_RENDERER AND NOT MAGNUM_BUILD_STATIC) + set(BPSIMPORTER_PLUGIN_FILENAME $) +endif() + +# First replace ${} variables, then $<> generator expressions +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/configure.h.cmake + ${CMAKE_CURRENT_BINARY_DIR}/configure.h.in) +file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/configure.h + INPUT ${CMAKE_CURRENT_BINARY_DIR}/configure.h.in) + +# The batched_sim library +set(batched_sim_SRCS BatchedSimAssert.h BatchedSimulator.cpp BatchedSimulator.h - BpsSceneMapping.cpp - BpsSceneMapping.h ColumnGrid.cpp ColumnGrid.h CollisionBroadphaseGrid.cpp @@ -18,7 +49,7 @@ add_library( EpisodeGenerator.h EpisodeSet.cpp EpisodeSet.h - GlmUtils.cpp + GlmUtils.cpp # TODO the GLM-less stuff should be split away from here GlmUtils.h PlacementHelper.cpp PlacementHelper.h @@ -27,14 +58,95 @@ add_library( SerializeCollection.cpp SerializeCollection.h ) - +if(MAGNUM_RENDERER) + list(APPEND batched_sim_SRCS + MagnumRenderer.cpp + MagnumRenderer.h + MagnumRendererStandalone.cpp + MagnumRendererStandalone.h + ) +else() + list(APPEND batched_sim_SRCS + BpsSceneMapping.cpp + BpsSceneMapping.h + ) +endif() +add_library(batched_sim STATIC ${batched_sim_SRCS}) target_link_libraries( batched_sim - PUBLIC core - sim - io - bps3D - bps3D_core - bps3D_vulkan - stb -) + PUBLIC core sim) +target_include_directories(batched_sim PRIVATE ${CMAKE_BINARY_DIR}) +if(MAGNUM_RENDERER) + target_include_directories(batched_sim PRIVATE + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ${CMAKE_CURRENT_LIST_DIR}/../gfx/cuda_helpers + ) + target_link_libraries(batched_sim PUBLIC ${CUDART_LIBRARY}) + target_link_libraries(batched_sim PRIVATE + Magnum::GL + Magnum::Magnum + Magnum::Shaders + Magnum::Trade + Magnum::MeshTools + Magnum::WindowlessEglApplication) + if(MAGNUM_BUILD_STATIC) + target_link_libraries(batched_sim PRIVATE + MagnumPlugins::BasisImporter + BpsImporter) + else() + add_dependencies(batched_sim BpsImporter) + endif() +else() + target_link_libraries( + batched_sim + PUBLIC + io + bps3D + bps3D_core + bps3D_vulkan + stb + ) +endif() + +# Converter and standalone demo for Magnum renderer +if(MAGNUM_RENDERER) + add_executable(MagnumRendererConverter + MagnumRendererConverter.cpp) + target_link_libraries(MagnumRendererConverter PRIVATE + Magnum::SceneTools + Magnum::Trade + Magnum::MeshTools) + if(MAGNUM_BUILD_STATIC) + target_link_libraries(MagnumRendererConverter PRIVATE + Magnum::AnySceneImporter + MagnumPlugins::StbImageImporter + MagnumPlugins::KtxImageConverter + MagnumPlugins::GltfSceneConverter + MagnumPlugins::GltfImporter) + endif() + + add_executable(MagnumRendererDemo + MagnumRenderer.cpp + MagnumRendererDemo.cpp) + target_link_libraries(MagnumRendererDemo PRIVATE + Magnum::Application + Magnum::DebugTools + Magnum::GL + Magnum::Magnum + Magnum::Shaders + Magnum::Trade + Magnum::MeshTools) + target_include_directories(MagnumRendererDemo PRIVATE + ${PROJECT_BINARY_DIR} + ) + if(MAGNUM_BUILD_STATIC) + target_link_libraries(MagnumRendererDemo PRIVATE + Magnum::AnySceneImporter + MagnumPlugins::BasisImporter + MagnumPlugins::GltfImporter + MagnumPlugins::KtxImporter + BpsImporter) + else() + add_dependencies(MagnumRendererDemo BpsImporter) + endif() +endif() diff --git a/src/esp/batched_sim/EpisodeGenerator.cpp b/src/esp/batched_sim/EpisodeGenerator.cpp index b094fd5805..44184f772d 100644 --- a/src/esp/batched_sim/EpisodeGenerator.cpp +++ b/src/esp/batched_sim/EpisodeGenerator.cpp @@ -12,6 +12,7 @@ #include "esp/io/JsonAllTypes.h" #include "esp/io/json.h" +#include #include #include @@ -45,7 +46,7 @@ Keyframe loadKeyframe(const std::string& filepath) { int findOrInsertRenderAsset(std::vector& renderAssets, const std::string& filepathOrName) { - auto name = std::filesystem::path(filepathOrName).stem(); + auto name = Cr::Utility::Path::splitExtension(Cr::Utility::Path::split(filepathOrName).second()).first(); auto it = std::find_if(renderAssets.begin(), renderAssets.end(), [&](const auto& item) { return item.name_ == name; }); if (it != renderAssets.end()) { @@ -391,7 +392,9 @@ void addEpisode(const EpisodeGeneratorConfig& config, EpisodeSet generateBenchmarkEpisodeSet( const EpisodeGeneratorConfig& config, + #ifndef MAGNUM_RENDERER const BpsSceneMapping& sceneMapping, + #endif const serialize::Collection& collection) { int numEpisodes = config.numEpisodes; @@ -465,7 +468,11 @@ EpisodeSet generateBenchmarkEpisodeSet( // sloppy: call postLoadFixup before adding episodes; this means that // set.maxFreeObjects_ gets computed incorrectly in here (but it will get // computed correctly, incrementally, in addEpisode). - postLoadFixup(set, sceneMapping, collection); + postLoadFixup(set, + #ifndef MAGNUM_RENDERER + sceneMapping, + #endif + collection); const auto robotProxy = createFreeObjectProxyForRobot(collection); diff --git a/src/esp/batched_sim/EpisodeGenerator.h b/src/esp/batched_sim/EpisodeGenerator.h index 59a8c7f4c8..c43838d9cf 100644 --- a/src/esp/batched_sim/EpisodeGenerator.h +++ b/src/esp/batched_sim/EpisodeGenerator.h @@ -6,6 +6,8 @@ #define ESP_BATCHEDSIM_EPISODEGENERATOR_H_ #include "esp/batched_sim/EpisodeSet.h" +#include "esp/core/esp.h" +#include "esp/batched_sim/configure.h" namespace esp { namespace batched_sim { @@ -24,7 +26,9 @@ struct EpisodeGeneratorConfig { }; EpisodeSet generateBenchmarkEpisodeSet(const EpisodeGeneratorConfig& config, + #ifndef MAGNUM_RENDERER const BpsSceneMapping& sceneMapping, + #endif const serialize::Collection& collection); } // namespace batched_sim diff --git a/src/esp/batched_sim/EpisodeSet.cpp b/src/esp/batched_sim/EpisodeSet.cpp index 21d78c48dc..b7c80d30ad 100644 --- a/src/esp/batched_sim/EpisodeSet.cpp +++ b/src/esp/batched_sim/EpisodeSet.cpp @@ -201,11 +201,15 @@ void updateFromSerializeCollection(EpisodeSet& set, } void postLoadFixup(EpisodeSet& set, + #ifndef MAGNUM_RENDERER const BpsSceneMapping& sceneMapping, + #endif const serialize::Collection& collection) { for (auto& renderAsset : set.renderAssets_) { + #ifndef MAGNUM_RENDERER renderAsset.instanceBlueprint_ = sceneMapping.findInstanceBlueprint(renderAsset.name_); + #endif renderAsset.needsPostLoadFixup_ = false; } @@ -238,7 +242,11 @@ void postLoadFixup(EpisodeSet& set, Mn::Matrix4 mat = Mn::Matrix4::from(transform.rotation_.toMatrix(), transform.origin_); mat = mat * Mn::Matrix4::scaling(instance.transform_.scale_); + #ifdef MAGNUM_RENDERER + instance.glMat_ = mat; + #else instance.glMat_ = toGlmMat4x3(mat); + #endif } // sloppy: copy-pasted from addStageFixedObject diff --git a/src/esp/batched_sim/EpisodeSet.h b/src/esp/batched_sim/EpisodeSet.h index c6d70f5d03..6eb286ab06 100644 --- a/src/esp/batched_sim/EpisodeSet.h +++ b/src/esp/batched_sim/EpisodeSet.h @@ -6,12 +6,16 @@ #define ESP_BATCHEDSIM_EPISODESET_H_ #include "esp/batched_sim/BatchedSimAssert.h" -#include "esp/batched_sim/BpsSceneMapping.h" #include "esp/batched_sim/CollisionBroadphaseGrid.h" #include "esp/batched_sim/ColumnGrid.h" #include "esp/batched_sim/SerializeCollection.h" +#include "esp/batched_sim/configure.h" +#ifndef MAGNUM_RENDERER +#include "esp/batched_sim/BpsSceneMapping.h" +#endif #include +#include #include #include @@ -32,7 +36,9 @@ struct CollisionSphere { class RenderAsset { public: std::string name_; + #ifndef MAGNUM_RENDERER BpsSceneMapping::InstanceBlueprint instanceBlueprint_; + #endif bool needsPostLoadFixup_ = true; }; @@ -57,7 +63,11 @@ class RenderAssetInstance { public: int renderAssetIndex_; Transform transform_; // for serialization + #ifdef MAGNUM_RENDERER + Magnum::Matrix4 glMat_; + #else glm::mat4x3 glMat_; // computed from transform_ + #endif }; class StaticScene { @@ -138,7 +148,9 @@ void updateFromSerializeCollection(FreeObject& freeObject, const serialize::Collection& collection); void postLoadFixup(EpisodeSet& set, + #ifndef MAGNUM_RENDERER const BpsSceneMapping& sceneMapping, + #endif const serialize::Collection& collection); } // namespace batched_sim diff --git a/src/esp/batched_sim/GlmUtils.cpp b/src/esp/batched_sim/GlmUtils.cpp index c5323ca6a2..c46bdc7cbb 100644 --- a/src/esp/batched_sim/GlmUtils.cpp +++ b/src/esp/batched_sim/GlmUtils.cpp @@ -12,6 +12,7 @@ namespace Mn = Magnum; namespace esp { namespace batched_sim { +#ifndef MAGNUM_RENDERER glm::mat4 toGlmMat4(const Mn::Vector3& pos, const Mn::Quaternion& rot) { Mn::Matrix3x3 r = rot.toMatrix(); return glm::mat4(r[0][0], r[0][1], r[0][2], 0.f, r[1][0], r[1][1], r[1][2], @@ -76,6 +77,7 @@ Magnum::Vector3 inverseTransformPoint(const glm::mat4x3& glMat, const Magnum::Ve return result; } +#endif Magnum::Vector3 getRangeCorner(const Magnum::Range3D& range, int cornerIdx) { @@ -130,6 +132,7 @@ bool sphereBoxContactTest(const Magnum::Vector3& sphereOrigin, float sphereRadiu return (distanceSq < sphereRadiusSq); } +#ifndef MAGNUM_RENDERER template bool batchSphereOrientedBoxContactTest(const glm::mat4x3** orientedBoxTransforms, const Magnum::Vector3** positions, @@ -173,6 +176,7 @@ bool batchSphereOrientedBoxContactTest(const glm::mat4x3** orientedBoxTransforms return resultBits; } +#endif Mn::Quaternion yawToRotation(float yawRadians) { constexpr Mn::Vector3 upAxis(0.f, 1.f, 0.f); @@ -201,12 +205,14 @@ Mn::Vector3 getSphericalCoordinates( return {rho, theta, phi}; } +#ifndef MAGNUM_RENDERER template bool batchSphereOrientedBoxContactTest<64, true>(const glm::mat4x3** orientedBoxTransforms, const Magnum::Vector3** positions, float sphereRadiusSq, const Magnum::Range3D** boxRanges, int numTests); template bool batchSphereOrientedBoxContactTest<64, false>(const glm::mat4x3** orientedBoxTransforms, const Magnum::Vector3** positions, float sphereRadiusSq, const Magnum::Range3D** boxRanges, int numTests); +#endif } // namespace batched_sim } // namespace esp diff --git a/src/esp/batched_sim/GlmUtils.h b/src/esp/batched_sim/GlmUtils.h index ce4e794a33..c30cbd4b23 100644 --- a/src/esp/batched_sim/GlmUtils.h +++ b/src/esp/batched_sim/GlmUtils.h @@ -5,7 +5,11 @@ #ifndef ESP_BATCHEDSIM_GLMUTILS_H_ #define ESP_BATCHEDSIM_GLMUTILS_H_ +#include "esp/batched_sim/configure.h" + +#ifndef MAGNUM_RENDERER #include +#endif #include #include @@ -19,6 +23,7 @@ namespace esp { namespace batched_sim { +#ifndef MAGNUM_RENDERER glm::mat4 toGlmMat4(const Magnum::Vector3& pos, const Magnum::Quaternion& rot); glm::mat4 toGlmMat4(const Magnum::Matrix4& m); @@ -26,15 +31,18 @@ glm::mat4x3 toGlmMat4x3(const Magnum::Matrix4& m); Magnum::Vector3 getMagnumTranslation(const glm::mat4x3& glMat); Magnum::Vector3 inverseTransformPoint(const glm::mat4x3& glMat, const Magnum::Vector3& pos); +#endif Magnum::Vector3 getRangeCorner(const Magnum::Range3D& range, int cornerIdx); bool sphereBoxContactTest(const Magnum::Vector3& sphereOrigin, float sphereRadiusSq, const Magnum::Range3D& aabb); +#ifndef MAGNUM_RENDERER template bool batchSphereOrientedBoxContactTest(const glm::mat4x3** orientedBoxTransforms, const Magnum::Vector3** positions, float sphereRadiusSq, const Magnum::Range3D** boxRanges, int numTests); +#endif Magnum::Quaternion yawToRotation(float yawRadians); diff --git a/src/esp/batched_sim/MagnumRenderer.cpp b/src/esp/batched_sim/MagnumRenderer.cpp new file mode 100644 index 0000000000..8ff8263e6b --- /dev/null +++ b/src/esp/batched_sim/MagnumRenderer.cpp @@ -0,0 +1,538 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#include "MagnumRenderer.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "esp/batched_sim/configure.h" + +#ifdef MAGNUM_BUILD_STATIC +void importStaticPlugins() { + CORRADE_PLUGIN_IMPORT(BpsImporter) +} +#endif + +namespace Cr = Corrade; +namespace Mn = Magnum; + +/* std::hash specialization to be able to use Corrade::String in unordered_map */ +// TODO put directly into Corrade, it's duplicated ON SEVEN PLACES ALREADY!!! +namespace std { + template<> struct hash { + std::size_t operator()(const Cr::Containers::String& key) const { + const Cr::Utility::MurmurHash2 hash; + const Cr::Utility::HashDigest digest = hash(key.data(), key.size()); + return *reinterpret_cast(digest.byteArray()); + } + }; +} + +namespace esp { namespace batched_sim { + +using namespace Cr::Containers::Literals; + +struct MagnumRendererConfiguration::State { + MagnumRendererFlags flags; + Magnum::Vector2i tileSize{128, 128}; + Magnum::Vector2i tileCount{16, 12}; + Magnum::UnsignedInt textureArrayMaxLevelSize{128}; +}; + +MagnumRendererConfiguration::MagnumRendererConfiguration(): state{Cr::InPlaceInit} {} +MagnumRendererConfiguration::~MagnumRendererConfiguration() = default; + +MagnumRendererConfiguration& MagnumRendererConfiguration::setFlags(MagnumRendererFlags flags) { + state->flags = flags; + return *this; +} +MagnumRendererConfiguration& MagnumRendererConfiguration::setTileSizeCount(const Magnum::Vector2i& tileSize, const Magnum::Vector2i& tileCount) { + state->tileSize = tileSize; + state->tileCount = tileCount; + return *this; +} +MagnumRendererConfiguration& MagnumRendererConfiguration::setTextureArrayMaxLevelSize(Magnum::UnsignedInt size) { + state->textureArrayMaxLevelSize = size; + return *this; +} + +namespace { + +struct MeshView { + // TODO also mesh ID, once we have multiple meshes + Mn::UnsignedInt indexOffsetInBytes; + Mn::UnsignedInt indexCount; + Mn::Int materialId; /* is never -1 tho */ + // TODO also parent, when we are able to fetch the whole hierarchy for a + // particular root object name + Mn::Matrix4 transformation; +}; + +struct DrawCommand { + std::size_t indexOffsetInBytes; + Mn::UnsignedInt indexCount; +}; + +struct Scene { + /* Appended to with add() */ + Cr::Containers::Array parents; /* parents[i] < i, always */ + Cr::Containers::Array transformations; + // TODO have this temporary and just once for all scenes, doesn't need to + // be stored + Cr::Containers::Array absoluteTransformations; + Cr::Containers::Array draws; + Cr::Containers::Array textureTransformations; + // TODO make the layout match GL to avoid a copy in draw() + Cr::Containers::Array drawCommands; + + /* Updated every frame */ + Mn::GL::Buffer transformationUniform; + Mn::GL::Buffer drawUniform; + Mn::GL::Buffer textureTransformationUniform; +}; + +} + +struct MagnumRenderer::State { + MagnumRendererFlags flags; + Mn::Vector2i tileSize, tileCount; + Mn::Shaders::PhongGL shader{Mn::NoCreate}; + + /* Filled at the beginning */ + Mn::GL::Texture2DArray texture{Mn::NoCreate}; + Mn::GL::Mesh mesh{Mn::NoCreate}; + Mn::GL::Buffer materialUniform; + /* Contains texture layer for each material. Used by add() to populate the + draw list. */ + // TODO have the materials deduplicated, and then this should have a layer + // for each mesh view instead + // TODO also the scale + offset, once we have non-shitty atlasing + Cr::Containers::Array textureLayers; + + /* Pairs of mesh views (index byte offset and count), material IDs and + initial transformations for draws. Used by add() to populate the draw + list. */ + Cr::Containers::Array meshViews; + /* Range of mesh views and materials corresponding to a particular name */ + std::unordered_map> meshViewRangeForName; + + /* Updated from camera() */ + Cr::Containers::Array projections; + /* Updated every frame */ + Mn::GL::Buffer projectionUniform; + + Cr::Containers::Array scenes; +}; + +MagnumRenderer::MagnumRenderer(Magnum::NoCreateT) {}; + +void MagnumRenderer::create(const MagnumRendererConfiguration& configurationWrapper) { + #ifdef MAGNUM_BUILD_STATIC + importStaticPlugins(); + #endif + + const MagnumRendererConfiguration::State& configuration = *configurationWrapper.state; + + CORRADE_INTERNAL_ASSERT(!_state); + _state.emplace(); + _state->flags = configuration.flags; + _state->tileSize = configuration.tileSize; + _state->tileCount = configuration.tileCount; + const std::size_t sceneCount = configuration.tileCount.product(); + _state->projections = Cr::Containers::Array{sceneCount}; + _state->scenes = Cr::Containers::Array{sceneCount}; + /* Have one extra transformation slot in each scene for easier transform + calculation in draw() */ + for(Scene& scene: _state->scenes) + arrayAppend(scene.absoluteTransformations, Cr::InPlaceInit); + + // TODO move this outside + Mn::GL::Renderer::enable(Mn::GL::Renderer::Feature::FaceCulling); + Mn::GL::Renderer::enable(Mn::GL::Renderer::Feature::DepthTest); +} + +MagnumRenderer::~MagnumRenderer() = default; + +Mn::Vector2i MagnumRenderer::tileCount() const { + return _state->tileCount; +} + +Mn::Vector2i MagnumRenderer::tileSize() const { + return _state->tileSize; +} + +std::size_t MagnumRenderer::sceneCount() const { + return _state->scenes.size(); +} + +void MagnumRenderer::addFile(const Cr::Containers::StringView filename) { + return addFile(filename, filename.hasSuffix(".bps"_s) ? + #ifdef MAGNUM_BUILD_STATIC + "BpsImporter"_s + #else + BPSIMPORTER_PLUGIN_FILENAME + #endif + : "AnySceneImporter"); +} + +void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr::Containers::StringView importerPlugin) { + CORRADE_ASSERT(!_state->texture.id(), + "MagnumRenderer::addFile(): sorry, only one file is supported at the moment", ); + + Cr::PluginManager::Manager manager; + Cr::Containers::Pointer importer = manager.loadAndInstantiate(importerPlugin); + CORRADE_INTERNAL_ASSERT(importer); + + if(importerPlugin.contains("BpsImporter")) { + // TODO why dafuq is this not propagated from BasisImporter config?? + importer->configuration().setValue("basisFormat", "Astc4x4RGBA"); + importer->configuration().setValue("meshViews", true); + importer->configuration().setValue("instanceScene", false); + importer->configuration().setValue("textureArrays", true); + importer->configuration().setValue("textureArraysForceAllMaterialsTextured", true); + } else if(importerPlugin.contains("GltfImporter") || + importerPlugin.contains("AnySceneImporter")) { + importer->configuration().setValue("ignoreRequiredExtensions", true); + importer->configuration().setValue("experimentalKhrTextureKtx", true); + } + + if(Cr::PluginManager::PluginMetadata* const metadata = manager.metadata("BasisImporter")) { + metadata->configuration().setValue("format", "Astc4x4RGBA"); // TODO + } + + // TODO memory-map self-contained files (have a config option? do implicitly + // for glb, bps and ply?) + CORRADE_INTERNAL_ASSERT_OUTPUT(importer->openFile(filename)); + + /* One texture for the whole scene */ + if(!(_state->flags & MagnumRendererFlag::NoTextures)) { + CORRADE_ASSERT(importer->textureCount() == 1, + "MagnumRenderer::addFile(): expected a file with exactly one texture, got" << importer->textureCount(), ); + const Cr::Containers::Optional texture = importer->texture(0); + CORRADE_INTERNAL_ASSERT(texture && texture->type() == Mn::Trade::TextureType::Texture2DArray); + + const Mn::UnsignedInt levelCount = importer->image3DLevelCount(texture->image()); + Cr::Containers::Optional image = importer->image3D(texture->image()); + CORRADE_INTERNAL_ASSERT(image); + _state->texture = Mn::GL::Texture2DArray{}; + _state->texture + .setMinificationFilter(texture->minificationFilter(), texture->mipmapFilter()) + .setMagnificationFilter(texture->magnificationFilter()) + .setWrapping(texture->wrapping().xy()); + if(image->isCompressed()) { + _state->texture + .setStorage(levelCount, Mn::GL::textureFormat(image->compressedFormat()), image->size()) + .setCompressedSubImage(0, {}, *image); + for(Mn::UnsignedInt level = 1; level != levelCount; ++level) { + Cr::Containers::Optional levelImage = importer->image3D(texture->image(), level); + CORRADE_INTERNAL_ASSERT(levelImage && levelImage->isCompressed() && levelImage->compressedFormat() == image->compressedFormat()); + _state->texture.setCompressedSubImage(level, {}, *levelImage); + } + } else { + _state->texture + .setStorage(levelCount, Mn::GL::textureFormat(image->format()), image->size()) + .setSubImage(0, {}, *image); + } + } + + /* One mesh for the whole scene */ + CORRADE_ASSERT(importer->meshCount() == 1, + "MagnumRenderer::addFile(): expected a file with exactly one mesh, got" << importer->meshCount(), ); + _state->mesh = Mn::MeshTools::compile(*CORRADE_INTERNAL_ASSERT_EXPRESSION(importer->mesh(0))); + + /* Immutable material data. Save layers to a temporary array to apply + them to draws instead */ + // TODO have a step that deduplicates materials and puts the layer to the + // scene instead + _state->textureLayers = Cr::Containers::Array{Cr::DefaultInit, importer->materialCount()}; + { + Cr::Containers::Array materialData{Cr::DefaultInit, importer->materialCount()}; + for(std::size_t i = 0; i != materialData.size(); ++i) { + const Cr::Containers::Optional material = importer->material(i); + CORRADE_INTERNAL_ASSERT(material); + const auto& flatMaterial = material->as(); + materialData[i].setAmbientColor(flatMaterial.color()); + + CORRADE_ASSERT(flatMaterial.hasTexture(), + "MagnumRenderer::addFile(): material" << i << "is not textured", ); + CORRADE_ASSERT(flatMaterial.texture() == 0, + "MagnumRenderer::addFile(): expected material" << i << "to reference the only texture, got" << flatMaterial.texture(), ); + + // TODO builtin attribute for this + _state->textureLayers[i] = flatMaterial.attribute("baseColorTextureLayer"_s); + } + + // TODO immutable buffer storage + _state->materialUniform.setData(materialData); + } + + /* Fill initial projection data for each view. Will be uploaded afresh every + draw. */ + _state->projections = Cr::Containers::Array{Cr::DefaultInit, std::size_t(_state->tileCount.product())}; + // TODO (mutable) buffer storage + + { + CORRADE_ASSERT(importer->sceneCount() == 1, + "MagnumRenderer::addFile(): expected exactly one scene, got" << importer->sceneCount(), ); + Cr::Containers::Optional scene = importer->scene(0); + CORRADE_INTERNAL_ASSERT(scene); + + /* Populate the mesh and material list */ + const Cr::Containers::Optional meshViewIndexOffsetFieldId = scene->findFieldId(importer->sceneFieldForName("meshViewIndexOffset")); + CORRADE_ASSERT(meshViewIndexOffsetFieldId, + "MagnumRenderer::addFile(): no meshViewIndexOffset field in the scene", ); + const Cr::Containers::Optional meshViewIndexCountFieldId = scene->findFieldId(importer->sceneFieldForName("meshViewIndexCount")); + CORRADE_ASSERT(meshViewIndexCountFieldId, + "MagnumRenderer::addFile(): no meshViewIndexCount field in the scene", ); + const Cr::Containers::Optional meshViewMaterialFieldId = scene->findFieldId(importer->sceneFieldForName("meshViewMaterial")); + CORRADE_ASSERT(meshViewMaterialFieldId, + "MagnumRenderer::addFile(): no meshViewMaterial field in the scene", ); + /* SceneData and copy() will assert if the types or sizes don't match, so + we don't have to */ + _state->meshViews = Cr::Containers::Array{Cr::NoInit, scene->fieldSize(*meshViewIndexCountFieldId)}; + if(importerPlugin.contains("BpsImporter")) { + Cr::Utility::copy(scene->field(*meshViewIndexOffsetFieldId), + stridedArrayView(_state->meshViews).slice(&MeshView::indexOffsetInBytes)); + Cr::Utility::copy(scene->field(*meshViewIndexCountFieldId), + stridedArrayView(_state->meshViews).slice(&MeshView::indexCount)); + Cr::Utility::copy(scene->field(*meshViewMaterialFieldId), + stridedArrayView(_state->meshViews).slice(&MeshView::materialId)); + } else { + // TODO implement parsing ints in the importer + Mn::Math::castInto( + // TODO make stridedArrayView implicitly convertible to higher + // dimensions, like images + Cr::Containers::arrayCast<2, const Mn::Double>(scene->field(*meshViewIndexOffsetFieldId)), + Cr::Containers::arrayCast<2, Mn::UnsignedInt>(stridedArrayView(_state->meshViews).slice(&MeshView::indexOffsetInBytes))); + Mn::Math::castInto( + // TODO make stridedArrayView implicitly convertible to higher + // dimensions, like images + Cr::Containers::arrayCast<2, const Mn::Double>(scene->field(*meshViewIndexCountFieldId)), + Cr::Containers::arrayCast<2, Mn::UnsignedInt>(stridedArrayView(_state->meshViews).slice(&MeshView::indexCount))); + Mn::Math::castInto( + // TODO make stridedArrayView implicitly convertible to higher + // dimensions, like images + Cr::Containers::arrayCast<2, const Mn::Double>(scene->field(*meshViewMaterialFieldId)), + Cr::Containers::arrayCast<2, Mn::Int>(stridedArrayView(_state->meshViews).slice(&MeshView::materialId))); + } + /* Transformations of all objects in the scene. Objects that don't have + this field default to an indentity transform. */ + Cr::Containers::Array transformations{scene->mappingBound()}; + for(Cr::Containers::Pair transformation: scene->transformations3DAsArray()) { + transformations[transformation.first()] = transformation.second(); + } + + /* Populate transforms of all mesh views. Assuming all three fields have + the same mapping. */ + const Cr::Containers::StridedArrayView1D meshViewMapping = scene->mapping(*meshViewIndexCountFieldId); + for(std::size_t i = 0; i != meshViewMapping.size(); ++i) { + _state->meshViews[i].transformation = transformations[meshViewMapping[i]]; + } + +// /* Populate name mapping. Assuming all three fields have it the same. */ +// for(std::size_t i = 0; i != mapping.size(); ++i) { +// Cr::Containers::String name = importer->objectName(mapping[i]); +// CORRADE_ASSERT(name, "MagnumRenderer::addFile(): node" << i << "has no name", ); +// // TODO this will get an actual range once we fetch the whole hierarchy +// // for each name +// // TODO fetch parents and transformations, order their mapping +// // depth-first (orderParentsDepthFirst()) so each top-level object has +// // its children next to itself, then populate / reshuffle the mesh views +// // to match this mapping (how!?), and ultimately fill +// // meshViewRangeForName only with names of top-level objects (with +// // parent = -1) and the child count (i.e., # of elements until another +// // -1 parent) +// _state->meshViewRangeForName.insert({name, {Mn::UnsignedInt(i), Mn::UnsignedInt(i + 1)}}); +// } + + /* Templates are the root objects with their names. Their immediate + children are the actual meshes. Assumes the order matches the order of + the custom fields. */ + // TODO hacky and brittle! doesn't handle nested children properly, doesn't + // account for a different order of the field vs the child lists + Mn::UnsignedInt offset = 0; + for(Mn::UnsignedLong root: scene->childrenFor(-1)) { + Cr::Containers::Array children = scene->childrenFor(root); + + Cr::Containers::String name = importer->objectName(root); + CORRADE_ASSERT(name, "MagnumRenderer::addFile(): node" << root << "has no name", ); + _state->meshViewRangeForName.insert({name, {offset, offset + Mn::UnsignedInt(children.size())}}); + offset += children.size(); + } + CORRADE_INTERNAL_ASSERT(offset = _state->meshViews.size()); + } + + /* Setup a zero-light (flat) shader, bind buffers that don't change + per-view */ + Mn::Shaders::PhongGL::Flags flags = + Mn::Shaders::PhongGL::Flag::MultiDraw| + Mn::Shaders::PhongGL::Flag::UniformBuffers; + if(!(_state->flags >= MagnumRendererFlag::NoTextures)) flags |= + Mn::Shaders::PhongGL::Flag::AmbientTexture| + Mn::Shaders::PhongGL::Flag::TextureArrays| + Mn::Shaders::PhongGL::Flag::TextureTransformation; + // TODO 1024 is 64K divided by 64 bytes needed for one draw uniform, have + // that fetched from actual GL limits instead + _state->shader = Mn::Shaders::PhongGL{flags, 0, importer->materialCount(), 1024}; + _state->shader + .bindMaterialBuffer(_state->materialUniform); + if(!(_state->flags >= MagnumRendererFlag::NoTextures)) _state->shader + .bindAmbientTexture(_state->texture); +} + +std::size_t MagnumRenderer::add(const Mn::UnsignedInt sceneId, const Cr::Containers::StringView name, const Mn::Matrix4& transformation) { + CORRADE_ASSERT(sceneId < _state->scenes.size(), + "MagnumRenderer::add(): index" << sceneId << "out of range for" << _state->scenes.size() << "scenes", {}); + + Scene& scene = _state->scenes[sceneId]; + /* Using a non-owning wrapper over the view to avoid an allocated string copy + because yes hello STL you're uhhmazing */ + const auto found = _state->meshViewRangeForName.find(Cr::Containers::String::nullTerminatedView(name)); + CORRADE_ASSERT(found != _state->meshViewRangeForName.end(), + "MagnumRenderer::add(): name" << name << "not found", {}); + + /* Add the whole hierarchy under this name */ + const std::size_t id = scene.transformations.size(); + for(std::size_t i = found->second.first(); i != found->second.second(); ++i) { + const MeshView& meshView = _state->meshViews[i]; + /* The following meshes are children of the first one, inheriting its + transformation */ + arrayAppend(scene.parents, + i == found->second.first() ? -1 : -1); // TODO here should be id, but that doesn't work, why?? + arrayAppend(scene.transformations, + (i == found->second.first() ? transformation : Mn::Matrix4{})*meshView.transformation); + + /* The actual absolute transformation will get filled each time draw() is + called */ + arrayAppend(scene.absoluteTransformations, Cr::InPlaceInit); + + arrayAppend(scene.draws, Cr::InPlaceInit) + .setMaterialId(meshView.materialId); + arrayAppend(scene.textureTransformations, Cr::InPlaceInit) + // TODO flip again once the input is fixed + .setLayer(104 - _state->textureLayers[meshView.materialId]); + arrayAppend(scene.drawCommands, Cr::InPlaceInit, + meshView.indexOffsetInBytes, meshView.indexCount); + } + + /* Assuming add() is called relatively infrequently compared to draw(), + upload the changed draw and texture transform buffers. Transformation + buffer will be updated in draw(). */ + scene.drawUniform.setData(scene.draws); + scene.textureTransformationUniform.setData(scene.textureTransformations); + + return id; +} + +std::size_t MagnumRenderer::add(const Mn::UnsignedInt scene, const Cr::Containers::StringView name) { + return add(scene, name, Mn::Matrix4{}); +} + +void MagnumRenderer::clear(const Mn::UnsignedInt sceneId) { + CORRADE_ASSERT(sceneId < _state->scenes.size(), + "MagnumRenderer::clear(): index" << sceneId << "out of range for" << _state->scenes.size() << "scenes", ); + + Scene& scene = _state->scenes[sceneId]; + // TODO have arrayClear() + /* Resizing instead of `= {}` to not discard the memory */ + arrayResize(scene.parents, 0); + arrayResize(scene.transformations, 0); + /* Keep the root absolute transform here tho (same state as when initially + constructed) */ + arrayResize(scene.absoluteTransformations, 1); + arrayResize(scene.draws, 0); + arrayResize(scene.textureTransformations, 0); + arrayResize(scene.drawCommands, 0); +} + +Mn::Matrix4& MagnumRenderer::camera(const Mn::UnsignedInt scene) { + return _state->projections[scene].projectionMatrix; +} + +Cr::Containers::StridedArrayView1D MagnumRenderer::transformations(const Mn::UnsignedInt scene) { + return _state->scenes[scene].transformations; +} + +void MagnumRenderer::draw(Mn::GL::AbstractFramebuffer& framebuffer) { + /* Calculate absolute transformations */ + for(std::size_t sceneId = 0; sceneId != _state->scenes.size(); ++sceneId) { + Scene& scene = _state->scenes[sceneId]; + + // TODO have a tool for this + scene.absoluteTransformations[0].setTransformationMatrix(Mn::Matrix4{}); +// if(sceneId == 0) !Mn::Debug{} << stridedArrayView(scene.transformations).slice(&Mn::Matrix4::translation); + for(std::size_t i = 0; i != scene.transformations.size(); ++i) + scene.absoluteTransformations[i + 1].setTransformationMatrix( + scene.absoluteTransformations[scene.parents[i] + 1].transformationMatrix *scene.transformations[i]); +// if(sceneId == 0) !Mn::Debug{} << stridedArrayView(scene.absoluteTransformations).slice(&Mn::Shaders::TransformationUniform3D::transformationMatrix).slice(&Mn::Matrix4::translation); + } + + /* Upload projection and transformation uniforms, assuming they change every + frame. Do it before the draw loop to minimize stalls. */ + _state->projectionUniform.setData(_state->projections); + for(std::size_t sceneId = 0; sceneId != _state->scenes.size(); ++sceneId) + // TODO have this somehow in a single buffer instead + // TODO needs arrayInsert(), heh + _state->scenes[sceneId].transformationUniform.setData(_state->scenes[sceneId].absoluteTransformations.exceptPrefix(1)); + + for(Mn::Int y = 0; y != _state->tileCount.y(); ++y) { + for(Mn::Int x = 0; x != _state->tileCount.x(); ++x) { + framebuffer.setViewport(Mn::Range2Di::fromSize(Mn::Vector2i{x, y}*_state->tileSize, _state->tileSize)); + + const std::size_t scene = y*_state->tileCount.x() + x; + +// !Debug{} << scene << _state->scenes[scene].textureTransformations + + // TODO bind the actual view + // TODO what is the above TODO about, actually?! + + // TODO split by draw count limit + _state->shader + // TODO bind all buffers together with a multi API + .bindProjectionBuffer(_state->projectionUniform, + scene*sizeof(Mn::Shaders::ProjectionUniform3D), + sizeof(Mn::Shaders::ProjectionUniform3D)) + .bindTransformationBuffer(_state->scenes[scene].transformationUniform) + .bindDrawBuffer(_state->scenes[scene].drawUniform); + if(!(_state->flags & MagnumRendererFlag::NoTextures)) + _state->shader.bindTextureTransformationBuffer(_state->scenes[scene].textureTransformationUniform); + _state->shader + .draw(_state->mesh, + stridedArrayView(_state->scenes[scene].drawCommands).slice(&DrawCommand::indexCount), + nullptr, + stridedArrayView(_state->scenes[scene].drawCommands).slice(&DrawCommand::indexOffsetInBytes)); + } + } +} + +}} diff --git a/src/esp/batched_sim/MagnumRenderer.h b/src/esp/batched_sim/MagnumRenderer.h new file mode 100644 index 0000000000..c02d4dd658 --- /dev/null +++ b/src/esp/batched_sim/MagnumRenderer.h @@ -0,0 +1,79 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef ESP_BATCHEDSIM_MAGNUM_RENDERER_H_ +#define ESP_BATCHEDSIM_MAGNUM_RENDERER_H_ + +#include +#include +#include +#include + +namespace esp { namespace batched_sim { + +enum class MagnumRendererFlag { + NoTextures = 1 << 0, + // TODO memory-map +}; +typedef Corrade::Containers::EnumSet MagnumRendererFlags; +CORRADE_ENUMSET_OPERATORS(MagnumRendererFlags) + +struct MagnumRendererConfiguration { + explicit MagnumRendererConfiguration(); + ~MagnumRendererConfiguration(); + + MagnumRendererConfiguration& setFlags(MagnumRendererFlags flags); + MagnumRendererConfiguration& setTileSizeCount(const Magnum::Vector2i& tileSize, const Magnum::Vector2i& tileCount); + // TODO drop + MagnumRendererConfiguration& setTextureArrayMaxLevelSize(Magnum::UnsignedInt size); + + struct State; + Corrade::Containers::Pointer state; +}; + +class MagnumRenderer { + public: + explicit MagnumRenderer(const MagnumRendererConfiguration& configuration): MagnumRenderer{Magnum::NoCreate} { + create(configuration); + } + + ~MagnumRenderer(); + + Magnum::Vector2i tileSize() const; + Magnum::Vector2i tileCount() const; + /* Same as tileCount().product() */ + std::size_t sceneCount() const; + + // TODO take an importer instead? that way the consumer can configure it + void addFile(Corrade::Containers::StringView filename); + void addFile(Corrade::Containers::StringView filename, Corrade::Containers::StringView importerPlugin); + + // TODO "if there's a scene, name corresponds to a root bject name, + // otherwise it's the whole file" + // TODO or use empty name for the whole scene? but which file??? or use a + // scene name for the whole scene?? + // TODO document this may not return consecutive IDs in case the added name + // is multiple meshes together + std::size_t add(Magnum::UnsignedInt sceneId, Corrade::Containers::StringView name); + std::size_t add(Magnum::UnsignedInt sceneId, Corrade::Containers::StringView name, const Magnum::Matrix4& transformation); + void clear(Magnum::UnsignedInt sceneId); + + Magnum::Matrix4& camera(Magnum::UnsignedInt sceneId); + Corrade::Containers::StridedArrayView1D transformations(Magnum::UnsignedInt sceneId); + + void draw(Magnum::GL::AbstractFramebuffer& framebuffer); + + protected: + /* used by MagnumRendererStandalone */ + explicit MagnumRenderer(Magnum::NoCreateT); + void create(const MagnumRendererConfiguration& configuration); + + private: + struct State; + Corrade::Containers::Pointer _state; +}; + +}} + +#endif diff --git a/src/esp/batched_sim/MagnumRendererConverter.cpp b/src/esp/batched_sim/MagnumRendererConverter.cpp new file mode 100644 index 0000000000..d0260fd7ea --- /dev/null +++ b/src/esp/batched_sim/MagnumRendererConverter.cpp @@ -0,0 +1,377 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Magnum; +using namespace Containers::Literals; + +/* std::hash specialization to be able to use Corrade::String in unordered_map */ +// TODO put directly into Corrade!! it's duplicated IN EIGHT!! PLACES! ALREADY! +namespace std { + template<> struct hash { + std::size_t operator()(const Containers::String& key) const { + const Utility::MurmurHash2 hash; + const Utility::HashDigest digest = hash(key.data(), key.size()); + return *reinterpret_cast(digest.byteArray()); + } + }; +} + +// TODO make these builtin +constexpr Trade::SceneField SceneFieldMeshViewIndexOffset = Trade::sceneFieldCustom(0); +constexpr Trade::SceneField SceneFieldMeshViewIndexCount = Trade::sceneFieldCustom(1); +constexpr Trade::SceneField SceneFieldMeshViewMaterial = Trade::sceneFieldCustom(2); + +int main(int argc, char** argv) { + Utility::Arguments args; + args.addArgument("input").setHelp("input", "input file prefix") + .addArgument("output").setHelp("output", "output file") + .addOption('C', "converter", "GltfSceneConverter").setHelp("converter", "converter plugin to use") + .parse(argc, argv); + + PluginManager::Manager importerManager; + PluginManager::Manager imageConverterManager; + PluginManager::Manager converterManager; + converterManager.registerExternalManager(imageConverterManager); + + /* Reasonable config defaults */ + if(PluginManager::PluginMetadata* m = importerManager.metadata("GltfImporter")) { + /* Don't need this */ + m->configuration().setValue("phongMaterialFallback", false); + } + if(PluginManager::PluginMetadata* m = imageConverterManager.metadata("BasisImageConverter")) { + // TODO set up once GltfSceneConverter uses it directly + } + + Containers::Pointer importer = importerManager.loadAndInstantiate("AnySceneImporter"); + Containers::Pointer converter = converterManager.loadAndInstantiate(args.value("converter")); + + /* To prevent the file from being opened by unsuspecting libraries */ + converter->configuration().addValue("extensionUsed", "MAGNUMX_mesh_views"); + converter->configuration().addValue("extensionRequired", "MAGNUMX_mesh_views"); + + /* Begin file conversion */ + converter->beginFile(args.value("output")); + converter->setSceneFieldName(SceneFieldMeshViewIndexOffset, "meshViewIndexOffset"); + converter->setSceneFieldName(SceneFieldMeshViewIndexCount, "meshViewIndexCount"); + converter->setSceneFieldName(SceneFieldMeshViewMaterial, "meshViewMaterial"); + + /* Materials are put into the file directly, meshes and textures get + collected and then joined. */ + Containers::Array inputMeshes; + Containers::Array inputImages; + UnsignedInt indexOffset = 0; + + /* Parent, present for all objects */ + struct Parent { + UnsignedInt mapping; + Int parent; + }; + Containers::Array parents; + + /* Transformation, present only for nested objects */ + struct Transformation { + UnsignedInt mapping; + Matrix4 transformation; + }; + Containers::Array transformations; + + /* Mesh assignment, present for all objects except a "template root" that + contains multiple meshes as children */ + struct Mesh { + UnsignedInt mapping; + UnsignedInt mesh; /* always 0 at the moment */ + UnsignedInt meshIndexOffset; + UnsignedInt meshIndexCount; + Int meshMaterial; + }; + Containers::Array meshes; +// const auto import = [&](Containers::StringView filename) { +// importer->openFile(filename); +// CORRADE_INTERNAL_ASSERT(importer->meshCount() == 1); +// CORRADE_INTERNAL_ASSERT(importer->materialCount() == 1); +// +// Containers::Optional mesh = importer->mesh(0); +// Containers::Optional material = importer->material(0); +// +// /* If the material is textured, extract the image as well */ +// // TODO findAttributeId()! +// if(material->hasAttribute(Trade::MaterialAttribute::BaseColorTexture)) { +// UnsignedInt& textureId = material->mutableAttribute(Trade::MaterialAttribute::BaseColorTexture); +// +// Containers::Optional texture = importer->texture(textureId); +// CORRADE_INTERNAL_ASSERT(texture && texture->type() == Trade::TextureType::Texture2D); +// +// /* Patch the material to use the zero texture but add a layer as well and +// make it just Flat */ +// textureId = 0; +// CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); +// Containers::Array attributes = material->releaseAttributeData(); +// arrayAppend(attributes, InPlaceInit, "baseColorTextureLayer", UnsignedInt(inputImages.size())); +// material = Trade::MaterialData{Trade::MaterialType::Flat, std::move(attributes)}; +// +// arrayAppend(inputImages, *importer->image2D(texture->image())); +// +// /* Otherwise just make it Flat */ +// } else { +// CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); +// material = Trade::MaterialData{Trade::MaterialType::Flat, material->releaseAttributeData()}; +// } +// +// /* Object setup */ +// Object o; +// o.id = objects.size(); +// o.mesh = 0; +// // TODO concatenate() should do this on its own +// o.meshIndexOffset = indexOffset; +// o.meshIndexCount = mesh->indexCount(); +// indexOffset += mesh->indexCount(); +// converter->setObjectName(inputMeshes.size(), Utility::Path::split(filename).second()); +// o.meshMaterial = *converter->add(*material); +// +// /* Mesh setup */ +// // TODO convert from a strip/... if not triangles +// arrayAppend(inputMeshes, *std::move(mesh)); +// +// arrayAppend(objects, o); +// }; + + const auto importMultiple = [&](Containers::StringView filename, std::unordered_map>* uniqueMeshes = nullptr) { + CORRADE_INTERNAL_ASSERT_OUTPUT(importer->openFile(filename)); + CORRADE_INTERNAL_ASSERT(importer->sceneCount() == 1); // TODO multi-scene?? + + Containers::Optional scene = importer->scene(0); + CORRADE_INTERNAL_ASSERT(scene); + + /* Top-level object, parent of the others */ + Parent root; + root.mapping = parents.size(); + root.parent = -1; + arrayAppend(parents, root); + converter->setObjectName(root.mapping, Utility::Path::splitExtension(Utility::Path::split(filename).second()).first()); + + /* Meshes are unfortunately named in a useless way, so override them with + names from the objects referencing them instead */ + Containers::Array meshNames{importer->meshCount()}; + for(Containers::Pair> objectMeshMaterial: scene->meshesMaterialsAsArray()) { + const UnsignedInt meshId = objectMeshMaterial.second().first(); + Containers::String objectName = importer->objectName(objectMeshMaterial.first()); + if(!objectName) + Fatal{} << "No name found for object" << objectMeshMaterial.first() << "referencing mesh" << importer->meshName(meshId) << "in" << Utility::Path::split(filename).second(); + + if(meshNames[meshId] && meshNames[meshId] != objectName) + Fatal{} << "Conflicting name for mesh" << importer->meshName(meshId) << Debug::nospace << ":" << meshNames[meshId] << "vs" << objectName << "in" << Utility::Path::split(filename).second(); + + meshNames[meshId] = std::move(objectName); + } + + /* Assuming materials are shared among meshes, remember the ID of already + imported materials */ + Containers::Array> importedMaterialIds{importer->materialCount()}; + + /* Node mesh/material assignments. Each entry will be one child of the + top-level object. */ + for(Containers::Triple transformationMeshMaterial: SceneTools::flattenMeshHierarchy3D(*scene)) { + Containers::Optional mesh = importer->mesh(transformationMeshMaterial.first()); + // TODO convert from a strip/... if not triangles + CORRADE_INTERNAL_ASSERT(mesh && mesh->isIndexed() && mesh->primitive() == MeshPrimitive::Triangles); + + Parent o; + o.mapping = parents.size(); + o.parent = root.mapping; + arrayAppend(parents, o); + arrayAppend(transformations, InPlaceInit, + o.mapping, + transformationMeshMaterial.third()); + /* Save the nested object name as well, for debugging purposes. It'll be + duplicated among different scenes but that's no problem. */ + converter->setObjectName(o.mapping, meshNames[transformationMeshMaterial.first()]); + + Mesh m; + m.mapping = o.mapping; + m.mesh = 0; + m.meshIndexCount = mesh->indexCount(); + + /* Check if a mesh of the same name is already present and reuse it in + that case, otherwise add to the map. Not using + `importer->meshName(transformationMeshMaterial.first())` + because the mesh names are useless `Mesh.XYZ` that don't match between + files. */ + // TODO avoid string copies by making the map over StringViews? but then + // it'd have to be changed again once we don't have the extra meshNames + // array + Containers::String meshName = meshNames[transformationMeshMaterial.first()]; + bool duplicate = false; + if(uniqueMeshes) { + std::unordered_map>::iterator found = uniqueMeshes->find(meshName); + if(found != uniqueMeshes->end()) { + if(mesh->indexCount() == found->second.second()) { + m.meshIndexOffset = found->second.first(); + duplicate = true; + } else { + Warning{} << "Mesh" << meshName << "in" << Utility::Path::split(filename).second() << "has" << mesh->indexCount() << "indices but expected" << found->second.second() << Debug::nospace << ", adding a new copy"; + } + } + } + + if(!duplicate) { + if(uniqueMeshes) Debug{} << "New mesh" << meshName << "in" << Utility::Path::split(filename).second(); + + uniqueMeshes->insert({std::move(meshName), {indexOffset, mesh->indexCount()}}); + m.meshIndexOffset = indexOffset; + indexOffset += mesh->indexCount()*4; // TODO ints hardcoded + + // TODO convert from a strip/... if not triangles + arrayAppend(inputMeshes, *std::move(mesh)); + } + + /* If the material is already parsed, reuse its ID */ + if(const Containers::Optional materialId = importedMaterialIds[transformationMeshMaterial.second()]) { + m.meshMaterial = *materialId; + + /* Otherwise parse it. If textured, extract the image as well. */ + } else { + Debug{} << "New material for" << meshNames[transformationMeshMaterial.first()] << "in" << Utility::Path::split(filename).second(); + + Containers::Optional material = importer->material(transformationMeshMaterial.second()); + CORRADE_INTERNAL_ASSERT(material); + // TODO findAttributeId()! + if(material->hasAttribute(Trade::MaterialAttribute::BaseColorTexture)) { + UnsignedInt& textureId = material->mutableAttribute(Trade::MaterialAttribute::BaseColorTexture); + + Containers::Optional texture = importer->texture(textureId); + CORRADE_INTERNAL_ASSERT(texture && texture->type() == Trade::TextureType::Texture2D); + + /* Patch the material to use the zero texture but add a layer as well + and make it just Flat */ + textureId = 0; + CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); + Containers::Array attributes = material->releaseAttributeData(); + arrayAppend(attributes, InPlaceInit, "baseColorTextureLayer", UnsignedInt(inputImages.size())); + material = Trade::MaterialData{Trade::MaterialType::Flat, std::move(attributes)}; + + arrayAppend(inputImages, *importer->image2D(texture->image())); + + /* Otherwise just make it Flat */ + } else { + // TODO make it reference a white texture?? + CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); + material = Trade::MaterialData{Trade::MaterialType::Flat, material->releaseAttributeData()}; + } + + importedMaterialIds[transformationMeshMaterial.second()] = m.meshMaterial = *converter->add(*material); + } + + arrayAppend(meshes, m); + } + }; + + /* Import the stuff. Well, some of it. */ + // TODO haha +// Containers::String debugModelsPath = Utility::Path::join(args.value("input"), "extra_source_data_v0/debug_models"); +// import(Utility::Path::join(debugModelsPath, "sphere_green_wireframe.glb")); +// import(Utility::Path::join(debugModelsPath, "sphere_orange_wireframe.glb")); +// import(Utility::Path::join(debugModelsPath, "cube_gray_shaded.glb")); +// import(Utility::Path::join(debugModelsPath, "cube_gray_shaded.glb")); // TODO + + Containers::String replicaPath = Utility::Path::join(args.value("input"), "ReplicaCAD_baked_lighting_v1.5/stages_uncompressed"); + std::unordered_map> uniqueReplicaMeshes; + for(std::size_t i = 0; i <= 4; ++i) { + for(std::size_t j = 0; j <= 20; ++j) { + importMultiple(Utility::Path::join(replicaPath, Utility::format("Baked_sc{}_staging_{:.2}.glb", i, j)), &uniqueReplicaMeshes); + } + } + + /* Target layout for the mesh. So far just for flat rendering, no normals + etc */ + Trade::MeshData mesh{MeshPrimitive::Triangles, nullptr, { + Trade::MeshAttributeData{Trade::MeshAttribute::Position, VertexFormat::Vector3, nullptr}, + Trade::MeshAttributeData{Trade::MeshAttribute::TextureCoordinates, VertexFormat::Vector2, nullptr}, + }}; + Containers::Array> inputMeshReferences; + for(const Trade::MeshData& inputMesh: inputMeshes) + arrayAppend(inputMeshReferences, inputMesh); + MeshTools::concatenateInto(mesh, inputMeshReferences); + CORRADE_INTERNAL_ASSERT(converter->add(mesh)); + + /* A combined 3D image */ + Trade::ImageData3D image{PixelFormat::RGB8Unorm, {2048, 2048, Int(inputImages.size())}, Containers::Array{NoInit, 2048*2048*inputImages.size()*4}}; + for(std::size_t i = 0; i != inputImages.size(); ++i) { + CORRADE_INTERNAL_ASSERT(inputImages[i].format() == PixelFormat::RGB8Unorm); + Utility::copy(inputImages[i].pixels(), image.mutablePixels()[i]); + } + + /* Clear the original images array to relieve the memory pressure a bit -- + Basis is HUNGRY */ + inputImages = {}; + CORRADE_INTERNAL_ASSERT(converter->add(image)); + + /* And a texture referencing the only image */ + CORRADE_INTERNAL_ASSERT(converter->add(Trade::TextureData{Trade::TextureType::Texture2DArray, + SamplerFilter::Linear, SamplerFilter::Linear, SamplerMipmap::Linear, + SamplerWrapping::Repeat, 0})); + + /* Combine the SceneData. In case of glTF the SceneData could be just a view + on the whole memory, with no combining, but this future-proofs it for + dumping into a binary representation */ + // TODO use SceneTools::combine() instead once it's public + Containers::StridedArrayView1D outputParents; + Containers::StridedArrayView1D outputTransformations; + Containers::StridedArrayView1D outputMeshes; + Containers::Array data = Containers::ArrayTuple{ + {NoInit, parents.size(), outputParents}, + {NoInit, transformations.size(), outputTransformations}, + {NoInit, meshes.size(), outputMeshes}, + }; + Utility::copy(parents, outputParents); + Utility::copy(transformations, outputTransformations); + Utility::copy(meshes, outputMeshes); + Trade::SceneData scene{Trade::SceneMappingType::UnsignedInt, parents.size(), {}, data, { + Trade::SceneFieldData{Trade::SceneField::Parent, + outputParents.slice(&Parent::mapping), + outputParents.slice(&Parent::parent)}, + Trade::SceneFieldData{Trade::SceneField::Transformation, + outputTransformations.slice(&Transformation::mapping), + outputTransformations.slice(&Transformation::transformation)}, + Trade::SceneFieldData{Trade::SceneField::Mesh, + outputMeshes.slice(&Mesh::mapping), + outputMeshes.slice(&Mesh::mesh)}, + Trade::SceneFieldData{ + SceneFieldMeshViewIndexOffset, + outputMeshes.slice(&Mesh::mapping), + outputMeshes.slice(&Mesh::meshIndexOffset)}, + Trade::SceneFieldData{ + SceneFieldMeshViewIndexCount, + outputMeshes.slice(&Mesh::mapping), + outputMeshes.slice(&Mesh::meshIndexCount)}, + Trade::SceneFieldData{ + SceneFieldMeshViewMaterial, + outputMeshes.slice(&Mesh::mapping), + outputMeshes.slice(&Mesh::meshMaterial)} + }}; + CORRADE_INTERNAL_ASSERT(converter->add(scene)); + + return converter->endFile() ? 0 : 1; +} diff --git a/src/esp/batched_sim/MagnumRendererDemo.cpp b/src/esp/batched_sim/MagnumRendererDemo.cpp new file mode 100644 index 0000000000..9446d52072 --- /dev/null +++ b/src/esp/batched_sim/MagnumRendererDemo.cpp @@ -0,0 +1,112 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MagnumRenderer.h" + +namespace esp { namespace batched_sim { + +namespace Cr = Corrade; +namespace Mn = Magnum; + +using namespace Cr::Containers::Literals; +using namespace Mn::Math::Literals; + +class MagnumRendererDemo: public Mn::Platform::Application { + public: + explicit MagnumRendererDemo(const Arguments& arguments); + + private: + void drawEvent() override; + + Cr::Containers::Optional _renderer; + Mn::DebugTools::FrameProfilerGL _profiler; +}; + +MagnumRendererDemo::MagnumRendererDemo(const Arguments& arguments): Mn::Platform::Application{arguments, Mn::NoCreate} { + Cr::Utility::Arguments args; + args.addArgument("file").setHelp("file", "bps file to load") + .addOption('I', "importer") + .setHelp("importer", "importer plugin to use instead of BpsImporter") + .addBooleanOption("profile") + .setHelp("profile", "profile frame times") + .addOption('S', "size", "128 128") + .setHelp("size", "size of one rendered tile") + .addOption('C', "count", "16 12") + .setHelp("count", "tile count") + .addBooleanOption("no-textures") + .setHelp("no-textures", "render without textures") + .addSkippedPrefix("magnum", "engine-specific options") + .parse(arguments.argc, arguments.argv); + + const Mn::Vector2i tileSize = args.value("size"); + const Mn::Vector2i tileCount = args.value("count"); + + /* Create a window with size matching the tile count & size */ + create(Configuration{} + .setSize(tileSize*tileCount, Mn::Vector2{1.0f}) + .setTitle("Magnum Renderer Demo")); + + /* Create the renderer */ + MagnumRendererConfiguration configuration; + configuration + .setFlags(args.isSet("no-textures") ? MagnumRendererFlag::NoTextures : MagnumRendererFlags{}) + .setTileSizeCount(tileSize, tileCount); + _renderer.emplace(configuration); + + if(!args.value("importer").empty()) + _renderer->addFile(args.value("file"), args.value("importer")); + else // TODO handle this fallback inside Renderer directly + _renderer->addFile(args.value("file")); + + /* Hardcode camera position + projection for all views to be above the scene */ + for(std::size_t i = 0; i != _renderer->sceneCount(); ++i) { + _renderer->camera(i) = + Mn::Matrix4::perspectiveProjection(35.0_degf, Mn::Vector2{tileSize}.aspectRatio(), 0.01f, 1000.0f)* + (Mn::Matrix4::rotationX(-90.0_degf)*Mn::Matrix4::translation(Mn::Vector3::zAxis(20.0f))).inverted(); + + /* Add stuff */ + _renderer->add(i, Cr::Utility::format("Baked_sc{}_staging_{:.2}", i/21, i%21)); + } + + Mn::Debug{} << "Rendering" << tileCount.product() << tileSize << "tiles every frame"; + + if(args.isSet("profile")) { + _profiler = Mn::DebugTools::FrameProfilerGL{ + Mn::DebugTools::FrameProfilerGL::Value::CpuDuration| + Mn::DebugTools::FrameProfilerGL::Value::GpuDuration, 50}; + } else _profiler.disable(); +} + +void MagnumRendererDemo::drawEvent() { + for(std::size_t i = 0; i != _renderer->sceneCount(); ++i) { + /* Rotate the first-ever object in the scene */ + Mn::Matrix4& transformation = _renderer->transformations(i)[0]; + transformation = transformation*Mn::Matrix4::rotationY(0.05_degf*Mn::Float(i)); + } + + Mn::GL::defaultFramebuffer.clear(Mn::GL::FramebufferClear::Color|Mn::GL::FramebufferClear::Depth); + + _profiler.beginFrame(); + + _renderer->draw(Mn::GL::defaultFramebuffer); + + _profiler.endFrame(); + _profiler.printStatistics(10); + + swapBuffers(); + if(_profiler.isEnabled()) redraw(); +} + +}} + +MAGNUM_APPLICATION_MAIN(esp::batched_sim::MagnumRendererDemo) diff --git a/src/esp/batched_sim/MagnumRendererStandalone.cpp b/src/esp/batched_sim/MagnumRendererStandalone.cpp new file mode 100644 index 0000000000..806d66c8fe --- /dev/null +++ b/src/esp/batched_sim/MagnumRendererStandalone.cpp @@ -0,0 +1,137 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#include "MagnumRendererStandalone.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "esp/batched_sim/configure.h" + +namespace Cr = Corrade; +namespace Mn = Magnum; + +namespace esp { namespace batched_sim { + +struct MagnumRendererStandalone::State { + Mn::Platform::WindowlessGLContext context; + Mn::Platform::GLContext magnumContext{Mn::NoCreate}; + Cr::Containers::Optional renderer; + Mn::GL::Renderbuffer color{Mn::NoCreate}, depth{Mn::NoCreate}; + Mn::GL::Framebuffer framebuffer{Mn::NoCreate}; + Mn::GL::BufferImage2D colorBuffer{Mn::NoCreate}; + Mn::GL::BufferImage2D depthBuffer{Mn::NoCreate}; + cudaGraphicsResource* cudaColorBuffer{}; + cudaGraphicsResource* cudaDepthBuffer{}; + + State(): context{Mn::Platform::WindowlessGLContext::Configuration{} + .setCudaDevice(0)} { + context.makeCurrent(); + magnumContext.create(); + color = Mn::GL::Renderbuffer{}; + depth = Mn::GL::Renderbuffer{}; + } + + ~State() { + /* Should be unmapped before the GL object gets destroyed, I guess? */ + if(cudaColorBuffer) + checkCudaErrors(cudaGraphicsUnmapResources(1, &cudaColorBuffer, 0)); + if(cudaDepthBuffer) + checkCudaErrors(cudaGraphicsUnmapResources(1, &cudaDepthBuffer, 0)); + } +}; + +MagnumRendererStandalone::MagnumRendererStandalone(const MagnumRendererConfiguration& configuration): MagnumRenderer{Mn::NoCreate}, _state{Cr::InPlaceInit} { + /* Create the renderer only once the GL context is ready */ + create(configuration); + + const Mn::Vector2i size = _state->renderer->tileSize()*_state->renderer->tileCount(); + _state->color.setStorage(Mn::GL::RenderbufferFormat::RGBA8, size); + _state->depth.setStorage(Mn::GL::RenderbufferFormat::DepthComponent32F, size); + _state->framebuffer = Mn::GL::Framebuffer{Mn::Range2Di{{}, size}}; + _state->framebuffer + .attachRenderbuffer(Mn::GL::Framebuffer::ColorAttachment{0}, _state->color) + // TODO just GL::Framebuffer::DepthAttachment, this is shit + .attachRenderbuffer(Mn::GL::Framebuffer::BufferAttachment::Depth, _state->depth); + /* Defer the buffer initialization to the point when it's actually read + into */ + _state->colorBuffer = Mn::GL::BufferImage2D{framebufferColorFormat()}; + _state->depthBuffer = Mn::GL::BufferImage2D{framebufferDepthFormat()}; +} + +MagnumRendererStandalone::~MagnumRendererStandalone() = default; + +Mn::PixelFormat MagnumRendererStandalone::framebufferColorFormat() const { + return Mn::PixelFormat::RGBA8Unorm; +} + +Mn::PixelFormat MagnumRendererStandalone::framebufferDepthFormat() const { + return Mn::PixelFormat::Depth32F; +} + +void MagnumRendererStandalone::draw() { + _state->framebuffer.clear(Mn::GL::FramebufferClear::Color|Mn::GL::FramebufferClear::Depth); + _state->renderer->draw(_state->framebuffer); +} + +const void* MagnumRendererStandalone::cudaColorBufferDevicePointer() { + /* If the CUDA buffer exists already, it's mapped from the previous call. + Unmap it first so we can read into it from GL. */ + if(_state->cudaColorBuffer) + checkCudaErrors(cudaGraphicsUnmapResources(1, &_state->cudaColorBuffer, 0)); + + /* Read to the buffer image, allocating it if it's not already. Can't really + return a pointer directly to the renderbuffer because the returned device + pointer is expected to be linearized. */ + _state->framebuffer.read({{}, tileCount()*tileSize()}, _state->colorBuffer, Mn::GL::BufferUsage::DynamicRead); + + /* Initialize the CUDA buffer from the GL buffer image if it's not already */ + if(!_state->cudaColorBuffer) { + checkCudaErrors(cudaGraphicsGLRegisterBuffer(&_state->cudaColorBuffer, _state->colorBuffer.buffer().id(), cudaGraphicsRegisterFlagsReadOnly)); + } + + /* Map the buffer and return the device pointer */ + checkCudaErrors(cudaGraphicsMapResources(1, &_state->cudaColorBuffer, 0)); + void* pointer; + std::size_t size; + checkCudaErrors(cudaGraphicsResourceGetMappedPointer(&pointer, &size, _state->cudaColorBuffer)); + CORRADE_INTERNAL_ASSERT(size == _state->colorBuffer.size().product()*_state->colorBuffer.pixelSize()); + return pointer; +} + +const void* MagnumRendererStandalone::cudaDepthBufferDevicePointer() { + /* If the CUDA buffer exists already, it's mapped from the previous call. + Unmap it first so we can read into it from GL. */ + if(_state->cudaDepthBuffer) + checkCudaErrors(cudaGraphicsUnmapResources(1, &_state->cudaDepthBuffer, 0)); + + /* Read to the buffer image, allocating it if it's not already. Can't really + return a pointer directly to the renderbuffer because the returned device + pointer is expected to be linearized. */ + _state->framebuffer.read({{}, tileCount()*tileSize()}, _state->depthBuffer, Mn::GL::BufferUsage::DynamicRead); + + /* Initialize the CUDA buffer from the GL buffer image if it's not already */ + if(!_state->cudaDepthBuffer) { + checkCudaErrors(cudaGraphicsGLRegisterBuffer(&_state->cudaDepthBuffer, _state->depthBuffer.buffer().id(), cudaGraphicsRegisterFlagsReadOnly)); + } + + /* Map the buffer and return the device pointer */ + checkCudaErrors(cudaGraphicsMapResources(1, &_state->cudaDepthBuffer, 0)); + void* pointer; + std::size_t size; + checkCudaErrors(cudaGraphicsResourceGetMappedPointer(&pointer, &size, _state->cudaDepthBuffer)); + CORRADE_INTERNAL_ASSERT(size == _state->depthBuffer.size().product()*_state->depthBuffer.pixelSize()); + return pointer; +} + +}} diff --git a/src/esp/batched_sim/MagnumRendererStandalone.h b/src/esp/batched_sim/MagnumRendererStandalone.h new file mode 100644 index 0000000000..b80acc2cb7 --- /dev/null +++ b/src/esp/batched_sim/MagnumRendererStandalone.h @@ -0,0 +1,32 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef ESP_BATCHEDSIM_MAGNUM_RENDERER_STANDALONE_H_ +#define ESP_BATCHEDSIM_MAGNUM_RENDERER_STANDALONE_H_ + +#include "MagnumRenderer.h" + +namespace esp { namespace batched_sim { + +class MagnumRendererStandalone: public MagnumRenderer { + public: + explicit MagnumRendererStandalone(const MagnumRendererConfiguration& configuration); + ~MagnumRendererStandalone(); + + Magnum::PixelFormat framebufferColorFormat() const; + Magnum::PixelFormat framebufferDepthFormat() const; + + void draw(); + + const void* cudaColorBufferDevicePointer(); + const void* cudaDepthBufferDevicePointer(); + + private: + struct State; + Corrade::Containers::Pointer _state; +}; + +}} + +#endif diff --git a/src/esp/batched_sim/PlacementHelper.cpp b/src/esp/batched_sim/PlacementHelper.cpp index 2a19b17656..e4027dace7 100644 --- a/src/esp/batched_sim/PlacementHelper.cpp +++ b/src/esp/batched_sim/PlacementHelper.cpp @@ -6,8 +6,11 @@ #include "esp/batched_sim/BatchedSimAssert.h" #include "esp/batched_sim/GlmUtils.h" +#include + #include "esp/core/Check.h" +namespace Cr = Corrade; namespace Mn = Magnum; namespace esp { diff --git a/src/esp/batched_sim/PlacementHelper.h b/src/esp/batched_sim/PlacementHelper.h index 542f0df5a1..c46c9a2d9a 100644 --- a/src/esp/batched_sim/PlacementHelper.h +++ b/src/esp/batched_sim/PlacementHelper.h @@ -19,7 +19,7 @@ class PlacementHelper { PlacementHelper(const ColumnGridSet& columnGridSet, const CollisionBroadphaseGrid& colGrid, const serialize::Collection& collection, esp::core::Random& random, int maxFailedPlacements); - bool place(Magnum::Matrix4& heldObjMat, const FreeObject& freeObject, const Mn::Vector3* fallbackPos=nullptr) const; + bool place(Magnum::Matrix4& heldObjMat, const FreeObject& freeObject, const Magnum::Vector3* fallbackPos=nullptr) const; private: const ColumnGridSet& columnGridSet_; diff --git a/src/esp/batched_sim/README.md b/src/esp/batched_sim/README.md new file mode 100644 index 0000000000..6da579f755 --- /dev/null +++ b/src/esp/batched_sim/README.md @@ -0,0 +1,106 @@ +# How? + +Build as usual. There should appear a `MagnumRendererConverter` binary and a +`MagnumRendererDemo` binary somewhere in the build dir. + +# Running the converter + +Expects the same data as the BPS conversion pipeline, except maybe extra +subdirectories. Patch the source if it misbehaves. + +```sh +./RelWithDebInfo/bin/MagnumRendererConverter path/to/input-data/ yay.gltf +``` + +Output is a `yay.gltf`, `yay.bin` and `yay.0.ktx2`, with 105 Replica scenes +and deduplicated meshes. Not Basis-compressed, because a 2048x2048x105 texture +needs lots of memory, that has to be done separately using for example the +`magnum-sceneconverter` utility (take it from the prebuilt tools): + +```sh +magnum-imageconverter -D3 --map -v -C BasisImageConverter \ + -c uastc,rdo_uastc,rdo_uastc_dict_size=1024,ktx2_zstd_supercompression_level=20,mip_gen,threads=0,y_flip=false \ + yay.0.ktx2 yay.0.basis.ktx2 +``` + +Takes about 25 minutes and 12 GB RAM on a 8-core laptop from 2018. Once done, +rename the output back to `yay.0.ktx2`. Now the input glTF is Basis-compressed. + +# Running the demo + +With the file you just generated: + +```sh +./RelWithDebInfo/bin/MagnumRendererDemo \ + --count "15 7" --size "128 192" --profile \ + yay.gltf +``` + +It could work, hopefully? Profiling info goes into the console. + +------------------------------------------------------------------------------- + +# Outdated info below, DO NOT READ + +# Runing the demo + +Use `--profile` to profile frame times to console. + +```sh +cd data/bps_data/combined_Stage_v3_sc0_staging +../../../build-externalmagnum/esp/batched_sim/BatchRendering combined_Stage_v3_sc0_staging_trimesh.bps -I ../../../build-externalmagnum/esp/batched_sim/BpsImporter.so +``` + +```sh +cd data/bps_data/combined_Stage_v3_sc0_staging +../../../build-externalmagnum/esp/batched_sim/MagnumRendererDemo combined_Stage_v3_sc0_staging_trimesh.bps -I ../../../build-externalmagnum/esp/batched_sim/BpsImporter.so +``` + +# Info about imported data + +Use `--info` to display also info about images, textures and materials. Add +the following options to `-i` to show different variants: + +- `meshViews` to import one mesh with several views +- `textureArrays` to import one 2D array texture with several layers + +```sh +cd data/bps_data/combined_Stage_v3_sc0_staging +magnum-sceneconverter combined_Stage_v3_sc0_staging_trimesh.bps -I ../../../build-externalmagnum/esp/batched_sim/BpsImporter.so -i basisFormat=Astc4x4RGBA --map --info-scenes --info-meshes +``` + +# Opening the BPS data in magnum-player + +Pick different format if on a NV GPU (e.g. `Bc7RGBA`): Add `meshViews` to `-i` +to import one mesh with several views (which `magnum-player` doesn't currently +understand). + +```sh +cd data/bps_data/combined_Stage_v3_sc0_staging +magnum-player combined_Stage_v3_sc0_staging_trimesh.bps -I ../../../build-externalmagnum/esp/batched_sim/BpsImporter.so -i basisFormat=Astc4x4RGBA --map +``` + +# Converting to a glTF + +```sh +magnum-sceneconverter -i basisFormat=Astc4x4RGBA,textureArrays,meshViews,instanceScene=false,textureArraysForceAllMaterialsTextured -I ~/Code/fair/batch/gala_kinematic/build-externalmagnum/esp/batched_sim/BpsImporter.so -C ~/Code/magnum-plugins/build/Debug/lib/magnum-d/sceneconverters/GltfSceneConverter.so ~/Code/fair/batch/gala_kinematic/data/bps_data/replicacad_composite/replicacad_composite.bps replicacad_composite.views.latest.gltf -c extensionRequired=MAGNUMX_mesh_views,extensionUsed=MAGNUMX_mesh_views +``` + +info: + +```sh +magnum-sceneconverter -i basisFormat=Astc4x4RGBA,textureArrays,meshViews,instanceScene=false,phongMaterialFallback=false,meshlets=false -I ~/Code/fair/batch/gala_kinematic/build-externalmagnum/esp/batched_sim/BpsImporter.so ~/Code/fair/batch/gala_kinematic/data/bps_data/replicacad_composite/replicacad_composite.bps replicacad_composite.views.latest.gltf --info -i phongMaterialFallback=false +``` + +info on the produced gltf + +```sh +magnum-sceneconverter -i phongMaterialFallback=false,ignoreRequiredExtensions,experimentalKhrTextureKtx -I ~/Code/magnum-plugins/build/Debug/lib/magnum-d/importers/GltfImporter.so replicacad_composite.views.latest.gltf --info -i phongMaterialFallback=false,ignoreRequiredExtensions,experimentalKhrTextureKtx +``` + +# Producing a glTF from scratch + +```sh +cd path/where/all/data/is +~/Code/fair/batch/gala_kinematic/build-externalmagnum/esp/batched_sim/MagnumRendererConverter . yay.gltf -C ~/Code/magnum-plugins/build/Debug/lib/magnum-d/sceneconverters/GltfSceneConverter.so +``` diff --git a/src/esp/batched_sim/configure.h.cmake b/src/esp/batched_sim/configure.h.cmake new file mode 100644 index 0000000000..16d112136a --- /dev/null +++ b/src/esp/batched_sim/configure.h.cmake @@ -0,0 +1,2 @@ +#cmakedefine MAGNUM_RENDERER +#cmakedefine BPSIMPORTER_PLUGIN_FILENAME "${BPSIMPORTER_PLUGIN_FILENAME}"_s diff --git a/src/esp/bindings/BatchedSimBindings.cpp b/src/esp/bindings/BatchedSimBindings.cpp index 7391031671..87ecab6dd0 100644 --- a/src/esp/bindings/BatchedSimBindings.cpp +++ b/src/esp/bindings/BatchedSimBindings.cpp @@ -19,7 +19,13 @@ namespace batched_sim { namespace { py::capsule getColorMemory(BatchedSimulator& bsim, const uint32_t groupIdx) { + #ifdef MAGNUM_RENDERER + CORRADE_ASSERT(groupIdx == 0, + "esp::batched_sim::getColorMemory(): what is groupIdx" << groupIdx << "for?", py::capsule()); + return py::capsule(bsim.getMagnumRenderer().cudaColorBufferDevicePointer()); + #else return py::capsule(bsim.getBpsRenderer().getColorPointer(groupIdx)); + #endif } py::capsule getDebugColorMemory(BatchedSimulator& bsim, const uint32_t groupIdx) { diff --git a/src/esp/geo/VoxelGrid.cpp b/src/esp/geo/VoxelGrid.cpp index 7a2e832f1d..f99c024781 100644 --- a/src/esp/geo/VoxelGrid.cpp +++ b/src/esp/geo/VoxelGrid.cpp @@ -161,7 +161,7 @@ void VoxelGrid::addVoxelToMeshPrimitives( Corrade::Containers::StridedArrayView1D cubeNormals = cubeData.attribute(Mn::Trade::MeshAttribute::Normal); Cr::Containers::ArrayView cubeIndices = - cubeData.indices(); + cubeData.indices().asContiguous(); for (std::size_t i = 0; i != cubeData.vertexCount(); ++i) { arrayAppend(vertexData, Cr::InPlaceInit, cubePositions[i] * m_voxelSize / 2 + mid, @@ -195,7 +195,7 @@ void VoxelGrid::addVectorToMeshPrimitives( Corrade::Containers::StridedArrayView1D coneNormals = coneData.attribute(Mn::Trade::MeshAttribute::Normal); Cr::Containers::ArrayView coneIndices = - coneData.indices(); + coneData.indices().asContiguous(); // Get rotation quaternion Mn::Rad angle = Mn::Math::angle(vec.normalized(), Mn::Vector3(0, 1, 0)); @@ -231,7 +231,7 @@ void VoxelGrid::addVectorToMeshPrimitives( cylinderNormals = cylinderData.attribute(Mn::Trade::MeshAttribute::Normal); Cr::Containers::ArrayView cylinderIndices = - cylinderData.indices(); + cylinderData.indices().asContiguous(); for (std::size_t i = 0; i != cylinderData.vertexCount(); ++i) { arrayAppend(vertexData, Cr::InPlaceInit, diff --git a/src/esp/sim/Simulator.cpp b/src/esp/sim/Simulator.cpp index 13a15d2f16..9f8bb3c3bb 100644 --- a/src/esp/sim/Simulator.cpp +++ b/src/esp/sim/Simulator.cpp @@ -168,10 +168,10 @@ void Simulator::reconfigure(const SimulatorConfiguration& cfg) { renderer_->acquireGlContext(); } else { - CORRADE_ASSERT( - !Magnum::GL::Context::hasCurrent(), - "Simulator::reconfigure() : Unexpected existing context when " - "createRenderer==false", ); +// CORRADE_ASSERT( +// !Magnum::GL::Context::hasCurrent(), +// "Simulator::reconfigure() : Unexpected existing context when " +// "createRenderer==false", ); } // (re) create scene instance diff --git a/src/tests/BatchedSimTest.cpp b/src/tests/BatchedSimTest.cpp index b458074813..521c2ccd7a 100644 --- a/src/tests/BatchedSimTest.cpp +++ b/src/tests/BatchedSimTest.cpp @@ -4,19 +4,28 @@ #include "esp/batched_sim/BatchedSimAssert.h" #include "esp/batched_sim/BatchedSimulator.h" + +#include + +#include + +#ifdef MAGNUM_RENDERER +#include +#include +#include +#else #include "esp/batched_sim/BpsSceneMapping.h" #include "esp/batched_sim/ColumnGrid.h" #include "esp/batched_sim/GlmUtils.h" #include "esp/core/logging.h" -#include -#include -#include #include +#include // FIXME -// #define STB_IMAGE_WRITE_IMPLEMENTATION +//#define STB_IMAGE_WRITE_IMPLEMENTATION #include +#endif // for key_press #include @@ -29,7 +38,19 @@ namespace Mn = Magnum; namespace esp { namespace batched_sim { +namespace Cr = Corrade; +namespace Mn = Magnum; + namespace { + +#ifdef MAGNUM_RENDERER +Mn::Image2D copyCudaBufferToImage(const void* devicePointer, Mn::PixelFormat pixelFormat, const Mn::Vector2i& size) { + Mn::Image2D out{Mn::PixelStorage{}.setAlignment(1), pixelFormat, size, Cr::Containers::Array{Cr::NoInit, std::size_t(size.product())*Mn::pixelSize(pixelFormat)}}; + + cudaMemcpy(out.data(), devicePointer, out.data().size(), cudaMemcpyDeviceToHost); + return out; +} +#else template static std::vector copyToHost(const T* dev_ptr, uint32_t width, @@ -80,6 +101,7 @@ void saveFrame(const char* fname, stbi_write_bmp(fname2, width, height, num_channels, buffer.data()); } } +#endif // Hacky way to get keypresses at the terminal. Linux only. // https://stackoverflow.com/a/67038432 @@ -236,10 +258,12 @@ int key_press() { // not working: ¹ (251), num lock (-144), caps lock (-20), } } +#ifndef MAGNUM_RENDERER float calcLerpFraction(float x, float src0, float src1) { BATCHED_SIM_ASSERT(src0 < src1); return (x - src0) / (src1 - src0); } +#endif } // namespace @@ -410,6 +434,7 @@ TEST_F(BatchedSimulatorTest, basic) { continue; } + #ifndef MAGNUM_RENDERER uint8_t* base_color_ptr = isDebug ? bsim.getDebugBpsRenderer().getColorPointer() : bsim.getBpsRenderer().getColorPointer(); @@ -438,6 +463,31 @@ TEST_F(BatchedSimulatorTest, basic) { // base_depth_ptr + b * out_dim.x * out_dim.y, out_dim.x, // out_dim.y, 1); } + #else + CORRADE_INTERNAL_ASSERT(!isDebug); // TODO + + Mn::Image2D color = copyCudaBufferToImage(bsim.getMagnumRenderer().cudaColorBufferDevicePointer(), bsim.getMagnumRenderer().framebufferColorFormat(), bsim.getMagnumRenderer().tileSize()*bsim.getMagnumRenderer().tileCount()); + Mn::Image2D depth = copyCudaBufferToImage(bsim.getMagnumRenderer().cudaDepthBufferDevicePointer(), bsim.getMagnumRenderer().framebufferDepthFormat(), bsim.getMagnumRenderer().tileSize()*bsim.getMagnumRenderer().tileCount()); + + Cr::PluginManager::Manager converterManager; + Cr::Containers::Pointer converter = converterManager.loadAndInstantiate("AnyImageConverter"); + + Cr::Containers::Optional> out = converter->convertToData(color); + CORRADE_INTERNAL_ASSERT(out); + if(doSaveAllFramesForVideo) + Cr::Utility::Path::write(Cr::Utility::format(isDebug ? "debug_frame{:.4}" : "frame{:.4}.png", frameIdx), *out); + Cr::Utility::Path::write(isDebug ? "latest_debug.png" : "latest.png", *out); + + /* Radiance HDR is used with bundled Magnum instead of EXR. It's awfully + low-precision but at least it doesn't require you to have the full + OpenEXR set up */ + #ifndef MAGNUM_BUILD_STATIC + CORRADE_INTERNAL_ASSERT(converter->convertToFile(depth, "out_depth.exr")); + #else + Mn::ImageView2D redButActuallyDepth{Mn::PixelFormat::R32F, depth.size(), depth.data()}; + CORRADE_INTERNAL_ASSERT(converter->convertToFile(redButActuallyDepth, "out_depth.hdr")); + #endif + #endif } int key = 0; diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 1534f6d318..5d713895a8 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -90,6 +90,13 @@ corrade_add_test(SensorTest SensorTest.cpp LIBRARIES sensor sim) target_include_directories(SensorTest PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) test(BatchedSimTest batched_sim) +if(MAGNUM_RENDERER) + target_include_directories( + BatchedSimTest PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ${CMAKE_CURRENT_LIST_DIR}/../esp/cuda_helpers + ) + target_link_libraries(BatchedSimTest ${CUDART_LIBRARY}) +endif() test(ColumnGridTest batched_sim) diff --git a/src/tests/configure.h.cmake b/src/tests/configure.h.cmake index 83b3d384ee..aecb7f8a85 100644 --- a/src/tests/configure.h.cmake +++ b/src/tests/configure.h.cmake @@ -7,3 +7,5 @@ #define DATA_DIR "${DATA_DIR}" #define FILE_THAT_EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/IOTest.cpp" + +#cmakedefine BPS_IMPORTER_PLUGIN_FILENAME "${BPS_IMPORTER_PLUGIN_FILENAME}" From 569ae09936beeb67debbd256293e4bebd2bd54f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 19 May 2022 20:31:00 +0200 Subject: [PATCH 45/53] Remove some sign flipping nonsense. Hopefully it now does what it should do, except that I'm still unsure WHY it would do that. --- src/deps/magnum-plugins | 2 +- src/esp/batched_sim/MagnumRenderer.cpp | 3 +-- src/esp/batched_sim/README.md | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/deps/magnum-plugins b/src/deps/magnum-plugins index ee1a20981a..87c9777e16 160000 --- a/src/deps/magnum-plugins +++ b/src/deps/magnum-plugins @@ -1 +1 @@ -Subproject commit ee1a20981ad5fd8670b98969d54c8ed47bd446db +Subproject commit 87c9777e1681ec041a47d66582f9753c1f5f2af3 diff --git a/src/esp/batched_sim/MagnumRenderer.cpp b/src/esp/batched_sim/MagnumRenderer.cpp index 8ff8263e6b..ac7bc9b59f 100644 --- a/src/esp/batched_sim/MagnumRenderer.cpp +++ b/src/esp/batched_sim/MagnumRenderer.cpp @@ -439,8 +439,7 @@ std::size_t MagnumRenderer::add(const Mn::UnsignedInt sceneId, const Cr::Contain arrayAppend(scene.draws, Cr::InPlaceInit) .setMaterialId(meshView.materialId); arrayAppend(scene.textureTransformations, Cr::InPlaceInit) - // TODO flip again once the input is fixed - .setLayer(104 - _state->textureLayers[meshView.materialId]); + .setLayer(_state->textureLayers[meshView.materialId]); arrayAppend(scene.drawCommands, Cr::InPlaceInit, meshView.indexOffsetInBytes, meshView.indexCount); } diff --git a/src/esp/batched_sim/README.md b/src/esp/batched_sim/README.md index 6da579f755..192425c1c8 100644 --- a/src/esp/batched_sim/README.md +++ b/src/esp/batched_sim/README.md @@ -18,8 +18,8 @@ needs lots of memory, that has to be done separately using for example the `magnum-sceneconverter` utility (take it from the prebuilt tools): ```sh -magnum-imageconverter -D3 --map -v -C BasisImageConverter \ - -c uastc,rdo_uastc,rdo_uastc_dict_size=1024,ktx2_zstd_supercompression_level=20,mip_gen,threads=0,y_flip=false \ +magnum-imageconverter -D3 --map -C BasisImageConverter \ + -c uastc,rdo_uastc,rdo_uastc_dict_size=1024,ktx2_zstd_supercompression_level=20,mip_gen,threads=0 \ yay.0.ktx2 yay.0.basis.ktx2 ``` From 7683fd1d30005a721d8637a6f86d0318283ea189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 19 May 2022 21:40:23 +0200 Subject: [PATCH 46/53] Add more stuff in the converter; handle untextured objects. --- src/esp/batched_sim/CMakeLists.txt | 1 + .../batched_sim/MagnumRendererConverter.cpp | 209 +++++++++++------- 2 files changed, 132 insertions(+), 78 deletions(-) diff --git a/src/esp/batched_sim/CMakeLists.txt b/src/esp/batched_sim/CMakeLists.txt index 5b1580d95a..99f6ad7cd6 100644 --- a/src/esp/batched_sim/CMakeLists.txt +++ b/src/esp/batched_sim/CMakeLists.txt @@ -119,6 +119,7 @@ if(MAGNUM_RENDERER) if(MAGNUM_BUILD_STATIC) target_link_libraries(MagnumRendererConverter PRIVATE Magnum::AnySceneImporter + MagnumPlugins::AssimpImporter MagnumPlugins::StbImageImporter MagnumPlugins::KtxImageConverter MagnumPlugins::GltfSceneConverter diff --git a/src/esp/batched_sim/MagnumRendererConverter.cpp b/src/esp/batched_sim/MagnumRendererConverter.cpp index d0260fd7ea..eeb33c22d4 100644 --- a/src/esp/batched_sim/MagnumRendererConverter.cpp +++ b/src/esp/batched_sim/MagnumRendererConverter.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include using namespace Magnum; +using namespace Math::Literals; using namespace Containers::Literals; /* std::hash specialization to be able to use Corrade::String in unordered_map */ @@ -65,6 +67,10 @@ int main(int argc, char** argv) { // TODO set up once GltfSceneConverter uses it directly } + /* Magnum's OBJ importer is ... well, not great. It'll get replaced eventually. */ + if(importerManager.loadState("ObjImporter") != PluginManager::LoadState::NotFound) + importerManager.setPreferredPlugins("ObjImporter", {"AssimpImporter"}); + Containers::Pointer importer = importerManager.loadAndInstantiate("AnySceneImporter"); Containers::Pointer converter = converterManager.loadAndInstantiate(args.value("converter")); @@ -108,57 +114,13 @@ int main(int argc, char** argv) { Int meshMaterial; }; Containers::Array meshes; -// const auto import = [&](Containers::StringView filename) { -// importer->openFile(filename); -// CORRADE_INTERNAL_ASSERT(importer->meshCount() == 1); -// CORRADE_INTERNAL_ASSERT(importer->materialCount() == 1); -// -// Containers::Optional mesh = importer->mesh(0); -// Containers::Optional material = importer->material(0); -// -// /* If the material is textured, extract the image as well */ -// // TODO findAttributeId()! -// if(material->hasAttribute(Trade::MaterialAttribute::BaseColorTexture)) { -// UnsignedInt& textureId = material->mutableAttribute(Trade::MaterialAttribute::BaseColorTexture); -// -// Containers::Optional texture = importer->texture(textureId); -// CORRADE_INTERNAL_ASSERT(texture && texture->type() == Trade::TextureType::Texture2D); -// -// /* Patch the material to use the zero texture but add a layer as well and -// make it just Flat */ -// textureId = 0; -// CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); -// Containers::Array attributes = material->releaseAttributeData(); -// arrayAppend(attributes, InPlaceInit, "baseColorTextureLayer", UnsignedInt(inputImages.size())); -// material = Trade::MaterialData{Trade::MaterialType::Flat, std::move(attributes)}; -// -// arrayAppend(inputImages, *importer->image2D(texture->image())); -// -// /* Otherwise just make it Flat */ -// } else { -// CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); -// material = Trade::MaterialData{Trade::MaterialType::Flat, material->releaseAttributeData()}; -// } -// -// /* Object setup */ -// Object o; -// o.id = objects.size(); -// o.mesh = 0; -// // TODO concatenate() should do this on its own -// o.meshIndexOffset = indexOffset; -// o.meshIndexCount = mesh->indexCount(); -// indexOffset += mesh->indexCount(); -// converter->setObjectName(inputMeshes.size(), Utility::Path::split(filename).second()); -// o.meshMaterial = *converter->add(*material); -// -// /* Mesh setup */ -// // TODO convert from a strip/... if not triangles -// arrayAppend(inputMeshes, *std::move(mesh)); -// -// arrayAppend(objects, o); -// }; - - const auto importMultiple = [&](Containers::StringView filename, std::unordered_map>* uniqueMeshes = nullptr) { + + const auto import = [&]( + Containers::StringView filename, + Containers::StringView name = {}, + Containers::Optional forceColor = {}, + std::unordered_map>* uniqueMeshes = nullptr + ) { CORRADE_INTERNAL_ASSERT_OUTPUT(importer->openFile(filename)); CORRADE_INTERNAL_ASSERT(importer->sceneCount() == 1); // TODO multi-scene?? @@ -170,7 +132,9 @@ int main(int argc, char** argv) { root.mapping = parents.size(); root.parent = -1; arrayAppend(parents, root); - converter->setObjectName(root.mapping, Utility::Path::splitExtension(Utility::Path::split(filename).second()).first()); + converter->setObjectName(root.mapping, + name ? name : + Utility::Path::splitExtension(Utility::Path::split(filename).second()).first()); /* Meshes are unfortunately named in a useless way, so override them with names from the objects referencing them instead */ @@ -237,9 +201,10 @@ int main(int argc, char** argv) { } if(!duplicate) { - if(uniqueMeshes) Debug{} << "New mesh" << meshName << "in" << Utility::Path::split(filename).second(); - - uniqueMeshes->insert({std::move(meshName), {indexOffset, mesh->indexCount()}}); + if(uniqueMeshes) { + Debug{} << "New mesh" << meshName << "in" << Utility::Path::split(filename).second(); + uniqueMeshes->insert({std::move(meshName), {indexOffset, mesh->indexCount()}}); + } m.meshIndexOffset = indexOffset; indexOffset += mesh->indexCount()*4; // TODO ints hardcoded @@ -253,34 +218,59 @@ int main(int argc, char** argv) { /* Otherwise parse it. If textured, extract the image as well. */ } else { - Debug{} << "New material for" << meshNames[transformationMeshMaterial.first()] << "in" << Utility::Path::split(filename).second(); - Containers::Optional material = importer->material(transformationMeshMaterial.second()); CORRADE_INTERNAL_ASSERT(material); + + /* Override the color if the attribute is already there. Otherwise a + new attribute gets added to the attributes array below */ + bool hasColorAttribute = false; + if(forceColor && material->hasAttribute(Trade::MaterialAttribute::BaseColor)) { + hasColorAttribute = true; + material->mutableAttribute(Trade::MaterialAttribute::BaseColor) = *forceColor; + } + + /* Not calling releaseAttributeData() yet either, as we need to ask + hasAttribute() first */ + Containers::Array attributes; // TODO findAttributeId()! if(material->hasAttribute(Trade::MaterialAttribute::BaseColorTexture)) { + Debug{} << "New textured material for" << meshNames[transformationMeshMaterial.first()] << "in" << Utility::Path::split(filename).second(); + UnsignedInt& textureId = material->mutableAttribute(Trade::MaterialAttribute::BaseColorTexture); Containers::Optional texture = importer->texture(textureId); CORRADE_INTERNAL_ASSERT(texture && texture->type() == Trade::TextureType::Texture2D); - /* Patch the material to use the zero texture but add a layer as well - and make it just Flat */ + /* Patch the material to use the zero texture but add a layer as + well, referencing the just added image (plus one for the first + white layer) and make it just Flat */ textureId = 0; CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); - Containers::Array attributes = material->releaseAttributeData(); - arrayAppend(attributes, InPlaceInit, "baseColorTextureLayer", UnsignedInt(inputImages.size())); - material = Trade::MaterialData{Trade::MaterialType::Flat, std::move(attributes)}; + attributes = material->releaseAttributeData(); + arrayAppend(attributes, InPlaceInit, "baseColorTextureLayer", UnsignedInt(inputImages.size() + 1)); + + // TODO add texture scaling if the image is smaller than 2K arrayAppend(inputImages, *importer->image2D(texture->image())); - /* Otherwise just make it Flat */ + /* Otherwise make it reference the first (white) image layer and make + it just Flat */ } else { - // TODO make it reference a white texture?? - CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); - material = Trade::MaterialData{Trade::MaterialType::Flat, material->releaseAttributeData()}; + Debug{} << "New untextured material for" << meshNames[transformationMeshMaterial.first()] << "in" << Utility::Path::split(filename).second(); + + attributes = material->releaseAttributeData(); + arrayAppend(attributes, { + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTexture, 0u}, + Trade::MaterialAttributeData{"baseColorTextureLayer", 0u} + }); + } + + if(forceColor && !hasColorAttribute) { + arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColor, *forceColor); } + material = Trade::MaterialData{Trade::MaterialType::Flat, std::move(attributes)}; + importedMaterialIds[transformationMeshMaterial.second()] = m.meshMaterial = *converter->add(*material); } @@ -288,22 +278,67 @@ int main(int argc, char** argv) { } }; - /* Import the stuff. Well, some of it. */ - // TODO haha -// Containers::String debugModelsPath = Utility::Path::join(args.value("input"), "extra_source_data_v0/debug_models"); -// import(Utility::Path::join(debugModelsPath, "sphere_green_wireframe.glb")); -// import(Utility::Path::join(debugModelsPath, "sphere_orange_wireframe.glb")); -// import(Utility::Path::join(debugModelsPath, "cube_gray_shaded.glb")); -// import(Utility::Path::join(debugModelsPath, "cube_gray_shaded.glb")); // TODO - + /* Spot */ + Containers::String spotPath = Utility::Path::join(args.value("input"), "extra_source_data_v0/spot_arm_textured/spot_arm/spot_arm/meshes"); + for(auto&& i: std::initializer_list>{ + // TODO why not take the colored models instead of having to manually set + // the color? + // TODO or, rather, just parse the URDF already + {"arm0.link_el0.obj", 0x3f3f3f_rgbf}, + {"arm0.link_el1.obj", 0xffff00_rgbf}, + {"arm0.link_fngr.obj", 0x7f7f7f_rgbf}, + // TODO yeyeyeye ... bored + }) + import(Utility::Path::join(spotPath, i.first()), {}, i.second()); + + /* Debug models */ + // TODO generate these all from Primitives instead of doing crazy stuff with + // glTF files + Containers::String debugModelsPath = Utility::Path::join(args.value("input"), "extra_source_data_v0/debug_models"); + for(const char* filename: { + // TODO make the "wireframe" a reasonable wireframe mesh + "sphere_green_wireframe.glb", + "sphere_orange_wireframe.glb", + "sphere_blue_wireframe.glb", + "sphere_pink_wireframe.glb", + "cube_gray_shaded.glb", + "cube_green.glb", + "cube_blue.glb", + "cube_pink.glb", + "cube_green_wireframe.glb", + "cube_orange_wireframe.glb", + "cube_blue_wireframe.glb", + "cube_pink_wireframe.glb" + }) + import(Utility::Path::join(debugModelsPath, filename)); + + /* ReplicaCAD */ Containers::String replicaPath = Utility::Path::join(args.value("input"), "ReplicaCAD_baked_lighting_v1.5/stages_uncompressed"); std::unordered_map> uniqueReplicaMeshes; for(std::size_t i = 0; i <= 4; ++i) { for(std::size_t j = 0; j <= 20; ++j) { - importMultiple(Utility::Path::join(replicaPath, Utility::format("Baked_sc{}_staging_{:.2}.glb", i, j)), &uniqueReplicaMeshes); + import( + Utility::Path::join(replicaPath, Utility::format("Baked_sc{}_staging_{:.2}.glb", i, j)), + {}, {}, + &uniqueReplicaMeshes); } } + /* YCB */ + Containers::String ycbPath = Utility::Path::join(args.value("input"), "hab_ycb_v1.1/ycb/"); + for(const char* name: { + "024_bowl", + "003_cracker_box", + "010_potted_meat_can", + "002_master_chef_can", + "004_sugar_box", + "005_tomato_soup_can", + "009_gelatin_box", + "008_pudding_box", + "007_tuna_fish_can" + }) + import(Utility::Path::join({ycbPath, name, "google_16k/textured.obj"}), name); + /* Target layout for the mesh. So far just for flat rendering, no normals etc */ Trade::MeshData mesh{MeshPrimitive::Triangles, nullptr, { @@ -316,11 +351,29 @@ int main(int argc, char** argv) { MeshTools::concatenateInto(mesh, inputMeshReferences); CORRADE_INTERNAL_ASSERT(converter->add(mesh)); - /* A combined 3D image */ - Trade::ImageData3D image{PixelFormat::RGB8Unorm, {2048, 2048, Int(inputImages.size())}, Containers::Array{NoInit, 2048*2048*inputImages.size()*4}}; + /* A combined 3D image. First layer is fully white for input meshes that have + no textures. */ + Trade::ImageData3D image{PixelFormat::RGB8Unorm, + // TODO don't hardcode max image size (goes onto the same pile as shitty + // "atlasing") + {2048, 2048, Int(inputImages.size() + 1)}, + Containers::Array{NoInit, 2048*2048*(inputImages.size() + 1)*4}}; + /* Fill the first layer white */ + constexpr char white[]{'\xff'}; + Utility::copy( + Containers::StridedArrayView3D{white, {2048, 2048, 3}, {0, 0, 0}}, + image.mutablePixels()[0]); + /* Copy the other images */ for(std::size_t i = 0; i != inputImages.size(); ++i) { CORRADE_INTERNAL_ASSERT(inputImages[i].format() == PixelFormat::RGB8Unorm); - Utility::copy(inputImages[i].pixels(), image.mutablePixels()[i]); + Utility::copy(inputImages[i].pixels(), + // TODO did i mention the "atlas packing" is EXTREMELY SHITTY? + image.mutablePixels()[i + 1].prefix({ + // TODO have implicit conversion of Vector to StridedDimensions, FINALLY + std::size_t(inputImages[i].size().x()), + std::size_t(inputImages[i].size().y()), + std::size_t(inputImages[i].pixelSize()) + })); } /* Clear the original images array to relieve the memory pressure a bit -- From bddaf642f4be6cf73a072192716b398ea8fb65a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 19 May 2022 21:49:06 +0200 Subject: [PATCH 47/53] "Fix" a silly transformation hierarchy bug. Causes one extra empty draw to be submitted for each added object, which isn't exactly fast, but at least it's correct. Not great, not terrible. --- src/esp/batched_sim/MagnumRenderer.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/esp/batched_sim/MagnumRenderer.cpp b/src/esp/batched_sim/MagnumRenderer.cpp index ac7bc9b59f..2c4b244837 100644 --- a/src/esp/batched_sim/MagnumRenderer.cpp +++ b/src/esp/batched_sim/MagnumRenderer.cpp @@ -421,16 +421,26 @@ std::size_t MagnumRenderer::add(const Mn::UnsignedInt sceneId, const Cr::Contain CORRADE_ASSERT(found != _state->meshViewRangeForName.end(), "MagnumRenderer::add(): name" << name << "not found", {}); - /* Add the whole hierarchy under this name */ + /* Add a top-level object */ const std::size_t id = scene.transformations.size(); + // TODO this adds an empty draw, which is useless; do better (separate + // transforms from draws) + arrayAppend(scene.parents, -1); + arrayAppend(scene.transformations, transformation); + arrayAppend(scene.absoluteTransformations, Cr::InPlaceInit); + arrayAppend(scene.draws, Cr::InPlaceInit) + .setMaterialId(0); + arrayAppend(scene.textureTransformations, Cr::InPlaceInit) + .setLayer(0); + arrayAppend(scene.drawCommands, Cr::InPlaceInit, 0u, 0u); + + /* Add the whole hierarchy under this name */ for(std::size_t i = found->second.first(); i != found->second.second(); ++i) { const MeshView& meshView = _state->meshViews[i]; /* The following meshes are children of the first one, inheriting its transformation */ - arrayAppend(scene.parents, - i == found->second.first() ? -1 : -1); // TODO here should be id, but that doesn't work, why?? - arrayAppend(scene.transformations, - (i == found->second.first() ? transformation : Mn::Matrix4{})*meshView.transformation); + arrayAppend(scene.parents, id); + arrayAppend(scene.transformations, meshView.transformation); /* The actual absolute transformation will get filled each time draw() is called */ From 19d0488c559b8b59eaa50cbee479ef3c4de28b7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 19 May 2022 22:54:00 +0200 Subject: [PATCH 48/53] Handle Phong input materials correctly, texture-transform smaller images. Ahh finally found the core reason for all textures being Y-flipped for SOME REASON. Gotta fix that in glTF exporter, if I'd have proper tests from the start i would have discovered that weeks ago. Also a "proof" that it works in the demo. --- src/deps/magnum-plugins | 2 +- src/esp/batched_sim/MagnumRenderer.cpp | 20 ++++++--- .../batched_sim/MagnumRendererConverter.cpp | 44 ++++++++++++++----- src/esp/batched_sim/MagnumRendererDemo.cpp | 4 ++ 4 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/deps/magnum-plugins b/src/deps/magnum-plugins index 87c9777e16..c082c667d3 160000 --- a/src/deps/magnum-plugins +++ b/src/deps/magnum-plugins @@ -1 +1 @@ -Subproject commit 87c9777e1681ec041a47d66582f9753c1f5f2af3 +Subproject commit c082c667d31ee1b6dca14b8511f41c67c995cbfe diff --git a/src/esp/batched_sim/MagnumRenderer.cpp b/src/esp/batched_sim/MagnumRenderer.cpp index 2c4b244837..b0e223931f 100644 --- a/src/esp/batched_sim/MagnumRenderer.cpp +++ b/src/esp/batched_sim/MagnumRenderer.cpp @@ -121,6 +121,11 @@ struct Scene { Mn::GL::Buffer textureTransformationUniform; }; +struct TextureTransformation { + Mn::UnsignedInt layer; + Mn::Matrix3 transformation; +}; + } struct MagnumRenderer::State { @@ -137,7 +142,7 @@ struct MagnumRenderer::State { // TODO have the materials deduplicated, and then this should have a layer // for each mesh view instead // TODO also the scale + offset, once we have non-shitty atlasing - Cr::Containers::Array textureLayers; + Cr::Containers::Array textureTransformations; /* Pairs of mesh views (index byte offset and count), material IDs and initial transformations for draws. Used by add() to populate the draw @@ -274,7 +279,7 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr them to draws instead */ // TODO have a step that deduplicates materials and puts the layer to the // scene instead - _state->textureLayers = Cr::Containers::Array{Cr::DefaultInit, importer->materialCount()}; + _state->textureTransformations = Cr::Containers::Array{Cr::DefaultInit, importer->materialCount()}; { Cr::Containers::Array materialData{Cr::DefaultInit, importer->materialCount()}; for(std::size_t i = 0; i != materialData.size(); ++i) { @@ -288,8 +293,12 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr CORRADE_ASSERT(flatMaterial.texture() == 0, "MagnumRenderer::addFile(): expected material" << i << "to reference the only texture, got" << flatMaterial.texture(), ); - // TODO builtin attribute for this - _state->textureLayers[i] = flatMaterial.attribute("baseColorTextureLayer"_s); + _state->textureTransformations[i] = { + // TODO builtin attribute for this + flatMaterial.attribute("baseColorTextureLayer"_s), + // TODO fix gltf converter to flip texcoords (ffs!!) + flatMaterial.hasTextureTransformation() ? Mn::Matrix3::translation(Mn::Vector2::yAxis(1.0f))*Mn::Matrix3::scaling(Mn::Vector2::yScale(-1.0f))*flatMaterial.textureMatrix()*Mn::Matrix3::translation(Mn::Vector2::yAxis(1.0f))*Mn::Matrix3::scaling(Mn::Vector2::yScale(-1.0f)) : Mn::Matrix3{} + }; } // TODO immutable buffer storage @@ -449,7 +458,8 @@ std::size_t MagnumRenderer::add(const Mn::UnsignedInt sceneId, const Cr::Contain arrayAppend(scene.draws, Cr::InPlaceInit) .setMaterialId(meshView.materialId); arrayAppend(scene.textureTransformations, Cr::InPlaceInit) - .setLayer(_state->textureLayers[meshView.materialId]); + .setTextureMatrix(_state->textureTransformations[meshView.materialId].transformation) + .setLayer(_state->textureTransformations[meshView.materialId].layer); arrayAppend(scene.drawCommands, Cr::InPlaceInit, meshView.indexOffsetInBytes, meshView.indexCount); } diff --git a/src/esp/batched_sim/MagnumRendererConverter.cpp b/src/esp/batched_sim/MagnumRendererConverter.cpp index eeb33c22d4..8cf67f4bbb 100644 --- a/src/esp/batched_sim/MagnumRendererConverter.cpp +++ b/src/esp/batched_sim/MagnumRendererConverter.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ constexpr Trade::SceneField SceneFieldMeshViewIndexOffset = Trade::sceneFieldCus constexpr Trade::SceneField SceneFieldMeshViewIndexCount = Trade::sceneFieldCustom(1); constexpr Trade::SceneField SceneFieldMeshViewMaterial = Trade::sceneFieldCustom(2); +constexpr Vector2i TextureAtlasSize{2048}; + int main(int argc, char** argv) { Utility::Arguments args; args.addArgument("input").setHelp("input", "input file prefix") @@ -224,19 +227,34 @@ int main(int argc, char** argv) { /* Override the color if the attribute is already there. Otherwise a new attribute gets added to the attributes array below */ bool hasColorAttribute = false; - if(forceColor && material->hasAttribute(Trade::MaterialAttribute::BaseColor)) { + if(forceColor) { + if(material->hasAttribute(Trade::MaterialAttribute::BaseColor)) { hasColorAttribute = true; material->mutableAttribute(Trade::MaterialAttribute::BaseColor) = *forceColor; + } + /* If it's a Phong material (an OBJ), make sure BaseColor is set + instead below */ + // TODO some compat for this? Assimp should have something, a similar + // feature got reverted in 5.1 + } else if(material->hasAttribute(Trade::MaterialAttribute::DiffuseColor)) { + forceColor = material->attribute(Trade::MaterialAttribute::DiffuseColor); } /* Not calling releaseAttributeData() yet either, as we need to ask hasAttribute() first */ Containers::Array attributes; // TODO findAttributeId()! - if(material->hasAttribute(Trade::MaterialAttribute::BaseColorTexture)) { + if(material->hasAttribute(Trade::MaterialAttribute::BaseColorTexture) || + material->hasAttribute(Trade::MaterialAttribute::DiffuseTexture)) { Debug{} << "New textured material for" << meshNames[transformationMeshMaterial.first()] << "in" << Utility::Path::split(filename).second(); - UnsignedInt& textureId = material->mutableAttribute(Trade::MaterialAttribute::BaseColorTexture); + const bool hasBaseColorTexture = material->hasAttribute(Trade::MaterialAttribute::BaseColorTexture); + + UnsignedInt& textureId = material->mutableAttribute( + hasBaseColorTexture ? + Trade::MaterialAttribute::BaseColorTexture : + Trade::MaterialAttribute::DiffuseTexture + ); Containers::Optional texture = importer->texture(textureId); CORRADE_INTERNAL_ASSERT(texture && texture->type() == Trade::TextureType::Texture2D); @@ -247,11 +265,19 @@ int main(int argc, char** argv) { textureId = 0; CORRADE_INTERNAL_ASSERT(material->layerCount() == 1); attributes = material->releaseAttributeData(); + /* If there's DiffuseTexture, add a BaseColorTexture instead */ + if(!hasBaseColorTexture) + arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColorTexture, 0u); arrayAppend(attributes, InPlaceInit, "baseColorTextureLayer", UnsignedInt(inputImages.size() + 1)); - // TODO add texture scaling if the image is smaller than 2K + Containers::Optional image = importer->image2D(texture->image()); + CORRADE_INTERNAL_ASSERT((image->size() <= TextureAtlasSize).all()); + /* Add texture scaling if the image is smaller than 2K */ + if((image->size() < TextureAtlasSize).any()) + arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColorTextureMatrix, + Matrix3::scaling(Vector2(image->size())/Vector2(TextureAtlasSize))); - arrayAppend(inputImages, *importer->image2D(texture->image())); + arrayAppend(inputImages, *std::move(image)); /* Otherwise make it reference the first (white) image layer and make it just Flat */ @@ -354,14 +380,12 @@ int main(int argc, char** argv) { /* A combined 3D image. First layer is fully white for input meshes that have no textures. */ Trade::ImageData3D image{PixelFormat::RGB8Unorm, - // TODO don't hardcode max image size (goes onto the same pile as shitty - // "atlasing") - {2048, 2048, Int(inputImages.size() + 1)}, - Containers::Array{NoInit, 2048*2048*(inputImages.size() + 1)*4}}; + {TextureAtlasSize, Int(inputImages.size() + 1)}, + Containers::Array{NoInit, TextureAtlasSize.product()*(inputImages.size() + 1)*4}}; /* Fill the first layer white */ constexpr char white[]{'\xff'}; Utility::copy( - Containers::StridedArrayView3D{white, {2048, 2048, 3}, {0, 0, 0}}, + Containers::StridedArrayView3D{white, {TextureAtlasSize.x(), TextureAtlasSize.y(), 3}, {0, 0, 0}}, image.mutablePixels()[0]); /* Copy the other images */ for(std::size_t i = 0; i != inputImages.size(); ++i) { diff --git a/src/esp/batched_sim/MagnumRendererDemo.cpp b/src/esp/batched_sim/MagnumRendererDemo.cpp index 9446d52072..9b6456366a 100644 --- a/src/esp/batched_sim/MagnumRendererDemo.cpp +++ b/src/esp/batched_sim/MagnumRendererDemo.cpp @@ -78,6 +78,10 @@ MagnumRendererDemo::MagnumRendererDemo(const Arguments& arguments): Mn::Platform _renderer->add(i, Cr::Utility::format("Baked_sc{}_staging_{:.2}", i/21, i%21)); } + _renderer->add(0, "sphere_green_wireframe"); + + _renderer->add(1, "007_tuna_fish_can", Mn::Matrix4::scaling(Mn::Vector3{50.0f})); + Mn::Debug{} << "Rendering" << tileCount.product() << tileSize << "tiles every frame"; if(args.isSet("profile")) { From 5c08b832daf5a2021b69253ed1b18a94ea8c9874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 16 Jun 2022 10:50:15 +0200 Subject: [PATCH 49/53] Adapt to texture layers and typed custom fields now being builtin. --- src/esp/batched_sim/MagnumRenderer.cpp | 43 +++++++------------ .../batched_sim/MagnumRendererConverter.cpp | 4 +- 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/src/esp/batched_sim/MagnumRenderer.cpp b/src/esp/batched_sim/MagnumRenderer.cpp index b0e223931f..a771a7eb66 100644 --- a/src/esp/batched_sim/MagnumRenderer.cpp +++ b/src/esp/batched_sim/MagnumRenderer.cpp @@ -141,7 +141,6 @@ struct MagnumRenderer::State { draw list. */ // TODO have the materials deduplicated, and then this should have a layer // for each mesh view instead - // TODO also the scale + offset, once we have non-shitty atlasing Cr::Containers::Array textureTransformations; /* Pairs of mesh views (index byte offset and count), material IDs and @@ -229,6 +228,14 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr importerPlugin.contains("AnySceneImporter")) { importer->configuration().setValue("ignoreRequiredExtensions", true); importer->configuration().setValue("experimentalKhrTextureKtx", true); + + /* Desired imported types for custom glTF scene fields. If the group + doesn't exist (which is the case for AnySceneImporter), add it first */ + Cr::Utility::ConfigurationGroup* types = importer->configuration().group("customSceneFieldTypes"); + if(!types) types = importer->configuration().addGroup("customSceneFieldTypes"); + types->addValue("meshViewIndexOffset", "UnsignedInt"); + types->addValue("meshViewIndexCount", "UnsignedInt"); + types->addValue("meshViewMaterial", "Int"); } if(Cr::PluginManager::PluginMetadata* const metadata = manager.metadata("BasisImporter")) { @@ -294,8 +301,7 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr "MagnumRenderer::addFile(): expected material" << i << "to reference the only texture, got" << flatMaterial.texture(), ); _state->textureTransformations[i] = { - // TODO builtin attribute for this - flatMaterial.attribute("baseColorTextureLayer"_s), + flatMaterial.attribute(Mn::Trade::MaterialAttribute::BaseColorTextureLayer), // TODO fix gltf converter to flip texcoords (ffs!!) flatMaterial.hasTextureTransformation() ? Mn::Matrix3::translation(Mn::Vector2::yAxis(1.0f))*Mn::Matrix3::scaling(Mn::Vector2::yScale(-1.0f))*flatMaterial.textureMatrix()*Mn::Matrix3::translation(Mn::Vector2::yAxis(1.0f))*Mn::Matrix3::scaling(Mn::Vector2::yScale(-1.0f)) : Mn::Matrix3{} }; @@ -329,31 +335,12 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr /* SceneData and copy() will assert if the types or sizes don't match, so we don't have to */ _state->meshViews = Cr::Containers::Array{Cr::NoInit, scene->fieldSize(*meshViewIndexCountFieldId)}; - if(importerPlugin.contains("BpsImporter")) { - Cr::Utility::copy(scene->field(*meshViewIndexOffsetFieldId), - stridedArrayView(_state->meshViews).slice(&MeshView::indexOffsetInBytes)); - Cr::Utility::copy(scene->field(*meshViewIndexCountFieldId), - stridedArrayView(_state->meshViews).slice(&MeshView::indexCount)); - Cr::Utility::copy(scene->field(*meshViewMaterialFieldId), - stridedArrayView(_state->meshViews).slice(&MeshView::materialId)); - } else { - // TODO implement parsing ints in the importer - Mn::Math::castInto( - // TODO make stridedArrayView implicitly convertible to higher - // dimensions, like images - Cr::Containers::arrayCast<2, const Mn::Double>(scene->field(*meshViewIndexOffsetFieldId)), - Cr::Containers::arrayCast<2, Mn::UnsignedInt>(stridedArrayView(_state->meshViews).slice(&MeshView::indexOffsetInBytes))); - Mn::Math::castInto( - // TODO make stridedArrayView implicitly convertible to higher - // dimensions, like images - Cr::Containers::arrayCast<2, const Mn::Double>(scene->field(*meshViewIndexCountFieldId)), - Cr::Containers::arrayCast<2, Mn::UnsignedInt>(stridedArrayView(_state->meshViews).slice(&MeshView::indexCount))); - Mn::Math::castInto( - // TODO make stridedArrayView implicitly convertible to higher - // dimensions, like images - Cr::Containers::arrayCast<2, const Mn::Double>(scene->field(*meshViewMaterialFieldId)), - Cr::Containers::arrayCast<2, Mn::Int>(stridedArrayView(_state->meshViews).slice(&MeshView::materialId))); - } + Cr::Utility::copy(scene->field(*meshViewIndexOffsetFieldId), + stridedArrayView(_state->meshViews).slice(&MeshView::indexOffsetInBytes)); + Cr::Utility::copy(scene->field(*meshViewIndexCountFieldId), + stridedArrayView(_state->meshViews).slice(&MeshView::indexCount)); + Cr::Utility::copy(scene->field(*meshViewMaterialFieldId), + stridedArrayView(_state->meshViews).slice(&MeshView::materialId)); /* Transformations of all objects in the scene. Objects that don't have this field default to an indentity transform. */ Cr::Containers::Array transformations{scene->mappingBound()}; diff --git a/src/esp/batched_sim/MagnumRendererConverter.cpp b/src/esp/batched_sim/MagnumRendererConverter.cpp index 8cf67f4bbb..7eacef4146 100644 --- a/src/esp/batched_sim/MagnumRendererConverter.cpp +++ b/src/esp/batched_sim/MagnumRendererConverter.cpp @@ -268,7 +268,7 @@ int main(int argc, char** argv) { /* If there's DiffuseTexture, add a BaseColorTexture instead */ if(!hasBaseColorTexture) arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColorTexture, 0u); - arrayAppend(attributes, InPlaceInit, "baseColorTextureLayer", UnsignedInt(inputImages.size() + 1)); + arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColorTextureLayer, UnsignedInt(inputImages.size() + 1)); Containers::Optional image = importer->image2D(texture->image()); CORRADE_INTERNAL_ASSERT((image->size() <= TextureAtlasSize).all()); @@ -287,7 +287,7 @@ int main(int argc, char** argv) { attributes = material->releaseAttributeData(); arrayAppend(attributes, { Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTexture, 0u}, - Trade::MaterialAttributeData{"baseColorTextureLayer", 0u} + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTextureLayer, 0u} }); } From 5f34eef43d83f5d9f08be60c90f5677877a5cbcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 16 Jun 2022 10:51:07 +0200 Subject: [PATCH 50/53] Better defaults for the demo. --- src/esp/batched_sim/MagnumRendererDemo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/esp/batched_sim/MagnumRendererDemo.cpp b/src/esp/batched_sim/MagnumRendererDemo.cpp index 9b6456366a..6f8b1ed502 100644 --- a/src/esp/batched_sim/MagnumRendererDemo.cpp +++ b/src/esp/batched_sim/MagnumRendererDemo.cpp @@ -39,9 +39,9 @@ MagnumRendererDemo::MagnumRendererDemo(const Arguments& arguments): Mn::Platform .setHelp("importer", "importer plugin to use instead of BpsImporter") .addBooleanOption("profile") .setHelp("profile", "profile frame times") - .addOption('S', "size", "128 128") + .addOption('S', "size", "128 192") .setHelp("size", "size of one rendered tile") - .addOption('C', "count", "16 12") + .addOption('C', "count", "15 7") .setHelp("count", "tile count") .addBooleanOption("no-textures") .setHelp("no-textures", "render without textures") From adf77bc733676a3317e099a3c099bfe4552e7e0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 16 Jun 2022 13:00:07 +0200 Subject: [PATCH 51/53] Use proper kind of asserts. --- src/esp/batched_sim/MagnumRendererConverter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/esp/batched_sim/MagnumRendererConverter.cpp b/src/esp/batched_sim/MagnumRendererConverter.cpp index 7eacef4146..529a752b13 100644 --- a/src/esp/batched_sim/MagnumRendererConverter.cpp +++ b/src/esp/batched_sim/MagnumRendererConverter.cpp @@ -375,7 +375,7 @@ int main(int argc, char** argv) { for(const Trade::MeshData& inputMesh: inputMeshes) arrayAppend(inputMeshReferences, inputMesh); MeshTools::concatenateInto(mesh, inputMeshReferences); - CORRADE_INTERNAL_ASSERT(converter->add(mesh)); + CORRADE_INTERNAL_ASSERT_OUTPUT(converter->add(mesh)); /* A combined 3D image. First layer is fully white for input meshes that have no textures. */ @@ -403,10 +403,10 @@ int main(int argc, char** argv) { /* Clear the original images array to relieve the memory pressure a bit -- Basis is HUNGRY */ inputImages = {}; - CORRADE_INTERNAL_ASSERT(converter->add(image)); + CORRADE_INTERNAL_ASSERT_OUTPUT(converter->add(image)); /* And a texture referencing the only image */ - CORRADE_INTERNAL_ASSERT(converter->add(Trade::TextureData{Trade::TextureType::Texture2DArray, + CORRADE_INTERNAL_ASSERT_OUTPUT(converter->add(Trade::TextureData{Trade::TextureType::Texture2DArray, SamplerFilter::Linear, SamplerFilter::Linear, SamplerMipmap::Linear, SamplerWrapping::Repeat, 0})); @@ -448,7 +448,7 @@ int main(int argc, char** argv) { outputMeshes.slice(&Mesh::mapping), outputMeshes.slice(&Mesh::meshMaterial)} }}; - CORRADE_INTERNAL_ASSERT(converter->add(scene)); + CORRADE_INTERNAL_ASSERT_OUTPUT(converter->add(scene)); return converter->endFile() ? 0 : 1; } From 47a8c278bf0f1373392b9419b1a96e69a751d74d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 16 Jun 2022 13:00:23 +0200 Subject: [PATCH 52/53] What's up with the formatting here. --- src/esp/batched_sim/MagnumRendererConverter.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/esp/batched_sim/MagnumRendererConverter.cpp b/src/esp/batched_sim/MagnumRendererConverter.cpp index 529a752b13..ed29b930e5 100644 --- a/src/esp/batched_sim/MagnumRendererConverter.cpp +++ b/src/esp/batched_sim/MagnumRendererConverter.cpp @@ -435,16 +435,13 @@ int main(int argc, char** argv) { Trade::SceneFieldData{Trade::SceneField::Mesh, outputMeshes.slice(&Mesh::mapping), outputMeshes.slice(&Mesh::mesh)}, - Trade::SceneFieldData{ - SceneFieldMeshViewIndexOffset, + Trade::SceneFieldData{SceneFieldMeshViewIndexOffset, outputMeshes.slice(&Mesh::mapping), outputMeshes.slice(&Mesh::meshIndexOffset)}, - Trade::SceneFieldData{ - SceneFieldMeshViewIndexCount, + Trade::SceneFieldData{SceneFieldMeshViewIndexCount, outputMeshes.slice(&Mesh::mapping), outputMeshes.slice(&Mesh::meshIndexCount)}, - Trade::SceneFieldData{ - SceneFieldMeshViewMaterial, + Trade::SceneFieldData{SceneFieldMeshViewMaterial, outputMeshes.slice(&Mesh::mapping), outputMeshes.slice(&Mesh::meshMaterial)} }}; From 3d5b613973adb16dc11276eec7164b3383dc57e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 21 Jul 2022 18:19:07 +0200 Subject: [PATCH 53/53] WIP --- src/deps/bps3D | 2 +- src/deps/googletest | 2 +- src/esp/batched_sim/BatchedSimulator.cpp | 6 +- src/esp/batched_sim/BpsImporter.conf | 10 +- src/esp/batched_sim/CMakeLists.txt | 5 +- src/esp/batched_sim/MagnumRenderer.cpp | 83 ++---- src/esp/batched_sim/MagnumRenderer.h | 5 +- .../batched_sim/MagnumRendererConverter.cpp | 258 +++++++++++++++--- .../batched_sim/MagnumRendererStandalone.cpp | 68 ++++- .../batched_sim/MagnumRendererStandalone.h | 30 +- src/esp/batched_sim/README.md | 14 +- src/tests/CMakeLists.txt | 12 +- src/tests/configure.h.cmake | 4 + 13 files changed, 373 insertions(+), 126 deletions(-) diff --git a/src/deps/bps3D b/src/deps/bps3D index ce7e28f76f..e0e686e5e1 160000 --- a/src/deps/bps3D +++ b/src/deps/bps3D @@ -1 +1 @@ -Subproject commit ce7e28f76f31f302f03903c09f99d899b575365e +Subproject commit e0e686e5e130551c2794f0a0a04039b1d9c66046 diff --git a/src/deps/googletest b/src/deps/googletest index 4679637f1c..00938b2b22 160000 --- a/src/deps/googletest +++ b/src/deps/googletest @@ -1 +1 @@ -Subproject commit 4679637f1c9d5a0728bdc347a531737fad0b1ca3 +Subproject commit 00938b2b228f3b70d3d9e51f29a1505bdad43f1e diff --git a/src/esp/batched_sim/BatchedSimulator.cpp b/src/esp/batched_sim/BatchedSimulator.cpp index da5a85cbb1..3920729ca4 100644 --- a/src/esp/batched_sim/BatchedSimulator.cpp +++ b/src/esp/batched_sim/BatchedSimulator.cpp @@ -1387,9 +1387,11 @@ BatchedSimulator::BatchedSimulator(const BatchedSimulatorConfig& config) { // .setFilename("combined_Stage_v3_sc0_staging_trimesh.bps"_s) .setTileSizeCount({config_.sensor0.width, config_.sensor0.height}, // TODO better way to specify this - {16, (config_.numEnvs + 15)/16}) + {16, (config_.numEnvs + 15)/16}), + MagnumRendererStandaloneConfiguration{} + .setCudaDevice(config_.gpuId) ); - // TODO GPU ID, if include depth/color + // TODO if include depth/color renderer_->addFile(config_.renderAssetCompositeFilepath); // #ifdef MAGNUM_RENDERER // /* Hardcode camera position + projection for all views (taken from diff --git a/src/esp/batched_sim/BpsImporter.conf b/src/esp/batched_sim/BpsImporter.conf index 0c381be542..68e6d94541 100644 --- a/src/esp/batched_sim/BpsImporter.conf +++ b/src/esp/batched_sim/BpsImporter.conf @@ -3,7 +3,7 @@ depends=BasisImporter [configuration] # If enabled, imports just one mesh with all vertex and index data together and # the scene containing mesh index offsets and count -meshViews=false +meshViews=true # If enabled together with meshViews, the mesh will have a second level with # MeshPrimitive::Meshlets, containing view info together with bounding spheres @@ -12,19 +12,19 @@ meshlets=true # If enabled, combines all 2D textures together into a single 2D array texture # with mip levels, throwing away levels above given size. -textureArrays=false +textureArrays=true textureArrayMaxLevelSize=128 # If enabled together with textureArrays, creates one additional all-white # array layer and makes all texture-less materials use it to avoid having a # dedicated second draw for texture-less objects -textureArraysForceAllMaterialsTextured=false +textureArraysForceAllMaterialsTextured=true # Include the instance scene in the output. If disabled, only the second scene # without transformation or parent info is included. -instanceScene=true +instanceScene=false # Same as the format option in BasisImporter -basisFormat= +basisFormat=Astc4x4RGBA # Provide basic Phong material attributes as a fallback to PBR phongMaterialFallback=true diff --git a/src/esp/batched_sim/CMakeLists.txt b/src/esp/batched_sim/CMakeLists.txt index 99f6ad7cd6..21d561dddf 100644 --- a/src/esp/batched_sim/CMakeLists.txt +++ b/src/esp/batched_sim/CMakeLists.txt @@ -14,7 +14,7 @@ endif() # BpsImporter plugin if(MAGNUM_RENDERER) find_package(Corrade REQUIRED PluginManager) - find_package(Magnum REQUIRED GL GlfwApplication SceneTools Trade MeshTools Shaders DebugTools WindowlessEglApplication) + find_package(Magnum REQUIRED GL GlfwApplication SceneTools Trade MeshTools Shaders TextureTools DebugTools WindowlessEglApplication) if(MAGNUM_BUILD_STATIC) find_package(MagnumPlugins REQUIRED BasisImporter) corrade_add_static_plugin(BpsImporter ";" BpsImporter.conf BpsImporter.cpp) @@ -115,7 +115,8 @@ if(MAGNUM_RENDERER) target_link_libraries(MagnumRendererConverter PRIVATE Magnum::SceneTools Magnum::Trade - Magnum::MeshTools) + Magnum::MeshTools + Magnum::TextureTools) if(MAGNUM_BUILD_STATIC) target_link_libraries(MagnumRendererConverter PRIVATE Magnum::AnySceneImporter diff --git a/src/esp/batched_sim/MagnumRenderer.cpp b/src/esp/batched_sim/MagnumRenderer.cpp index a771a7eb66..084d6ac47d 100644 --- a/src/esp/batched_sim/MagnumRenderer.cpp +++ b/src/esp/batched_sim/MagnumRenderer.cpp @@ -64,9 +64,8 @@ using namespace Cr::Containers::Literals; struct MagnumRendererConfiguration::State { MagnumRendererFlags flags; - Magnum::Vector2i tileSize{128, 128}; - Magnum::Vector2i tileCount{16, 12}; - Magnum::UnsignedInt textureArrayMaxLevelSize{128}; + Mn::Vector2i tileSize{128, 128}; + Mn::Vector2i tileCount{16, 12}; }; MagnumRendererConfiguration::MagnumRendererConfiguration(): state{Cr::InPlaceInit} {} @@ -76,15 +75,12 @@ MagnumRendererConfiguration& MagnumRendererConfiguration::setFlags(MagnumRendere state->flags = flags; return *this; } -MagnumRendererConfiguration& MagnumRendererConfiguration::setTileSizeCount(const Magnum::Vector2i& tileSize, const Magnum::Vector2i& tileCount) { + +MagnumRendererConfiguration& MagnumRendererConfiguration::setTileSizeCount(const Mn::Vector2i& tileSize, const Mn::Vector2i& tileCount) { state->tileSize = tileSize; state->tileCount = tileCount; return *this; } -MagnumRendererConfiguration& MagnumRendererConfiguration::setTextureArrayMaxLevelSize(Magnum::UnsignedInt size) { - state->textureArrayMaxLevelSize = size; - return *this; -} namespace { @@ -133,14 +129,12 @@ struct MagnumRenderer::State { Mn::Vector2i tileSize, tileCount; Mn::Shaders::PhongGL shader{Mn::NoCreate}; - /* Filled at the beginning */ + /* Filled upon addFile() */ Mn::GL::Texture2DArray texture{Mn::NoCreate}; Mn::GL::Mesh mesh{Mn::NoCreate}; Mn::GL::Buffer materialUniform; - /* Contains texture layer for each material. Used by add() to populate the - draw list. */ - // TODO have the materials deduplicated, and then this should have a layer - // for each mesh view instead + /* Contains texture transform and layer for each material. Used by add() to + populate the draw list. */ Cr::Containers::Array textureTransformations; /* Pairs of mesh views (index byte offset and count), material IDs and @@ -158,7 +152,7 @@ struct MagnumRenderer::State { Cr::Containers::Array scenes; }; -MagnumRenderer::MagnumRenderer(Magnum::NoCreateT) {}; +MagnumRenderer::MagnumRenderer(Mn::NoCreateT) {} void MagnumRenderer::create(const MagnumRendererConfiguration& configurationWrapper) { #ifdef MAGNUM_BUILD_STATIC @@ -185,6 +179,10 @@ void MagnumRenderer::create(const MagnumRendererConfiguration& configurationWrap Mn::GL::Renderer::enable(Mn::GL::Renderer::Feature::DepthTest); } +void MagnumRenderer::destroy() { + _state = {}; +} + MagnumRenderer::~MagnumRenderer() = default; Mn::Vector2i MagnumRenderer::tileCount() const { @@ -217,15 +215,10 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr Cr::Containers::Pointer importer = manager.loadAndInstantiate(importerPlugin); CORRADE_INTERNAL_ASSERT(importer); - if(importerPlugin.contains("BpsImporter")) { - // TODO why dafuq is this not propagated from BasisImporter config?? - importer->configuration().setValue("basisFormat", "Astc4x4RGBA"); - importer->configuration().setValue("meshViews", true); - importer->configuration().setValue("instanceScene", false); - importer->configuration().setValue("textureArrays", true); - importer->configuration().setValue("textureArraysForceAllMaterialsTextured", true); - } else if(importerPlugin.contains("GltfImporter") || - importerPlugin.contains("AnySceneImporter")) { + /* Set up options for glTF import. We can also import any other files (such + as serialized magnum blobs or BPS files), assume these don't need any + custom setup. */ + if(importerPlugin.contains("GltfImporter") || (importerPlugin.contains("AnySceneImporter") && (filename.contains(".gltf") || filename.contains(".glb")))) { importer->configuration().setValue("ignoreRequiredExtensions", true); importer->configuration().setValue("experimentalKhrTextureKtx", true); @@ -238,8 +231,10 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr types->addValue("meshViewMaterial", "Int"); } + /* Basis options. Don't want to bother with all platform variations right + now, so it's always ASTC, sorry. */ if(Cr::PluginManager::PluginMetadata* const metadata = manager.metadata("BasisImporter")) { - metadata->configuration().setValue("format", "Astc4x4RGBA"); // TODO + metadata->configuration().setValue("format", "Astc4x4RGBA"); } // TODO memory-map self-contained files (have a config option? do implicitly @@ -282,10 +277,8 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr "MagnumRenderer::addFile(): expected a file with exactly one mesh, got" << importer->meshCount(), ); _state->mesh = Mn::MeshTools::compile(*CORRADE_INTERNAL_ASSERT_EXPRESSION(importer->mesh(0))); - /* Immutable material data. Save layers to a temporary array to apply - them to draws instead */ - // TODO have a step that deduplicates materials and puts the layer to the - // scene instead + /* Immutable material data. Save texture transformations and layers to a + temporary array to apply them to draws instead */ _state->textureTransformations = Cr::Containers::Array{Cr::DefaultInit, importer->materialCount()}; { Cr::Containers::Array materialData{Cr::DefaultInit, importer->materialCount()}; @@ -302,8 +295,7 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr _state->textureTransformations[i] = { flatMaterial.attribute(Mn::Trade::MaterialAttribute::BaseColorTextureLayer), - // TODO fix gltf converter to flip texcoords (ffs!!) - flatMaterial.hasTextureTransformation() ? Mn::Matrix3::translation(Mn::Vector2::yAxis(1.0f))*Mn::Matrix3::scaling(Mn::Vector2::yScale(-1.0f))*flatMaterial.textureMatrix()*Mn::Matrix3::translation(Mn::Vector2::yAxis(1.0f))*Mn::Matrix3::scaling(Mn::Vector2::yScale(-1.0f)) : Mn::Matrix3{} + flatMaterial.hasTextureTransformation() ? flatMaterial.textureMatrix() : Mn::Matrix3{} }; } @@ -355,22 +347,6 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr _state->meshViews[i].transformation = transformations[meshViewMapping[i]]; } -// /* Populate name mapping. Assuming all three fields have it the same. */ -// for(std::size_t i = 0; i != mapping.size(); ++i) { -// Cr::Containers::String name = importer->objectName(mapping[i]); -// CORRADE_ASSERT(name, "MagnumRenderer::addFile(): node" << i << "has no name", ); -// // TODO this will get an actual range once we fetch the whole hierarchy -// // for each name -// // TODO fetch parents and transformations, order their mapping -// // depth-first (orderParentsDepthFirst()) so each top-level object has -// // its children next to itself, then populate / reshuffle the mesh views -// // to match this mapping (how!?), and ultimately fill -// // meshViewRangeForName only with names of top-level objects (with -// // parent = -1) and the child count (i.e., # of elements until another -// // -1 parent) -// _state->meshViewRangeForName.insert({name, {Mn::UnsignedInt(i), Mn::UnsignedInt(i + 1)}}); -// } - /* Templates are the root objects with their names. Their immediate children are the actual meshes. Assumes the order matches the order of the custom fields. */ @@ -398,7 +374,8 @@ void MagnumRenderer::addFile(const Cr::Containers::StringView filename, const Cr Mn::Shaders::PhongGL::Flag::TextureArrays| Mn::Shaders::PhongGL::Flag::TextureTransformation; // TODO 1024 is 64K divided by 64 bytes needed for one draw uniform, have - // that fetched from actual GL limits instead + // that fetched from actual GL limits instead once I get to actually + // splitting draws by this limit _state->shader = Mn::Shaders::PhongGL{flags, 0, importer->materialCount(), 1024}; _state->shader .bindMaterialBuffer(_state->materialUniform); @@ -490,17 +467,19 @@ Cr::Containers::StridedArrayView1D MagnumRenderer::transformations( } void MagnumRenderer::draw(Mn::GL::AbstractFramebuffer& framebuffer) { + // TODO allow this (currently addFile() sets up shader limits) + CORRADE_ASSERT(_state->mesh.id(), + "MagnumRenderer::draw(): no file was added", ); + /* Calculate absolute transformations */ for(std::size_t sceneId = 0; sceneId != _state->scenes.size(); ++sceneId) { Scene& scene = _state->scenes[sceneId]; // TODO have a tool for this scene.absoluteTransformations[0].setTransformationMatrix(Mn::Matrix4{}); -// if(sceneId == 0) !Mn::Debug{} << stridedArrayView(scene.transformations).slice(&Mn::Matrix4::translation); for(std::size_t i = 0; i != scene.transformations.size(); ++i) scene.absoluteTransformations[i + 1].setTransformationMatrix( scene.absoluteTransformations[scene.parents[i] + 1].transformationMatrix *scene.transformations[i]); -// if(sceneId == 0) !Mn::Debug{} << stridedArrayView(scene.absoluteTransformations).slice(&Mn::Shaders::TransformationUniform3D::transformationMatrix).slice(&Mn::Matrix4::translation); } /* Upload projection and transformation uniforms, assuming they change every @@ -508,7 +487,6 @@ void MagnumRenderer::draw(Mn::GL::AbstractFramebuffer& framebuffer) { _state->projectionUniform.setData(_state->projections); for(std::size_t sceneId = 0; sceneId != _state->scenes.size(); ++sceneId) // TODO have this somehow in a single buffer instead - // TODO needs arrayInsert(), heh _state->scenes[sceneId].transformationUniform.setData(_state->scenes[sceneId].absoluteTransformations.exceptPrefix(1)); for(Mn::Int y = 0; y != _state->tileCount.y(); ++y) { @@ -517,11 +495,6 @@ void MagnumRenderer::draw(Mn::GL::AbstractFramebuffer& framebuffer) { const std::size_t scene = y*_state->tileCount.x() + x; -// !Debug{} << scene << _state->scenes[scene].textureTransformations - - // TODO bind the actual view - // TODO what is the above TODO about, actually?! - // TODO split by draw count limit _state->shader // TODO bind all buffers together with a multi API diff --git a/src/esp/batched_sim/MagnumRenderer.h b/src/esp/batched_sim/MagnumRenderer.h index c02d4dd658..b1656053b3 100644 --- a/src/esp/batched_sim/MagnumRenderer.h +++ b/src/esp/batched_sim/MagnumRenderer.h @@ -13,7 +13,7 @@ namespace esp { namespace batched_sim { enum class MagnumRendererFlag { - NoTextures = 1 << 0, + NoTextures = 1 << 0 // TODO memory-map }; typedef Corrade::Containers::EnumSet MagnumRendererFlags; @@ -25,8 +25,6 @@ struct MagnumRendererConfiguration { MagnumRendererConfiguration& setFlags(MagnumRendererFlags flags); MagnumRendererConfiguration& setTileSizeCount(const Magnum::Vector2i& tileSize, const Magnum::Vector2i& tileCount); - // TODO drop - MagnumRendererConfiguration& setTextureArrayMaxLevelSize(Magnum::UnsignedInt size); struct State; Corrade::Containers::Pointer state; @@ -68,6 +66,7 @@ class MagnumRenderer { /* used by MagnumRendererStandalone */ explicit MagnumRenderer(Magnum::NoCreateT); void create(const MagnumRendererConfiguration& configuration); + void destroy(); private: struct State; diff --git a/src/esp/batched_sim/MagnumRendererConverter.cpp b/src/esp/batched_sim/MagnumRendererConverter.cpp index ed29b930e5..b3574410fd 100644 --- a/src/esp/batched_sim/MagnumRendererConverter.cpp +++ b/src/esp/batched_sim/MagnumRendererConverter.cpp @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -69,11 +71,25 @@ int main(int argc, char** argv) { if(PluginManager::PluginMetadata* m = imageConverterManager.metadata("BasisImageConverter")) { // TODO set up once GltfSceneConverter uses it directly } + if(PluginManager::PluginMetadata* m = converterManager.metadata("GltfSceneConverter")) { + // TODO fix the damn texcoord orientation in the converter already + m->configuration().setValue("orientation", "rdo"); + } - /* Magnum's OBJ importer is ... well, not great. It'll get replaced eventually. */ + /* Magnum's OBJ importer is ... well, not great. It'll get replaced + eventually. */ if(importerManager.loadState("ObjImporter") != PluginManager::LoadState::NotFound) importerManager.setPreferredPlugins("ObjImporter", {"AssimpImporter"}); + /* Use StbImageImporter because for it we can override channel count */ + { + PluginManager::PluginMetadata* m = importerManager.metadata("StbImageImporter"); + CORRADE_INTERNAL_ASSERT(m); + m->configuration().setValue("forceChannelCount", 3); + importerManager.setPreferredPlugins("PngImporter", {"StbImageImporter"}); + importerManager.setPreferredPlugins("JpegImporter", {"StbImageImporter"}); + } + Containers::Pointer importer = importerManager.loadAndInstantiate("AnySceneImporter"); Containers::Pointer converter = converterManager.loadAndInstantiate(args.value("converter")); @@ -87,10 +103,23 @@ int main(int argc, char** argv) { converter->setSceneFieldName(SceneFieldMeshViewIndexCount, "meshViewIndexCount"); converter->setSceneFieldName(SceneFieldMeshViewMaterial, "meshViewMaterial"); - /* Materials are put into the file directly, meshes and textures get - collected and then joined. */ + /* Meshes and textures get collected, then joined / packed, then added to the + converter */ Containers::Array inputMeshes; Containers::Array inputImages; + + /* Reserve the first image for texture-less objects -- a single white pixel */ + constexpr Color3ub WhitePixel[]{0xffffff_rgb}; + arrayAppend(inputImages, Trade::ImageData2D{ + PixelStorage{}.setAlignment(1), + PixelFormat::RGB8Unorm, {1, 1}, + Trade::DataFlags{}, WhitePixel}); + + /* As textures get packed into an atlas, the materials will need to be + updated with final layer IDs and offsets. Store them temporarily in an + array, using the imported image index and zero offset as placeholders. */ + Containers::Array inputMaterials; + UnsignedInt indexOffset = 0; /* Parent, present for all objects */ @@ -118,6 +147,46 @@ int main(int argc, char** argv) { }; Containers::Array meshes; + const auto importSingleMesh = [&]( + Containers::StringView filename + ) { + CORRADE_INTERNAL_ASSERT_OUTPUT(importer->openFile(filename)); + CORRADE_INTERNAL_ASSERT(importer->meshCount() == 1); + + Parent root; + root.mapping = parents.size(); + root.parent = -1; + + arrayAppend(parents, root); + converter->setObjectName(root.mapping, + Utility::Path::splitExtension(Utility::Path::split(filename).second()).first()); + + Containers::Optional mesh = importer->mesh(0); + CORRADE_INTERNAL_ASSERT(mesh && mesh->primitive() == MeshPrimitive::Triangles); + /* Assumin STL files, which are not indexed and thus with many duplicate + vertices. Create an index buffer by deduplicating them. */ + // TODO possibly useful to filter and recreate normals also? + mesh = MeshTools::removeDuplicates(*mesh); + + Mesh m; + m.mapping = root.mapping; + m.mesh = 0; + m.meshIndexCount = mesh->indexCount(); + m.meshMaterial = inputMaterials.size(); + m.meshIndexOffset = indexOffset; + indexOffset += mesh->indexCount()*4; // TODO ints hardcoded + + /* Add an empty white material */ + arrayAppend(inputMaterials, Trade::MaterialData{Trade::MaterialType::Flat, { + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTexture, 0u}, + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTextureLayer, 0u}, + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTextureMatrix, + Matrix3::scaling(Vector2{1}/Vector2(TextureAtlasSize))} + }}); + + arrayAppend(meshes, m); + }; + const auto import = [&]( Containers::StringView filename, Containers::StringView name = {}, @@ -141,6 +210,7 @@ int main(int argc, char** argv) { /* Meshes are unfortunately named in a useless way, so override them with names from the objects referencing them instead */ + // TODO this is replica-specific, make a dedicated function Containers::Array meshNames{importer->meshCount()}; for(Containers::Pair> objectMeshMaterial: scene->meshesMaterialsAsArray()) { const UnsignedInt meshId = objectMeshMaterial.second().first(); @@ -162,6 +232,16 @@ int main(int argc, char** argv) { top-level object. */ for(Containers::Triple transformationMeshMaterial: SceneTools::flattenMeshHierarchy3D(*scene)) { Containers::Optional mesh = importer->mesh(transformationMeshMaterial.first()); + Containers::String meshName = meshNames[transformationMeshMaterial.first()]; + + /* Skip non-triangle meshes */ + if(mesh->primitive() != MeshPrimitive::Triangles && + mesh->primitive() != MeshPrimitive::TriangleFan && + mesh->primitive() != MeshPrimitive::TriangleStrip) { + Warning{} << "Mesh" << meshName << "in" << Utility::Path::split(filename).second() << "is" << mesh->primitive() << Debug::nospace << ", skipping"; + continue; + } + // TODO convert from a strip/... if not triangles CORRADE_INTERNAL_ASSERT(mesh && mesh->isIndexed() && mesh->primitive() == MeshPrimitive::Triangles); @@ -189,7 +269,6 @@ int main(int argc, char** argv) { // TODO avoid string copies by making the map over StringViews? but then // it'd have to be changed again once we don't have the extra meshNames // array - Containers::String meshName = meshNames[transformationMeshMaterial.first()]; bool duplicate = false; if(uniqueMeshes) { std::unordered_map>::iterator found = uniqueMeshes->find(meshName); @@ -268,9 +347,10 @@ int main(int argc, char** argv) { /* If there's DiffuseTexture, add a BaseColorTexture instead */ if(!hasBaseColorTexture) arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColorTexture, 0u); - arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColorTextureLayer, UnsignedInt(inputImages.size() + 1)); + arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColorTextureLayer, UnsignedInt(inputImages.size())); Containers::Optional image = importer->image2D(texture->image()); + // TODO detection of single-color images here CORRADE_INTERNAL_ASSERT((image->size() <= TextureAtlasSize).all()); /* Add texture scaling if the image is smaller than 2K */ if((image->size() < TextureAtlasSize).any()) @@ -279,15 +359,17 @@ int main(int argc, char** argv) { arrayAppend(inputImages, *std::move(image)); - /* Otherwise make it reference the first (white) image layer and make - it just Flat */ + /* Otherwise make it reference the first image, which is a 1x1 white + pixel */ } else { Debug{} << "New untextured material for" << meshNames[transformationMeshMaterial.first()] << "in" << Utility::Path::split(filename).second(); attributes = material->releaseAttributeData(); arrayAppend(attributes, { Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTexture, 0u}, - Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTextureLayer, 0u} + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTextureLayer, 0u}, + Trade::MaterialAttributeData{Trade::MaterialAttribute::BaseColorTextureMatrix, + Matrix3::scaling(Vector2{1}/Vector2(TextureAtlasSize))} }); } @@ -295,9 +377,11 @@ int main(int argc, char** argv) { arrayAppend(attributes, InPlaceInit, Trade::MaterialAttribute::BaseColor, *forceColor); } + /* Make it just Flat */ material = Trade::MaterialData{Trade::MaterialType::Flat, std::move(attributes)}; - importedMaterialIds[transformationMeshMaterial.second()] = m.meshMaterial = *converter->add(*material); + importedMaterialIds[transformationMeshMaterial.second()] = m.meshMaterial = inputMaterials.size(); + arrayAppend(inputMaterials, *std::move(material)); } arrayAppend(meshes, m); @@ -313,10 +397,85 @@ int main(int argc, char** argv) { {"arm0.link_el0.obj", 0x3f3f3f_rgbf}, {"arm0.link_el1.obj", 0xffff00_rgbf}, {"arm0.link_fngr.obj", 0x7f7f7f_rgbf}, - // TODO yeyeyeye ... bored + {"arm0.link_hr0.obj", 0xffff00_rgbf}, + {"arm0.link_sh0.obj", 0x3f3f3f_rgbf}, + {"arm0.link_sh1.obj", 0x7f7f7f_rgbf}, + {"arm0.link_wr0.obj", 0x3f3f3f_rgbf}, + {"arm0.link_wr1.obj", 0xffff00_rgbf}, + {"base.obj", 0x4cc6ff_rgbf}, + {"fl.hip.obj", 0x3f3f3f_rgbf}, + {"fl.lleg.obj", 0x3f3f3f_rgbf}, + {"fl.uleg.obj", 0x4cc6ff_rgbf}, + {"fr.hip.obj", 0x3f3f3f_rgbf}, + {"fr.lleg.obj", 0x3f3f3f_rgbf}, + {"fr.uleg.obj", 0x4cc6ff_rgbf}, + {"hl.hip.obj", 0x3f3f3f_rgbf}, + {"hl.lleg.obj", 0x3f3f3f_rgbf}, + {"hl.uleg.obj", 0x4cc6ff_rgbf}, + {"hr.hip.obj", 0x3f3f3f_rgbf}, + {"hr.lleg.obj", 0x3f3f3f_rgbf}, + {"hr.uleg.obj", 0x4cc6ff_rgbf} }) import(Utility::Path::join(spotPath, i.first()), {}, i.second()); + /* ReplicaCAD articulated objects */ + // TODO this needs the images deduplicated ... some content-addressing + // file callback? + Containers::String replicaArticulatedObjectsPath = Utility::Path::join(args.value("input"), "ReplicaCAD_baked_lighting_v1.5/urdf_uncompressed"); + for(const char* filename: { + "fridge/body_brighter2.glb", // TODO "fixed" model instead + "fridge/bottom_door_brighter2.glb", // TODO "fixed" + "fridge/top_door_brighter2.glb", // TODO "fixed" + "kitchen_counter/kitchen_counter.glb", + "kitchen_counter/drawer1.glb", + "kitchen_counter/drawer2.glb", + "kitchen_counter/drawer3.glb", + "kitchen_counter/drawer4.glb", + "kitchen_cupboards/kitchencupboard_base.glb", + "kitchen_cupboards/kitchencupboard_doorWhole_L.glb", + "kitchen_cupboards/kitchencupboard_doorWhole_R.glb", + "kitchen_cupboards/kitchencupboard_doorWindow_L.glb", + "kitchen_cupboards/kitchencupboard_doorWindow_R.glb", + "doors/door2.glb", + "cabinet/cabinet.glb", + "cabinet/door.glb", + "chest_of_drawers/chestOfDrawers_base.glb", // TODO "fixed" + "chest_of_drawers/chestOfDrawers_DrawerBot.glb", // TODO "fixed" + "chest_of_drawers/chestOfDrawers_DrawerMid.glb", // TODO "fixed" + "chest_of_drawers/chestOfDrawers_DrawerTop.glb" // TODO "fixed" + }) + import(Utility::Path::join(replicaArticulatedObjectsPath, filename)); + + /* Fetch */ + // TODO there was some "preprocessed_fetch_meshes" glb file instead + Containers::String fetchPath = Utility::Path::join(args.value("input"), "hab_fetch_v1.0/meshes"); + for(const char* filename: { +// "base_link.dae", // TODO it's a 4K texture, FFS + "elbow_flex_link.dae", + "forearm_roll_link.dae", + "gripper_link.dae", + "head_pan_link.dae", + "head_tilt_link.dae", + "shoulder_lift_link.dae", + "shoulder_pan_link.dae", + "torso_fixed_link.dae", + "torso_lift_link.dae", + "upperarm_roll_link.dae", + "wrist_flex_link.dae", + "wrist_roll_link.dae" + }) + import(Utility::Path::join(fetchPath, filename)); + for(const char* filename: { + "bellows_link.STL", + "estop_link.STL", // TODO or .dae? + "l_wheel_link.STL", + "l_gripper_finger_link_opt.stl", + "laser_link.STL", + "r_gripper_finger_link_opt.stl", + "r_wheel_link.STL", + }) + importSingleMesh(Utility::Path::join(fetchPath, filename)); + /* Debug models */ // TODO generate these all from Primitives instead of doing crazy stuff with // glTF files @@ -338,18 +497,6 @@ int main(int argc, char** argv) { }) import(Utility::Path::join(debugModelsPath, filename)); - /* ReplicaCAD */ - Containers::String replicaPath = Utility::Path::join(args.value("input"), "ReplicaCAD_baked_lighting_v1.5/stages_uncompressed"); - std::unordered_map> uniqueReplicaMeshes; - for(std::size_t i = 0; i <= 4; ++i) { - for(std::size_t j = 0; j <= 20; ++j) { - import( - Utility::Path::join(replicaPath, Utility::format("Baked_sc{}_staging_{:.2}.glb", i, j)), - {}, {}, - &uniqueReplicaMeshes); - } - } - /* YCB */ Containers::String ycbPath = Utility::Path::join(args.value("input"), "hab_ycb_v1.1/ycb/"); for(const char* name: { @@ -365,6 +512,18 @@ int main(int argc, char** argv) { }) import(Utility::Path::join({ycbPath, name, "google_16k/textured.obj"}), name); + /* ReplicaCAD */ + Containers::String replicaPath = Utility::Path::join(args.value("input"), "ReplicaCAD_baked_lighting_v1.5/stages_uncompressed"); + std::unordered_map> uniqueReplicaMeshes; + for(std::size_t i = 0; i <= 4; ++i) { + for(std::size_t j = 0; j <= 20; ++j) { + import( + Utility::Path::join(replicaPath, Utility::format("Baked_sc{}_staging_{:.2}.glb", i, j)), + {}, {}, + &uniqueReplicaMeshes); + } + } + /* Target layout for the mesh. So far just for flat rendering, no normals etc */ Trade::MeshData mesh{MeshPrimitive::Triangles, nullptr, { @@ -375,24 +534,53 @@ int main(int argc, char** argv) { for(const Trade::MeshData& inputMesh: inputMeshes) arrayAppend(inputMeshReferences, inputMesh); MeshTools::concatenateInto(mesh, inputMeshReferences); + for(Vector2& i: mesh.mutableAttribute(Trade::MeshAttribute::TextureCoordinates)) { + // TODO remmove this once GltfSceneConverter does that itself + i.y() = 1.0f - i.y(); + } CORRADE_INTERNAL_ASSERT_OUTPUT(converter->add(mesh)); - /* A combined 3D image. First layer is fully white for input meshes that have - no textures. */ + /* Pack input images into an atlas */ + Containers::Pair> layerCountOffsets = + TextureTools::atlasArrayPowerOfTwo(TextureAtlasSize, Containers::StridedArrayView1D(inputImages).slice(&Trade::ImageData2D::size)); + + /* Update layer and offset info in the materials, add them to the converter */ + for(Trade::MaterialData& inputMaterial: inputMaterials) { + UnsignedInt& layer = inputMaterial.mutableAttribute(Trade::MaterialAttribute::BaseColorTextureLayer); + const UnsignedInt imageId = layer; + + layer = layerCountOffsets.second()[imageId].z(); + + /* If the material has a texture matrix (textures that are same as atlas + layer size don't have it), update the offset there */ + // TODO findMaterial()!! + if(inputMaterial.hasAttribute(Trade::MaterialAttribute::BaseColorTextureMatrix)) { + Matrix3& matrix = inputMaterial.mutableAttribute(Trade::MaterialAttribute::BaseColorTextureMatrix); + matrix = Matrix3::translation(Vector2{layerCountOffsets.second()[imageId].xy()}/Vector2{TextureAtlasSize})*matrix; + } + + CORRADE_INTERNAL_ASSERT_OUTPUT(converter->add(inputMaterial)); + } + + /* A combined 3D image */ Trade::ImageData3D image{PixelFormat::RGB8Unorm, - {TextureAtlasSize, Int(inputImages.size() + 1)}, - Containers::Array{NoInit, TextureAtlasSize.product()*(inputImages.size() + 1)*4}}; - /* Fill the first layer white */ - constexpr char white[]{'\xff'}; - Utility::copy( - Containers::StridedArrayView3D{white, {TextureAtlasSize.x(), TextureAtlasSize.y(), 3}, {0, 0, 0}}, - image.mutablePixels()[0]); - /* Copy the other images */ + {TextureAtlasSize, layerCountOffsets.first()}, + Containers::Array{NoInit, std::size_t(TextureAtlasSize.product()*layerCountOffsets.first()*3)}}; + /* Copy the images to their respective locations, calculate waste ratio + during the process */ + std::size_t inputImageArea = 0; for(std::size_t i = 0; i != inputImages.size(); ++i) { + inputImageArea += inputImages[i].size().product(); + // TODO handle RGBA (drop alpha), or convert everything to RGBA instead CORRADE_INTERNAL_ASSERT(inputImages[i].format() == PixelFormat::RGB8Unorm); Utility::copy(inputImages[i].pixels(), - // TODO did i mention the "atlas packing" is EXTREMELY SHITTY? - image.mutablePixels()[i + 1].prefix({ + /** @todo sliceCount(), finally */ + image.mutablePixels()[layerCountOffsets.second()[i].z()].exceptPrefix({ + // TODO have implicit conversion of Vector to StridedDimensions, FINALLY + std::size_t(layerCountOffsets.second()[i].x()), + std::size_t(layerCountOffsets.second()[i].y()), + 0 + }).prefix({ // TODO have implicit conversion of Vector to StridedDimensions, FINALLY std::size_t(inputImages[i].size().x()), std::size_t(inputImages[i].size().y()), @@ -400,6 +588,8 @@ int main(int argc, char** argv) { })); } + Debug{} << inputImages.size() << "images packed to" << layerCountOffsets.first() << "layers," << Utility::format("{:.2f}", 100.0f - 100.0f*inputImageArea/(TextureAtlasSize.product()*layerCountOffsets.first())) << Debug::nospace << "% area wasted"; + /* Clear the original images array to relieve the memory pressure a bit -- Basis is HUNGRY */ inputImages = {}; diff --git a/src/esp/batched_sim/MagnumRendererStandalone.cpp b/src/esp/batched_sim/MagnumRendererStandalone.cpp index 806d66c8fe..6875a8dfe6 100644 --- a/src/esp/batched_sim/MagnumRendererStandalone.cpp +++ b/src/esp/batched_sim/MagnumRendererStandalone.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -23,10 +24,28 @@ namespace Mn = Magnum; namespace esp { namespace batched_sim { +struct MagnumRendererStandaloneConfiguration::State { + /* Not picking any CUDA device by default */ + Magnum::UnsignedInt cudaDevice = ~Magnum::UnsignedInt{}; + MagnumRendererStandaloneFlags flags; +}; + +MagnumRendererStandaloneConfiguration::MagnumRendererStandaloneConfiguration(): state{Cr::InPlaceInit} {} +MagnumRendererStandaloneConfiguration::~MagnumRendererStandaloneConfiguration() = default; + +MagnumRendererStandaloneConfiguration& MagnumRendererStandaloneConfiguration::setCudaDevice(Magnum::UnsignedInt id) { + state->cudaDevice = id; + return *this; +} + +MagnumRendererStandaloneConfiguration& MagnumRendererStandaloneConfiguration::setFlags(MagnumRendererStandaloneFlags flags) { + state->flags = flags; + return *this; +} + struct MagnumRendererStandalone::State { Mn::Platform::WindowlessGLContext context; Mn::Platform::GLContext magnumContext{Mn::NoCreate}; - Cr::Containers::Optional renderer; Mn::GL::Renderbuffer color{Mn::NoCreate}, depth{Mn::NoCreate}; Mn::GL::Framebuffer framebuffer{Mn::NoCreate}; Mn::GL::BufferImage2D colorBuffer{Mn::NoCreate}; @@ -34,10 +53,14 @@ struct MagnumRendererStandalone::State { cudaGraphicsResource* cudaColorBuffer{}; cudaGraphicsResource* cudaDepthBuffer{}; - State(): context{Mn::Platform::WindowlessGLContext::Configuration{} - .setCudaDevice(0)} { + State(const MagnumRendererStandaloneConfiguration& configuration): context{Mn::Platform::WindowlessGLContext::Configuration{} + .setCudaDevice(configuration.state->cudaDevice) + .addFlags(configuration.state->flags & MagnumRendererStandaloneFlag::QuietLog ? Mn::Platform::WindowlessGLContext::Configuration::Flag::QuietLog : Mn::Platform::WindowlessGLContext::Configuration::Flags{}) + } { context.makeCurrent(); - magnumContext.create(); + magnumContext.create(Mn::GL::Context::Configuration{} + .addFlags(configuration.state->flags & MagnumRendererStandaloneFlag::QuietLog ? Mn::GL::Context::Configuration::Flag::QuietLog : Mn::GL::Context::Configuration::Flags{}) + ); color = Mn::GL::Renderbuffer{}; depth = Mn::GL::Renderbuffer{}; } @@ -51,40 +74,55 @@ struct MagnumRendererStandalone::State { } }; -MagnumRendererStandalone::MagnumRendererStandalone(const MagnumRendererConfiguration& configuration): MagnumRenderer{Mn::NoCreate}, _state{Cr::InPlaceInit} { +MagnumRendererStandalone::MagnumRendererStandalone(const MagnumRendererConfiguration& configuration, const MagnumRendererStandaloneConfiguration& standaloneConfiguration): MagnumRenderer{Mn::NoCreate}, _state{Cr::InPlaceInit, standaloneConfiguration} { /* Create the renderer only once the GL context is ready */ create(configuration); - const Mn::Vector2i size = _state->renderer->tileSize()*_state->renderer->tileCount(); + const Mn::Vector2i size = tileSize()*tileCount(); _state->color.setStorage(Mn::GL::RenderbufferFormat::RGBA8, size); _state->depth.setStorage(Mn::GL::RenderbufferFormat::DepthComponent32F, size); _state->framebuffer = Mn::GL::Framebuffer{Mn::Range2Di{{}, size}}; _state->framebuffer .attachRenderbuffer(Mn::GL::Framebuffer::ColorAttachment{0}, _state->color) - // TODO just GL::Framebuffer::DepthAttachment, this is shit .attachRenderbuffer(Mn::GL::Framebuffer::BufferAttachment::Depth, _state->depth); /* Defer the buffer initialization to the point when it's actually read into */ - _state->colorBuffer = Mn::GL::BufferImage2D{framebufferColorFormat()}; - _state->depthBuffer = Mn::GL::BufferImage2D{framebufferDepthFormat()}; + _state->colorBuffer = Mn::GL::BufferImage2D{colorFramebufferFormat()}; + _state->depthBuffer = Mn::GL::BufferImage2D{depthFramebufferFormat()}; } -MagnumRendererStandalone::~MagnumRendererStandalone() = default; +MagnumRendererStandalone::~MagnumRendererStandalone() { + /* Yup, shitty, but as we hold the GL context we can't let any GL resources + to be destructed after our destructor. Better ideas? */ + MagnumRenderer::destroy(); +} -Mn::PixelFormat MagnumRendererStandalone::framebufferColorFormat() const { +Mn::PixelFormat MagnumRendererStandalone::colorFramebufferFormat() const { return Mn::PixelFormat::RGBA8Unorm; } -Mn::PixelFormat MagnumRendererStandalone::framebufferDepthFormat() const { +Mn::PixelFormat MagnumRendererStandalone::depthFramebufferFormat() const { return Mn::PixelFormat::Depth32F; } void MagnumRendererStandalone::draw() { _state->framebuffer.clear(Mn::GL::FramebufferClear::Color|Mn::GL::FramebufferClear::Depth); - _state->renderer->draw(_state->framebuffer); + MagnumRenderer::draw(_state->framebuffer); +} + +Mn::Image2D MagnumRendererStandalone::colorImage() { + /* Not using _state->framebuffer.viewport() as it's left pointing to whatever + tile was rendered last */ + return _state->framebuffer.read({{}, tileCount()*tileSize()}, colorFramebufferFormat()); +} + +Mn::Image2D MagnumRendererStandalone::depthImage() { + /* Not using _state->framebuffer.viewport() as it's left pointing to whatever + tile was rendered last */ + return _state->framebuffer.read({{}, tileCount()*tileSize()}, depthFramebufferFormat()); } -const void* MagnumRendererStandalone::cudaColorBufferDevicePointer() { +const void* MagnumRendererStandalone::colorCudaBufferDevicePointer() { /* If the CUDA buffer exists already, it's mapped from the previous call. Unmap it first so we can read into it from GL. */ if(_state->cudaColorBuffer) @@ -109,7 +147,7 @@ const void* MagnumRendererStandalone::cudaColorBufferDevicePointer() { return pointer; } -const void* MagnumRendererStandalone::cudaDepthBufferDevicePointer() { +const void* MagnumRendererStandalone::depthCudaBufferDevicePointer() { /* If the CUDA buffer exists already, it's mapped from the previous call. Unmap it first so we can read into it from GL. */ if(_state->cudaDepthBuffer) diff --git a/src/esp/batched_sim/MagnumRendererStandalone.h b/src/esp/batched_sim/MagnumRendererStandalone.h index b80acc2cb7..2991de0e23 100644 --- a/src/esp/batched_sim/MagnumRendererStandalone.h +++ b/src/esp/batched_sim/MagnumRendererStandalone.h @@ -9,18 +9,38 @@ namespace esp { namespace batched_sim { +enum class MagnumRendererStandaloneFlag { + QuietLog = 1 << 0 +}; +typedef Corrade::Containers::EnumSet MagnumRendererStandaloneFlags; +CORRADE_ENUMSET_OPERATORS(MagnumRendererStandaloneFlags) + +struct MagnumRendererStandaloneConfiguration { + explicit MagnumRendererStandaloneConfiguration(); + ~MagnumRendererStandaloneConfiguration(); + + MagnumRendererStandaloneConfiguration& setCudaDevice(Magnum::UnsignedInt id); + MagnumRendererStandaloneConfiguration& setFlags(MagnumRendererStandaloneFlags flags); + + struct State; + Corrade::Containers::Pointer state; +}; + class MagnumRendererStandalone: public MagnumRenderer { public: - explicit MagnumRendererStandalone(const MagnumRendererConfiguration& configuration); + explicit MagnumRendererStandalone(const MagnumRendererConfiguration& configuration, const MagnumRendererStandaloneConfiguration& standaloneConfiguration); ~MagnumRendererStandalone(); - Magnum::PixelFormat framebufferColorFormat() const; - Magnum::PixelFormat framebufferDepthFormat() const; + Magnum::PixelFormat colorFramebufferFormat() const; + Magnum::PixelFormat depthFramebufferFormat() const; void draw(); - const void* cudaColorBufferDevicePointer(); - const void* cudaDepthBufferDevicePointer(); + Magnum::Image2D colorImage(); + Magnum::Image2D depthImage(); + + const void* colorCudaBufferDevicePointer(); + const void* depthCudaBufferDevicePointer(); private: struct State; diff --git a/src/esp/batched_sim/README.md b/src/esp/batched_sim/README.md index 192425c1c8..b1b4ab644d 100644 --- a/src/esp/batched_sim/README.md +++ b/src/esp/batched_sim/README.md @@ -23,8 +23,18 @@ magnum-imageconverter -D3 --map -C BasisImageConverter \ yay.0.ktx2 yay.0.basis.ktx2 ``` -Takes about 25 minutes and 12 GB RAM on a 8-core laptop from 2018. Once done, -rename the output back to `yay.0.ktx2`. Now the input glTF is Basis-compressed. +Takes about 25 minutes and 12 GB RAM on a 8-core laptop from 2018. That might +be extremely unbearable for quick iterations, so alternatively you can encode +directly to DXT1 which will take about 8 seconds and 1.7 GB RAM: + +```sh +magnum-imageconverter -D3 --map -C StbDxtImageConverter \ + -c highQuality \ + yay.0.ktx2 yay.0.dxt.ktx2 +``` + +Once done, rename the output back to `yay.0.ktx2`. Now the input glTF is +Basis- or DXT-compressed. # Running the demo diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 5d713895a8..253cd9d944 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,5 +1,5 @@ find_package(Magnum REQUIRED DebugTools AnyImageConverter) -find_package(MagnumPlugins REQUIRED StbImageImporter) +find_package(MagnumPlugins REQUIRED StbImageImporter OPTIONAL_COMPONENTS GltfSceneConverter) # needed for BatchedSimulator set(CMAKE_CXX_STANDARD 17) @@ -15,6 +15,12 @@ macro(TEST TEST_NAME) add_test(${TEST_NAME} ${TEST_NAME}) endmacro(TEST) +# If the GltfSceneConverter is present, the new Magnum SceneConverter APIs are +# as well. Which enables a data-generating test case in MagnumRendererTest +if(MagnumPlugins_GltfSceneConverter_FOUND) + set(HAS_MAGNUM_GLTFSCENECONVERTER ON) +endif() + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/configure.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/configure.h ) @@ -59,6 +65,10 @@ target_include_directories(PhysicsTest PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) test(GfxReplayTest assets gfx sim) target_include_directories(GfxReplayTest PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +corrade_add_test(MagnumRendererTest MagnumRendererTest.cpp + LIBRARIES batched_sim Magnum::DebugTools) +target_include_directories(MagnumRendererTest PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + test(ResourceManagerTest assets) target_include_directories(ResourceManagerTest PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/src/tests/configure.h.cmake b/src/tests/configure.h.cmake index aecb7f8a85..4219e03205 100644 --- a/src/tests/configure.h.cmake +++ b/src/tests/configure.h.cmake @@ -9,3 +9,7 @@ #define FILE_THAT_EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/IOTest.cpp" #cmakedefine BPS_IMPORTER_PLUGIN_FILENAME "${BPS_IMPORTER_PLUGIN_FILENAME}" + +#define MAGNUMRENDERERTEST_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}" + +#cmakedefine HAS_MAGNUM_GLTFSCENECONVERTER