From ac8238c80041dbf9723d1045c7e03ab43bccaf7c Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 5 Sep 2025 11:04:19 -0700
Subject: [PATCH 1/6] Improvements to integer typing and overflow protection.

- Use the sanisizer library to avoid integer overflow, mostly during vector
  allocations but also in some additions and casts from float.
- Slap const'ness on as many variables as possible, for safety.
---
 CMakeLists.txt                           |   3 +-
 cmake/Config.cmake.in                    |   1 +
 extern/CMakeLists.txt                    |   7 +
 include/umappp/Status.hpp                |  14 +-
 include/umappp/combine_neighbor_sets.hpp |  13 +-
 include/umappp/find_ab.hpp               |  36 ++---
 include/umappp/initialize.hpp            |  57 ++++----
 include/umappp/neighbor_similarities.hpp |  46 ++++---
 include/umappp/optimize_layout.hpp       | 164 +++++++++++------------
 include/umappp/parallelize.hpp           |   2 +-
 include/umappp/spectral_init.hpp         |  47 ++++---
 11 files changed, 201 insertions(+), 189 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 77149c5..eda873c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,10 +25,11 @@ else()
     find_package(ltla_aarand CONFIG REQUIRED)
     find_package(ltla_irlba CONFIG REQUIRED)
     find_package(ltla_subpar 0.3.1 CONFIG REQUIRED)
+    find_package(ltla_sanisizer 0.1.3 CONFIG REQUIRED)
     find_package(knncolle_knncolle 2.3.0 CONFIG REQUIRED)
 endif()
 
-target_link_libraries(umappp INTERFACE ltla::aarand ltla::irlba ltla::subpar knncolle::knncolle)
+target_link_libraries(umappp INTERFACE ltla::aarand ltla::irlba ltla::subpar ltla::sanisizer knncolle::knncolle)
 
 # Tests
 if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in
index 4df2b2f..9772c32 100644
--- a/cmake/Config.cmake.in
+++ b/cmake/Config.cmake.in
@@ -3,6 +3,7 @@
 include(CMakeFindDependencyMacro)
 find_dependency(ltla_aarand CONFIG REQUIRED)
 find_dependency(ltla_subpar 0.3.1 CONFIG REQUIRED)
+find_dependency(ltla_sanisizer 0.1.3 CONFIG REQUIRED)
 find_dependency(ltla_irlba CONFIG REQUIRED)
 find_dependency(knncolle_knncolle 2.3.0 CONFIG REQUIRED)
 
diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
index 5ccd313..890dca7 100644
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -12,6 +12,12 @@ FetchContent_Declare(
   GIT_TAG master # ^0.3.1
 )
 
+FetchContent_Declare(
+  sanisizer
+  GIT_REPOSITORY https://github.com/LTLA/sanisizer
+  GIT_TAG master # ^0.1.3
+)
+
 FetchContent_Declare(
   knncolle
   GIT_REPOSITORY https://github.com/knncolle/knncolle
@@ -26,5 +32,6 @@ FetchContent_Declare(
 
 FetchContent_MakeAvailable(aarand)
 FetchContent_MakeAvailable(subpar)
+FetchContent_MakeAvailable(sanisizer)
 FetchContent_MakeAvailable(knncolle)
 FetchContent_MakeAvailable(irlba)
diff --git a/include/umappp/Status.hpp b/include/umappp/Status.hpp
index d503c67..5db8485 100644
--- a/include/umappp/Status.hpp
+++ b/include/umappp/Status.hpp
@@ -1,12 +1,14 @@
 #ifndef UMAPPP_STATUS_HPP
 #define UMAPPP_STATUS_HPP
 
-#include "Options.hpp"
-#include "optimize_layout.hpp"
-
 #include <random>
 #include <cstddef>
 
+#include "sanisizer/sanisizer.hpp"
+
+#include "Options.hpp"
+#include "optimize_layout.hpp"
+
 /**
  * @file Status.hpp
  * @brief Status of the UMAP algorithm.
@@ -27,7 +29,7 @@ class Status {
     /**
      * @cond
      */
-    Status(internal::EpochData<Index_, Float_> epochs, Options options, std::size_t num_dim, Float_* embedding) : 
+    Status(internal::EpochData<Index_, Float_> epochs, Options options, const std::size_t num_dim, Float_* const embedding) : 
         my_epochs(std::move(epochs)),
         my_options(std::move(options)),
         my_engine(my_options.seed),
@@ -84,9 +86,9 @@ class Status {
      * The contents of the new array in `ptr` should be the same as the array that it replaces, as `run()` will continue the iteration from the coordinates inside the array.
      * This is enforced by default when `copy = true`, but if the supplied `ptr` already contains a copy, the caller may set `copy = false` to avoid extra work
      */
-    void set_embedding(Float_* ptr, bool copy = true) {
+    void set_embedding(Float_* const ptr, const bool copy = true) {
         if (copy) {
-            std::size_t n = num_dimensions() * static_cast<std::size_t>(num_observations()); // cast to avoid overflow.
+            const std::size_t n = sanisizer::product_unsafe<std::size_t>(num_dimensions(), num_observations());
             std::copy_n(my_embedding, n, ptr);
         }
         my_embedding = ptr;
diff --git a/include/umappp/combine_neighbor_sets.hpp b/include/umappp/combine_neighbor_sets.hpp
index 42e0430..7d3ade3 100644
--- a/include/umappp/combine_neighbor_sets.hpp
+++ b/include/umappp/combine_neighbor_sets.hpp
@@ -5,6 +5,8 @@
 #include <algorithm>
 #include <type_traits>
 
+#include "sanisizer/sanisizer.hpp"
+
 #include "NeighborList.hpp"
 
 namespace umappp {
@@ -12,9 +14,10 @@ namespace umappp {
 namespace internal {
 
 template<typename Index_, typename Float_>
-void combine_neighbor_sets(NeighborList<Index_, Float_>& x, Float_ mix_ratio) {
-    Index_ num_obs = x.size();
-    std::vector<decltype(x[0].size())> last(num_obs), original(num_obs);
+void combine_neighbor_sets(NeighborList<Index_, Float_>& x, const Float_ mix_ratio) {
+    const Index_ num_obs = x.size(); // assume that Index_ is large enough to store the number of observations.
+    auto last = sanisizer::create<std::vector<Index_> >(num_obs);
+    auto original = sanisizer::create<std::vector<Index_> >(num_obs);
 
     for (Index_ i = 0; i < num_obs; ++i) {
         auto& current = x[i];
@@ -22,7 +25,7 @@ void combine_neighbor_sets(NeighborList<Index_, Float_>& x, Float_ mix_ratio) {
         original[i] = x[i].size();
     }
 
-    for (Index_ i = 0; i < num_obs; ++i ){
+    for (Index_ i = 0; i < num_obs; ++i) {
         auto& current = x[i];
 
         // Looping through the neighbors and searching for self in each
@@ -42,7 +45,7 @@ void combine_neighbor_sets(NeighborList<Index_, Float_>& x, Float_ mix_ratio) {
                 // previous iteration of the outermost loop where i and y.first
                 // swap values. So we skip this to avoid adding it twice.
                 if (i < y.first) { 
-                    Float_ product = y.second * target[curlast].second;
+                    const Float_ product = y.second * target[curlast].second;
                     Float_ prob_final;
 
                     if (mix_ratio == 1) {
diff --git a/include/umappp/find_ab.hpp b/include/umappp/find_ab.hpp
index 7ae75d1..c8ade26 100644
--- a/include/umappp/find_ab.hpp
+++ b/include/umappp/find_ab.hpp
@@ -4,6 +4,8 @@
 #include <cmath>
 #include <vector>
 
+#include "sanisizer/sanisizer.hpp"
+
 namespace umappp {
 
 namespace internal {
@@ -42,13 +44,15 @@ namespace internal {
  */
  
 template<typename Float_>
-std::pair<Float_, Float_> find_ab(Float_ spread, Float_ min_dist) {
-    constexpr Float_ grid = 300;
+std::pair<Float_, Float_> find_ab(const Float_ spread, const Float_ min_dist) {
+    constexpr std::size_t grid = 300;
+    auto grid_x = sanisizer::create<std::vector<Float_> >(grid);
+    auto grid_y = sanisizer::create<std::vector<Float_> >(grid);
+    auto log_x = sanisizer::create<std::vector<Float_> >(grid);
 
     // Compute the x and y coordinates of the expected distance curve.
-    std::vector<Float_> grid_x(grid), grid_y(grid), log_x(grid);
     const Float_ delta = spread * 3 / grid;
-    for (int g = 0; g < grid; ++g) {
+    for (std::size_t g = 0; g < grid; ++g) {
         grid_x[g] = (g + 1) * delta; // +1 to avoid meaningless least squares result at x = 0, where both curves have y = 1 (and also the derivative w.r.t. b is not defined).
         log_x[g] = std::log(grid_x[g]);
         grid_y[g] = (grid_x[g] <= min_dist ? 1 : std::exp(- (grid_x[g] - min_dist) / spread));
@@ -59,25 +63,23 @@ std::pair<Float_, Float_> find_ab(Float_ spread, Float_ min_dist) {
     // We use 'limit = 0.5' because that's where most interesting stuff
     // happens, given that the curve is bounded between 0 and 1 on the y-axis.
     constexpr Float_ limit = 0.5;
-    Float_ x_half = std::log(limit) * -spread + min_dist; // guaranteed > 0, as log(limit) is negative.
-    Float_ d_half = limit / -spread; // first derivative at x_half.
+    const Float_ x_half = std::log(limit) * -spread + min_dist; // guaranteed > 0, as log(limit) is negative.
+    const Float_ d_half = limit / -spread; // first derivative at x_half.
     Float_ b = - d_half * x_half / (1 / limit - 1) / (2 * limit * limit);
     Float_ a = (1 / limit - 1) / std::pow(x_half, 2 * b);
 
-    std::vector<Float_> fit_y(grid), xpow(grid), grid_resid(grid);
-    auto compute_ss = [&](Float_ A, Float_ B) -> Float_ {
-        for (int g = 0; g < grid; ++g) {
+    auto fit_y = sanisizer::create<std::vector<Float_> >(grid);
+    auto xpow = sanisizer::create<std::vector<Float_> >(grid);
+    auto grid_resid = sanisizer::create<std::vector<Float_> >(grid);
+
+    auto compute_ss = [&](const Float_ A, const Float_ B) -> Float_ {
+        Float_ ss = 0;
+        for (std::size_t g = 0; g < grid; ++g) {
             xpow[g] = std::pow(grid_x[g], 2 * B);
             fit_y[g] = 1 / (1 + A * xpow[g]);
             grid_resid[g] = grid_y[g] - fit_y[g];
+            ss += grid_resid[g] * grid_resid[g];
         }
-
-        Float_ ss = 0;
-        for (int g = 0; g < grid; ++g) {
-            auto r = grid_resid[g];
-            ss += r * r;
-        }
-
         return ss;
     };
     Float_ ss = compute_ss(a, b);
@@ -98,7 +100,7 @@ std::pair<Float_, Float_> find_ab(Float_ spread, Float_ min_dist) {
          */
         Float_ da2 = 0, db2 = 0, dadb = 0, da_resid = 0, db_resid = 0;
 
-        for (int g = 0; g < grid; ++g) {
+        for (std::size_t g = 0; g < grid; ++g) {
             const Float_ x2b = xpow[g]; // set by the last compute_ss() call.
             const Float_ oy = fit_y[g]; // ditto
             const Float_ resid = grid_resid[g]; // ditto
diff --git a/include/umappp/initialize.hpp b/include/umappp/initialize.hpp
index fdb1c0a..15a600f 100644
--- a/include/umappp/initialize.hpp
+++ b/include/umappp/initialize.hpp
@@ -26,26 +26,25 @@ namespace umappp {
 namespace internal {
 
 template<typename Index_>
-int choose_num_epochs(int num_epochs, Index_ size) {
-    if (num_epochs < 0) {
-        // Choosing the number of epochs. We use a simple formula to decrease
-        // the number of epochs with increasing size, with the aim being that
-        // the 'extra work' beyond the minimal 200 epochs should be the same
-        // regardless of the numbe of observations. Given one calculation per
-        // observation per epoch, this amounts to 300 * 10000 calculations at
-        // the lower bound, so we simply choose a number of epochs that
-        // equalizes the number of calculations for any number of observations.
-        if (num_epochs < 0) {
-            constexpr Index_ limit = 10000;
-            const int minimal = 200, maximal = 300;
-            if (size <= limit) {
-                num_epochs = minimal + maximal;
-            } else {
-                num_epochs = minimal + static_cast<int>(std::ceil(maximal * static_cast<double>(limit) / static_cast<double>(size)));
-            }
-        }
+int choose_num_epochs(const int num_epochs, const Index_ size) {
+    if (num_epochs >= 0) {
+        return num_epochs;
+    }
+
+    // Choosing the number of epochs. We use a simple formula to decrease
+    // the number of epochs with increasing size, with the aim being that
+    // the 'extra work' beyond the minimal 200 epochs should be the same
+    // regardless of the number of observations. Given one calculation per
+    // observation per epoch, this amounts to 300 * 10000 calculations at
+    // the lower bound, so we simply choose a number of epochs that
+    // equalizes the number of calculations for any number of observations.
+    constexpr Index_ limit = 10000;
+    const int minimal = 200, maximal = 300;
+    if (size <= limit) {
+        return minimal + maximal;
+    } else {
+        return minimal + static_cast<int>(std::ceil(maximal * static_cast<double>(limit) / static_cast<double>(size)));
     }
-    return num_epochs;
 }
 
 }
@@ -72,7 +71,7 @@ int choose_num_epochs(int num_epochs, Index_ size) {
  * Further calls to `Status::run()` will update the embeddings in `embedding`.
  */
 template<typename Index_, typename Float_>
-Status<Index_, Float_> initialize(NeighborList<Index_, Float_> x, std::size_t num_dim, Float_* embedding, Options options) {
+Status<Index_, Float_> initialize(NeighborList<Index_, Float_> x, const std::size_t num_dim, Float_* const embedding, Options options) {
     internal::NeighborSimilaritiesOptions<Float_> nsopt;
     nsopt.local_connectivity = options.local_connectivity;
     nsopt.bandwidth = options.bandwidth;
@@ -83,7 +82,7 @@ Status<Index_, Float_> initialize(NeighborList<Index_, Float_> x, std::size_t nu
 
     // Choosing the manner of initialization.
     if (options.initialize == InitializeMethod::SPECTRAL || options.initialize == InitializeMethod::SPECTRAL_ONLY) {
-        bool attempt = internal::spectral_init(x, num_dim, embedding, options.num_threads);
+        const bool attempt = internal::spectral_init(x, num_dim, embedding, options.num_threads);
         if (!attempt && options.initialize == InitializeMethod::SPECTRAL) {
             internal::random_init<Index_>(x.size(), num_dim, embedding);
         }
@@ -93,7 +92,7 @@ Status<Index_, Float_> initialize(NeighborList<Index_, Float_> x, std::size_t nu
 
     // Finding a good a/b pair.
     if (options.a <= 0 || options.b <= 0) {
-        auto found = internal::find_ab(options.spread, options.min_dist);
+        const auto found = internal::find_ab(options.spread, options.min_dist);
         options.a = found.first;
         options.b = found.second;
     }
@@ -127,7 +126,7 @@ Status<Index_, Float_> initialize(NeighborList<Index_, Float_> x, std::size_t nu
  * Further calls to `Status::run()` will update the embeddings in `embedding`.
  */
 template<typename Index_, typename Input_, typename Float_>
-Status<Index_, Float_> initialize(const knncolle::Prebuilt<Index_, Input_, Float_>& prebuilt, std::size_t num_dim, Float_* embedding, Options options) { 
+Status<Index_, Float_> initialize(const knncolle::Prebuilt<Index_, Input_, Float_>& prebuilt, const std::size_t num_dim, Float_* const embedding, Options options) { 
     auto output = knncolle::find_nearest_neighbors(prebuilt, options.num_neighbors, options.num_threads);
     return initialize(std::move(output), num_dim, embedding, std::move(options));
 }
@@ -156,15 +155,15 @@ Status<Index_, Float_> initialize(const knncolle::Prebuilt<Index_, Input_, Float
  */
 template<typename Index_, typename Float_, class Matrix_ = knncolle::Matrix<Index_, Float_> >
 Status<Index_, Float_> initialize(
-    std::size_t data_dim,
-    std::size_t num_obs,
-    const Float_* data,
+    const std::size_t data_dim,
+    const Index_ num_obs,
+    const Float_* const data,
     const knncolle::Builder<Index_, Float_, Float_, Matrix_>& builder,
-    std::size_t num_dim,
-    Float_* embedding,
+    const std::size_t num_dim,
+    Float_* const embedding,
     Options options)
 { 
-    auto prebuilt = builder.build_unique(knncolle::SimpleMatrix<Index_, Float_>(data_dim, num_obs, data));
+    const auto prebuilt = builder.build_unique(knncolle::SimpleMatrix<Index_, Float_>(data_dim, num_obs, data));
     return initialize(*prebuilt, num_dim, embedding, std::move(options));
 }
 
diff --git a/include/umappp/neighbor_similarities.hpp b/include/umappp/neighbor_similarities.hpp
index 556dc12..6af4adc 100644
--- a/include/umappp/neighbor_similarities.hpp
+++ b/include/umappp/neighbor_similarities.hpp
@@ -7,6 +7,8 @@
 #include <algorithm>
 #include <numeric>
 
+#include "sanisizer/sanisizer.hpp"
+
 #include "NeighborList.hpp"
 #include "parallelize.hpp"
 
@@ -51,22 +53,25 @@ false
 #endif
 , typename Index_, typename Float_>
 void neighbor_similarities(NeighborList<Index_, Float_>& x, const NeighborSimilaritiesOptions<Float_>& options) {
-    Index_ npoints = x.size();
-    const int raw_connect_index = std::floor(options.local_connectivity);
+    // 'raw_connect_index' is the 1-based index of the first non-identical neighbor that is assumed to always be connected.
+    // This can also be fractional in which case the threshold distance is defined by interpolation.
+    const Index_ raw_connect_index = sanisizer::from_float<Index_>(options.local_connectivity);
     const Float_ interpolation = options.local_connectivity - raw_connect_index;
 
-    parallelize(options.num_threads, npoints, [&](int, Index_ start, Index_ length) -> void {
+    const Index_ npoints = x.size();
+    parallelize(options.num_threads, npoints, [&](const int, const Index_ start, const Index_ length) -> void {
         std::vector<Float_> active_delta;
 
         for (Index_ i = start, end = start + length; i < end; ++i) {
             auto& all_neighbors = x[i];
-            const int num_neighbors = all_neighbors.size();
+            const Index_ num_neighbors = all_neighbors.size();
             if (num_neighbors == 0) {
                 continue;
             }
 
-            // Define rho as the distance to the nearest (non-identical) neighbor at 'local_connectivity', possibly with interpolation.
-            int num_zero = 0;
+            // Define 'rho' as the distance to the 'raw_connect_index'-th non-identical neighbor.
+            // In other words, the actual index in the array is 'num_zero + raw_connect_index - 1' (bacause it's 1-based).
+            Index_ num_zero = 0;
             for (const auto& f : all_neighbors) {
                 if (f.second) {
                     break;
@@ -74,28 +79,25 @@ void neighbor_similarities(NeighborList<Index_, Float_>& x, const NeighborSimila
                 ++num_zero;
             }
 
-            const int connect_index = num_zero + raw_connect_index;
-            if (num_neighbors <= connect_index) {
-                // When this happens, 'rho' is just theoretically set to the
-                // maximum distance. In such cases, the weights are always just
-                // set to 1 in the remaining code, because no distance can be
-                // greater than 'rho'. If that's the case, we might as well
-                // save some time and compute it here.
-                for (int k = 0; k < num_neighbors; ++k) {
+            if (sanisizer::is_less_than_or_equal(num_neighbors - num_zero, raw_connect_index)) {
+                // When this happens, we set 'rho' to the maximum distance, because we can't define it within range.
+                // In such cases, the weights are always just set to 1 in the remaining code, because no distance can be
+                // greater than 'rho'. If that's the case, we might as well save some time and compute it here.
+                for (Index_ k = 0; k < num_neighbors; ++k) {
                     all_neighbors[k].second = 1;
                 }
                 continue;
             }
-
-            const Float_ lower = (connect_index > 0 ? all_neighbors[connect_index - 1].second : 0); // 'local_connectivity' (and thus 'connect_index') is 1-based, hence the -1.
+            const Index_ connect_index = num_zero + raw_connect_index; // guaranteed to fit in an Index_, as this should be less than 'num_neighbors'.
+            const Float_ lower = (connect_index > 0 ? all_neighbors[connect_index - 1].second : static_cast<Float_>(0)); // 'connect_index' is 1-based, hence the subtraction.
             const Float_ upper = all_neighbors[connect_index].second;
             const Float_ rho = lower + interpolation * (upper - lower);
 
             // Pre-computing the difference between each distance and rho to reduce work in the inner iterations.
             active_delta.clear();
             Float_ num_le_rho = num_zero;
-            for (int k = num_zero; k < num_neighbors; ++k) {
-                auto curdist = all_neighbors[k].second;
+            for (Index_ k = num_zero; k < num_neighbors; ++k) {
+                const auto curdist = all_neighbors[k].second;
                 if (curdist > rho) {
                     active_delta.push_back(curdist - rho);
                 } else {
@@ -105,7 +107,7 @@ void neighbor_similarities(NeighborList<Index_, Float_>& x, const NeighborSimila
 
             if (active_delta.empty()) {
                 // Same early-return logic as above.
-                for (int k = 0; k < num_neighbors; ++k) {
+                for (Index_ k = 0; k < num_neighbors; ++k) {
                     all_neighbors[k].second = 1;
                 }
                 continue;
@@ -135,8 +137,8 @@ void neighbor_similarities(NeighborList<Index_, Float_>& x, const NeighborSimila
                 // to the bounded nature of the Newton calculation and the
                 // underflow-safe nature of the binary search.
                 const Float_ invsigma = 1 / sigma, invsigma2 = invsigma * invsigma;
-                for (auto d : active_delta) {
-                    Float_ current = std::exp(- d * invsigma);
+                for (const auto d : active_delta) {
+                    const Float_ current = std::exp(- d * invsigma);
                     observed += current;
                     deriv += d * current * invsigma2;
                 }
@@ -192,7 +194,7 @@ void neighbor_similarities(NeighborList<Index_, Float_>& x, const NeighborSimila
             sigma = std::max(options.min_k_dist_scale * mean_dist, sigma);
 
             const Float_ invsigma = 1 / sigma;
-            for (int k = 0; k < num_neighbors; ++k) {
+            for (Index_ k = 0; k < num_neighbors; ++k) {
                 Float_& dist = all_neighbors[k].second;
                 if (dist > rho) {
                     dist = std::exp(-(dist - rho) * invsigma);
diff --git a/include/umappp/optimize_layout.hpp b/include/umappp/optimize_layout.hpp
index 10c777c..6016eb0 100644
--- a/include/umappp/optimize_layout.hpp
+++ b/include/umappp/optimize_layout.hpp
@@ -6,7 +6,7 @@
 #include <algorithm>
 #include <cmath>
 #include <cstddef>
-#include <limits>
+#include <type_traits>
 
 #ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
 #include <thread>
@@ -17,16 +17,23 @@
 #include <stdexcept>
 #endif
 
-#include "NeighborList.hpp"
 #include "aarand/aarand.hpp"
+#include "sanisizer/sanisizer.hpp"
+
+#include "NeighborList.hpp"
 
 namespace umappp {
 
 namespace internal {
 
+template<typename Input_>
+std::remove_cv_t<std::remove_reference_t<Input_> > I(const Input_ x) {
+    return x;
+}
+
 template<typename Index_, typename Float_>
 struct EpochData {
-    EpochData(Index_ nobs) : head(nobs) {}
+    EpochData(const Index_ nobs) : head(sanisizer::sum<decltype(I(head.size()))>(nobs, 1)) {}
 
     int total_epochs;
     int current_epoch = 0;
@@ -41,33 +48,32 @@ struct EpochData {
 };
 
 template<typename Index_, typename Float_>
-EpochData<Index_, Float_> similarities_to_epochs(const NeighborList<Index_, Float_>& p, int num_epochs, Float_ negative_sample_rate) {
+EpochData<Index_, Float_> similarities_to_epochs(const NeighborList<Index_, Float_>& p, const int num_epochs, const Float_ negative_sample_rate) {
     Float_ maxed = 0;
     std::size_t count = 0;
     for (const auto& x : p) {
-        count += x.size();
+        count = sanisizer::sum<std::size_t>(count, x.size());
         for (const auto& y : x) {
             maxed = std::max(maxed, y.second);
         }
     }
 
-    EpochData<Index_, Float_> output(p.size());
+    const Index_ num_obs = p.size(); // Index_ should be able to hold the number of observations.
+    EpochData<Index_, Float_> output(num_obs);
     output.total_epochs = num_epochs;
     output.tail.reserve(count);
     output.epochs_per_sample.reserve(count);
     const Float_ limit = maxed / num_epochs;
 
-    std::size_t last = 0;
-    for (Index_ i = 0, num_obs = p.size(); i < num_obs; ++i) {
+    for (Index_ i = 0; i < num_obs; ++i) {
         const auto& x = p[i];
         for (const auto& y : x) {
             if (y.second >= limit) {
                 output.tail.push_back(y.first);
                 output.epochs_per_sample.push_back(maxed / y.second);
-                ++last;
             }
         }
-        output.head[i] = last;
+        output.head[i + 1] = output.tail.size();
     }
 
     // Filling in some epoch-related running statistics.
@@ -78,11 +84,14 @@ EpochData<Index_, Float_> similarities_to_epochs(const NeighborList<Index_, Floa
     }
     output.negative_sample_rate = negative_sample_rate;
 
+    // See comments in compute_num_neg_samples().
+    sanisizer::from_float<int>(static_cast<Float_>(num_epochs) * negative_sample_rate);
+
     return output;       
 }
 
 template<typename Float_>
-Float_ quick_squared_distance(const Float_* left, const Float_* right, std::size_t num_dim) {
+Float_ quick_squared_distance(const Float_* const left, const Float_* const right, const std::size_t num_dim) {
     Float_ dist2 = 0;
     for (std::size_t d = 0; d < num_dim; ++d) {
         Float_ delta = (left[d] - right[d]);
@@ -93,27 +102,24 @@ Float_ quick_squared_distance(const Float_* left, const Float_* right, std::size
 }
 
 template<typename Float_>
-Float_ clamp(Float_ input) {
+Float_ clamp(const Float_ input) {
     constexpr Float_ min_gradient = -4;
     constexpr Float_ max_gradient = 4;
     return std::min(std::max(input, min_gradient), max_gradient);
 }
 
 template<typename Index_, typename Float_>
-unsigned long long compute_num_neg_samples(std::size_t j, Float_ epoch, const EpochData<Index_, Float_>& setup) {
+int compute_num_neg_samples(const std::size_t j, const Float_ epoch, const EpochData<Index_, Float_>& setup) {
     // Remember that 'epochs_per_negative_sample' is defined as 'epochs_per_sample[j] / negative_sample_rate'.
     // We just use it inline below rather than defining a new variable and suffering floating-point round-off.
     Float_ num_neg_samples = (epoch - setup.epoch_of_next_negative_sample[j]) * 
         setup.negative_sample_rate / setup.epochs_per_sample[j]; // i.e., 1/epochs_per_negative_sample.
 
-    // Avoiding problems with overflow. We return an unsigned long long to guarantee at least 64 bits,
-    // which should be more than enough to hold whatever num_neg_samples is.
-    constexpr auto max_value = std::numeric_limits<unsigned long long>::max();
-    if (num_neg_samples <= static_cast<Float_>(max_value)) {
-        return num_neg_samples;
-    } else {
-        return max_value;
-    }
+    // Maximum value of 'num_neg_samples' should be 'num_epochs * negative_sample_rate', because:
+    // - '(epoch - setup.epoch_of_next_negative_sample[j])' has a maximum value of 'num_epochs'.
+    // - 'setup.epochs_per_sample' has a minimum value of 1, when 'y.second == maxed'.
+    // So we just have to check that the cast is safe for the maximum value in initialize(). 
+    return num_neg_samples;
 }
 
 /*****************************************************
@@ -133,50 +139,47 @@ void optimize_layout(
     int epoch_limit
 ) {
     auto& n = setup.current_epoch;
-    auto num_epochs = setup.total_epochs;
-    auto limit_epochs = num_epochs;
-    if (epoch_limit> 0) {
-        limit_epochs = std::min(epoch_limit, num_epochs);
-    }
+    const auto num_epochs = setup.total_epochs;
+    const auto limit_epochs = (epoch_limit > 0 ? std::min(epoch_limit, num_epochs) : num_epochs);
 
     for (; n < limit_epochs; ++n) {
         const Float_ epoch = n;
         const Float_ alpha = initial_alpha * (1.0 - epoch / num_epochs);
 
-        Index_ num_obs = setup.head.size(); 
+        const Index_ num_obs = setup.head.size(); 
         for (Index_ i = 0; i < num_obs; ++i) {
-            std::size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
-            Float_* left = embedding + static_cast<std::size_t>(i) * num_dim; // cast to size_t to avoid overflow.
+            const auto start = setup.head[i], end = setup.head[i + 1];
+            const auto left = embedding + sanisizer::product_unsafe<std::size_t>(i, num_dim);
 
-            for (std::size_t j = start; j < end; ++j) {
+            for (auto j = start; j < end; ++j) {
                 if (setup.epoch_of_next_sample[j] > epoch) {
                     continue;
                 }
 
                 {
-                    Float_* right = embedding + static_cast<std::size_t>(setup.tail[j]) * num_dim; // again, casting to avoid overflow.
-                    Float_ dist2 = quick_squared_distance(left, right, num_dim);
+                    const auto right = embedding + sanisizer::product_unsafe<std::size_t>(setup.tail[j], num_dim);
+                    const Float_ dist2 = quick_squared_distance(left, right, num_dim);
                     const Float_ pd2b = std::pow(dist2, b);
                     const Float_ grad_coef = (-2 * a * b * pd2b) / (dist2 * (a * pd2b + 1.0));
 
                     for (std::size_t d = 0; d < num_dim; ++d) {
                         auto& l = left[d];
                         auto& r = right[d];
-                        Float_ gradient = alpha * clamp(grad_coef * (l - r));
+                        const Float_ gradient = alpha * clamp(grad_coef * (l - r));
                         l += gradient;
                         r -= gradient;
                     }
                 }
 
-                auto num_neg_samples = compute_num_neg_samples(j, epoch, setup);
-                for (decltype(num_neg_samples) p = 0; p < num_neg_samples; ++p) {
-                    auto sampled = aarand::discrete_uniform(rng, num_obs);
+                const auto num_neg_samples = compute_num_neg_samples(j, epoch, setup);
+                for (decltype(I(num_neg_samples)) p = 0; p < num_neg_samples; ++p) {
+                    const auto sampled = aarand::discrete_uniform(rng, num_obs);
                     if (sampled == i) {
                         continue;
                     }
 
-                    const Float_* right = embedding + static_cast<std::size_t>(sampled) * num_dim; // again, casting to avoid overflow.
-                    Float_ dist2 = quick_squared_distance(left, right, num_dim);
+                    const auto right = embedding + sanisizer::product_unsafe<std::size_t>(sampled, num_dim);
+                    const Float_ dist2 = quick_squared_distance(left, right, num_dim);
                     const Float_ grad_coef = 2 * gamma * b / ((0.001 + dist2) * (a * std::pow(dist2, b) + 1.0));
 
                     for (std::size_t d = 0; d < num_dim; ++d) {
@@ -202,13 +205,14 @@ void optimize_layout(
  *****************************************************/
 
 #ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
-constexpr unsigned long long skip_ns_sentinel = static_cast<unsigned long long>(-1);
+constexpr int skip_ns_sentinel = -1;
 
 template<typename Index_, typename Float_>
 struct BusyWaiterInput {
     std::vector<Index_> negative_sample_selections;
-    std::vector<unsigned long long> negative_sample_count;
+    std::vector<int> negative_sample_count;
     Index_ observation;
+    std::size_t tail_offset;
     Float_ alpha;
 };
 
@@ -229,45 +233,42 @@ void optimize_single_observation(const BusyWaiterInput<Index_, Float_>& input, B
     // We don't bother doing this for the neighbors, though, as it's 
     // tedious to make sure that the modified values are available during negative sampling.
     // (This isn't a problem for the self, as the self cannot be its own negative sample.)
-    {
-        const Float_* left = state.embedding + static_cast<std::size_t>(input.observation) * state.num_dim; // cast to avoid overflow.
-        std::copy_n(left, state.num_dim, state.self_modified.data());
-    }
+    const auto left = state.embedding + sanisizer::product_unsafe<std::size_t>(input.observation, state.num_dim);
+    std::copy_n(left, state.num_dim, state.self_modified.data());
 
-    unsigned long long position = 0;
-    auto nsIt = input.negative_sample_count.begin();
-    std::size_t start = (input.observation == 0 ? 0 : state.setup->head[input.observation-1]), end = state.setup->head[input.observation];
-
-    for (std::size_t j = start; j < end; ++j, ++nsIt) {
-        auto number = *nsIt;
+    decltype(I(input.negative_sample_selections.size())) position = 0;
+    const auto num_neighbors = input.negative_sample_count.size();
+    for (decltype(I(num_neighbors)) n = 0; n < num_neighbors; ++n) {
+        const auto number = input.negative_sample_count[n];
         if (number == skip_ns_sentinel) {
             continue;
         }
 
         {
-            Float_* left = state.self_modified.data();
-            Float_* right = state.embedding + static_cast<std::size_t>(state.setup->tail[j]) * state.num_dim; // cast to avoid overflow.
+            const auto left = state.self_modified.data();
+            const auto j = sanisizer::sum_unsafe<std::size_t>(n, input.tail_offset);
+            const auto right = state.embedding + sanisizer::product_unsafe<std::size_t>(state.setup->tail[j], state.num_dim);
 
-            Float_ dist2 = quick_squared_distance(left, right, state.num_dim);
+            const Float_ dist2 = quick_squared_distance(left, right, state.num_dim);
             const Float_ pd2b = std::pow(dist2, state.b);
             const Float_ grad_coef = (-2 * state.a * state.b * pd2b) / (dist2 * (state.a * pd2b + 1.0));
 
             for (std::size_t d = 0; d < state.num_dim; ++d) {
                 auto& l = left[d];
                 auto& r = right[d];
-                Float_ gradient = input.alpha * clamp(grad_coef * (l - r));
+                const Float_ gradient = input.alpha * clamp(grad_coef * (l - r));
                 l += gradient;
                 r -= gradient;
             }
         }
 
-        auto original = position;
+        auto s = position;
         position += number;
-        for (decltype(position) s = original; s < position; ++s) {
-            Float_* left = state.self_modified.data();
-            const Float_* right = state.embedding + static_cast<std::size_t>(input.negative_sample_selections[s]) * state.num_dim; // cast to avoid overflow.
+        for (; s < position; ++s) {
+            const auto left = state.self_modified.data();
+            const auto right = state.embedding + sanisizer::product_unsafe<std::size_t>(input.negative_sample_selections[s], state.num_dim);
 
-            Float_ dist2 = quick_squared_distance(left, right, state.num_dim);
+            const Float_ dist2 = quick_squared_distance(left, right, state.num_dim);
             const Float_ grad_coef = 2 * state.gamma * state.b / ((0.001 + dist2) * (state.a * std::pow(dist2, state.b) + 1.0));
 
             for (std::size_t d = 0; d < state.num_dim; ++d) {
@@ -277,10 +278,7 @@ void optimize_single_observation(const BusyWaiterInput<Index_, Float_>& input, B
     }
 
     // Copying it back to the embedding once we're done.
-    {
-        std::size_t offset = static_cast<std::size_t>(input.observation) * state.num_dim; // cast to avoid overflow.
-        std::copy(state.self_modified.begin(), state.self_modified.end(), state.embedding + offset);
-    }
+    std::copy(state.self_modified.begin(), state.self_modified.end(), left);
 }
 
 template<typename Index_, typename Float_>
@@ -362,24 +360,21 @@ class BusyWaiterThread {
 
 template<typename Index_, typename Float_, class Rng_>
 void optimize_layout_parallel(
-    std::size_t num_dim,
-    Float_* embedding, 
+    const std::size_t num_dim,
+    Float_* const embedding, 
     EpochData<Index_, Float_>& setup,
-    Float_ a, 
-    Float_ b, 
-    Float_ gamma,
-    Float_ initial_alpha,
+    const Float_ a, 
+    const Float_ b, 
+    const Float_ gamma,
+    const Float_ initial_alpha,
     Rng_& rng,
-    int epoch_limit,
-    int nthreads
+    const int epoch_limit,
+    const int nthreads
 ) {
 #ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
     auto& n = setup.current_epoch;
-    auto num_epochs = setup.total_epochs;
-    auto limit_epochs = num_epochs;
-    if (epoch_limit> 0) {
-        limit_epochs = std::min(epoch_limit, num_epochs);
-    }
+    const auto num_epochs = setup.total_epochs;
+    const auto limit_epochs = (epoch_limit > 0 ? std::min(epoch_limit, num_epochs) : num_epochs);
 
     BusyWaiterState<Index_, Float_> state;
     state.num_dim = num_dim;
@@ -400,7 +395,7 @@ void optimize_layout_parallel(
         pool.emplace_back(state);
     }
 
-    std::vector<BusyWaiterInput<Index_, Float_> > raw_inputs(nthreads);
+    auto raw_inputs = sanisizer::create<std::vector<BusyWaiterInput<Index_, Float_> > >(nthreads);
     BusyWaiterInput<Index_, Float_>* main_input = &(raw_inputs.back());
     std::vector<BusyWaiterInput<Index_, Float_>*> pool_inputs;
     pool_inputs.reserve(nthreads - 1);
@@ -456,15 +451,16 @@ void optimize_layout_parallel(
                     ttype = WRITE;
                 }
 
-                const std::size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
-                for (std::size_t j = start; j < end; ++j) {
+                const auto start = setup.head[i], end = setup.head[i + 1];
+                input.tail_offset = start;
+                for (auto j = start; j < end; ++j) {
                     if (setup.epoch_of_next_sample[j] > epoch) {
                         ns_count.push_back(skip_ns_sentinel);
                         continue;
                     }
 
                     {
-                        auto neighbor = setup.tail[j];
+                        const auto neighbor = setup.tail[j];
                         auto& touched = last_touched_iteration[neighbor];
                         auto& ttype = touch_type[neighbor];
 //                        if (PRINT) { std::cout << "\tNEIGHBOR: " << neighbor << ": " << touched << " (" << ttype << ")" << std::endl; }
@@ -478,10 +474,10 @@ void optimize_layout_parallel(
                         ttype = WRITE;
                     }
 
-                    auto prior_size = ns_selections.size();
-                    auto num_neg_samples = compute_num_neg_samples(j, epoch, setup);
-                    for (decltype(num_neg_samples) p = 0; p < num_neg_samples; ++p) {
-                        Index_ sampled = aarand::discrete_uniform(rng, num_obs);
+                    const auto prior_size = ns_selections.size();
+                    const auto num_neg_samples = compute_num_neg_samples(j, epoch, setup);
+                    for (decltype(I(num_neg_samples)) p = 0; p < num_neg_samples; ++p) {
+                        const Index_ sampled = aarand::discrete_uniform(rng, num_obs);
                         if (sampled == i) {
                             continue;
                         }
diff --git a/include/umappp/parallelize.hpp b/include/umappp/parallelize.hpp
index a164f43..db62000 100644
--- a/include/umappp/parallelize.hpp
+++ b/include/umappp/parallelize.hpp
@@ -26,7 +26,7 @@ namespace umappp {
  * Any user-defined macro should accept the same arguments as `subpar::parallelize_range()`.
  */
 template<typename Task_, class Run_>
-void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range) {
+void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range) {
 #ifndef UMAPPP_CUSTOM_PARALLEL
     // Don't make this nothrow_ = true, there's too many allocations and the
     // derived methods for the nearest neighbors search could do anything...
diff --git a/include/umappp/spectral_init.hpp b/include/umappp/spectral_init.hpp
index ab990be..f9701bd 100644
--- a/include/umappp/spectral_init.hpp
+++ b/include/umappp/spectral_init.hpp
@@ -1,16 +1,17 @@
 #ifndef UMAPPP_SPECTRAL_INIT_HPP
 #define UMAPPP_SPECTRAL_INIT_HPP
 
-#include "irlba/irlba.hpp"
-#include "irlba/parallel.hpp"
-
 #include <vector>
 #include <random>
 #include <algorithm>
 #include <cstddef>
 
-#include "NeighborList.hpp"
 #include "aarand/aarand.hpp"
+#include "irlba/irlba.hpp"
+#include "irlba/parallel.hpp"
+#include "sanisizer/sanisizer.hpp"
+
+#include "NeighborList.hpp"
 
 namespace umappp {
 
@@ -22,20 +23,18 @@ namespace internal {
  * It is assumed that 'edges' has already been symmetrized.
  */
 template<typename Index_, typename Float_>
-bool normalized_laplacian(const NeighborList<Index_, Float_>& edges, std::size_t num_dim, Float_* Y, int nthreads) {
-    Index_ nobs = edges.size();
-    std::vector<double> sums(nobs); // we deliberately use double-precision to avoid difficult problems from overflow/underflow inside IRLBA.
-    std::vector<std::size_t> pointers;
-    pointers.reserve(nobs + 1);
-    pointers.push_back(0);
+bool normalized_laplacian(const NeighborList<Index_, Float_>& edges, const std::size_t num_dim, Float_* const Y, const int nthreads) {
+    const Index_ nobs = edges.size();
+    auto sums = sanisizer::create<std::vector<double> >(nobs); // we deliberately use double-precision to avoid difficult problems from overflow/underflow inside IRLBA.
+    std::vector<std::size_t> pointers(sanisizer::sum<typename std::vector<std::size_t>::size_type>(nobs, 1));
     std::size_t reservable = 0;
 
     for (Index_ c = 0; c < nobs; ++c) {
         const auto& current = edges[c];
 
-        // +1 for self, assuming that no entry of 'current' is equal to 'c'.
-        reservable += current.size() + 1; 
-        pointers.push_back(reservable);
+        reservable = sanisizer::sum<std::size_t>(reservable, current.size()); 
+        reservable = sanisizer::sum<std::size_t>(reservable, 1); // +1 for self, assuming that no entry of 'current' is equal to 'c'.
+        pointers[c + 1] = reservable;
 
         double& sum = sums[c];
         for (const auto& f : current) {
@@ -54,9 +53,10 @@ bool normalized_laplacian(const NeighborList<Index_, Float_>& edges, std::size_t
 
     for (Index_ c = 0; c < nobs; ++c) {
         const auto& current = edges[c]; 
-        auto cIt = current.begin(), last = current.end();
+        auto cIt = current.begin();
+        const auto cLast = current.end();
 
-        for (; cIt != last && cIt->first < c; ++cIt) {
+        for (; cIt != cLast && cIt->first < c; ++cIt) {
             indices.push_back(cIt->first);
             values.push_back(- static_cast<double>(cIt->second) / sums[cIt->first] / sums[c] /* TRANSFORM */ * (-1) );
         }
@@ -65,7 +65,7 @@ bool normalized_laplacian(const NeighborList<Index_, Float_>& edges, std::size_t
         indices.push_back(c); 
         values.push_back(1 /* TRANSFORM */ * (-1) + 2);
 
-        for (; cIt != current.end(); ++cIt) {
+        for (; cIt != cLast; ++cIt) {
             indices.push_back(cIt->first);
             values.push_back(- static_cast<double>(cIt->second) / sums[cIt->first] / sums[c] /* TRANSFORM */ * (-1) );
         }
@@ -96,7 +96,7 @@ bool normalized_laplacian(const NeighborList<Index_, Float_>& edges, std::size_t
      * see LTLA/umappp#4 for the discussion.
      */
 
-    irlba::ParallelSparseMatrix<
+    const irlba::ParallelSparseMatrix<
         decltype(values),
         decltype(indices),
         decltype(pointers)
@@ -104,9 +104,8 @@ bool normalized_laplacian(const NeighborList<Index_, Float_>& edges, std::size_t
     > mat(nobs, nobs, std::move(values), std::move(indices), std::move(pointers), /* column_major = */ true, nthreads);
     irlba::EigenThreadScope tscope(nthreads);
 
-    irlba::Options opt;
-    auto actual = irlba::compute(mat, num_dim + 1, opt);
-    auto ev = actual.U.rightCols(num_dim); 
+    const auto actual = irlba::compute(mat, num_dim + 1, irlba::Options{});
+    const auto ev = actual.U.rightCols(num_dim); 
 
     // Getting the maximum value; this is assumed to be non-zero,
     // otherwise this entire thing is futile.
@@ -114,8 +113,8 @@ bool normalized_laplacian(const NeighborList<Index_, Float_>& edges, std::size_t
     const double expansion = (max_val > 0 ? 10 / max_val : 1);
 
     for (Index_ c = 0; c < nobs; ++c) {
-        for (std::size_t d = 0; d < num_dim; ++d, ++Y) {
-            *Y = ev.coeff(c, d) * expansion; // TODO: put back the jitter step.
+        for (std::size_t d = 0; d < num_dim; ++d) {
+            Y[sanisizer::nd_offset<std::size_t>(d, num_dim, c)] = ev.coeff(c, d) * expansion; // TODO: put back the jitter step.
         }
     }
     return true;
@@ -150,7 +149,7 @@ bool has_multiple_components(const NeighborList<Index_, Float_>& edges) {
 }
 
 template<typename Index_, typename Float_>
-bool spectral_init(const NeighborList<Index_, Float_>& edges, std::size_t num_dim, Float_* vals, int nthreads) {
+bool spectral_init(const NeighborList<Index_, Float_>& edges, const std::size_t num_dim, Float_* const vals, const int nthreads) {
     if (!has_multiple_components(edges)) {
         if (normalized_laplacian(edges, num_dim, vals, nthreads)) {
             return true;
@@ -160,7 +159,7 @@ bool spectral_init(const NeighborList<Index_, Float_>& edges, std::size_t num_di
 }
 
 template<typename Index_, typename Float_>
-void random_init(Index_ num_obs, std::size_t num_dim, Float_ * vals) {
+void random_init(const Index_ num_obs, const std::size_t num_dim, Float_ * const vals) {
     std::size_t num = static_cast<std::size_t>(num_obs) * num_dim; // cast to avoid overflow.
     std::mt19937_64 rng(num); // for a bit of deterministic variety.
     for (std::size_t i = 0; i < num; ++i) {

From 01d2ff6bc3b91cbb875e11842097474573c6bc61 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 5 Sep 2025 11:22:22 -0700
Subject: [PATCH 2/6] Minor fixes.

---
 include/umappp/optimize_layout.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/umappp/optimize_layout.hpp b/include/umappp/optimize_layout.hpp
index 6016eb0..015b5b1 100644
--- a/include/umappp/optimize_layout.hpp
+++ b/include/umappp/optimize_layout.hpp
@@ -112,7 +112,7 @@ template<typename Index_, typename Float_>
 int compute_num_neg_samples(const std::size_t j, const Float_ epoch, const EpochData<Index_, Float_>& setup) {
     // Remember that 'epochs_per_negative_sample' is defined as 'epochs_per_sample[j] / negative_sample_rate'.
     // We just use it inline below rather than defining a new variable and suffering floating-point round-off.
-    Float_ num_neg_samples = (epoch - setup.epoch_of_next_negative_sample[j]) * 
+    const Float_ num_neg_samples = (epoch - setup.epoch_of_next_negative_sample[j]) * 
         setup.negative_sample_rate / setup.epochs_per_sample[j]; // i.e., 1/epochs_per_negative_sample.
 
     // Maximum value of 'num_neg_samples' should be 'num_epochs * negative_sample_rate', because:
@@ -233,8 +233,8 @@ void optimize_single_observation(const BusyWaiterInput<Index_, Float_>& input, B
     // We don't bother doing this for the neighbors, though, as it's 
     // tedious to make sure that the modified values are available during negative sampling.
     // (This isn't a problem for the self, as the self cannot be its own negative sample.)
-    const auto left = state.embedding + sanisizer::product_unsafe<std::size_t>(input.observation, state.num_dim);
-    std::copy_n(left, state.num_dim, state.self_modified.data());
+    const auto source = state.embedding + sanisizer::product_unsafe<std::size_t>(input.observation, state.num_dim);
+    std::copy_n(source, state.num_dim, state.self_modified.data());
 
     decltype(I(input.negative_sample_selections.size())) position = 0;
     const auto num_neighbors = input.negative_sample_count.size();
@@ -278,7 +278,7 @@ void optimize_single_observation(const BusyWaiterInput<Index_, Float_>& input, B
     }
 
     // Copying it back to the embedding once we're done.
-    std::copy(state.self_modified.begin(), state.self_modified.end(), left);
+    std::copy(state.self_modified.begin(), state.self_modified.end(), source);
 }
 
 template<typename Index_, typename Float_>

From 4ec6da307af876cd321ad3464fa272889aa99eb9 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 5 Sep 2025 11:29:18 -0700
Subject: [PATCH 3/6] Minor fix for the head.

---
 include/umappp/optimize_layout.hpp |  4 ++--
 tests/src/optimize_layout.cpp      | 10 +---------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/include/umappp/optimize_layout.hpp b/include/umappp/optimize_layout.hpp
index 015b5b1..7a44d0c 100644
--- a/include/umappp/optimize_layout.hpp
+++ b/include/umappp/optimize_layout.hpp
@@ -146,7 +146,7 @@ void optimize_layout(
         const Float_ epoch = n;
         const Float_ alpha = initial_alpha * (1.0 - epoch / num_epochs);
 
-        const Index_ num_obs = setup.head.size(); 
+        const Index_ num_obs = setup.head.size() - 1; 
         for (Index_ i = 0; i < num_obs; ++i) {
             const auto start = setup.head[i], end = setup.head[i + 1];
             const auto left = embedding + sanisizer::product_unsafe<std::size_t>(i, num_dim);
@@ -403,7 +403,7 @@ void optimize_layout_parallel(
         pool_inputs.push_back(&(raw_inputs[t]));
     }
 
-    const Index_ num_obs = setup.head.size(); 
+    const Index_ num_obs = setup.head.size() - 1; 
     std::vector<Index_> last_touched_iteration(num_obs);
     std::vector<unsigned char> touch_type(num_obs);
 
diff --git a/tests/src/optimize_layout.cpp b/tests/src/optimize_layout.cpp
index ca60d4a..d56769c 100644
--- a/tests/src/optimize_layout.cpp
+++ b/tests/src/optimize_layout.cpp
@@ -135,17 +135,9 @@ INSTANTIATE_TEST_SUITE_P(
 
 TEST(OptimizeLayout, ChooseNumNegSamplesFailsafe) {
     umappp::internal::EpochData<int, double> setup(1);
-    setup.epochs_per_sample.push_back(0);
+    setup.epochs_per_sample.push_back(1);
     setup.epoch_of_next_negative_sample.push_back(0);
     setup.negative_sample_rate = 5;
-
-    auto num_neg = umappp::internal::compute_num_neg_samples(0, 1.0, setup); // division by zero for epochs_per_sample.
-    EXPECT_EQ(num_neg, std::numeric_limits<unsigned long long>::max());
-
-    num_neg = umappp::internal::compute_num_neg_samples(0, 0.0, setup); // still works if the initial calculation yields an NaN.
-    EXPECT_EQ(num_neg, std::numeric_limits<unsigned long long>::max());
-
-    setup.epochs_per_sample.back() = 1;
     num_neg = umappp::internal::compute_num_neg_samples(0, 1.0, setup); // as a control, checking that we don't hit the cap.
     EXPECT_EQ(num_neg, 5);
 }

From b68cd318eae1b2bda7808c052871a35ce6a15809 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 5 Sep 2025 14:56:06 -0700
Subject: [PATCH 4/6] Fixes for the tests.

---
 tests/R/package/src/Makevars  | 3 +--
 tests/src/optimize_layout.cpp | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/R/package/src/Makevars b/tests/R/package/src/Makevars
index a718fed..729a428 100644
--- a/tests/R/package/src/Makevars
+++ b/tests/R/package/src/Makevars
@@ -3,8 +3,7 @@
 # are available.
 BUILD_DIR=../../../../build/_deps
 PKG_CPPFLAGS=-I${BUILD_DIR}/aarand-src/include \
-    -I${BUILD_DIR}/kmeans-src/include \
-    -I${BUILD_DIR}/powerit-src/include \
+    -I${BUILD_DIR}/sanisizer-src/include \
     -I${BUILD_DIR}/knncolle-src/include \
     -I${BUILD_DIR}/irlba-src/include \
     -I${BUILD_DIR}/subpar-src/include \
diff --git a/tests/src/optimize_layout.cpp b/tests/src/optimize_layout.cpp
index d56769c..71c75f2 100644
--- a/tests/src/optimize_layout.cpp
+++ b/tests/src/optimize_layout.cpp
@@ -138,6 +138,6 @@ TEST(OptimizeLayout, ChooseNumNegSamplesFailsafe) {
     setup.epochs_per_sample.push_back(1);
     setup.epoch_of_next_negative_sample.push_back(0);
     setup.negative_sample_rate = 5;
-    num_neg = umappp::internal::compute_num_neg_samples(0, 1.0, setup); // as a control, checking that we don't hit the cap.
+    auto num_neg = umappp::internal::compute_num_neg_samples(0, 1.0, setup); // as a control, checking that we don't hit the cap.
     EXPECT_EQ(num_neg, 5);
 }

From 8594282486be28068ba0df5897236197198b1401 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 5 Sep 2025 15:13:09 -0700
Subject: [PATCH 5/6] yet another attempt to get this to work.

---
 tests/src/optimize_layout.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/src/optimize_layout.cpp b/tests/src/optimize_layout.cpp
index 71c75f2..d7f3d27 100644
--- a/tests/src/optimize_layout.cpp
+++ b/tests/src/optimize_layout.cpp
@@ -59,7 +59,7 @@ TEST_P(OptimizeTest, Epochs) {
     stored[0][0].second = 1e-8; // check for correct removal.
 
     auto epoch = umappp::internal::similarities_to_epochs(stored, 500, 5.0);
-    EXPECT_EQ(epoch.head.size(), nobs);
+    EXPECT_EQ(epoch.head.size(), nobs + 1);
     EXPECT_EQ(epoch.tail.size(), epoch.epochs_per_sample.size());
     EXPECT_EQ(epoch.tail.size(), epoch.head.back());
 

From 5414cf3cf52ddfd59e83453ecaf01c22ecc9675e Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 5 Sep 2025 15:32:20 -0700
Subject: [PATCH 6/6] Yet more fixes.

---
 include/umappp/Status.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/umappp/Status.hpp b/include/umappp/Status.hpp
index 5db8485..eca3344 100644
--- a/include/umappp/Status.hpp
+++ b/include/umappp/Status.hpp
@@ -113,7 +113,7 @@ class Status {
      * @return The number of observations in the dataset.
      */
     Index_ num_observations() const {
-        return my_epochs.head.size();
+        return my_epochs.head.size() - 1;
     }
 
 public: