KittyCAD · adamchalmers · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/kcl-ezpz/src/lib.rs b/kcl-ezpz/src/lib.rs
@@ -298,7 +298,7 @@ fn solve_inner<A: Analysis>(
     };
 
     let mut unsatisfied: Vec<usize> = Vec::new();
-    let outcome = model.solve_gauss_newton(&mut values, config);
+    let outcome = model.solve_gauss_newton(&mut values);
     warnings.extend(model.warnings.lock().unwrap().drain(..));
     let success = match outcome {
         Ok(o) => o,

diff --git a/kcl-ezpz/src/solver.rs b/kcl-ezpz/src/solver.rs
@@ -67,7 +67,7 @@ impl Layout {
         var as usize
     }
 
-    pub fn num_rows(&self) -> usize {
+    pub const fn num_rows(&self) -> usize {
         self.total_num_residuals
     }
 }
@@ -88,6 +88,7 @@ struct Jc {
 /// The problem to actually solve.
 /// Note that the initial values of each variable are required for Tikhonov regularization.
 pub(crate) struct Model<'c> {
+    config: Config,
     layout: Layout,
     jc: Jc,
     constraints: &'c [ConstraintEntry<'c>],
@@ -213,6 +214,7 @@ impl<'c> Model<'c> {
 
         // All done.
         Ok(Self {
+            config,
             warnings: Default::default(),
             layout,
             jc,

diff --git a/kcl-ezpz/src/solver/newton.rs b/kcl-ezpz/src/solver/newton.rs
@@ -1,10 +1,11 @@
 use faer::{
-    ColRef,
+    ColRef, get_global_parallelism,
+    matrix_free::eigen::{PartialEigenParams, partial_eigen_scratch},
     prelude::Solve,
     sparse::{SparseColMatRef, linalg::solvers::Lu},
 };
 
-use crate::{Config, NonLinearSystemError};
+use crate::NonLinearSystemError;
 
 use super::Model;
 
@@ -18,13 +19,12 @@ impl Model<'_> {
     pub fn solve_gauss_newton(
         &mut self,
         current_values: &mut [f64],
-        config: Config,
     ) -> Result<SuccessfulSolve, NonLinearSystemError> {
         let m = self.layout.total_num_residuals;
 
         let mut global_residual = vec![0.0; m];
 
-        for this_iteration in 0..config.max_iterations {
+        for this_iteration in 0..self.config.max_iterations {
             // Assemble global residual and Jacobian
             // Re-evaluate the global residual.
             self.residual(current_values, &mut global_residual);
@@ -38,7 +38,7 @@ impl Model<'_> {
                 .map(|x| x.abs())
                 .reduce(f64::max)
                 .ok_or(NonLinearSystemError::EmptySystemNotAllowed)?;
-            if largest_absolute_elem <= config.convergence_tolerance {
+            if largest_absolute_elem <= self.config.convergence_tolerance {
                 return Ok(SuccessfulSolve {
                     iterations: this_iteration,
                 });
@@ -74,7 +74,8 @@ impl Model<'_> {
                 .for_each(|(curr_val, d)| {
                     *curr_val += d;
                 });
-            let step_threshold = config.step_tolerance * (current_inf_norm + config.step_tolerance);
+            let step_threshold =
+                self.config.step_tolerance * (current_inf_norm + self.config.step_tolerance);
 
             // Convergence check: if `d` is small enough,
             // then the system is at a local minimum. It might be inconsistent, and therefore
@@ -91,35 +92,98 @@ impl Model<'_> {
 
     pub fn is_underconstrained(&self) -> Result<bool, NonLinearSystemError> {
         // First step is to compute the SVD.
-        // Faer doesn't have a sparse SVD algorithm, so let's convert it to a dense matrix.
-        // This step is SLOW.
-        // Faer maintainer said she has a sparse SVD algorithm she hasn't published yet,
-        // so hopefully she will publish it soon and this slow step won't be necessary.
-        let j_sparse = SparseColMatRef::new(self.jc.sym.as_ref(), &self.jc.vals);
-        let j_dense = j_sparse.to_dense();
-        debug_assert_eq!(
-            self.layout.num_variables,
-            j_dense.ncols(),
-            "Jacobian was malformed, Adam messed something up here."
-        );
-        let svd = j_dense.svd().map_err(NonLinearSystemError::FaerSvd)?;
-        let sigma_diags = svd.S();
-
-        // These are the 'singular values'.
-        let sigma_col = sigma_diags.column_vector();
+        // If the problem is very very small, just use dense SVD.
+        let n = self.layout.num_variables;
+        let m = self.layout.num_rows();
+        let k = 40;
+        // This is the number of singular values in the SVD.
+        let min_mn = usize::min(n, m);
+        assert!(k < min_mn, "faer's sparse SVD algorithm has a bound for k");
+        eprintln!("Jacobian is {m}x{n}, k = {k}");
+
+        let singular_values = if k <= 32 {
+            eprintln!("Matrix is small, using dense SVD");
+            // Small, use sparse SVD
+            let j_sparse = SparseColMatRef::new(self.jc.sym.as_ref(), &self.jc.vals);
+            let j_dense = j_sparse.to_dense();
+            debug_assert_eq!(
+                self.layout.num_variables,
+                j_dense.ncols(),
+                "Jacobian was malformed, Adam messed something up here."
+            );
+            let svd = j_dense.svd().map_err(NonLinearSystemError::FaerSvd)?;
+            svd.S().column_vector().iter().copied().collect()
+        } else {
+            eprintln!("Matrix is big, using sparse SVD");
+            // Large, use dense SVD
+            let j = SparseColMatRef::new(self.jc.sym.as_ref(), &self.jc.vals);
+            // SVD requires a matrix `a` with dimension N by N.
+            // Because `j` is rectangular MxN, we use
+            // A = JᵀJ + λI, as we do in the Newton-Gauss solver loop.
+            // This is square and with the right dimension.
+            let jtj = j.transpose().to_col_major()? * j;
+            let a = jtj + &self.lambda_i;
+
+            // Allocate scratch space for Faer with `u` and `v`.
+            let mut u = faer::Mat::zeros(n, k);
+            let mut v = faer::Mat::zeros(n, k);
+            // Faer will write the singular values into this.
+            let mut singular_values = vec![0.0; k];
+
+            // Make a unit basis vector, with length n, it should be normalized (unit).
+            // It's trivially normalized if we set all entries to 0 and the first one to 1.
+            let mut v0_buf = vec![0.0_f64; n];
+            v0_buf[0] = 1.0;
+            let v0 = faer::ColRef::from_slice(&v0_buf);
+
+            // Tune subspace dims to stay below min(m, n).
+            let min_dim = usize::min(min_mn.saturating_sub(1), usize::max(2, k));
+            let params = PartialEigenParams {
+                max_restarts: 10,
+                min_dim,
+                max_dim: usize::min(min_mn.saturating_sub(1), usize::max(min_dim * 2, k)),
+                ..Default::default()
+            };
+
+            // Allocate scratch space for the algorithm.
+            let par = get_global_parallelism();
+            let estimated_memory_needed =
+                partial_eigen_scratch(&a, params.max_dim * 10, par, params);
+            let mut memory = faer::dyn_stack::MemBuffer::new(estimated_memory_needed);
+            let scratch_space = faer::dyn_stack::MemStack::new(&mut memory);
+
+            let _svd_info = faer::matrix_free::eigen::partial_svd(
+                // Output matrix for U (n by k), the left_singular_rows
+                u.as_mut(),
+                // Output matrix for V (n by k), the right_singular_rows
+                v.as_mut(),
+                // Sigma, i.e. the K largest singular values.
+                &mut singular_values,
+                // square operator, n by n
+                &a,
+                // length n start vector (normalized)
+                v0,
+                // convergence threshold for residuals
+                self.config.convergence_tolerance,
+                par,
+                scratch_space,
+                params,
+            );
+            singular_values
+        };
 
         // The system is underconstrained if there's too many singular values
         // close to 0. How close to 0? The tolerance should be derived from
         // the largest singular value.
-        let largest_singular_value = sigma_col
+        let largest_singular_value = singular_values
             .iter()
             .copied()
             .reduce(f64::max)
             .ok_or(NonLinearSystemError::EmptySystemNotAllowed)?;
         let tolerance = 1e-8 * largest_singular_value;
 
-        let rank = sigma_col.iter().filter(|&&s| s > tolerance).count();
-        let degrees_of_freedom = self.layout.num_variables - rank;
+        let rank = singular_values.iter().filter(|&&s| s > tolerance).count();
+        let degrees_of_freedom = n - rank;
         Ok(degrees_of_freedom > 0)
     }
 }
diff --git a/kcl-ezpz/src/tests.rs b/kcl-ezpz/src/tests.rs
@@ -138,12 +138,12 @@ fn coincident() {
     assert_points_eq(solved.get_point("q").unwrap(), Point { x: 3.0, y: 3.0 });
 }
 
-// #[test]
-// fn massive() {
-//     let solved = run("massive_parallel_system");
-//     assert!(solved.is_satisfied());
-//     assert!(!solved.analysis.is_underconstrained);
-// }
+#[test]
+fn massive() {
+    let solved = run("massive_parallel_system");
+    assert!(solved.is_satisfied());
+    assert!(!solved.analysis.is_underconstrained);
+}
 
 #[test]
 fn symmetric() {