From 3a986ba9aa07a47a4518b4a20c0a126774ae6da6 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 7 Feb 2026 15:40:17 -0600
Subject: [PATCH 1/3] Add `matrixscale` for general matrices

---
 src/ScaleInvariantAnalysis.jl | 42 +++++++++++++++++++++++++++++++++--
 src/utils.jl                  | 26 +++++++++++++++++++++-
 test/runtests.jl              |  2 ++
 3 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/src/ScaleInvariantAnalysis.jl b/src/ScaleInvariantAnalysis.jl
index dbd8735..006b58d 100644
--- a/src/ScaleInvariantAnalysis.jl
+++ b/src/ScaleInvariantAnalysis.jl
@@ -3,7 +3,7 @@ module ScaleInvariantAnalysis
 using LinearAlgebra
 using SparseArrays
 
-export condscale, divmag, dotabs, symscale
+export condscale, divmag, dotabs, matrixscale, symscale
 
 include("utils.jl")
 
@@ -51,9 +51,47 @@ function symscale(A::AbstractMatrix; exact::Bool=false)
         offset = sum(sumlogA) / (2 * sum(nz))
         return exp.(sumlogA ./ nz .- offset)
     end
-    return exp.(cholesky(Diagonal(nz) + (!iszero).(A)) \ sumlogA)
+    return exp.(cholesky(Diagonal(nz) + isnz(A)) \ sumlogA)
 end
 
+"""
+    a1, a2 = matrixscale(A; exact=false)
+
+Given a matrix `A`, return vectors `a1` and `a2` representing the "scale of each
+axis," so that `|A[i,j]| ~ a1[i] * a2[j]` for all `i, j`. `a1[i]` and `a2[j]`
+are nonnegative, and are zero only if `A[i, j] = 0` for all `j` or all `i`,
+respectively.
+
+With `exact=true`, `a1` and `a2` solve the optimization problem
+
+    min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a1[i] * a2[j])))²
+    s.t. n * ∑_i log(a1[i]) = m * ∑_j log(a2[j])
+
+where `m, n = size(A)`. These vectors are covariant under changes of scale but
+not general linear transformations.
+
+With `exact=false`, the pattern of nonzeros in `A` is approximated as `u * v'`,
+where `sum(u) * v[j]` is the number of nonzeros in column `j` and `sum(v) *
+u[i]` is the number of nonzeros in row `i`. This results in an `O(m*n)` rather
+than `O((m+n)^3)` algorithm.
+"""
+function matrixscale(A::AbstractMatrix; exact::Bool=false)
+    Base.require_one_based_indexing(A)
+    ax1, ax2 = axes(A, 1), axes(A, 2)
+    (sumlogA1, nz1), (sumlogA2, nz2) = _matrixscale(A, ax1, ax2)
+    m, n = length(ax1), length(ax2)
+    p = [fill(n, m); fill(-m, n)]   # used to enforce the constraint
+    # if !exact || (all(==(n), nz1) && all(==(m), nz2))
+    #     # Sherman-Morrison formula for efficiency
+    #     offset = sum(sumlogA) / (2 * sum(nz))
+    #     return exp.(sumlogA ./ nz .- offset)
+    # end
+    Anz = isnz(A)
+    a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(Anz) + p * p') \ vcat(sumlogA1, sumlogA2))
+    return a12[begin:begin+m-1], a12[m+begin:end]
+end
+
+
 ratio_nz(n, d) = iszero(d) ? zero(n) / oneunit(d) : n / d
 
 """
diff --git a/src/utils.jl b/src/utils.jl
index 6622350..5db1e5e 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -15,4 +15,28 @@ function _symscale(A, ax)
     return sumlogA, nz
 end
 
-# TODO: implement _symscale for SparseMatrixCSC
+function _matrixscale(A, ax1, ax2)
+    sumlogA1, nz1 = fill!(similar(A, Float64, ax1), 0), fill!(similar(A, Int, ax1), 0)
+    sumlogA2, nz2 = fill!(similar(A, Float64, ax2), 0), fill!(similar(A, Int, ax2), 0)
+    for j in ax2
+        for i in ax1
+            Aij = abs(A[i, j])
+            iszero(Aij) && continue
+            logAij = log(Aij)
+            sumlogA1[i] += logAij
+            nz1[i] += 1
+            sumlogA2[j] += logAij
+            nz2[j] += 1
+        end
+    end
+    return (sumlogA1, nz1), (sumlogA2, nz2)
+end
+
+isnz(A) = .!iszero.(A)
+
+function odblocks(Anz::AbstractMatrix{T}) where T
+    m, n = size(Anz)
+    return [zeros(T, m, m) Anz; Anz' zeros(T, n, n)]
+end
+
+# TODO: implementations for SparseMatrixCSC
diff --git a/test/runtests.jl b/test/runtests.jl
index 0cb68e1..d68c188 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -20,6 +20,8 @@ end
     @test symscale([1.0 -0.2; -0.2 0]; exact=true) ≈ [1, 0.2]
     @test symscale([1.0 0; 0 2]; exact=true) ≈ [1, sqrt(2)]
     test_scaleinv(A -> symscale(A; exact=true), [2.0 1.0; 1.0 3.0], 1)
+    u, v = matrixscale([2.0 1.0; 1.0 3.0]; exact=true)
+    @test u ≈ v ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true)
 
     @test condscale([1 0; 0 1e-8]) ≈ 1
     A = [1.0 -0.2; -0.2 0]

From 2f01ec8c3041cbc910fafc133758ee42f832e17a Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sun, 8 Feb 2026 01:57:27 -0600
Subject: [PATCH 2/3] wip

---
 src/ScaleInvariantAnalysis.jl | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/ScaleInvariantAnalysis.jl b/src/ScaleInvariantAnalysis.jl
index 006b58d..64f9593 100644
--- a/src/ScaleInvariantAnalysis.jl
+++ b/src/ScaleInvariantAnalysis.jl
@@ -67,8 +67,8 @@ With `exact=true`, `a1` and `a2` solve the optimization problem
     min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a1[i] * a2[j])))²
     s.t. n * ∑_i log(a1[i]) = m * ∑_j log(a2[j])
 
-where `m, n = size(A)`. These vectors are covariant under changes of scale but
-not general linear transformations.
+where `m, n = size(A)`. Up to multiplication by a scalar, these vectors are
+covariant under changes of scale but not general linear transformations.
 
 With `exact=false`, the pattern of nonzeros in `A` is approximated as `u * v'`,
 where `sum(u) * v[j]` is the number of nonzeros in column `j` and `sum(v) *
@@ -80,14 +80,16 @@ function matrixscale(A::AbstractMatrix; exact::Bool=false)
     ax1, ax2 = axes(A, 1), axes(A, 2)
     (sumlogA1, nz1), (sumlogA2, nz2) = _matrixscale(A, ax1, ax2)
     m, n = length(ax1), length(ax2)
+    if !exact || (all(==(n), nz1) && all(==(m), nz2))
+        z = sum(nz1)
+        u, v = nz1 / sqrt(z), nz2 / sqrt(z)
+        uᵀ1, vᵀ1 = sum(u), sum(v)
+        s′, t′ = sumlogA1 ./ u, sumlogA2 ./ v
+        αoffset, βoffset = s′ ./ vᵀ1, t′ ./ uᵀ1
+    end
     p = [fill(n, m); fill(-m, n)]   # used to enforce the constraint
-    # if !exact || (all(==(n), nz1) && all(==(m), nz2))
-    #     # Sherman-Morrison formula for efficiency
-    #     offset = sum(sumlogA) / (2 * sum(nz))
-    #     return exp.(sumlogA ./ nz .- offset)
-    # end
-    Anz = isnz(A)
-    a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(Anz) + p * p') \ vcat(sumlogA1, sumlogA2))
+    W = isnz(A)
+    a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(W) + p * p') \ vcat(sumlogA1, sumlogA2))
     return a12[begin:begin+m-1], a12[m+begin:end]
 end
 

From 050c7be9635ced6d01ac863d6aca907c3af4cba2 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sun, 8 Feb 2026 03:46:00 -0600
Subject: [PATCH 3/3] finish

---
 src/ScaleInvariantAnalysis.jl | 42 +++++++++++++++++------------------
 test/runtests.jl              | 34 ++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/src/ScaleInvariantAnalysis.jl b/src/ScaleInvariantAnalysis.jl
index 64f9593..c272407 100644
--- a/src/ScaleInvariantAnalysis.jl
+++ b/src/ScaleInvariantAnalysis.jl
@@ -55,41 +55,41 @@ function symscale(A::AbstractMatrix; exact::Bool=false)
 end
 
 """
-    a1, a2 = matrixscale(A; exact=false)
+    a, b = matrixscale(A; exact=false)
 
-Given a matrix `A`, return vectors `a1` and `a2` representing the "scale of each
-axis," so that `|A[i,j]| ~ a1[i] * a2[j]` for all `i, j`. `a1[i]` and `a2[j]`
-are nonnegative, and are zero only if `A[i, j] = 0` for all `j` or all `i`,
+Given a matrix `A`, return vectors `a` and `b` representing the "scale of each
+axis," so that `|A[i,j]| ~ a[i] * b[j]` for all `i, j`. `a[i]` and `b[j]` are
+nonnegative, and are zero only if `A[i, j] = 0` for all `j` or all `i`,
 respectively.
 
-With `exact=true`, `a1` and `a2` solve the optimization problem
+With `exact=true`, `a` and `b` solve the optimization problem
 
-    min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a1[i] * a2[j])))²
-    s.t. n * ∑_i log(a1[i]) = m * ∑_j log(a2[j])
+    min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a[i] * b[j])))²
+    s.t. ∑_i nA[i] * log(a[i]) = ∑_j mA[j] * log(b[j])
 
-where `m, n = size(A)`. Up to multiplication by a scalar, these vectors are
-covariant under changes of scale but not general linear transformations.
+where `nA` and `mA` are the number of nonzeros in each row and column,
+respectively. Up to multiplication by a scalar, these vectors are covariant
+under changes of scale but not general linear transformations.
 
 With `exact=false`, the pattern of nonzeros in `A` is approximated as `u * v'`,
-where `sum(u) * v[j]` is the number of nonzeros in column `j` and `sum(v) *
-u[i]` is the number of nonzeros in row `i`. This results in an `O(m*n)` rather
-than `O((m+n)^3)` algorithm.
+where `sum(u) * v[j] = mA[j]` and `sum(v) * u[i] = nA[i]`. This results in an
+`O(m*n)` rather than `O((m+n)^3)` algorithm.
 """
 function matrixscale(A::AbstractMatrix; exact::Bool=false)
     Base.require_one_based_indexing(A)
     ax1, ax2 = axes(A, 1), axes(A, 2)
-    (sumlogA1, nz1), (sumlogA2, nz2) = _matrixscale(A, ax1, ax2)
+    (s, ns), (t, mt) = _matrixscale(A, ax1, ax2)
     m, n = length(ax1), length(ax2)
-    if !exact || (all(==(n), nz1) && all(==(m), nz2))
-        z = sum(nz1)
-        u, v = nz1 / sqrt(z), nz2 / sqrt(z)
-        uᵀ1, vᵀ1 = sum(u), sum(v)
-        s′, t′ = sumlogA1 ./ u, sumlogA2 ./ v
-        αoffset, βoffset = s′ ./ vᵀ1, t′ ./ uᵀ1
+    if !exact || (all(==(n), ns) && all(==(m), mt))
+        z = sum(ns)
+        @assert sum(mt) == z "Inconsistent nonzero counts in rows and columns"
+        a = exp.(s ./ ns .- sum(s) / (2z))
+        b = exp.(t ./ mt .- sum(t) / (2z))
+        return a, b
     end
-    p = [fill(n, m); fill(-m, n)]   # used to enforce the constraint
+    p = vcat(ns, -mt)
     W = isnz(A)
-    a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(W) + p * p') \ vcat(sumlogA1, sumlogA2))
+    a12 = exp.(cholesky(Diagonal(vcat(ns, mt)) + odblocks(W) + p * p') \ vcat(s, t))
     return a12[begin:begin+m-1], a12[m+begin:end]
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index d68c188..7f0d594 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,13 +15,43 @@ function test_scaleinv(f, A::AbstractMatrix, p::Int; iter=10, rtol=sqrt(eps(floa
     @test npass ≥ iter - 1
 end
 
+function test_sumlog(A, a, b; rtol=1e-6)
+    α, β = log.(a), log.(b)
+    Aref = sum(abs(log(abs(A[i, j]))) for i in axes(A, 1), j in axes(A, 2) if A[i, j] != 0)
+    for j in axes(A, 2)
+        s = 0.0
+        for i in axes(A, 1)
+            if A[i, j] != 0
+                s += log(abs(A[i, j])) - α[i] - β[j]
+            end
+        end
+        @test abs(s) ≤ rtol * Aref
+    end
+    for i in axes(A, 1)
+        s = 0.0
+        for j in axes(A, 2)
+            if A[i, j] != 0
+                s += log(abs(A[i, j])) - α[i] - β[j]
+            end
+        end
+        @test abs(s) ≤ rtol * Aref
+    end
+end
+
 @testset "ScaleInvariantAnalysis.jl" begin
     @test symscale([2.0 1.0; 1.0 3.0]) ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true) ≈ exp.([3 1; 1 3] \ [log(2.0); log(3.0)])
     @test symscale([1.0 -0.2; -0.2 0]; exact=true) ≈ [1, 0.2]
     @test symscale([1.0 0; 0 2]; exact=true) ≈ [1, sqrt(2)]
     test_scaleinv(A -> symscale(A; exact=true), [2.0 1.0; 1.0 3.0], 1)
-    u, v = matrixscale([2.0 1.0; 1.0 3.0]; exact=true)
-    @test u ≈ v ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true)
+    a, b = matrixscale([2.0 1.0; 1.0 3.0]; exact=true)
+    @test a ≈ b ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true)
+    a′, b′ = matrixscale([2.0 1.0; 1.0 3.0])
+    @test a′ ≈ a && b′ ≈ b
+    A = [0.0 1.0; -2.0 0.0]
+    a, b = matrixscale(A; exact=true)
+    test_sumlog(A, a, b)
+    a′, b′ = matrixscale(A)
+    @test sum(log, a) ≈ sum(log, b) ≈ sum(log, a′) ≈ sum(log, b′)
 
     @test condscale([1 0; 0 1e-8]) ≈ 1
     A = [1.0 -0.2; -0.2 0]