From 3a986ba9aa07a47a4518b4a20c0a126774ae6da6 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 7 Feb 2026 15:40:17 -0600 Subject: [PATCH 1/3] Add `matrixscale` for general matrices --- src/ScaleInvariantAnalysis.jl | 42 +++++++++++++++++++++++++++++++++-- src/utils.jl | 26 +++++++++++++++++++++- test/runtests.jl | 2 ++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/ScaleInvariantAnalysis.jl b/src/ScaleInvariantAnalysis.jl index dbd8735..006b58d 100644 --- a/src/ScaleInvariantAnalysis.jl +++ b/src/ScaleInvariantAnalysis.jl @@ -3,7 +3,7 @@ module ScaleInvariantAnalysis using LinearAlgebra using SparseArrays -export condscale, divmag, dotabs, symscale +export condscale, divmag, dotabs, matrixscale, symscale include("utils.jl") @@ -51,9 +51,47 @@ function symscale(A::AbstractMatrix; exact::Bool=false) offset = sum(sumlogA) / (2 * sum(nz)) return exp.(sumlogA ./ nz .- offset) end - return exp.(cholesky(Diagonal(nz) + (!iszero).(A)) \ sumlogA) + return exp.(cholesky(Diagonal(nz) + isnz(A)) \ sumlogA) end +""" + a1, a2 = matrixscale(A; exact=false) + +Given a matrix `A`, return vectors `a1` and `a2` representing the "scale of each +axis," so that `|A[i,j]| ~ a1[i] * a2[j]` for all `i, j`. `a1[i]` and `a2[j]` +are nonnegative, and are zero only if `A[i, j] = 0` for all `j` or all `i`, +respectively. + +With `exact=true`, `a1` and `a2` solve the optimization problem + + min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a1[i] * a2[j])))² + s.t. n * ∑_i log(a1[i]) = m * ∑_j log(a2[j]) + +where `m, n = size(A)`. These vectors are covariant under changes of scale but +not general linear transformations. + +With `exact=false`, the pattern of nonzeros in `A` is approximated as `u * v'`, +where `sum(u) * v[j]` is the number of nonzeros in column `j` and `sum(v) * +u[i]` is the number of nonzeros in row `i`. This results in an `O(m*n)` rather +than `O((m+n)^3)` algorithm. +""" +function matrixscale(A::AbstractMatrix; exact::Bool=false) + Base.require_one_based_indexing(A) + ax1, ax2 = axes(A, 1), axes(A, 2) + (sumlogA1, nz1), (sumlogA2, nz2) = _matrixscale(A, ax1, ax2) + m, n = length(ax1), length(ax2) + p = [fill(n, m); fill(-m, n)] # used to enforce the constraint + # if !exact || (all(==(n), nz1) && all(==(m), nz2)) + # # Sherman-Morrison formula for efficiency + # offset = sum(sumlogA) / (2 * sum(nz)) + # return exp.(sumlogA ./ nz .- offset) + # end + Anz = isnz(A) + a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(Anz) + p * p') \ vcat(sumlogA1, sumlogA2)) + return a12[begin:begin+m-1], a12[m+begin:end] +end + + ratio_nz(n, d) = iszero(d) ? zero(n) / oneunit(d) : n / d """ diff --git a/src/utils.jl b/src/utils.jl index 6622350..5db1e5e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -15,4 +15,28 @@ function _symscale(A, ax) return sumlogA, nz end -# TODO: implement _symscale for SparseMatrixCSC +function _matrixscale(A, ax1, ax2) + sumlogA1, nz1 = fill!(similar(A, Float64, ax1), 0), fill!(similar(A, Int, ax1), 0) + sumlogA2, nz2 = fill!(similar(A, Float64, ax2), 0), fill!(similar(A, Int, ax2), 0) + for j in ax2 + for i in ax1 + Aij = abs(A[i, j]) + iszero(Aij) && continue + logAij = log(Aij) + sumlogA1[i] += logAij + nz1[i] += 1 + sumlogA2[j] += logAij + nz2[j] += 1 + end + end + return (sumlogA1, nz1), (sumlogA2, nz2) +end + +isnz(A) = .!iszero.(A) + +function odblocks(Anz::AbstractMatrix{T}) where T + m, n = size(Anz) + return [zeros(T, m, m) Anz; Anz' zeros(T, n, n)] +end + +# TODO: implementations for SparseMatrixCSC diff --git a/test/runtests.jl b/test/runtests.jl index 0cb68e1..d68c188 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,6 +20,8 @@ end @test symscale([1.0 -0.2; -0.2 0]; exact=true) ≈ [1, 0.2] @test symscale([1.0 0; 0 2]; exact=true) ≈ [1, sqrt(2)] test_scaleinv(A -> symscale(A; exact=true), [2.0 1.0; 1.0 3.0], 1) + u, v = matrixscale([2.0 1.0; 1.0 3.0]; exact=true) + @test u ≈ v ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true) @test condscale([1 0; 0 1e-8]) ≈ 1 A = [1.0 -0.2; -0.2 0] From 2f01ec8c3041cbc910fafc133758ee42f832e17a Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 8 Feb 2026 01:57:27 -0600 Subject: [PATCH 2/3] wip --- src/ScaleInvariantAnalysis.jl | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ScaleInvariantAnalysis.jl b/src/ScaleInvariantAnalysis.jl index 006b58d..64f9593 100644 --- a/src/ScaleInvariantAnalysis.jl +++ b/src/ScaleInvariantAnalysis.jl @@ -67,8 +67,8 @@ With `exact=true`, `a1` and `a2` solve the optimization problem min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a1[i] * a2[j])))² s.t. n * ∑_i log(a1[i]) = m * ∑_j log(a2[j]) -where `m, n = size(A)`. These vectors are covariant under changes of scale but -not general linear transformations. +where `m, n = size(A)`. Up to multiplication by a scalar, these vectors are +covariant under changes of scale but not general linear transformations. With `exact=false`, the pattern of nonzeros in `A` is approximated as `u * v'`, where `sum(u) * v[j]` is the number of nonzeros in column `j` and `sum(v) * @@ -80,14 +80,16 @@ function matrixscale(A::AbstractMatrix; exact::Bool=false) ax1, ax2 = axes(A, 1), axes(A, 2) (sumlogA1, nz1), (sumlogA2, nz2) = _matrixscale(A, ax1, ax2) m, n = length(ax1), length(ax2) + if !exact || (all(==(n), nz1) && all(==(m), nz2)) + z = sum(nz1) + u, v = nz1 / sqrt(z), nz2 / sqrt(z) + uᵀ1, vᵀ1 = sum(u), sum(v) + s′, t′ = sumlogA1 ./ u, sumlogA2 ./ v + αoffset, βoffset = s′ ./ vᵀ1, t′ ./ uᵀ1 + end p = [fill(n, m); fill(-m, n)] # used to enforce the constraint - # if !exact || (all(==(n), nz1) && all(==(m), nz2)) - # # Sherman-Morrison formula for efficiency - # offset = sum(sumlogA) / (2 * sum(nz)) - # return exp.(sumlogA ./ nz .- offset) - # end - Anz = isnz(A) - a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(Anz) + p * p') \ vcat(sumlogA1, sumlogA2)) + W = isnz(A) + a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(W) + p * p') \ vcat(sumlogA1, sumlogA2)) return a12[begin:begin+m-1], a12[m+begin:end] end From 050c7be9635ced6d01ac863d6aca907c3af4cba2 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 8 Feb 2026 03:46:00 -0600 Subject: [PATCH 3/3] finish --- src/ScaleInvariantAnalysis.jl | 42 +++++++++++++++++------------------ test/runtests.jl | 34 ++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/ScaleInvariantAnalysis.jl b/src/ScaleInvariantAnalysis.jl index 64f9593..c272407 100644 --- a/src/ScaleInvariantAnalysis.jl +++ b/src/ScaleInvariantAnalysis.jl @@ -55,41 +55,41 @@ function symscale(A::AbstractMatrix; exact::Bool=false) end """ - a1, a2 = matrixscale(A; exact=false) + a, b = matrixscale(A; exact=false) -Given a matrix `A`, return vectors `a1` and `a2` representing the "scale of each -axis," so that `|A[i,j]| ~ a1[i] * a2[j]` for all `i, j`. `a1[i]` and `a2[j]` -are nonnegative, and are zero only if `A[i, j] = 0` for all `j` or all `i`, +Given a matrix `A`, return vectors `a` and `b` representing the "scale of each +axis," so that `|A[i,j]| ~ a[i] * b[j]` for all `i, j`. `a[i]` and `b[j]` are +nonnegative, and are zero only if `A[i, j] = 0` for all `j` or all `i`, respectively. -With `exact=true`, `a1` and `a2` solve the optimization problem +With `exact=true`, `a` and `b` solve the optimization problem - min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a1[i] * a2[j])))² - s.t. n * ∑_i log(a1[i]) = m * ∑_j log(a2[j]) + min ∑_{i,j : A[i,j] ≠ 0} (log(|A[i,j]| / (a[i] * b[j])))² + s.t. ∑_i nA[i] * log(a[i]) = ∑_j mA[j] * log(b[j]) -where `m, n = size(A)`. Up to multiplication by a scalar, these vectors are -covariant under changes of scale but not general linear transformations. +where `nA` and `mA` are the number of nonzeros in each row and column, +respectively. Up to multiplication by a scalar, these vectors are covariant +under changes of scale but not general linear transformations. With `exact=false`, the pattern of nonzeros in `A` is approximated as `u * v'`, -where `sum(u) * v[j]` is the number of nonzeros in column `j` and `sum(v) * -u[i]` is the number of nonzeros in row `i`. This results in an `O(m*n)` rather -than `O((m+n)^3)` algorithm. +where `sum(u) * v[j] = mA[j]` and `sum(v) * u[i] = nA[i]`. This results in an +`O(m*n)` rather than `O((m+n)^3)` algorithm. """ function matrixscale(A::AbstractMatrix; exact::Bool=false) Base.require_one_based_indexing(A) ax1, ax2 = axes(A, 1), axes(A, 2) - (sumlogA1, nz1), (sumlogA2, nz2) = _matrixscale(A, ax1, ax2) + (s, ns), (t, mt) = _matrixscale(A, ax1, ax2) m, n = length(ax1), length(ax2) - if !exact || (all(==(n), nz1) && all(==(m), nz2)) - z = sum(nz1) - u, v = nz1 / sqrt(z), nz2 / sqrt(z) - uᵀ1, vᵀ1 = sum(u), sum(v) - s′, t′ = sumlogA1 ./ u, sumlogA2 ./ v - αoffset, βoffset = s′ ./ vᵀ1, t′ ./ uᵀ1 + if !exact || (all(==(n), ns) && all(==(m), mt)) + z = sum(ns) + @assert sum(mt) == z "Inconsistent nonzero counts in rows and columns" + a = exp.(s ./ ns .- sum(s) / (2z)) + b = exp.(t ./ mt .- sum(t) / (2z)) + return a, b end - p = [fill(n, m); fill(-m, n)] # used to enforce the constraint + p = vcat(ns, -mt) W = isnz(A) - a12 = exp.(cholesky(Diagonal(vcat(nz1, nz2)) + odblocks(W) + p * p') \ vcat(sumlogA1, sumlogA2)) + a12 = exp.(cholesky(Diagonal(vcat(ns, mt)) + odblocks(W) + p * p') \ vcat(s, t)) return a12[begin:begin+m-1], a12[m+begin:end] end diff --git a/test/runtests.jl b/test/runtests.jl index d68c188..7f0d594 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,13 +15,43 @@ function test_scaleinv(f, A::AbstractMatrix, p::Int; iter=10, rtol=sqrt(eps(floa @test npass ≥ iter - 1 end +function test_sumlog(A, a, b; rtol=1e-6) + α, β = log.(a), log.(b) + Aref = sum(abs(log(abs(A[i, j]))) for i in axes(A, 1), j in axes(A, 2) if A[i, j] != 0) + for j in axes(A, 2) + s = 0.0 + for i in axes(A, 1) + if A[i, j] != 0 + s += log(abs(A[i, j])) - α[i] - β[j] + end + end + @test abs(s) ≤ rtol * Aref + end + for i in axes(A, 1) + s = 0.0 + for j in axes(A, 2) + if A[i, j] != 0 + s += log(abs(A[i, j])) - α[i] - β[j] + end + end + @test abs(s) ≤ rtol * Aref + end +end + @testset "ScaleInvariantAnalysis.jl" begin @test symscale([2.0 1.0; 1.0 3.0]) ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true) ≈ exp.([3 1; 1 3] \ [log(2.0); log(3.0)]) @test symscale([1.0 -0.2; -0.2 0]; exact=true) ≈ [1, 0.2] @test symscale([1.0 0; 0 2]; exact=true) ≈ [1, sqrt(2)] test_scaleinv(A -> symscale(A; exact=true), [2.0 1.0; 1.0 3.0], 1) - u, v = matrixscale([2.0 1.0; 1.0 3.0]; exact=true) - @test u ≈ v ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true) + a, b = matrixscale([2.0 1.0; 1.0 3.0]; exact=true) + @test a ≈ b ≈ symscale([2.0 1.0; 1.0 3.0]; exact=true) + a′, b′ = matrixscale([2.0 1.0; 1.0 3.0]) + @test a′ ≈ a && b′ ≈ b + A = [0.0 1.0; -2.0 0.0] + a, b = matrixscale(A; exact=true) + test_sumlog(A, a, b) + a′, b′ = matrixscale(A) + @test sum(log, a) ≈ sum(log, b) ≈ sum(log, a′) ≈ sum(log, b′) @test condscale([1 0; 0 1e-8]) ≈ 1 A = [1.0 -0.2; -0.2 0]