Make DFSane non-allocating as well

avik-pal · avik-pal · commit 59b0d9665b69 · 2024-01-13T02:36:02.000-05:00
diff --git a/src/nlsolve/dfsane.jl b/src/nlsolve/dfsane.jl
@@ -1,6 +1,6 @@
 """
     SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
-        M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
+        M::Union{Int, Val} = Val(10), γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
         nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 ./ k^2)
 
 A low-overhead implementation of the df-sane method for solving large-scale nonlinear
@@ -42,21 +42,26 @@ see the paper [1].
 information for solving large-scale nonlinear systems of equations, Mathematics of
 Computation, 75, 1429-1448.
 """
-@kwdef @concrete struct SimpleDFSane <: AbstractSimpleNonlinearSolveAlgorithm
-    σ_min = 1e-10
-    σ_max = 1e10
-    σ_1 = 1.0
-    M::Int = 10
-    γ = 1e-4
-    τ_min = 0.1
-    τ_max = 0.5
-    nexp::Int = 2
-    η_strategy = (f_1, k, x, F) -> f_1 ./ k^2
+@concrete struct SimpleDFSane{M} <: AbstractSimpleNonlinearSolveAlgorithm
+    σ_min
+    σ_max
+    σ_1
+    γ
+    τ_min
+    τ_max
+    nexp::Int
+    η_strategy
 end
 
-function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, args...;
+function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
+        M::Union{Int, Val} = Val(10), γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
+        nexp::Int = 2, η_strategy::F = (f_1, k, x, F) -> f_1 ./ k^2) where {F}
+    return SimpleDFSane{SciMLBase._unwrap_val(M)}(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp, η_strategy)
+end
+
+function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane{M}, args...;
         abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
-        termination_condition = nothing, kwargs...)
+        termination_condition = nothing, kwargs...) where {M}
     x = __maybe_unaliased(prob.u0, alias_u0)
     fx = _get_fx(prob, x)
     T = eltype(x)
@@ -65,7 +70,7 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, args...;
     σ_max = T(alg.σ_max)
     σ_k = T(alg.σ_1)
 
-    (; M, nexp, η_strategy) = alg
+    (; nexp, η_strategy) = alg
     γ = T(alg.γ)
     τ_min = T(alg.τ_min)
     τ_max = T(alg.τ_max)
@@ -77,7 +82,11 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, args...;
     α_1 = one(T)
     f_1 = fx_norm
 
-    history_f_k = fill(fx_norm, M)
+    history_f_k = if x isa SArray
+        ones(SVector{M, T}) * fx_norm
+    else
+        fill(fx_norm, M)
+    end
 
     # Generate the cache
     @bb x_cache = similar(x)
@@ -143,7 +152,11 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, args...;
         fx_norm = fx_norm_new
 
         # Store function value
-        history_f_k[mod1(k, M)] = fx_norm_new
+        if history_f_k isa SVector
+            history_f_k = Base.setindex(history_f_k, fx_norm_new, mod1(k, M))
+        else
+            history_f_k[mod1(k, M)] = fx_norm_new
+        end
         k += 1
     end
 
diff --git a/src/nlsolve/lbroyden.jl b/src/nlsolve/lbroyden.jl
@@ -107,7 +107,7 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
     fx = _get_fx(prob, x)
     threshold = __get_threshold(alg)
 
-    U, Vᵀ = __init_low_rank_jacobian(x, fx, threshold)
+    U, Vᵀ = __init_low_rank_jacobian(vec(x), vec(fx), threshold)
 
     abstol = DiffEqBase._get_tolerance(abstol, eltype(x))
 
@@ -230,8 +230,8 @@ function __mapdot(x::SVector{S1}, Y::SVector{S2, <:SVector{S1}}) where {S1, S2}
 end
 @generated function __mapTdot(x::SVector{S1}, Y::SVector{S1, <:SVector{S2}}) where {S1, S2}
     calls = []
-    syms = [gensym("m$(i)") for i in 1:length(Y)]
-    for i in 1:length(Y)
+    syms = [gensym("m$(i)") for i in 1:S1]
+    for i in 1:S1
         push!(calls, :($(syms[i]) = x[$(i)] .* Y[$i]))
     end
     push!(calls, :(return .+($(syms...))))
@@ -259,18 +259,21 @@ function __init_low_rank_jacobian(u::StaticArray{S1, T1}, fu::StaticArray{S2, T2
     U = MArray{Tuple{prod(fuSize), threshold}, T}(undef)
     return U, Vᵀ
 end
-@generated function __init_low_rank_jacobian(u::SArray{S1, T1}, fu::SArray{S2, T2},
-        ::Val{threshold}) where {S1, S2, T1, T2, threshold}
+
+@generated function __init_low_rank_jacobian(u::SVector{Lu, T1}, fu::SVector{Lfu, T2},
+        ::Val{threshold}) where {Lu, Lfu, T1, T2, threshold}
     T = promote_type(T1, T2)
-    Lfu, Lu = prod(Size(fu)), prod(Size(u))
-    inner_inits_Vᵀ = [zeros(SVector{Lu, T}) for i in 1:threshold]
-    inner_inits_U = [zeros(SVector{Lfu, T}) for i in 1:threshold]
+    # Lfu, Lu = __prod_size(S2), __prod_size(S1)
+    # Lfu, Lu = __prod(Size(fu)), __prod(Size(u))
+    inner_inits_Vᵀ = [:(zeros(SVector{$Lu, $T})) for i in 1:threshold]
+    inner_inits_U = [:(zeros(SVector{$Lfu, $T})) for i in 1:threshold]
     return quote
         Vᵀ = SVector($(inner_inits_Vᵀ...))
         U = SVector($(inner_inits_U...))
         return U, Vᵀ
     end
 end
+
 function __init_low_rank_jacobian(u, fu, ::Val{threshold}) where {threshold}
     Vᵀ = similar(u, threshold, length(u))
     U = similar(u, length(fu), threshold)
diff --git a/test/basictests.jl b/test/basictests.jl
@@ -164,7 +164,7 @@ end
 ## SimpleDFSane needs to allocate a history vector
 @testset "Allocation Checks: $(_nameof(alg))" for alg in (SimpleNewtonRaphson(),
     SimpleHalley(), SimpleBroyden(), SimpleKlement(), SimpleLimitedMemoryBroyden(),
-    SimpleTrustRegion())
+    SimpleTrustRegion(), SimpleDFSane())
     @check_allocs nlsolve(prob, alg) = SciMLBase.solve(prob, alg; abstol = 1e-9)
 
     nlprob_scalar = NonlinearProblem{false}(quadratic_f, 1.0, 2.0)
@@ -175,7 +175,8 @@ end
         @test true
     catch e
         @error e
-        @test false
+        # History Vector Allocates
+        @test false broken=(alg isa SimpleDFSane)
     end
 
     # ForwardDiff allocates for hessian since we don't propagate the chunksize