Last active
October 26, 2023 05:00
-
-
Save ojwoodford/789e85197b18dcddb349e1f695bffc31 to your computer and use it in GitHub Desktop.
Comparison of non-linear least squares optimizers in Julia, using Rosenbrock-like functions of various dimensions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compare non-linear least squares solvers on the multi-dimensional Rosenbrock function | |
startpoint(n) = Float64[i % 2 == 0 ? 1.0 : -1.2 for i = 1:n] | |
const diagonal_weight_dense = 1.2 | |
const offdiagonal_weight_dense = 1.0 | |
const diagonal_weight_sparse = 1.0 | |
const offdiagonal_weight_sparse = 10.0 | |
################################################################################ | |
# NLLS formulation | |
using NLLSsolver, Static, StaticArrays | |
struct RosenbrockDiag <: NLLSsolver.AbstractResidual | |
weight::Float64 | |
varind::Int | |
end | |
Base.eltype(::RosenbrockDiag) = Float64 | |
NLLSsolver.ndeps(::RosenbrockDiag) = static(1) # Residual depends on 1 variable | |
NLLSsolver.nres(::RosenbrockDiag) = static(1) # Residual has length 2 | |
NLLSsolver.varindices(res::RosenbrockDiag) = res.varind | |
NLLSsolver.getvars(res::RosenbrockDiag, vars::Vector) = (vars[res.varind]::Float64, ) | |
NLLSsolver.computeresidual(res::RosenbrockDiag, x) = res.weight * (x - 1) | |
struct RosenbrockOffDiag <: NLLSsolver.AbstractResidual | |
weight::Float64 | |
varind::SVector{2, Int} | |
end | |
RosenbrockOffDiag(w, j, k) = RosenbrockOffDiag(w, SVector(j, k)) | |
Base.eltype(::RosenbrockOffDiag) = Float64 | |
NLLSsolver.ndeps(::RosenbrockOffDiag) = static(2) # Residual depends on 2 variables | |
NLLSsolver.nres(::RosenbrockOffDiag) = static(1) # Residual has length 1 | |
NLLSsolver.varindices(res::RosenbrockOffDiag) = res.varind | |
NLLSsolver.getvars(res::RosenbrockOffDiag, vars::Vector) = (vars[res.varind[1]]::Float64, vars[res.varind[2]]::Float64) | |
NLLSsolver.computeresidual(res::RosenbrockOffDiag, x, y) = res.weight * (y - x ^ 2) | |
function nllssolver_sparse(n::Int) | |
costs = NLLSsolver.CostStruct{Union{RosenbrockDiag, RosenbrockOffDiag}}() | |
costs.data[RosenbrockDiag] = RosenbrockDiag[RosenbrockDiag(diagonal_weight_sparse, k) for k = 1:n] | |
costs.data[RosenbrockOffDiag] = RosenbrockOffDiag[RosenbrockOffDiag(offdiagonal_weight_sparse, k, k+1) for k = 1:n-1] | |
return NLLSProblem(startpoint(n), costs) | |
end | |
function nllssolver_dense(n::Int) | |
costs = NLLSsolver.CostStruct{Union{RosenbrockDiag, RosenbrockOffDiag}}() | |
costs.data[RosenbrockDiag] = RosenbrockDiag[RosenbrockDiag(diagonal_weight_dense, k) for k = 1:n] | |
costs.data[RosenbrockOffDiag] = vcat(collect(Iterators.flatten([[RosenbrockOffDiag(offdiagonal_weight_dense, j, k) for k = j+1:n] for j = 1:n-1]))) | |
return NLLSProblem(startpoint(n), costs) | |
end | |
function optimize(model::NLLSProblem) | |
result = NLLSsolver.optimize!(model, NLLSOptions(maxiters = 30000)) | |
return result.niterations, result.startcost, result.bestcost | |
end | |
################################################################################ | |
################################################################################ | |
# JuMP formulation | |
using JuMP, Ipopt | |
import MathOptInterface as MOI | |
function ipopt_sparse(n::Int) | |
model = Model(Ipopt.Optimizer) | |
x0 = startpoint(n) | |
@variable(model, x[i in 1:n], start = x0[i]) | |
@variable(model, residuals[1:2*n-1]) | |
@constraints(model, begin | |
[k in 1:(n - 1)], residuals[k] == offdiagonal_weight_sparse * (x[k + 1] - x[k]^2) | |
[k in 1:n], residuals[n-1+k] == diagonal_weight_sparse * (x[k] - 1) | |
end) | |
@objective(model, Min, sum(residuals .^ 2)) | |
set_silent(model) | |
return model | |
end | |
function ipopt_dense(n::Int) | |
model = Model(Ipopt.Optimizer) | |
x0 = startpoint(n) | |
@variable(model, x[i = 1:n], start = x0[i]) | |
len = n + ((n - 1) * n) >> 1 | |
@variable(model, residuals[1:len]) | |
i = 0 | |
for j in 1:n-1 | |
for k in j+1:n | |
i += 1 | |
@constraint(model, residuals[i] == offdiagonal_weight_dense * (x[k] - x[j]^2)) | |
end | |
end | |
@constraint(model, [k in 1:n], residuals[i+k] == diagonal_weight_dense * (x[k] - 1)) | |
@objective(model, Min, sum(residuals .^ 2)) | |
set_silent(model) | |
return model | |
end | |
function optimize(model::JuMP.Model) | |
JuMP.optimize!(model) | |
return MOI.get(model, MOI.BarrierIterations()), NaN64, objective_value(model) | |
end | |
################################################################################ | |
################################################################################ | |
# JSO formulation | |
using JSOSolvers, NLPModels, NLPModelsJuMP | |
struct JSOModel{T} | |
model::MathOptNLSModel | |
solver::T | |
end | |
function jso_sparse(n::Int, solver) | |
model = Model() | |
x0 = startpoint(n) | |
@variable(model, x[i = 1:n], start = x0[i]) | |
@NLexpression(model, FA[k = 1:(n - 1)], offdiagonal_weight_sparse * (x[k + 1] - x[k]^2)) | |
@expression(model, FB[k = 1:n], diagonal_weight_sparse * (x[k] - 1)) | |
return JSOModel(MathOptNLSModel(model, [FA; FB]), solver) | |
end | |
function jso_dense(n::Int, solver) | |
model = Model() | |
x0 = startpoint(n) | |
@variable(model, x[i = 1:n], start = x0[i]) | |
FA = Any[] | |
for j = 1:n-1 | |
for k = j+1:n | |
push!(FA, @NLexpression(model, offdiagonal_weight_dense * (x[k] - x[j]^2))) | |
end | |
end | |
@expression(model, FB[k = 1:n], diagonal_weight_dense * (x[k] - 1)) | |
return JSOModel(MathOptNLSModel(model, [FA; FB]), solver) | |
end | |
function optimize(model::JSOModel) | |
result = model.solver(model.model) | |
return result.iter, NaN64, result.objective | |
end | |
################################################################################ | |
################################################################################ | |
# LeastSquaresOptim formulation | |
import LeastSquaresOptim, SparseArrays | |
function rosenbrock_sparse!(out, x) | |
@inbounds for k = 1:length(x)-1 | |
out[2*k-1] = diagonal_weight_sparse * (x[k] - 1) | |
out[2*k] = offdiagonal_weight_sparse * (x[k + 1] - x[k]^2) | |
end | |
out[end] = diagonal_weight_sparse * (x[end] - 1) | |
end | |
function rosenbrockJ!(out, x) | |
@inbounds for k = 1:length(x)-1 | |
out.nzval[3*k-2] = diagonal_weight_sparse | |
out.nzval[3*k-1] = -2 * offdiagonal_weight_sparse * x[k] | |
out.nzval[3*k] = offdiagonal_weight_sparse | |
end | |
out.nzval[end] = diagonal_weight_sparse | |
end | |
function rosenbrock_dense!(out, x) | |
n = length(x) | |
@inbounds for k = 1:n | |
out[k] = diagonal_weight_dense * (x[k] - 1) | |
end | |
i = n + 1 | |
@inbounds for j = 1:n-1 | |
xj2 = x[j] ^ 2 | |
for k = j+1:n | |
out[i] = offdiagonal_weight_dense * (x[k] - xj2) | |
i += 1 | |
end | |
end | |
end | |
function lso_sparse(n::Int) | |
len = 2 * n - 1 | |
J = SparseArrays.sparse(vcat(collect(Iterators.flatten([(2*k-1, 2*k, 2*k) for k in 1:(n-1)])), len), vcat(collect(Iterators.flatten([(k, k, k+1) for k in 1:(n-1)])), n), zeros(3*n-2), len, n) | |
return LeastSquaresOptim.LeastSquaresProblem(; x=startpoint(n), f! = rosenbrock_sparse!, g! = rosenbrockJ!, J=J, output_length=len) | |
end | |
lso_dense(n::Int) = LeastSquaresOptim.LeastSquaresProblem(; x=startpoint(n), f! = rosenbrock_dense!, output_length=n+((n-1)*n)>>1, autodiff=:forward) | |
function optimize(model::LeastSquaresOptim.LeastSquaresProblem) | |
result = LeastSquaresOptim.optimize!(model, LeastSquaresOptim.LevenbergMarquardt(); iterations=30000) | |
return result.iterations, NaN64, result.ssr * 0.5 | |
end | |
################################################################################ | |
################################################################################ | |
# Run the test and display results | |
using Plots, Printf | |
tickformatter(x) = @sprintf("%g", x) | |
function runtest(name, sizes, solvers) | |
result = Matrix{Float64}(undef, (4, length(sizes))) | |
p = plot() | |
# For each optimizer | |
for (label, constructor) in solvers | |
# First run the optimzation to compile everything | |
optimize(constructor(sizes[1])) | |
optimize(constructor(sizes[end])) # Do largest and smallest in case there are dense and sparse code paths | |
# Go over each problem, recording the time, iterations and start and end cost | |
for (i, n) in enumerate(sizes) | |
# Construct the problem | |
model = constructor(n) | |
# Optimize | |
result[1,i] = @elapsed res = optimize(model) | |
result[2,i] = res[1] | |
result[3,i] = res[2] | |
result[4,i] = res[3] | |
if res[3] > 1.e-10 | |
cost = res[3] | |
println("$label on size $n converged to a cost of $cost") | |
result[1,i] = NaN64 | |
end | |
end | |
# Plot the graphs | |
plot!(p, vec(sizes), vec(result[1,:]), label=label) | |
end | |
yaxis!(p, minorgrid=true, formatter=tickformatter) | |
xaxis!(p, minorgrid=true, formatter=tickformatter) | |
plot!(p, legend=:topleft, yscale=:log10, xscale=:log2) | |
title!(p, "Speed comparison: $name") | |
xlabel!(p, "Problem size") | |
ylabel!(p, "Optimization time (s)") | |
display(p) | |
end | |
################################################################################ | |
runtest("dense problem", [2 4 8 16 32 64], ["Ipopt" => ipopt_dense, "JSO trunk" => n->jso_dense(n, trunk), "JSO tron" => n->jso_dense(n, tron), "LeastSquaresOptim LM" => lso_dense, "NLLSsolver LM" => nllssolver_dense]) | |
runtest("sparse problem", [16 46 128 362 1024 2896], ["Ipopt" => ipopt_sparse, "JSO trunk" => n->jso_sparse(n, trunk), "JSO tron" => n->jso_sparse(n, tron), "LeastSquaresOptim LM" => lso_sparse, "NLLSsolver LM" => nllssolver_sparse]) |
@pkofod The sparse problem is almost identical. The dense problem is different - it generates a fully dense Hessian. You can see the cost definition for the sparse cost on line 141, and the dense cost on line 158.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is it the second variant here? https://en.wikipedia.org/wiki/Rosenbrock_function