From fd6c03222edf3504c98fe18ea0fadabb2719d631 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 23 Aug 2025 14:54:43 -0400 Subject: [PATCH 1/4] option to omit inner-iteration gradients in CCSA --- src/algs/mma/ccsa_quadratic.c | 45 ++++++++++++++++++++++++++++++--- src/algs/mma/mma.c | 47 ++++++++++++++++++++++++++++++++--- src/algs/mma/mma.h | 2 ++ src/api/optimize.c | 10 ++++++-- 4 files changed, 94 insertions(+), 10 deletions(-) diff --git a/src/algs/mma/ccsa_quadratic.c b/src/algs/mma/ccsa_quadratic.c index f89fc78e..25e1c9e3 100644 --- a/src/algs/mma/ccsa_quadratic.c +++ b/src/algs/mma/ccsa_quadratic.c @@ -219,6 +219,7 @@ nlopt_result ccsa_quadratic_minimize( double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, + int inner_gradients, int always_improve, const double *sigma_init) { nlopt_result ret = NLOPT_SUCCESS; @@ -446,7 +447,7 @@ nlopt_result ccsa_quadratic_minimize( i, y[i], i, dd.gcval[i]); } - fcur = f(n, xcur, dfdx_cur, f_data); + fcur = f(n, xcur, inner_gradients ? dfdx_cur : NULL, f_data); ++ *(stop->nevals_p); ++inner_nevals; if (nlopt_stop_forced(stop)) { @@ -454,7 +455,8 @@ nlopt_result ccsa_quadratic_minimize( feasible_cur = 1; infeasibility_cur = 0; inner_done = dd.gval >= fcur; for (i = ifc = 0; ifc < mfc; ++ifc) { - nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n, + nlopt_eval_constraint(fcval_cur + i, + inner_gradients ? dfcdx_cur + i*n : NULL, fc + ifc, n, xcur); i += fc[ifc].m; if (nlopt_stop_forced(stop)) { @@ -474,10 +476,45 @@ nlopt_result ccsa_quadratic_minimize( inner_done = inner_done || (inner_maxeval > 0 && inner_nevals == inner_maxeval); - if ((fcur < *minf && (inner_done || feasible_cur || !feasible)) - || (!feasible && infeasibility_cur < infeasibility)) { + /* update the current point. If always_improve (the default), use an aggressive + strategy that always accepts an improvement even if !inner_done, + otherwise only update the current point if inner_done (= outer iteration), + more like the original Svanberg paper */ + if (always_improve ? + (fcur < *minf && (inner_done || feasible_cur || !feasible)) + || (!feasible && infeasibility_cur < infeasibility) : inner_done) { if (verbose && !feasible_cur) printf("CCSA - using infeasible point?\n"); + + if (!inner_gradients) { /* evaluate again, this time with gradients */ + fcur = f(n, xcur, dfdx_cur, f_data); + /* don't update *(stop->nevals_p) — hope user caches xcur + so that they don't actually recompute f */ + if (nlopt_stop_forced(stop)) { + ret = NLOPT_FORCED_STOP; goto done; } + feasible_cur = 1; infeasibility_cur = 0; + for (i = ifc = 0; ifc < mfc; ++ifc) { + nlopt_eval_constraint(fcval_cur + i, + dfcdx_cur + i*n, + fc + ifc, n, xcur); + i += fc[ifc].m; + if (nlopt_stop_forced(stop)) + ret = NLOPT_FORCED_STOP; goto done; + } + /* recompute feasible_cur etc in case the caller + has changed the objective function for an outer iteration, + but don't change inner_done in that case */ + for (i = ifc = 0; ifc < mfc; ++ifc) { + unsigned i0 = i, inext = i + fc[ifc].m; + for (; i < inext; ++i) { + feasible_cur = feasible_cur + && fcval_cur[i] <= fc[ifc].tol[i-i0]; + if (fcval_cur[i] > infeasibility_cur) + infeasibility_cur = fcval_cur[i]; + } + } + } + dd.fval = *minf = fcur; infeasibility = infeasibility_cur; memcpy(fcval, fcval_cur, sizeof(double)*m); diff --git a/src/algs/mma/mma.c b/src/algs/mma/mma.c index 3236099c..309066e3 100644 --- a/src/algs/mma/mma.c +++ b/src/algs/mma/mma.c @@ -149,6 +149,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, + int inner_gradients, int always_improve, const double *sigma_init) { nlopt_result ret = NLOPT_SUCCESS; @@ -292,7 +293,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, i, y[i], i, dd.gcval[i]); } - fcur = f(n, xcur, dfdx_cur, f_data); + fcur = f(n, xcur, inner_gradients ? dfdx_cur : NULL, f_data); ++ *(stop->nevals_p); ++inner_nevals; if (nlopt_stop_forced(stop)) { @@ -301,7 +302,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, new_infeasible_constraint = 0; inner_done = dd.gval >= fcur; for (i = ifc = 0; ifc < mfc; ++ifc) { - nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n, + nlopt_eval_constraint(fcval_cur + i, inner_gradients ? dfcdx_cur + i*n : NULL, fc + ifc, n, xcur); i += fc[ifc].m; if (nlopt_stop_forced(stop)) { @@ -325,10 +326,48 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, inner_done = inner_done || (inner_maxeval > 0 && inner_nevals == inner_maxeval); - if ((fcur < *minf && (inner_done || feasible_cur || !feasible)) - || (!feasible && infeasibility_cur < infeasibility)) { + /* update the current point. If always_improve (the default), use an aggressive + strategy that always accepts an improvement even if !inner_done, + otherwise only update the current point if inner_done (= outer iteration), + more like the original Svanberg paper */ + if (always_improve ? (fcur < *minf && (inner_done || feasible_cur || !feasible)) + || (!feasible && infeasibility_cur < infeasibility) : inner_done) { if (verbose && !feasible_cur) printf("MMA - using infeasible point?\n"); + + if (!inner_gradients) { /* evaluate again, this time with gradients */ + fcur = f(n, xcur, dfdx_cur, f_data); + /* don't update *(stop->nevals_p) — hope user caches xcur + so that they don't actually recompute f */ + if (nlopt_stop_forced(stop)) { + ret = NLOPT_FORCED_STOP; goto done; } + feasible_cur = 1; infeasibility_cur = 0; + new_infeasible_constraint = 0; + inner_done = dd.gval >= fcur; + for (i = ifc = 0; ifc < mfc; ++ifc) { + nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n, + fc + ifc, n, xcur); + i += fc[ifc].m; + if (nlopt_stop_forced(stop)) { + ret = NLOPT_FORCED_STOP; goto done; } + } + /* recompute feasible_cur etc in case the caller + has changed the objective function for an outer iteration, + but don't change inner_done in that case */ + for (i = ifc = 0; ifc < mfc; ++ifc) { + unsigned i0 = i, inext = i + fc[ifc].m; + for (; i < inext; ++i) + if (!nlopt_isnan(fcval_cur[i])) { + feasible_cur = feasible_cur + && (fcval_cur[i] <= fc[ifc].tol[i-i0]); + if (nlopt_isnan(fcval[i]) && fcval_cur[i] > 0) + new_infeasible_constraint = 1; + if (fcval_cur[i] > infeasibility_cur) + infeasibility_cur = fcval_cur[i]; + } + } + } + dd.fval = *minf = fcur; infeasibility = infeasibility_cur; memcpy(fcval, fcval_cur, sizeof(double)*m); diff --git a/src/algs/mma/mma.h b/src/algs/mma/mma.h index ca2131c4..48c36bca 100644 --- a/src/algs/mma/mma.h +++ b/src/algs/mma/mma.h @@ -41,6 +41,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, + int inner_gradients, int always_improve, const double *sigma_init); nlopt_result ccsa_quadratic_minimize( @@ -54,6 +55,7 @@ nlopt_result ccsa_quadratic_minimize( double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, + int inner_gradients, int always_improve, const double *sigma_init); #ifdef __cplusplus diff --git a/src/api/optimize.c b/src/api/optimize.c index 4cc6583b..58938614 100644 --- a/src/api/optimize.c +++ b/src/api/optimize.c @@ -798,11 +798,17 @@ static nlopt_result nlopt_optimize_(nlopt_opt opt, double *x, double *minf) int inner_maxeval = (int)nlopt_get_param(opt, "inner_maxeval",0); int verbosity = (int)nlopt_get_param(opt, "verbosity",0); double rho_init = nlopt_get_param(opt, "rho_init",1.0); + int inner_gradients = (int)nlopt_get_param(opt, "inner_gradients",1); + int always_improve = (int)nlopt_get_param(opt, "always_improve",1); nlopt_opt dual_opt; nlopt_result ret; if (!(rho_init > 0) && !nlopt_isinf(rho_init)) RETURN_ERR(NLOPT_INVALID_ARGS, opt, "rho_init must be positive and finite"); + if (inner_gradients != 0 && inner_gradients != 1) + RETURN_ERR(NLOPT_INVALID_ARGS, opt, "inner_gradients must be 0 or 1"); + if (always_improve != 0 && always_improve != 1) + RETURN_ERR(NLOPT_INVALID_ARGS, opt, "always_improve must be 0 or 1"); verbosity = verbosity < 0 ? 0 : verbosity; #define LO(param, def) (opt->local_opt ? opt->local_opt->param : (def)) @@ -817,9 +823,9 @@ static nlopt_result nlopt_optimize_(nlopt_opt opt, double *x, double *minf) nlopt_set_maxeval(dual_opt, (int)nlopt_get_param(opt, "dual_maxeval", LO(maxeval, 100000))); #undef LO if (algorithm == NLOPT_LD_MMA) - ret = mma_minimize(n, f, f_data, opt->m, opt->fc, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, opt->dx); + ret = mma_minimize(n, f, f_data, opt->m, opt->fc, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, inner_gradients, always_improve, opt->dx); else - ret = ccsa_quadratic_minimize(n, f, f_data, opt->m, opt->fc, opt->pre, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, opt->dx); + ret = ccsa_quadratic_minimize(n, f, f_data, opt->m, opt->fc, opt->pre, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, inner_gradients, always_improve, opt->dx); nlopt_destroy(dual_opt); return ret; } From 0b4031ac439d0ddb5da5abce9b43d5aaf2136bff Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 23 Aug 2025 15:01:34 -0400 Subject: [PATCH 2/4] docs for new CCSA params --- doc/docs/NLopt_Algorithms.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/docs/NLopt_Algorithms.md b/doc/docs/NLopt_Algorithms.md index 786c8097..975dd7a5 100644 --- a/doc/docs/NLopt_Algorithms.md +++ b/doc/docs/NLopt_Algorithms.md @@ -337,6 +337,8 @@ specified using the [`nlopt_set_param` API](NLopt_Reference.md#algorithm-specifi * `dual_algorithm` (defaults to `NLOPT_LD_MMA`), `dual_ftol_rel` (defaults to `1e-14`), `dual_ftol_abs` (defaults to `0`), `dual_xtol_rel` (defaults to `0`), `dual_xtol_abs` (defaults to `0`), `dual_maxeval` (defaults to `100000`): These specify how the algorithm internally solves the "dual" optimization problem for its approximate objective. Because this subsidiary solve requires no evaluations of the user's objective function, it is typically fast enough that we can solve it to high precision without worrying too much about the details. Howeve,r in high-dimensional problems you may notice that MMA/CCSA is taking a long time between optimization steps, in which case you may want to increase `dual_ftol_rel` or make other changes. If these parameters are not specified, NLopt takes them from the [subsidiary-optimizer algorithm](NLopt_Reference.md#localsubsidiary-optimization-algorithm) if that has been specified, and otherwise uses the defaults indicated here. * `verbosity`: If > 0, causes the algorithm to print internal status information on each iteration. * `rho_init`: if specified, should be a rough upper bound for the second derivative (the biggest eigenvalue of the Hessian of the objective or constraints); defaults to `1.0`. CCSA/MMA will adaptively adjust this as the optimization progresses, so even it if `rho_init` is completely wrong the algorithm will still converge. A `rho_init` that is too large will cause the algorithm to take overly small steps at the beginning, while a `rho_init` that is too small will cause it to take overly large steps (and have to backtrack) at the beginning. Similarly, you can also use the "initial stepsize" option ([NLopt reference](NLopt_Reference.md#initial-step-size)) to control the maximum size of the initial steps (half the diameter of the trust region). +* `inner_gradients` (defaults to `1`): if `0`, the gradient is *not* computed on "inner" iterations of the CCSA algorithm. This can be useful in order to save computations on inner iterations, and also for the caller to distinguish inner and outer iterations. However, that means that your functions are called twice on outer iterations: once for the last inner iteration (without gradients), and again with the gradient; to save wasted computation, you may want to cache the last function value and not recompute it (only compute the gradient) if `x` does not change between subsequent calls. +* `always_improve` (defaults to `1`): if `0`, only update the current point (whence the gradients and model) on outer iterations, as in the original Svanberg paper; if `1`, we are slightly more aggressive about updating the current point, even doing so on inner iterations if the new point is strictly better (e.g. feasible and better objective) than the previous point. ### SLSQP From 68df46238e3347335ca1d77f6b1ea31259e86bd2 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 23 Aug 2025 15:16:01 -0400 Subject: [PATCH 3/4] no unicode dash in comment --- src/algs/mma/ccsa_quadratic.c | 2 +- src/algs/mma/mma.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/algs/mma/ccsa_quadratic.c b/src/algs/mma/ccsa_quadratic.c index 25e1c9e3..614f0d4c 100644 --- a/src/algs/mma/ccsa_quadratic.c +++ b/src/algs/mma/ccsa_quadratic.c @@ -488,7 +488,7 @@ nlopt_result ccsa_quadratic_minimize( if (!inner_gradients) { /* evaluate again, this time with gradients */ fcur = f(n, xcur, dfdx_cur, f_data); - /* don't update *(stop->nevals_p) — hope user caches xcur + /* don't update *(stop->nevals_p) -- hope user caches xcur so that they don't actually recompute f */ if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; } diff --git a/src/algs/mma/mma.c b/src/algs/mma/mma.c index 309066e3..6a5b33e0 100644 --- a/src/algs/mma/mma.c +++ b/src/algs/mma/mma.c @@ -337,7 +337,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, if (!inner_gradients) { /* evaluate again, this time with gradients */ fcur = f(n, xcur, dfdx_cur, f_data); - /* don't update *(stop->nevals_p) — hope user caches xcur + /* don't update *(stop->nevals_p) -- hope user caches xcur so that they don't actually recompute f */ if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; } From 4d720b189ee2efd2dec9280fe2b2e0c4d6bcb5d4 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 25 Aug 2025 09:52:24 -0400 Subject: [PATCH 4/4] undocumented internal parameter sigma_min for experiments --- src/algs/mma/ccsa_quadratic.c | 4 +++- src/algs/mma/mma.c | 4 +++- src/algs/mma/mma.h | 4 ++-- src/api/optimize.c | 7 +++++-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/algs/mma/ccsa_quadratic.c b/src/algs/mma/ccsa_quadratic.c index 614f0d4c..77a3a89b 100644 --- a/src/algs/mma/ccsa_quadratic.c +++ b/src/algs/mma/ccsa_quadratic.c @@ -219,7 +219,7 @@ nlopt_result ccsa_quadratic_minimize( double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, - int inner_gradients, int always_improve, + int inner_gradients, int always_improve, double sigma_min, const double *sigma_init) { nlopt_result ret = NLOPT_SUCCESS; @@ -330,6 +330,7 @@ nlopt_result ccsa_quadratic_minimize( sigma[j] = 1.0; /* arbitrary default */ else sigma[j] = 0.5 * (ub[j] - lb[j]); + sigma[j] = MAX(sigma[j], sigma_min); } rho = rho_init; for (i = 0; i < m; ++i) { @@ -585,6 +586,7 @@ nlopt_result ccsa_quadratic_minimize( 0.01*(ub-lb), which seems unnecessarily large */ sigma[j] = MAX(sigma[j], 1e-8*(ub[j]-lb[j])); } + sigma[j] = MAX(sigma[j], sigma_min); } for (j = 0; j < MIN(verbose, n); ++j) printf(" CCSA sigma[%u] -> %g\n", diff --git a/src/algs/mma/mma.c b/src/algs/mma/mma.c index 6a5b33e0..505cedd4 100644 --- a/src/algs/mma/mma.c +++ b/src/algs/mma/mma.c @@ -149,7 +149,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, - int inner_gradients, int always_improve, + int inner_gradients, int always_improve, double sigma_min, const double *sigma_init) { nlopt_result ret = NLOPT_SUCCESS; @@ -206,6 +206,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, sigma[j] = 1.0; /* arbitrary default */ else sigma[j] = 0.5 * (ub[j] - lb[j]); + sigma[j] = MAX(sigma[j], sigma_min); } rho = rho_init; for (i = 0; i < m; ++i) { @@ -437,6 +438,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j])); sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j])); } + sigma[j] = MAX(sigma[j], sigma_min); } for (j = 0; j < MIN(verbose, n); ++j) printf(" MMA sigma[%u] -> %g\n", diff --git a/src/algs/mma/mma.h b/src/algs/mma/mma.h index 48c36bca..72a813c6 100644 --- a/src/algs/mma/mma.h +++ b/src/algs/mma/mma.h @@ -41,7 +41,7 @@ nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data, double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, - int inner_gradients, int always_improve, + int inner_gradients, int always_improve, double sigma_min, const double *sigma_init); nlopt_result ccsa_quadratic_minimize( @@ -55,7 +55,7 @@ nlopt_result ccsa_quadratic_minimize( double *minf, nlopt_stopping *stop, nlopt_opt dual_opt, int inner_maxeval, unsigned verbose, double rho_init, - int inner_gradients, int always_improve, + int inner_gradients, int always_improve, double sigma_min, const double *sigma_init); #ifdef __cplusplus diff --git a/src/api/optimize.c b/src/api/optimize.c index 58938614..20268f9a 100644 --- a/src/api/optimize.c +++ b/src/api/optimize.c @@ -800,6 +800,7 @@ static nlopt_result nlopt_optimize_(nlopt_opt opt, double *x, double *minf) double rho_init = nlopt_get_param(opt, "rho_init",1.0); int inner_gradients = (int)nlopt_get_param(opt, "inner_gradients",1); int always_improve = (int)nlopt_get_param(opt, "always_improve",1); + double sigma_min = nlopt_get_param(opt, "sigma_min",0.0); nlopt_opt dual_opt; nlopt_result ret; @@ -809,6 +810,8 @@ static nlopt_result nlopt_optimize_(nlopt_opt opt, double *x, double *minf) RETURN_ERR(NLOPT_INVALID_ARGS, opt, "inner_gradients must be 0 or 1"); if (always_improve != 0 && always_improve != 1) RETURN_ERR(NLOPT_INVALID_ARGS, opt, "always_improve must be 0 or 1"); + if (sigma_min < 0.0) + RETURN_ERR(NLOPT_INVALID_ARGS, opt, "sigma_min must be non-negative"); verbosity = verbosity < 0 ? 0 : verbosity; #define LO(param, def) (opt->local_opt ? opt->local_opt->param : (def)) @@ -823,9 +826,9 @@ static nlopt_result nlopt_optimize_(nlopt_opt opt, double *x, double *minf) nlopt_set_maxeval(dual_opt, (int)nlopt_get_param(opt, "dual_maxeval", LO(maxeval, 100000))); #undef LO if (algorithm == NLOPT_LD_MMA) - ret = mma_minimize(n, f, f_data, opt->m, opt->fc, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, inner_gradients, always_improve, opt->dx); + ret = mma_minimize(n, f, f_data, opt->m, opt->fc, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, inner_gradients, always_improve, sigma_min, opt->dx); else - ret = ccsa_quadratic_minimize(n, f, f_data, opt->m, opt->fc, opt->pre, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, inner_gradients, always_improve, opt->dx); + ret = ccsa_quadratic_minimize(n, f, f_data, opt->m, opt->fc, opt->pre, lb, ub, x, minf, &stop, dual_opt, inner_maxeval, (unsigned)verbosity, rho_init, inner_gradients, always_improve, sigma_min, opt->dx); nlopt_destroy(dual_opt); return ret; }