version 0.4.1
trustregion.hh
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2021-2025 The Ikarus Developers mueller@ibb.uni-stuttgart.de
2// SPDX-License-Identifier: LGPL-3.0-or-later
3
9#pragma once
10
11#include <iosfwd>
12
13#include <dune/common/float_cmp.hh>
14
15#include <spdlog/spdlog.h>
16
17#include <Eigen/Sparse>
18
26
27namespace Ikarus {
28
34{
38};
39
41{
42 int verbosity = 5;
43 double maxtime = std::numeric_limits<double>::infinity();
44 int minIter = 3;
45 int maxIter = 1000;
46 int debug = 0;
47 double grad_tol = 1e-6;
48 double corr_tol = 1e-6;
49 double rho_prime = 0.01;
50 bool useRand = false;
51 double rho_reg = 1e6;
52 double Delta_bar = std::numeric_limits<double>::infinity();
53 double Delta0 = 10;
54};
55
61template <PreConditioner preConditioner = PreConditioner::IncompleteCholesky, typename UF = utils::UpdateDefault>
63{
64 static_assert(std::copy_constructible<UF>,
65 " The update function should be copy constructable. If it is a lambda wrap it in a std::function");
66
67 using UpdateFunction = UF;
68
70 static constexpr PreConditioner preConditionerType = preConditioner;
72 template <typename UF2>
75 .updateFunction = std::forward<UF2>(updateFunction)};
76 return settings;
77 }
78};
79
80template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
81 typename UF = utils::UpdateDefault>
82class TrustRegion;
83
92template <typename NLO, typename TRConfig>
93requires traits::isSpecializationNonTypeAndTypes<TrustRegionConfig, std::remove_cvref_t<TRConfig>>::value
94auto createNonlinearSolver(TRConfig&& config, NLO&& nonLinearOperator) {
95 static constexpr PreConditioner preConditioner = std::remove_cvref_t<TRConfig>::preConditionerType;
96 using UF = std::remove_cvref_t<TRConfig>::UpdateFunction;
97 static_assert(std::remove_cvref_t<NLO>::numberOfFunctions == 3,
98 "The number of derivatives in the nonlinear operator have to be exactly 3.");
99 auto solver = std::make_shared<TrustRegion<NLO, preConditioner, UF>>(nonLinearOperator,
100 std::forward<TRConfig>(config).updateFunction);
101
102 solver->setup(config.parameters);
103 return solver;
104}
105
110enum class StopReason
111{
117};
118
124{
128 std::string stopReasonString;
129 std::string trstr;
130 std::string accstr;
132 std::string cauchystr = " ";
134 bool used_cauchy = false;
136 std::string reasonString;
137};
138
143struct Stats
144{
145 double gradNorm{1};
146 double etaNorm{1};
147 double time{0};
148 double energy{0};
149 double energyProposal{0};
150 double rho{0};
151 int outerIter{0};
153};
154
167template <typename NLO, PreConditioner preConditioner, typename UF>
168class TrustRegion : public IObservable<NonLinearSolverMessages>
169{
170public:
172 using ValueType = typename NLO::template ParameterValue<0>;
174 using CorrectionType = typename NLO::DerivativeType;
175 using UpdateFunction = UF;
176
177 using NonLinearOperator = NLO;
178
179 using ScalarType = std::remove_cvref_t<typename NLO::template FunctionReturnType<0>>;
181
182 using MatrixType = std::remove_cvref_t<typename NLO::template FunctionReturnType<2>>;
183
189 template <typename UF2 = UF>
190 explicit TrustRegion(const NLO& nonLinearOperator, UF2&& updateFunction = {})
191 : nonLinearOperator_{nonLinearOperator},
192 updateFunction_{std::forward<UF2>(updateFunction)},
193 xOld_{this->nonLinearOperator().firstParameter()} {
194 eta_.setZero(gradient().size());
195 Heta_.setZero(gradient().size());
196 truncatedConjugateGradient_.analyzePattern(hessian());
197 }
198
203 void setup(const Settings& settings) {
204 settings_ = settings;
205 assert(settings_.rho_prime < 0.25 && "options.rho_prime must be strictly smaller than 1/4.");
206 assert(settings_.Delta_bar > 0 && "options.Delta_bar must be positive.");
207 assert(settings_.Delta0 > 0 && settings_.Delta0 < settings_.Delta_bar &&
208 "options.Delta0 must be positive and smaller than Delta_bar.");
209 }
210
211#ifndef DOXYGEN
212 struct NoPredictor
213 {
214 };
215#endif
222 template <typename SolutionType = NoPredictor>
223 requires std::is_same_v<SolutionType, NoPredictor> || std::is_convertible_v<SolutionType, CorrectionType>
224 NonLinearSolverInformation solve(const SolutionType& dxPredictor = NoPredictor{}) {
226 stats_ = Stats{};
227 info_ = AlgoInfo{};
228
229 NonLinearSolverInformation solverInformation;
230 nonLinearOperator().updateAll();
231 stats_.energy = energy();
232 auto& x = nonLinearOperator().firstParameter();
233 xOld_ = x;
234 stats_.gradNorm = norm(gradient());
235 if constexpr (not std::is_same_v<SolutionType, NoPredictor>)
236 updateFunction(x, dxPredictor);
237 truncatedConjugateGradient_.analyzePattern(hessian());
238
239 innerInfo_.Delta = settings_.Delta0;
240 spdlog::info(
241 " | iter | inner_i | rho | energy | energy_p | energy_inc | norm(g) | Delta | norm(corr) | "
242 "InnerBreakReason");
243 spdlog::info("{:-^143}", "-");
244 while (not stoppingCriterion()) {
246 if (settings_.useRand) {
247 if (stats_.outerIter == 0) {
248 eta_.setRandom();
249 while (eta_.dot(hessian() * eta_) > innerInfo_.Delta * innerInfo_.Delta)
250 eta_ *= eps_; // eps is sqrt(sqrt(maschine-precision))
251 } else
252 eta_.setZero();
253 } else
254 eta_.setZero();
255
256 solveInnerProblem();
257 stats_.innerIterSum += innerInfo_.numInnerIter;
258
259 info_.stopReasonString = tcg_stop_reason_[static_cast<int>(innerInfo_.stop_tCG)];
260 Heta_ = hessian() * eta_;
261 if (settings_.useRand and stats_.outerIter == 0) {
262 info_.used_cauchy = false;
263 info_.randomPredictionString = " Used Random correction predictor";
264 info_.cauchystr = " ";
265 double tauC;
266 // Check the curvature
267 const Eigen::VectorXd Hg = hessian() * gradient();
268 const auto g_Hg = (gradient().dot(Hg));
269 if (g_Hg <= 0)
270 tauC = 1;
271 else
272 tauC = std::min(Dune::power(stats_.gradNorm, 3) / (innerInfo_.Delta * g_Hg), 1.0);
273
274 // generate the Cauchy point.
275 const Eigen::VectorXd etaC = -tauC * innerInfo_.Delta / stats_.gradNorm * gradient();
276 const Eigen::VectorXd HetaC = -tauC * innerInfo_.Delta / stats_.gradNorm * Hg;
277
278 const double mdle = stats_.energy + gradient().dot(eta_) + .5 * Heta_.dot(eta_);
279 const double mdlec = stats_.energy + gradient().dot(etaC) + .5 * HetaC.dot(etaC);
280 if (mdlec < mdle && stats_.outerIter == 0) {
281 eta_ = etaC;
282 Heta_ = HetaC;
283 info_.used_cauchy = true;
284
285 info_.cauchystr = " USED CAUCHY POINT!";
286 }
287 } else
288 info_.cauchystr = " ";
289
290 stats_.etaNorm = eta_.norm();
291
292 updateFunction_(x, eta_);
293
294 // Calculate energy of our proposed update step
295 nonLinearOperator().template update<0>();
296 stats_.energyProposal = energy();
297
298 // Will we accept the proposal or not?
299 // Check the performance of the quadratic model against the actual energy.
300 auto rhonum = stats_.energy - stats_.energyProposal;
301 auto rhoden = -eta_.dot(gradient() + 0.5 * Heta_);
302
303 /* Close to convergence the proposed energy and the real energy almost coincide.
304 * Therefore, the performance check of our model becomes ill-conditioned
305 * The regularisation fixes this */
306 const auto rhoReg = std::max(1.0, abs(stats_.energy)) * eps_ * settings_.rho_reg;
307 rhonum = rhonum + rhoReg;
308 rhoden = rhoden + rhoReg;
309
310 const bool modelDecreased = rhoden > 0.0;
311
312 if (!modelDecreased)
313 info_.stopReasonString.append(", model did not decrease");
314
315 stats_.rho = rhonum / rhoden;
316 stats_.rho = stats_.rho < 0.0 ? -1.0 : stats_.rho; // move rho to the domain [-1.0,inf]
317
318 info_.trstr = " ";
319
320 // measure if energy decreased
321 const bool energyDecreased = Dune::FloatCmp::ge(stats_.energy - stats_.energyProposal, -1e-12);
322
323 // If the model behaves badly or if the energy increased we reduce the trust region size
324 if (stats_.rho < 1e-4 || not modelDecreased || std::isnan(stats_.rho) || not energyDecreased) {
325 info_.trstr = "TR-";
326 innerInfo_.Delta /= 4.0;
327 info_.consecutive_TRplus = 0;
328 info_.consecutive_TRminus++;
329 if (info_.consecutive_TRminus >= 5 && settings_.verbosity >= 1) {
330 info_.consecutive_TRminus = -std::numeric_limits<int>::infinity();
331 spdlog::info(" +++ Detected many consecutive TR- (radius decreases).");
332 spdlog::info(" +++ Consider decreasing options.Delta_bar by an order of magnitude.");
333 }
334
335 } else if (stats_.rho > 0.99 && (innerInfo_.stop_tCG == Eigen::TCGStopReason::negativeCurvature ||
337 info_.trstr = "TR+";
338 innerInfo_.Delta = std::min(3.5 * innerInfo_.Delta, settings_.Delta_bar);
339 info_.consecutive_TRminus = 0;
340 info_.consecutive_TRplus++;
341 if (info_.consecutive_TRplus >= 5 && settings_.verbosity >= 1) {
342 info_.consecutive_TRplus = -std::numeric_limits<int>::infinity();
343 spdlog::info(" +++ Detected many consecutive TR+ (radius increases)");
344 spdlog::info(" +++ Consider increasing options.Delta_bar by an order of magnitude");
345
346 } else {
347 info_.consecutive_TRplus = 0;
348 info_.consecutive_TRminus = 0;
349 }
350 }
351
352 if (modelDecreased && stats_.rho > settings_.rho_prime && energyDecreased) {
353 if (stats_.energyProposal > stats_.energy)
354 spdlog::info(
355 "Energy function increased by {} (step size: {}). Since this is small we accept the step and hope for "
356 "convergence of the gradient norm.",
357 stats_.energyProposal - stats_.energy, stats_.etaNorm);
358
359 info_.acceptProposal = true;
360 info_.accstr = "acc";
361 info_.consecutive_Rejected = 0;
362 } else {
363 info_.acceptProposal = false;
364 info_.accstr = "REJ";
365
366 if (info_.consecutive_Rejected >= 5)
367 innerInfo_.Delta /= 2;
368 else
369 innerInfo_.Delta = std::min(innerInfo_.Delta, stats_.etaNorm / 2.0);
370 ++info_.consecutive_Rejected;
371 }
372
373 stats_.outerIter++;
374
375 if (settings_.verbosity == 1)
376 logState();
377
378 info_.randomPredictionString = "";
379
380 if (info_.acceptProposal) {
381 stats_.energy = stats_.energyProposal;
382 nonLinearOperator_.updateAll();
383 xOld_ = x;
387 } else {
388 x = xOld_;
389 eta_.setZero();
390 }
391 nonLinearOperator_.updateAll();
392 stats_.gradNorm = gradient().norm();
394 }
395 spdlog::info("{}", info_.reasonString);
396 spdlog::info("Total iterations: {} Total CG Iterations: {}", stats_.outerIter, stats_.innerIterSum);
397
398 solverInformation.success =
400
401 solverInformation.iterations = stats_.outerIter;
402 solverInformation.residualNorm = stats_.gradNorm;
403 if (solverInformation.success)
404 this->notify(NonLinearSolverMessages::FINISHED_SUCESSFULLY, solverInformation.iterations);
405 return solverInformation;
406 }
411 auto& nonLinearOperator() { return nonLinearOperator_; }
412
413private:
414 void logState() const {
415 spdlog::info(
416 "{:>3s} {:>3s} {:>6d} {:>9d} {:>6.2f} {:>9.2e} {:>9.2e} {:>11.2e} {:>9.2e} {:>9.2e} {:>11.2e} "
417 "{:<73}",
418 info_.accstr, info_.trstr, stats_.outerIter, innerInfo_.numInnerIter, stats_.rho, stats_.energy,
419 stats_.energyProposal, stats_.energyProposal - stats_.energy, stats_.gradNorm, innerInfo_.Delta, stats_.etaNorm,
421 }
422
423 void logFinalState() {
424 spdlog::info("{:>3s} {:>3s} {:>6d} {:>9d} {: ^6} {: ^9} {: ^9} {: ^11} {:>9.2e} {: ^9} {: ^11} {:<73}",
425 info_.accstr, info_.trstr, stats_.outerIter, innerInfo_.numInnerIter, " ", " ", " ", " ",
426 stats_.gradNorm, " ", " ", info_.stopReasonString + info_.cauchystr + info_.randomPredictionString);
427 }
428
429 inline const auto& energy() { return nonLinearOperator().value(); }
430 inline const auto& gradient() { return nonLinearOperator().derivative(); }
431 inline const auto& hessian() { return nonLinearOperator().secondDerivative(); }
432
433 bool stoppingCriterion() {
434 std::ostringstream stream;
436 if (stats_.gradNorm < settings_.grad_tol && stats_.outerIter != 0) {
437 logFinalState();
438 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(gradient): {:1.16e}", nonLinearOperator().value(),
439 stats_.gradNorm);
440 stream << "Gradient norm tolerance reached; options.tolerance = " << settings_.grad_tol;
441
442 info_.reasonString = stream.str();
443
445 return true;
446 } else if (stats_.etaNorm < settings_.corr_tol && stats_.outerIter != 0) {
447 logFinalState();
448 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(correction): {:1.16e}", nonLinearOperator().value(),
449 stats_.etaNorm);
450 stream << "Displacement norm tolerance reached; = " << settings_.corr_tol << "." << std::endl;
451
452 info_.reasonString = stream.str();
454 return true;
455 }
456
458 if (stats_.time >= settings_.maxtime) {
459 logFinalState();
460 stream << "Max time exceeded; options.maxtime = " << settings_.maxtime << ".";
461 info_.reasonString = stream.str();
463 return true;
464 }
465
467 if (stats_.outerIter >= settings_.maxIter) {
468 logFinalState();
469 stream << "Max iteration count reached; options.maxiter = " << settings_.maxIter << ".";
470 info_.reasonString = stream.str();
472 return true;
473 }
474 return false;
475 }
476
477 void solveInnerProblem() {
478 truncatedConjugateGradient_.setInfo(innerInfo_);
479 int attempts = 0;
480 truncatedConjugateGradient_.factorize(hessian());
481 // If the preconditioner is IncompleteCholesky the factorization may fail if we have negative diagonal entries and
482 // the initial shift is too small. Therefore, if the factorization fails we increase the initial shift by a factor
483 // of 5.
484 if constexpr (preConditioner == PreConditioner::IncompleteCholesky) {
485 while (truncatedConjugateGradient_.info() != Eigen::Success) {
486 choleskyInitialShift_ *= 5;
487 truncatedConjugateGradient_.preconditioner().setInitialShift(choleskyInitialShift_);
488 truncatedConjugateGradient_.factorize(hessian());
489 if (attempts > 5)
490 DUNE_THROW(Dune::MathError, "Factorization of preconditioner failed!");
491 ++attempts;
492 }
493 if (truncatedConjugateGradient_.info() == Eigen::Success)
494 choleskyInitialShift_ = 1e-3;
495 }
496 eta_ = truncatedConjugateGradient_.solveWithGuess(-gradient(), eta_);
497 innerInfo_ = truncatedConjugateGradient_.getInfo();
498 }
499
500 NLO nonLinearOperator_;
501 UpdateFunction updateFunction_;
502 typename NLO::template ParameterValue<0> xOld_;
503 CorrectionType eta_;
504 CorrectionType Heta_;
505 Settings settings_;
506 AlgoInfo info_;
507 double choleskyInitialShift_ = 1e-3;
508 Eigen::TCGInfo<double> innerInfo_;
509 Stats stats_;
510 static constexpr double eps_ = 0.0001220703125; // 0.0001220703125 is sqrt(sqrt(maschine-precision))
511 std::array<std::string, 6> tcg_stop_reason_{
512 {"negative curvature", "exceeded trust region", "reached target residual-kappa (linear)",
513 "reached target residual-theta (superlinear)", "maximum inner iterations", "model increased"}
514 };
515
516 using PreConditionerType =
517 std::conditional_t<preConditioner == PreConditioner::IdentityPreconditioner, Eigen::IdentityPreconditioner,
518 std::conditional_t<preConditioner == PreConditioner::DiagonalPreconditioner,
519 typename Eigen::DiagonalPreconditioner<ScalarType>,
520 typename Eigen::IncompleteCholesky<ScalarType>>>;
522 truncatedConjugateGradient_;
523};
524
535template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
536 typename UF = utils::UpdateDefault>
537auto makeTrustRegion(const NLO& nonLinearOperator, UF&& updateFunction = {}) {
538 return std::make_shared<TrustRegion<NLO, preConditioner, UF>>(nonLinearOperator, updateFunction);
539}
540
541template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
542 typename UF2 = utils::UpdateDefault>
543TrustRegion(const NLO& nonLinearOperator,
544 UF2&& updateFunction = {}) -> TrustRegion<NLO, preConditioner, std::remove_cvref_t<UF2>>;
545
546} // namespace Ikarus
Contains stl-like type traits.
Collection of fallback default functions.
Helper for the autodiff library.
Definition of TruncatedConjugateGradient class for solving linear systems using truncated conjugate g...
Implementation of the Newton-Raphson method for solving nonlinear equations.
Enums for observer messages.
Implementation of the observer design pattern.
auto norm(const Eigen::MatrixBase< Derived > &v)
Adding free norm function to Eigen types.
Definition: linearalgebrahelper.hh:259
Definition: assemblermanipulatorbuildingblocks.hh:22
auto makeTrustRegion(const NLO &nonLinearOperator, UF &&updateFunction={})
Creates an instance of the TrustRegion solver.
Definition: trustregion.hh:537
::value auto createNonlinearSolver(NRConfig &&config, NLO &&nonLinearOperator)
Function to create a NewtonRaphson solver instance.
Definition: newtonraphson.hh:65
TrustRegion(const NLO &nonLinearOperator, UF2 &&updateFunction={}) -> TrustRegion< NLO, preConditioner, std::remove_cvref_t< UF2 > >
StopReason
Enumeration of reasons for stopping the TrustRegion solver.
Definition: trustregion.hh:111
PreConditioner
Enumeration of available preconditioners for the trust region solver.
Definition: trustregion.hh:34
Scalar Delta
Definition: truncatedconjugategradient.hh:38
TCGStopReason stop_tCG
Definition: truncatedconjugategradient.hh:37
Eigen::Index numInnerIter
Definition: truncatedconjugategradient.hh:43
TCGInfo< typename MatrixType::RealScalar > getInfo()
Get information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:227
void setInfo(TCGInfo< typename MatrixType::RealScalar > alginfo)
Set information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:233
Information about the result of a non-linear solver.
Definition: solverinfos.hh:19
Definition: trustregion.hh:41
double grad_tol
Gradient tolerance.
Definition: trustregion.hh:47
double Delta_bar
Maximum trust region radius.
Definition: trustregion.hh:52
double rho_reg
Regularization value for rho.
Definition: trustregion.hh:51
double Delta0
Initial trust region radius.
Definition: trustregion.hh:53
bool useRand
Flag for using random correction predictor.
Definition: trustregion.hh:50
int verbosity
Verbosity level.
Definition: trustregion.hh:42
int debug
Debugging flag.
Definition: trustregion.hh:46
int minIter
Minimum number of iterations.
Definition: trustregion.hh:44
double maxtime
Maximum allowable time for solving.
Definition: trustregion.hh:43
double corr_tol
Correction tolerance.
Definition: trustregion.hh:48
int maxIter
Maximum number of iterations.
Definition: trustregion.hh:45
double rho_prime
Rho prime value.
Definition: trustregion.hh:49
Definition: trustregion.hh:63
static constexpr PreConditioner preConditionerType
Definition: trustregion.hh:70
TRSettings parameters
Definition: trustregion.hh:69
UF UpdateFunction
Definition: trustregion.hh:67
UF updateFunction
Definition: trustregion.hh:71
auto rebindUpdateFunction(UF2 &&updateFunction) const
Definition: trustregion.hh:73
Trust Region solver for non-linear optimization problems.
Definition: trustregion.hh:169
typename NLO::DerivativeType CorrectionType
Type of the correction of x += deltaX.
Definition: trustregion.hh:174
std::remove_cvref_t< typename NLO::template FunctionReturnType< 0 > > ScalarType
Definition: trustregion.hh:180
void setup(const Settings &settings)
Sets up the TrustRegion solver with the provided settings and checks feasibility.
Definition: trustregion.hh:203
TRSettings Settings
Type of the settings for the TrustRegion solver.
Definition: trustregion.hh:171
typename NLO::template ParameterValue< 0 > ValueType
Definition: trustregion.hh:173
auto & nonLinearOperator()
Access the nonlinear operator.
Definition: trustregion.hh:411
std::remove_cvref_t< typename NLO::template FunctionReturnType< 2 > > MatrixType
Type of the Hessian.
Definition: trustregion.hh:182
NLO NonLinearOperator
Type of the non-linear operator.
Definition: trustregion.hh:177
UF UpdateFunction
Type of the update function.
Definition: trustregion.hh:175
TrustRegion(const NLO &nonLinearOperator, UF2 &&updateFunction={})
Constructs a TrustRegion solver instance.
Definition: trustregion.hh:190
NonLinearSolverInformation solve(const SolutionType &dxPredictor=NoPredictor{})
Solves the nonlinear optimization problem using the TrustRegion algorithm.
Definition: trustregion.hh:224
Additional information about the TrustRegion algorithm.
Definition: trustregion.hh:124
std::string cauchystr
Used Cauchy step string.
Definition: trustregion.hh:132
StopReason stop
Stopping reason.
Definition: trustregion.hh:135
std::string randomPredictionString
Random prediction string.
Definition: trustregion.hh:131
bool acceptProposal
Flag indicating whether the proposal is accepted.
Definition: trustregion.hh:133
std::string reasonString
String describing the stopping reason.
Definition: trustregion.hh:136
int consecutive_TRminus
Consecutive trust region decreases.
Definition: trustregion.hh:126
bool used_cauchy
Flag indicating whether Cauchy point was used.
Definition: trustregion.hh:134
int consecutive_Rejected
Consecutive rejected proposals.
Definition: trustregion.hh:127
std::string trstr
Trust region change string (TR+, TR-).
Definition: trustregion.hh:129
std::string stopReasonString
String describing the stopping reason.
Definition: trustregion.hh:128
std::string accstr
Acceptance string (REJ, acc).
Definition: trustregion.hh:130
int consecutive_TRplus
Consecutive trust region increases.
Definition: trustregion.hh:125
Information about the TrustRegion solver.
Definition: trustregion.hh:144
double rho
Definition: trustregion.hh:150
double etaNorm
Definition: trustregion.hh:146
double energyProposal
Definition: trustregion.hh:149
double energy
Definition: trustregion.hh:148
double gradNorm
Definition: trustregion.hh:145
int outerIter
Definition: trustregion.hh:151
double time
Definition: trustregion.hh:147
int innerIterSum
Definition: trustregion.hh:152
Generic observable interface for the Observer design pattern. See for a description of the design pa...
Definition: observer.hh:129
void notify(NonLinearSolverMessages message)
Notify observers about a specific message type.