version 0.4.1
trustregion.hh
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2021-2024 The Ikarus Developers mueller@ibb.uni-stuttgart.de
2// SPDX-License-Identifier: LGPL-3.0-or-later
3
9#pragma once
10
11#include <iosfwd>
12
13#include <dune/common/float_cmp.hh>
14
15#include <spdlog/spdlog.h>
16
17#include <Eigen/Sparse>
18
26
27namespace Ikarus {
28
34{
38};
39
41{
42 int verbosity = 5;
43 double maxtime = std::numeric_limits<double>::infinity();
44 int minIter = 3;
45 int maxIter = 1000;
46 int debug = 0;
47 double grad_tol = 1e-6;
48 double corr_tol = 1e-6;
49 double rho_prime = 0.01;
50 bool useRand = false;
51 double rho_reg = 1e6;
52 double Delta_bar = std::numeric_limits<double>::infinity();
53 double Delta0 = 10;
54};
55
61template <PreConditioner preConditioner = PreConditioner::IncompleteCholesky, typename UF = utils::UpdateDefault>
63{
64 static_assert(std::copy_constructible<UF>,
65 " The update function should be copy constructable. If it is a lambda wrap it in a std::function");
66
67 using UpdateFunction = UF;
68
70 static constexpr PreConditioner preConditionerType = preConditioner;
72 template <typename UF2>
75 .updateFunction = std::forward<UF2>(updateFunction)};
76 return settings;
77 }
78};
79
80template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
81 typename UF = utils::UpdateDefault>
82class TrustRegion;
83
92template <typename NLO, typename TRConfig>
93requires traits::isSpecializationNonTypeAndTypes<TrustRegionConfig, std::remove_cvref_t<TRConfig>>::value
94auto createNonlinearSolver(TRConfig&& config, NLO&& nonLinearOperator) {
95 static constexpr PreConditioner preConditioner = std::remove_cvref_t<TRConfig>::preConditionerType;
96 using UF = std::remove_cvref_t<TRConfig>::UpdateFunction;
97 static_assert(std::remove_cvref_t<NLO>::numberOfFunctions == 3,
98 "The number of derivatives in the nonlinear operator have to be exactly 3.");
99 auto solver = std::make_shared<TrustRegion<NLO, preConditioner, UF>>(nonLinearOperator,
100 std::forward<TRConfig>(config).updateFunction);
101
102 solver->setup(config.parameters);
103 return solver;
104}
105
110enum class StopReason
111{
117};
118
124{
128 std::string stopReasonString;
129 std::string trstr;
130 std::string accstr;
132 std::string cauchystr = " ";
134 bool used_cauchy = false;
136 std::string reasonString;
137};
138
143struct Stats
144{
145 double gradNorm{1};
146 double etaNorm{1};
147 double time{0};
148 double energy{0};
149 double energyProposal{0};
150 double rho{0};
151 int outerIter{0};
153};
154
167template <typename NLO, PreConditioner preConditioner, typename UF>
168class TrustRegion : public IObservable<NonLinearSolverMessages>
169{
170public:
172 using ValueType = typename NLO::template ParameterValue<0>;
174 using CorrectionType = typename NLO::DerivativeType;
175 using UpdateFunction = UF;
176
177 using NonLinearOperator = NLO;
178
179 using ScalarType = std::remove_cvref_t<typename NLO::template FunctionReturnType<0>>;
181
182 using MatrixType = std::remove_cvref_t<typename NLO::template FunctionReturnType<2>>;
183
189 template <typename UF2 = UF>
190 explicit TrustRegion(const NLO& nonLinearOperator, UF2&& updateFunction = {})
191 : nonLinearOperator_{nonLinearOperator},
192 updateFunction_{std::forward<UF2>(updateFunction)},
193 xOld_{this->nonLinearOperator().firstParameter()} {
194 eta_.setZero(gradient().size());
195 Heta_.setZero(gradient().size());
196 truncatedConjugateGradient_.analyzePattern(hessian());
197 }
198
203 void setup(const Settings& settings) {
204 settings_ = settings;
205 assert(settings_.rho_prime < 0.25 && "options.rho_prime must be strictly smaller than 1/4.");
206 assert(settings_.Delta_bar > 0 && "options.Delta_bar must be positive.");
207 assert(settings_.Delta0 > 0 && settings_.Delta0 < settings_.Delta_bar &&
208 "options.Delta0 must be positive and smaller than Delta_bar.");
209 }
210
211#ifndef DOXYGEN
212 struct NoPredictor
213 {
214 };
215#endif
222 template <typename SolutionType = NoPredictor>
223 requires std::is_same_v<SolutionType, NoPredictor> || std::is_convertible_v<SolutionType, CorrectionType>
224 NonLinearSolverInformation solve(const SolutionType& dxPredictor = NoPredictor{}) {
226 stats_ = Stats{};
227 info_ = AlgoInfo{};
228
229 NonLinearSolverInformation solverInformation;
230 nonLinearOperator().updateAll();
231 stats_.energy = energy();
232 auto& x = nonLinearOperator().firstParameter();
233 xOld_ = x;
234 stats_.gradNorm = norm(gradient());
235 if constexpr (not std::is_same_v<SolutionType, NoPredictor>)
236 updateFunction(x, dxPredictor);
237 truncatedConjugateGradient_.analyzePattern(hessian());
238
239 innerInfo_.Delta = settings_.Delta0;
240 spdlog::info(
241 " | iter | inner_i | rho | energy | energy_p | energy_inc | norm(g) | Delta | norm(corr) | "
242 "InnerBreakReason");
243 spdlog::info("{:-^143}", "-");
244 while (not stoppingCriterion()) {
246 if (settings_.useRand) {
247 if (stats_.outerIter == 0) {
248 eta_.setRandom();
249 while (eta_.dot(hessian() * eta_) > innerInfo_.Delta * innerInfo_.Delta)
250 eta_ *= eps_; // eps is sqrt(sqrt(maschine-precision))
251 } else
252 eta_.setZero();
253 } else
254 eta_.setZero();
255
256 solveInnerProblem();
257 stats_.innerIterSum += innerInfo_.numInnerIter;
258
259 info_.stopReasonString = tcg_stop_reason_[static_cast<int>(innerInfo_.stop_tCG)];
260 Heta_ = hessian() * eta_;
261 if (settings_.useRand and stats_.outerIter == 0) {
262 info_.used_cauchy = false;
263 info_.randomPredictionString = " Used Random correction predictor";
264 info_.cauchystr = " ";
265 double tauC;
266 // Check the curvature
267 const Eigen::VectorXd Hg = hessian() * gradient();
268 const auto g_Hg = (gradient().dot(Hg));
269 if (g_Hg <= 0)
270 tauC = 1;
271 else
272 tauC = std::min(Dune::power(stats_.gradNorm, 3) / (innerInfo_.Delta * g_Hg), 1.0);
273
274 // generate the Cauchy point.
275 const Eigen::VectorXd etaC = -tauC * innerInfo_.Delta / stats_.gradNorm * gradient();
276 const Eigen::VectorXd HetaC = -tauC * innerInfo_.Delta / stats_.gradNorm * Hg;
277
278 const double mdle = stats_.energy + gradient().dot(eta_) + .5 * Heta_.dot(eta_);
279 const double mdlec = stats_.energy + gradient().dot(etaC) + .5 * HetaC.dot(etaC);
280 if (mdlec < mdle && stats_.outerIter == 0) {
281 eta_ = etaC;
282 Heta_ = HetaC;
283 info_.used_cauchy = true;
284
285 info_.cauchystr = " USED CAUCHY POINT!";
286 }
287 } else
288 info_.cauchystr = " ";
289
290 stats_.etaNorm = eta_.norm();
291
292 updateFunction_(x, eta_);
293
294 // Calculate energy of our proposed update step
295 nonLinearOperator().template update<0>();
296 stats_.energyProposal = energy();
297
298 // Will we accept the proposal or not?
299 // Check the performance of the quadratic model against the actual energy.
300 auto rhonum = stats_.energy - stats_.energyProposal;
301 auto rhoden = -eta_.dot(gradient() + 0.5 * Heta_);
302
303 /* Close to convergence the proposed energy and the real energy almost coincide.
304 * Therefore, the performance check of our model becomes ill-conditioned
305 * The regularisation fixes this */
306 const auto rhoReg = std::max(1.0, abs(stats_.energy)) * eps_ * settings_.rho_reg;
307 rhonum = rhonum + rhoReg;
308 rhoden = rhoden + rhoReg;
309
310 const bool modelDecreased = rhoden > 0.0;
311
312 if (!modelDecreased)
313 info_.stopReasonString.append(", model did not decrease");
314
315 stats_.rho = rhonum / rhoden;
316 stats_.rho = stats_.rho < 0.0 ? -1.0 : stats_.rho; // move rho to the domain [-1.0,inf]
317
318 info_.trstr = " ";
319
320 // measure if energy decreased
321 const bool energyDecreased = Dune::FloatCmp::ge(stats_.energy - stats_.energyProposal, -1e-12);
322
323 // If the model behaves badly or if the energy increased we reduce the trust region size
324 if (stats_.rho < 1e-4 || not modelDecreased || std::isnan(stats_.rho) || not energyDecreased) {
325 info_.trstr = "TR-";
326 innerInfo_.Delta /= 4.0;
327 info_.consecutive_TRplus = 0;
328 info_.consecutive_TRminus++;
329 if (info_.consecutive_TRminus >= 5 && settings_.verbosity >= 1) {
330 info_.consecutive_TRminus = -std::numeric_limits<int>::infinity();
331 spdlog::info(" +++ Detected many consecutive TR- (radius decreases).");
332 spdlog::info(" +++ Consider decreasing options.Delta_bar by an order of magnitude.");
333 }
334
335 } else if (stats_.rho > 0.99 && (innerInfo_.stop_tCG == Eigen::TCGStopReason::negativeCurvature ||
337 info_.trstr = "TR+";
338 innerInfo_.Delta = std::min(3.5 * innerInfo_.Delta, settings_.Delta_bar);
339 info_.consecutive_TRminus = 0;
340 info_.consecutive_TRplus++;
341 if (info_.consecutive_TRplus >= 5 && settings_.verbosity >= 1) {
342 info_.consecutive_TRplus = -std::numeric_limits<int>::infinity();
343 spdlog::info(" +++ Detected many consecutive TR+ (radius increases)");
344 spdlog::info(" +++ Consider increasing options.Delta_bar by an order of magnitude");
345
346 } else {
347 info_.consecutive_TRplus = 0;
348 info_.consecutive_TRminus = 0;
349 }
350 }
351
352 if (modelDecreased && stats_.rho > settings_.rho_prime && energyDecreased) {
353 if (stats_.energyProposal > stats_.energy)
354 spdlog::info(
355 "Energy function increased by {} (step size: {}). Since this is small we accept the step and hope for "
356 "convergence of the gradient norm.",
357 stats_.energyProposal - stats_.energy, stats_.etaNorm);
358
359 info_.acceptProposal = true;
360 info_.accstr = "acc";
361 info_.consecutive_Rejected = 0;
362 } else {
363 info_.acceptProposal = false;
364 info_.accstr = "REJ";
365
366 if (info_.consecutive_Rejected >= 5)
367 innerInfo_.Delta /= 2;
368 else
369 innerInfo_.Delta = std::min(innerInfo_.Delta, stats_.etaNorm / 2.0);
370 ++info_.consecutive_Rejected;
371 }
372
373 stats_.outerIter++;
374
375 if (settings_.verbosity == 1)
376 logState();
377
378 info_.randomPredictionString = "";
379
380 if (info_.acceptProposal) {
381 stats_.energy = stats_.energyProposal;
382 nonLinearOperator_.updateAll();
383 xOld_ = x;
387 } else {
388 x = xOld_;
389 eta_.setZero();
390 }
391 nonLinearOperator_.updateAll();
392 stats_.gradNorm = gradient().norm();
394 }
395 spdlog::info("{}", info_.reasonString);
396 spdlog::info("Total iterations: {} Total CG Iterations: {}", stats_.outerIter, stats_.innerIterSum);
397
398 solverInformation.success =
400
401 solverInformation.iterations = stats_.outerIter;
402 solverInformation.residualNorm = stats_.gradNorm;
403 if (solverInformation.success)
404 this->notify(NonLinearSolverMessages::FINISHED_SUCESSFULLY, solverInformation.iterations);
405 return solverInformation;
406 }
411 auto& nonLinearOperator() { return nonLinearOperator_; }
412
413private:
414 void logState() const {
415 spdlog::info(
416 "{:>3s} {:>3s} {:>6d} {:>9d} {:>6.2f} {:>9.2e} {:>9.2e} {:>11.2e} {:>9.2e} {:>9.2e} {:>11.2e} "
417 "{:<73}",
418 info_.accstr, info_.trstr, stats_.outerIter, innerInfo_.numInnerIter, stats_.rho, stats_.energy,
419 stats_.energyProposal, stats_.energyProposal - stats_.energy, stats_.gradNorm, innerInfo_.Delta, stats_.etaNorm,
421 }
422
423 void logFinalState() {
424 spdlog::info("{:>3s} {:>3s} {:>6d} {:>9d} {: ^6} {: ^9} {: ^9} {: ^11} {:>9.2e} {: ^9} {: ^11} {:<73}",
425 info_.accstr, info_.trstr, stats_.outerIter, innerInfo_.numInnerIter, " ", " ", " ", " ",
426 stats_.gradNorm, " ", " ", info_.stopReasonString + info_.cauchystr + info_.randomPredictionString);
427 }
428
429 inline const auto& energy() { return nonLinearOperator().value(); }
430 inline const auto& gradient() { return nonLinearOperator().derivative(); }
431 inline const auto& hessian() { return nonLinearOperator().secondDerivative(); }
432
433 bool stoppingCriterion() {
434 std::ostringstream stream;
436 if (stats_.gradNorm < settings_.grad_tol && stats_.outerIter != 0) {
437 logFinalState();
438 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(gradient): {:1.16e}", nonLinearOperator().value(),
439 stats_.gradNorm);
440 stream << "Gradient norm tolerance reached; options.tolerance = " << settings_.grad_tol;
441
442 info_.reasonString = stream.str();
443
445 return true;
446 } else if (stats_.etaNorm < settings_.corr_tol && stats_.outerIter != 0) {
447 logFinalState();
448 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(correction): {:1.16e}", nonLinearOperator().value(),
449 stats_.etaNorm);
450 stream << "Displacement norm tolerance reached; = " << settings_.corr_tol << "." << std::endl;
451
452 info_.reasonString = stream.str();
454 return true;
455 }
456
458 if (stats_.time >= settings_.maxtime) {
459 logFinalState();
460 stream << "Max time exceeded; options.maxtime = " << settings_.maxtime << ".";
461 info_.reasonString = stream.str();
463 return true;
464 }
465
467 if (stats_.outerIter >= settings_.maxIter) {
468 logFinalState();
469 stream << "Max iteration count reached; options.maxiter = " << settings_.maxIter << ".";
470 info_.reasonString = stream.str();
472 return true;
473 }
474 return false;
475 }
476
477 void solveInnerProblem() {
478 truncatedConjugateGradient_.setInfo(innerInfo_);
479 int attempts = 0;
480 truncatedConjugateGradient_.factorize(hessian());
481 // If the preconditioner is IncompleteCholesky the factorization may fail if we have negative diagonal entries and
482 // the initial shift is too small. Therefore, if the factorization fails we increase the initial shift by a factor
483 // of 5.
484 if constexpr (preConditioner == PreConditioner::IncompleteCholesky) {
485 while (truncatedConjugateGradient_.info() != Eigen::Success) {
486 choleskyInitialShift_ *= 5;
487 truncatedConjugateGradient_.preconditioner().setInitialShift(choleskyInitialShift_);
488 truncatedConjugateGradient_.factorize(hessian());
489 if (attempts > 5)
490 DUNE_THROW(Dune::MathError, "Factorization of preconditioner failed!");
491 ++attempts;
492 }
493 if (truncatedConjugateGradient_.info() == Eigen::Success)
494 choleskyInitialShift_ = 1e-3;
495 }
496 eta_ = truncatedConjugateGradient_.solveWithGuess(-gradient(), eta_);
497 innerInfo_ = truncatedConjugateGradient_.getInfo();
498 }
499
500 NLO nonLinearOperator_;
501 UpdateFunction updateFunction_;
502 typename NLO::template ParameterValue<0> xOld_;
503 CorrectionType eta_;
504 CorrectionType Heta_;
505 Settings settings_;
506 AlgoInfo info_;
507 double choleskyInitialShift_ = 1e-3;
508 Eigen::TCGInfo<double> innerInfo_;
509 Stats stats_;
510 static constexpr double eps_ = 0.0001220703125; // 0.0001220703125 is sqrt(sqrt(maschine-precision))
511 std::array<std::string, 6> tcg_stop_reason_{
512 {"negative curvature", "exceeded trust region", "reached target residual-kappa (linear)",
513 "reached target residual-theta (superlinear)", "maximum inner iterations", "model increased"}
514 };
515
516 using PreConditionerType =
517 std::conditional_t<preConditioner == PreConditioner::IdentityPreconditioner, Eigen::IdentityPreconditioner,
518 std::conditional_t<preConditioner == PreConditioner::DiagonalPreconditioner,
519 typename Eigen::DiagonalPreconditioner<ScalarType>,
520 typename Eigen::IncompleteCholesky<ScalarType>>>;
522 truncatedConjugateGradient_;
523};
524
535template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
536 typename UF = utils::UpdateDefault>
537auto makeTrustRegion(const NLO& nonLinearOperator, UF&& updateFunction = {}) {
538 return std::make_shared<TrustRegion<NLO, preConditioner, UF>>(nonLinearOperator, updateFunction);
539}
540
541template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
542 typename UF2 = utils::UpdateDefault>
543TrustRegion(const NLO& nonLinearOperator,
544 UF2&& updateFunction = {}) -> TrustRegion<NLO, preConditioner, std::remove_cvref_t<UF2>>;
545
546} // namespace Ikarus
Contains stl-like type traits.
Helper for the autodiff library.
Collection of fallback default functions.
Definition of TruncatedConjugateGradient class for solving linear systems using truncated conjugate g...
Enums for observer messages.
Implementation of the observer design pattern.
Implementation of the Newton-Raphson method for solving nonlinear equations.
auto norm(const Eigen::MatrixBase< Derived > &v)
Adding free norm function to Eigen types.
Definition: linearalgebrahelper.hh:259
Definition: assemblermanipulatorbuildingblocks.hh:22
auto makeTrustRegion(const NLO &nonLinearOperator, UF &&updateFunction={})
Creates an instance of the TrustRegion solver.
Definition: trustregion.hh:537
::value auto createNonlinearSolver(NRConfig &&config, NLO &&nonLinearOperator)
Function to create a NewtonRaphson solver instance.
Definition: newtonraphson.hh:65
TrustRegion(const NLO &nonLinearOperator, UF2 &&updateFunction={}) -> TrustRegion< NLO, preConditioner, std::remove_cvref_t< UF2 > >
StopReason
Enumeration of reasons for stopping the TrustRegion solver.
Definition: trustregion.hh:111
PreConditioner
Enumeration of available preconditioners for the trust region solver.
Definition: trustregion.hh:34
Scalar Delta
Definition: truncatedconjugategradient.hh:38
TCGStopReason stop_tCG
Definition: truncatedconjugategradient.hh:37
Eigen::Index numInnerIter
Definition: truncatedconjugategradient.hh:43
TCGInfo< typename MatrixType::RealScalar > getInfo()
Get information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:227
void setInfo(TCGInfo< typename MatrixType::RealScalar > alginfo)
Set information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:233
Information about the result of a non-linear solver.
Definition: solverinfos.hh:19
Definition: trustregion.hh:41
double grad_tol
Gradient tolerance.
Definition: trustregion.hh:47
double Delta_bar
Maximum trust region radius.
Definition: trustregion.hh:52
double rho_reg
Regularization value for rho.
Definition: trustregion.hh:51
double Delta0
Initial trust region radius.
Definition: trustregion.hh:53
bool useRand
Flag for using random correction predictor.
Definition: trustregion.hh:50
int verbosity
Verbosity level.
Definition: trustregion.hh:42
int debug
Debugging flag.
Definition: trustregion.hh:46
int minIter
Minimum number of iterations.
Definition: trustregion.hh:44
double maxtime
Maximum allowable time for solving.
Definition: trustregion.hh:43
double corr_tol
Correction tolerance.
Definition: trustregion.hh:48
int maxIter
Maximum number of iterations.
Definition: trustregion.hh:45
double rho_prime
Rho prime value.
Definition: trustregion.hh:49
Definition: trustregion.hh:63
static constexpr PreConditioner preConditionerType
Definition: trustregion.hh:70
TRSettings parameters
Definition: trustregion.hh:69
UF UpdateFunction
Definition: trustregion.hh:67
UF updateFunction
Definition: trustregion.hh:71
auto rebindUpdateFunction(UF2 &&updateFunction) const
Definition: trustregion.hh:73
Trust Region solver for non-linear optimization problems.
Definition: trustregion.hh:169
typename NLO::DerivativeType CorrectionType
Type of the correction of x += deltaX.
Definition: trustregion.hh:174
std::remove_cvref_t< typename NLO::template FunctionReturnType< 0 > > ScalarType
Definition: trustregion.hh:180
void setup(const Settings &settings)
Sets up the TrustRegion solver with the provided settings and checks feasibility.
Definition: trustregion.hh:203
TRSettings Settings
Type of the settings for the TrustRegion solver.
Definition: trustregion.hh:171
typename NLO::template ParameterValue< 0 > ValueType
Definition: trustregion.hh:173
auto & nonLinearOperator()
Access the nonlinear operator.
Definition: trustregion.hh:411
std::remove_cvref_t< typename NLO::template FunctionReturnType< 2 > > MatrixType
Type of the Hessian.
Definition: trustregion.hh:182
NLO NonLinearOperator
Type of the non-linear operator.
Definition: trustregion.hh:177
UF UpdateFunction
Type of the update function.
Definition: trustregion.hh:175
TrustRegion(const NLO &nonLinearOperator, UF2 &&updateFunction={})
Constructs a TrustRegion solver instance.
Definition: trustregion.hh:190
NonLinearSolverInformation solve(const SolutionType &dxPredictor=NoPredictor{})
Solves the nonlinear optimization problem using the TrustRegion algorithm.
Definition: trustregion.hh:224
Additional information about the TrustRegion algorithm.
Definition: trustregion.hh:124
std::string cauchystr
Used Cauchy step string.
Definition: trustregion.hh:132
StopReason stop
Stopping reason.
Definition: trustregion.hh:135
std::string randomPredictionString
Random prediction string.
Definition: trustregion.hh:131
bool acceptProposal
Flag indicating whether the proposal is accepted.
Definition: trustregion.hh:133
std::string reasonString
String describing the stopping reason.
Definition: trustregion.hh:136
int consecutive_TRminus
Consecutive trust region decreases.
Definition: trustregion.hh:126
bool used_cauchy
Flag indicating whether Cauchy point was used.
Definition: trustregion.hh:134
int consecutive_Rejected
Consecutive rejected proposals.
Definition: trustregion.hh:127
std::string trstr
Trust region change string (TR+, TR-).
Definition: trustregion.hh:129
std::string stopReasonString
String describing the stopping reason.
Definition: trustregion.hh:128
std::string accstr
Acceptance string (REJ, acc).
Definition: trustregion.hh:130
int consecutive_TRplus
Consecutive trust region increases.
Definition: trustregion.hh:125
Information about the TrustRegion solver.
Definition: trustregion.hh:144
double rho
Definition: trustregion.hh:150
double etaNorm
Definition: trustregion.hh:146
double energyProposal
Definition: trustregion.hh:149
double energy
Definition: trustregion.hh:148
double gradNorm
Definition: trustregion.hh:145
int outerIter
Definition: trustregion.hh:151
double time
Definition: trustregion.hh:147
int innerIterSum
Definition: trustregion.hh:152
Generic observable interface for the Observer design pattern. See for a description of the design pa...
Definition: observer.hh:129
void notify(NonLinearSolverMessages message)
Notify observers about a specific message type.