version 0.4.1
trustregion.hh
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2021-2024 The Ikarus Developers mueller@ibb.uni-stuttgart.de
2// SPDX-License-Identifier: LGPL-3.0-or-later
3
9#pragma once
10
11#include <iosfwd>
12
13#include <dune/common/float_cmp.hh>
14
15#include <spdlog/spdlog.h>
16
17#include <Eigen/Sparse>
18
26
27namespace Ikarus {
28
34{
38};
44{
45 int verbosity = 5;
46 double maxtime = std::numeric_limits<double>::infinity();
47 int minIter = 3;
48 int maxIter = 1000;
49 int debug = 0;
50 double grad_tol = 1e-6;
51 double corr_tol = 1e-6;
52 double rho_prime = 0.01;
53 bool useRand = false;
54 double rho_reg = 1e6;
55 double Delta_bar = std::numeric_limits<double>::infinity();
56 double Delta0 = 10;
57};
58
63enum class StopReason
64{
70};
71
77{
81 std::string stopReasonString;
82 std::string trstr;
83 std::string accstr;
85 std::string cauchystr = " ";
87 bool used_cauchy = false;
89 std::string reasonString;
90};
91
96struct Stats
97{
98 double gradNorm{1};
99 double etaNorm{1};
100 double time{0};
101 double energy{0};
102 double energyProposal{0};
103 double rho{0};
104 int outerIter{0};
106};
107
120template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
121 typename UF = utils::UpdateDefault>
122class TrustRegion : public IObservable<NonLinearSolverMessages>
123{
124public:
125 using ValueType = typename NLO::template ParameterValue<0>;
127 using CorrectionType = typename NLO::DerivativeType;
128 using UpdateFunction = UF;
129
130 using NonLinearOperator = NLO;
131
132 using ScalarType = std::remove_cvref_t<typename NLO::template FunctionReturnType<0>>;
134
135 using MatrixType = std::remove_cvref_t<typename NLO::template FunctionReturnType<2>>;
136
142 explicit TrustRegion(const NLO& nonLinearOperator, UF updateFunction = {})
143 : nonLinearOperator_{nonLinearOperator},
144 updateFunction_{updateFunction},
145 xOld_{this->nonLinearOperator().firstParameter()} {
146 eta_.setZero(gradient().size());
147 Heta_.setZero(gradient().size());
148 truncatedConjugateGradient_.analyzePattern(hessian());
149 }
150
155 void setup(const TrustRegionSettings& settings) {
156 options_ = settings;
157 assert(options_.rho_prime < 0.25 && "options.rho_prime must be strictly smaller than 1/4.");
158 assert(options_.Delta_bar > 0 && "options.Delta_bar must be positive.");
159 assert(options_.Delta0 > 0 && options_.Delta0 < options_.Delta_bar &&
160 "options.Delta0 must be positive and smaller than Delta_bar.");
161 }
162
163#ifndef DOXYGEN
164 struct NoPredictor
165 {
166 };
167#endif
174 template <typename SolutionType = NoPredictor>
175 requires std::is_same_v<SolutionType, NoPredictor> || std::is_convertible_v<SolutionType, CorrectionType>
176 NonLinearSolverInformation solve(const SolutionType& dxPredictor = NoPredictor{}) {
178 stats_ = Stats{};
179 info_ = AlgoInfo{};
180
181 NonLinearSolverInformation solverInformation;
182 nonLinearOperator().updateAll();
183 stats_.energy = energy();
184 auto& x = nonLinearOperator().firstParameter();
185 xOld_ = x;
186 stats_.gradNorm = norm(gradient());
187 if constexpr (not std::is_same_v<SolutionType, NoPredictor>)
188 updateFunction(x, dxPredictor);
189 truncatedConjugateGradient_.analyzePattern(hessian());
190
191 innerInfo_.Delta = options_.Delta0;
192 spdlog::info(
193 " | iter | inner_i | rho | energy | energy_p | energy_inc | norm(g) | Delta | norm(corr) | "
194 "InnerBreakReason");
195 spdlog::info("{:-^143}", "-");
196 while (not stoppingCriterion()) {
198 if (options_.useRand) {
199 if (stats_.outerIter == 0) {
200 eta_.setRandom();
201 while (eta_.dot(hessian() * eta_) > innerInfo_.Delta * innerInfo_.Delta)
202 eta_ *= eps_; // eps is sqrt(sqrt(maschine-precision))
203 } else
204 eta_.setZero();
205 } else
206 eta_.setZero();
207
208 solveInnerProblem();
209 stats_.innerIterSum += innerInfo_.numInnerIter;
210
211 info_.stopReasonString = tcg_stop_reason_[static_cast<int>(innerInfo_.stop_tCG)];
212 Heta_ = hessian() * eta_;
213 if (options_.useRand and stats_.outerIter == 0) {
214 info_.used_cauchy = false;
215 info_.randomPredictionString = " Used Random correction predictor";
216 info_.cauchystr = " ";
217 double tauC;
218 // Check the curvature
219 const Eigen::VectorXd Hg = hessian() * gradient();
220 const auto g_Hg = (gradient().dot(Hg));
221 if (g_Hg <= 0)
222 tauC = 1;
223 else
224 tauC = std::min(Dune::power(stats_.gradNorm, 3) / (innerInfo_.Delta * g_Hg), 1.0);
225
226 // generate the Cauchy point.
227 const Eigen::VectorXd etaC = -tauC * innerInfo_.Delta / stats_.gradNorm * gradient();
228 const Eigen::VectorXd HetaC = -tauC * innerInfo_.Delta / stats_.gradNorm * Hg;
229
230 const double mdle = stats_.energy + gradient().dot(eta_) + .5 * Heta_.dot(eta_);
231 const double mdlec = stats_.energy + gradient().dot(etaC) + .5 * HetaC.dot(etaC);
232 if (mdlec < mdle && stats_.outerIter == 0) {
233 eta_ = etaC;
234 Heta_ = HetaC;
235 info_.used_cauchy = true;
236
237 info_.cauchystr = " USED CAUCHY POINT!";
238 }
239 } else
240 info_.cauchystr = " ";
241
242 stats_.etaNorm = eta_.norm();
243
244 updateFunction_(x, eta_);
245
246 // Calculate energy of our proposed update step
247 nonLinearOperator().template update<0>();
248 stats_.energyProposal = energy();
249
250 // Will we accept the proposal or not?
251 // Check the performance of the quadratic model against the actual energy.
252 auto rhonum = stats_.energy - stats_.energyProposal;
253 auto rhoden = -eta_.dot(gradient() + 0.5 * Heta_);
254
255 /* Close to convergence the proposed energy and the real energy almost coincide.
256 * Therefore, the performance check of our model becomes ill-conditioned
257 * The regularisation fixes this */
258 const auto rhoReg = std::max(1.0, abs(stats_.energy)) * eps_ * options_.rho_reg;
259 rhonum = rhonum + rhoReg;
260 rhoden = rhoden + rhoReg;
261
262 const bool modelDecreased = rhoden > 0.0;
263
264 if (!modelDecreased)
265 info_.stopReasonString.append(", model did not decrease");
266
267 stats_.rho = rhonum / rhoden;
268 stats_.rho = stats_.rho < 0.0 ? -1.0 : stats_.rho; // move rho to the domain [-1.0,inf]
269
270 info_.trstr = " ";
271
272 // measure if energy decreased
273 const bool energyDecreased = Dune::FloatCmp::ge(stats_.energy - stats_.energyProposal, -1e-12);
274
275 // If the model behaves badly or if the energy increased we reduce the trust region size
276 if (stats_.rho < 1e-4 || not modelDecreased || std::isnan(stats_.rho) || not energyDecreased) {
277 info_.trstr = "TR-";
278 innerInfo_.Delta /= 4.0;
279 info_.consecutive_TRplus = 0;
280 info_.consecutive_TRminus++;
281 if (info_.consecutive_TRminus >= 5 && options_.verbosity >= 1) {
282 info_.consecutive_TRminus = -std::numeric_limits<int>::infinity();
283 spdlog::info(" +++ Detected many consecutive TR- (radius decreases).");
284 spdlog::info(" +++ Consider decreasing options.Delta_bar by an order of magnitude.");
285 }
286
287 } else if (stats_.rho > 0.99 && (innerInfo_.stop_tCG == Eigen::TCGStopReason::negativeCurvature ||
289 info_.trstr = "TR+";
290 innerInfo_.Delta = std::min(3.5 * innerInfo_.Delta, options_.Delta_bar);
291 info_.consecutive_TRminus = 0;
292 info_.consecutive_TRplus++;
293 if (info_.consecutive_TRplus >= 5 && options_.verbosity >= 1) {
294 info_.consecutive_TRplus = -std::numeric_limits<int>::infinity();
295 spdlog::info(" +++ Detected many consecutive TR+ (radius increases)");
296 spdlog::info(" +++ Consider increasing options.Delta_bar by an order of magnitude");
297
298 } else {
299 info_.consecutive_TRplus = 0;
300 info_.consecutive_TRminus = 0;
301 }
302 }
303
304 if (modelDecreased && stats_.rho > options_.rho_prime && energyDecreased) {
305 if (stats_.energyProposal > stats_.energy)
306 spdlog::info(
307 "Energy function increased by {} (step size: {}). Since this is small we accept the step and hope for "
308 "convergence of the gradient norm.",
309 stats_.energyProposal - stats_.energy, stats_.etaNorm);
310
311 info_.acceptProposal = true;
312 info_.accstr = "acc";
313 info_.consecutive_Rejected = 0;
314 } else {
315 info_.acceptProposal = false;
316 info_.accstr = "REJ";
317
318 if (info_.consecutive_Rejected >= 5)
319 innerInfo_.Delta /= 2;
320 else
321 innerInfo_.Delta = std::min(innerInfo_.Delta, stats_.etaNorm / 2.0);
322 ++info_.consecutive_Rejected;
323 }
324
325 stats_.outerIter++;
326
327 if (options_.verbosity == 1)
328 logState();
329
330 info_.randomPredictionString = "";
331
332 if (info_.acceptProposal) {
333 stats_.energy = stats_.energyProposal;
334 nonLinearOperator_.updateAll();
335 xOld_ = x;
339 } else {
340 x = xOld_;
341 eta_.setZero();
342 }
343 nonLinearOperator_.updateAll();
344 stats_.gradNorm = gradient().norm();
346 }
347 spdlog::info("{}", info_.reasonString);
348 spdlog::info("Total iterations: {} Total CG Iterations: {}", stats_.outerIter, stats_.innerIterSum);
349
350 solverInformation.success =
352
353 solverInformation.iterations = stats_.outerIter;
354 solverInformation.residualNorm = stats_.gradNorm;
355 if (solverInformation.success)
356 this->notify(NonLinearSolverMessages::FINISHED_SUCESSFULLY, solverInformation.iterations);
357 return solverInformation;
358 }
363 auto& nonLinearOperator() { return nonLinearOperator_; }
364
365private:
366 void logState() const {
367 spdlog::info(
368 "{:>3s} {:>3s} {:>6d} {:>9d} {:>6.2f} {:>9.2e} {:>9.2e} {:>11.2e} {:>9.2e} {:>9.2e} {:>11.2e} "
369 "{:<73}",
370 info_.accstr, info_.trstr, stats_.outerIter, innerInfo_.numInnerIter, stats_.rho, stats_.energy,
371 stats_.energyProposal, stats_.energyProposal - stats_.energy, stats_.gradNorm, innerInfo_.Delta, stats_.etaNorm,
373 }
374
375 void logFinalState() {
376 spdlog::info("{:>3s} {:>3s} {:>6d} {:>9d} {: ^6} {: ^9} {: ^9} {: ^11} {:>9.2e} {: ^9} {: ^11} {:<73}",
377 info_.accstr, info_.trstr, stats_.outerIter, innerInfo_.numInnerIter, " ", " ", " ", " ",
378 stats_.gradNorm, " ", " ", info_.stopReasonString + info_.cauchystr + info_.randomPredictionString);
379 }
380
381 inline const auto& energy() { return nonLinearOperator().value(); }
382 inline const auto& gradient() { return nonLinearOperator().derivative(); }
383 inline const auto& hessian() { return nonLinearOperator().secondDerivative(); }
384
385 bool stoppingCriterion() {
386 std::ostringstream stream;
388 if (stats_.gradNorm < options_.grad_tol && stats_.outerIter != 0) {
389 logFinalState();
390 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(gradient): {:1.16e}", nonLinearOperator().value(),
391 stats_.gradNorm);
392 stream << "Gradient norm tolerance reached; options.tolerance = " << options_.grad_tol;
393
394 info_.reasonString = stream.str();
395
397 return true;
398 } else if (stats_.etaNorm < options_.corr_tol && stats_.outerIter != 0) {
399 logFinalState();
400 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(correction): {:1.16e}", nonLinearOperator().value(),
401 stats_.etaNorm);
402 stream << "Displacement norm tolerance reached; = " << options_.corr_tol << "." << std::endl;
403
404 info_.reasonString = stream.str();
406 return true;
407 }
408
410 if (stats_.time >= options_.maxtime) {
411 logFinalState();
412 stream << "Max time exceeded; options.maxtime = " << options_.maxtime << ".";
413 info_.reasonString = stream.str();
415 return true;
416 }
417
419 if (stats_.outerIter >= options_.maxIter) {
420 logFinalState();
421 stream << "Max iteration count reached; options.maxiter = " << options_.maxIter << ".";
422 info_.reasonString = stream.str();
424 return true;
425 }
426 return false;
427 }
428
429 void solveInnerProblem() {
430 truncatedConjugateGradient_.setInfo(innerInfo_);
431 int attempts = 0;
432 truncatedConjugateGradient_.factorize(hessian());
433 // If the preconditioner is IncompleteCholesky the factorization may fail if we have negative diagonal entries and
434 // the initial shift is too small. Therefore, if the factorization fails we increase the initial shift by a factor
435 // of 5.
436 if constexpr (preConditioner == PreConditioner::IncompleteCholesky) {
437 while (truncatedConjugateGradient_.info() != Eigen::Success) {
438 choleskyInitialShift_ *= 5;
439 truncatedConjugateGradient_.preconditioner().setInitialShift(choleskyInitialShift_);
440 truncatedConjugateGradient_.factorize(hessian());
441 if (attempts > 5)
442 DUNE_THROW(Dune::MathError, "Factorization of preconditioner failed!");
443 ++attempts;
444 }
445 if (truncatedConjugateGradient_.info() == Eigen::Success)
446 choleskyInitialShift_ = 1e-3;
447 }
448 eta_ = truncatedConjugateGradient_.solveWithGuess(-gradient(), eta_);
449 innerInfo_ = truncatedConjugateGradient_.getInfo();
450 }
451
452 NLO nonLinearOperator_;
453 UpdateFunction updateFunction_;
454 typename NLO::template ParameterValue<0> xOld_;
455 CorrectionType eta_;
456 CorrectionType Heta_;
457 TrustRegionSettings options_;
458 AlgoInfo info_;
459 double choleskyInitialShift_ = 1e-3;
460 Eigen::TCGInfo<double> innerInfo_;
461 Stats stats_;
462 static constexpr double eps_ = 0.0001220703125; // 0.0001220703125 is sqrt(sqrt(maschine-precision))
463 std::array<std::string, 6> tcg_stop_reason_{
464 {"negative curvature", "exceeded trust region", "reached target residual-kappa (linear)",
465 "reached target residual-theta (superlinear)", "maximum inner iterations", "model increased"}
466 };
467
468 using PreConditionerType =
469 std::conditional_t<preConditioner == PreConditioner::IdentityPreconditioner, Eigen::IdentityPreconditioner,
470 std::conditional_t<preConditioner == PreConditioner::DiagonalPreconditioner,
471 typename Eigen::DiagonalPreconditioner<ScalarType>,
472 typename Eigen::IncompleteCholesky<ScalarType>>>;
474 truncatedConjugateGradient_;
475};
476
487template <typename NLO, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
488 typename UF = utils::UpdateDefault>
489auto makeTrustRegion(const NLO& nonLinearOperator, UF&& updateFunction = {}) {
490 return std::make_shared<TrustRegion<NLO, preConditioner, UF>>(nonLinearOperator, updateFunction);
491}
492
493} // namespace Ikarus
Contains stl-like type traits.
Collection of fallback default functions.
Helper for the autodiff library.
Implementation of the Newton-Raphson method for solving nonlinear equations.
Definition of TruncatedConjugateGradient class for solving linear systems using truncated conjugate g...
Enums for observer messages.
Implementation of the observer design pattern.
auto norm(const Eigen::MatrixBase< Derived > &v)
Adding free norm function to Eigen types.
Definition: linearalgebrahelper.hh:259
Definition: simpleassemblers.hh:22
auto makeTrustRegion(const NLO &nonLinearOperator, UF &&updateFunction={})
Creates an instance of the TrustRegion solver.
Definition: trustregion.hh:489
StopReason
Enumeration of reasons for stopping the TrustRegion solver.
Definition: trustregion.hh:64
PreConditioner
Enumeration of available preconditioners for the trust region solver.
Definition: trustregion.hh:34
Scalar Delta
Definition: truncatedconjugategradient.hh:38
TCGStopReason stop_tCG
Definition: truncatedconjugategradient.hh:37
Eigen::Index numInnerIter
Definition: truncatedconjugategradient.hh:43
TCGInfo< typename MatrixType::RealScalar > getInfo()
Get information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:227
void setInfo(TCGInfo< typename MatrixType::RealScalar > alginfo)
Set information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:233
Information about the result of a non-linear solver.
Definition: solverinfos.hh:19
Configuration settings for the TrustRegion solver.
Definition: trustregion.hh:44
double rho_reg
Regularization value for rho.
Definition: trustregion.hh:54
int minIter
Minimum number of iterations.
Definition: trustregion.hh:47
int verbosity
Verbosity level.
Definition: trustregion.hh:45
int maxIter
Maximum number of iterations.
Definition: trustregion.hh:48
double grad_tol
Gradient tolerance.
Definition: trustregion.hh:50
double Delta0
Initial trust region radius.
Definition: trustregion.hh:56
int debug
Debugging flag.
Definition: trustregion.hh:49
double Delta_bar
Maximum trust region radius.
Definition: trustregion.hh:55
bool useRand
Flag for using random correction predictor.
Definition: trustregion.hh:53
double rho_prime
Rho prime value.
Definition: trustregion.hh:52
double maxtime
Maximum allowable time for solving.
Definition: trustregion.hh:46
double corr_tol
Correction tolerance.
Definition: trustregion.hh:51
Additional information about the TrustRegion algorithm.
Definition: trustregion.hh:77
std::string cauchystr
Used Cauchy step string.
Definition: trustregion.hh:85
StopReason stop
Stopping reason.
Definition: trustregion.hh:88
std::string randomPredictionString
Random prediction string.
Definition: trustregion.hh:84
bool acceptProposal
Flag indicating whether the proposal is accepted.
Definition: trustregion.hh:86
std::string reasonString
String describing the stopping reason.
Definition: trustregion.hh:89
int consecutive_TRminus
Consecutive trust region decreases.
Definition: trustregion.hh:79
bool used_cauchy
Flag indicating whether Cauchy point was used.
Definition: trustregion.hh:87
int consecutive_Rejected
Consecutive rejected proposals.
Definition: trustregion.hh:80
std::string trstr
Trust region change string (TR+, TR-).
Definition: trustregion.hh:82
std::string stopReasonString
String describing the stopping reason.
Definition: trustregion.hh:81
std::string accstr
Acceptance string (REJ, acc).
Definition: trustregion.hh:83
int consecutive_TRplus
Consecutive trust region increases.
Definition: trustregion.hh:78
Information about the TrustRegion solver.
Definition: trustregion.hh:97
double rho
Definition: trustregion.hh:103
double etaNorm
Definition: trustregion.hh:99
double energyProposal
Definition: trustregion.hh:102
double energy
Definition: trustregion.hh:101
double gradNorm
Definition: trustregion.hh:98
int outerIter
Definition: trustregion.hh:104
double time
Definition: trustregion.hh:100
int innerIterSum
Definition: trustregion.hh:105
Trust Region solver for non-linear optimization problems.
Definition: trustregion.hh:123
typename NLO::DerivativeType CorrectionType
Type of the correction of x += deltaX.
Definition: trustregion.hh:127
std::remove_cvref_t< typename NLO::template FunctionReturnType< 0 > > ScalarType
Definition: trustregion.hh:133
typename NLO::template ParameterValue< 0 > ValueType
Definition: trustregion.hh:126
void setup(const TrustRegionSettings &settings)
Sets up the TrustRegion solver with the provided settings and checks feasibility.
Definition: trustregion.hh:155
auto & nonLinearOperator()
Access the nonlinear operator.
Definition: trustregion.hh:363
std::remove_cvref_t< typename NLO::template FunctionReturnType< 2 > > MatrixType
Type of the Hessian.
Definition: trustregion.hh:135
NLO NonLinearOperator
Type of the non-linear operator.
Definition: trustregion.hh:130
UF UpdateFunction
Type of the update function.
Definition: trustregion.hh:128
TrustRegion(const NLO &nonLinearOperator, UF updateFunction={})
Constructs a TrustRegion solver instance.
Definition: trustregion.hh:142
NonLinearSolverInformation solve(const SolutionType &dxPredictor=NoPredictor{})
Solves the nonlinear optimization problem using the TrustRegion algorithm.
Definition: trustregion.hh:176
Generic observable interface for the Observer design pattern. See for a description of the design pa...
Definition: observer.hh:129
void notify(NonLinearSolverMessages message)
Notify observers about a specific message type.