version 0.4
trustregion.hh
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2021-2024 The Ikarus Developers mueller@ibb.uni-stuttgart.de
2// SPDX-License-Identifier: LGPL-3.0-or-later
3
9#pragma once
10
11#include <iosfwd>
12
13#include <dune/common/float_cmp.hh>
14
15#include <spdlog/spdlog.h>
16
17#include <Eigen/Sparse>
18
26
27namespace Ikarus {
28
39 int verbosity = 5;
40 double maxtime = std::numeric_limits<double>::infinity();
41 int miniter = 3;
42 int maxiter = 1000;
43 int debug = 0;
44 double grad_tol = 1e-6;
45 double corr_tol = 1e-6;
46 double rho_prime = 0.01;
47 bool useRand = false;
48 double rho_reg = 1e6;
49 double Delta_bar = std::numeric_limits<double>::infinity();
50 double Delta0 = 10;
51 };
52
57 enum class StopReason {
63 };
64
69 struct AlgoInfo {
73 std::string stopReasonString;
74 std::string trstr;
75 std::string accstr;
77 std::string cauchystr = " ";
79 bool used_cauchy = false;
81 std::string reasonString;
82 };
83
88 struct Stats {
89 double gradNorm{1};
90 double etaNorm{1};
91 double time{0};
92 double energy{0};
93 double energyProposal{0};
94 double rho{0};
95 int outerIter{0};
97 };
98
111 template <typename NonLinearOperatorImpl, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
112 typename UpdateFunctionTypeImpl = utils::UpdateDefault>
113 class TrustRegion : public IObservable<NonLinearSolverMessages> {
114 public:
115 using ValueType = typename NonLinearOperatorImpl::template ParameterValue<0>;
117 using CorrectionType = typename NonLinearOperatorImpl::DerivativeType;
118 using UpdateFunctionType = UpdateFunctionTypeImpl;
119
120 using NonLinearOperator = NonLinearOperatorImpl;
121
123 = std::remove_cvref_t<typename NonLinearOperatorImpl::template FunctionReturnType<0>>;
125
127 = std::remove_cvref_t<typename NonLinearOperatorImpl::template FunctionReturnType<2>>;
128
134 explicit TrustRegion(const NonLinearOperatorImpl& p_nonLinearOperator, UpdateFunctionTypeImpl p_updateFunction = {})
135 : nonLinearOperator_{p_nonLinearOperator},
136 updateFunction{p_updateFunction},
137 xOld{nonLinearOperator().firstParameter()} {
138 eta.setZero(gradient().size());
139 Heta.setZero(gradient().size());
140 }
141
146 void setup(const TrustRegionSettings& p_settings) {
147 options = p_settings;
148 assert(options.rho_prime < 0.25 && "options.rho_prime must be strictly smaller than 1/4.");
149 assert(options.Delta_bar > 0 && "options.Delta_bar must be positive.");
150 assert(options.Delta0 > 0 && options.Delta0 < options.Delta_bar
151 && "options.Delta0 must be positive and smaller than Delta_bar.");
152 }
153
154#ifndef DOXYGEN
155 struct NoPredictor {};
156#endif
163 template <typename SolutionType = NoPredictor>
164 requires std::is_same_v<SolutionType, NoPredictor> || std::is_convertible_v<SolutionType, CorrectionType>
165 NonLinearSolverInformation solve(const SolutionType& dx_predictor = NoPredictor{}) {
167 stats = Stats{};
168 info = AlgoInfo{};
169
170 NonLinearSolverInformation solverInformation;
171 nonLinearOperator().updateAll();
172 stats.energy = energy();
173 auto& x = nonLinearOperator().firstParameter();
174 xOld = x;
175 stats.gradNorm = norm(gradient());
176 if constexpr (not std::is_same_v<SolutionType, NoPredictor>) updateFunction(x, dx_predictor);
177 truncatedConjugateGradient.analyzePattern(hessian());
178
179 innerInfo.Delta = options.Delta0;
180 spdlog::info(
181 " | iter | inner_i | rho | energy | energy_p | energy_inc | norm(g) | Delta | norm(corr) | "
182 "InnerBreakReason");
183 spdlog::info("{:-^143}", "-");
184 while (not stoppingCriterion()) {
186 if (options.useRand) {
187 if (stats.outerIter == 0) {
188 eta.setRandom();
189 while (eta.dot(hessian() * eta) > innerInfo.Delta * innerInfo.Delta)
190 eta *= eps; // eps is sqrt(sqrt(maschine-precision))
191 } else
192 eta.setZero();
193 } else
194 eta.setZero();
195
196 solveInnerProblem();
197 stats.innerIterSum += innerInfo.numInnerIter;
198
199 info.stopReasonString = tcg_stop_reason[static_cast<int>(innerInfo.stop_tCG)];
200 Heta = hessian() * eta;
201 if (options.useRand and stats.outerIter == 0) {
202 info.used_cauchy = false;
203 info.randomPredictionString = " Used Random correction predictor";
204 info.cauchystr = " ";
205 double tau_c;
206 // Check the curvature
207 const Eigen::VectorXd Hg = hessian() * gradient();
208 const auto g_Hg = (gradient().dot(Hg));
209 if (g_Hg <= 0)
210 tau_c = 1;
211 else
212 tau_c = std::min(Dune::power(stats.gradNorm, 3) / (innerInfo.Delta * g_Hg), 1.0);
213
214 // generate the Cauchy point.
215 const Eigen::VectorXd eta_c = -tau_c * innerInfo.Delta / stats.gradNorm * gradient();
216 const Eigen::VectorXd Heta_c = -tau_c * innerInfo.Delta / stats.gradNorm * Hg;
217
218 const double mdle = stats.energy + gradient().dot(eta) + .5 * Heta.dot(eta);
219 const double mdlec = stats.energy + gradient().dot(eta_c) + .5 * Heta_c.dot(eta_c);
220 if (mdlec < mdle && stats.outerIter == 0) {
221 eta = eta_c;
222 Heta = Heta_c;
223 info.used_cauchy = true;
224
225 info.cauchystr = " USED CAUCHY POINT!";
226 }
227 } else
228 info.cauchystr = " ";
229
230 stats.etaNorm = eta.norm();
231
232 updateFunction(x, eta);
233
234 // Calculate energy of our proposed update step
235 nonLinearOperator().template update<0>();
236 stats.energyProposal = energy();
237
238 // Will we accept the proposal or not?
239 // Check the performance of the quadratic model against the actual energy.
240 auto rhonum = stats.energy - stats.energyProposal;
241 auto rhoden = -eta.dot(gradient() + 0.5 * Heta);
242
243 /* Close to convergence the proposed energy and the real energy almost coincide.
244 * Therefore, the performance check of our model becomes ill-conditioned
245 * The regularisation fixes this */
246 const auto rho_reg = std::max(1.0, abs(stats.energy)) * eps * options.rho_reg;
247 rhonum = rhonum + rho_reg;
248 rhoden = rhoden + rho_reg;
249
250 const bool model_decreased = rhoden > 0.0;
251
252 if (!model_decreased) info.stopReasonString.append(", model did not decrease");
253
254 stats.rho = rhonum / rhoden;
255 stats.rho = stats.rho < 0.0 ? -1.0 : stats.rho; // move rho to the domain [-1.0,inf]
256
257 info.trstr = " ";
258
259 // measure if energy decreased
260 const bool energyDecreased = Dune::FloatCmp::ge(stats.energy - stats.energyProposal, -1e-12);
261
262 // If the model behaves badly or if the energy increased we reduce the trust region size
263 if (stats.rho < 1e-4 || not model_decreased || std::isnan(stats.rho) || not energyDecreased) {
264 info.trstr = "TR-";
265 innerInfo.Delta /= 4.0;
266 info.consecutive_TRplus = 0;
267 info.consecutive_TRminus++;
268 if (info.consecutive_TRminus >= 5 && options.verbosity >= 1) {
269 info.consecutive_TRminus = -std::numeric_limits<int>::infinity();
270 spdlog::info(" +++ Detected many consecutive TR- (radius decreases).");
271 spdlog::info(" +++ Consider decreasing options.Delta_bar by an order of magnitude.");
272 }
273
274 } else if (stats.rho > 0.99
277 info.trstr = "TR+";
278 innerInfo.Delta = std::min(3.5 * innerInfo.Delta, options.Delta_bar);
279 info.consecutive_TRminus = 0;
280 info.consecutive_TRplus++;
281 if (info.consecutive_TRplus >= 5 && options.verbosity >= 1) {
282 info.consecutive_TRplus = -std::numeric_limits<int>::infinity();
283 spdlog::info(" +++ Detected many consecutive TR+ (radius increases)");
284 spdlog::info(" +++ Consider increasing options.Delta_bar by an order of magnitude");
285
286 } else {
287 info.consecutive_TRplus = 0;
288 info.consecutive_TRminus = 0;
289 }
290 }
291
292 if (model_decreased && stats.rho > options.rho_prime && energyDecreased) {
293 if (stats.energyProposal > stats.energy)
294 spdlog::info(
295 "Energy function increased by {} (step size: {}). Since this is small we accept the step and hope for "
296 "convergence of the gradient norm.",
297 stats.energyProposal - stats.energy, stats.etaNorm);
298
299 info.acceptProposal = true;
300 info.accstr = "acc";
301 info.consecutive_Rejected = 0;
302 } else {
303 info.acceptProposal = false;
304 info.accstr = "REJ";
305
306 if (info.consecutive_Rejected >= 5)
307 innerInfo.Delta /= 2;
308 else
309 innerInfo.Delta = std::min(innerInfo.Delta, stats.etaNorm / 2.0);
311 }
312
313 stats.outerIter++;
314
315 if (options.verbosity == 1) logState();
316
317 info.randomPredictionString = "";
318
319 if (info.acceptProposal) {
320 stats.energy = stats.energyProposal;
321 nonLinearOperator_.updateAll();
322 xOld = x;
326 } else {
327 x = xOld;
328 eta.setZero();
329 }
330 nonLinearOperator_.updateAll();
331 stats.gradNorm = gradient().norm();
333 }
334 spdlog::info("{}", info.reasonString);
335 spdlog::info("Total iterations: {} Total CG Iterations: {}", stats.outerIter, stats.innerIterSum);
336
337 solverInformation.success
339
340 solverInformation.iterations = stats.outerIter;
341 solverInformation.residualNorm = stats.gradNorm;
342 if (solverInformation.success)
343 this->notify(NonLinearSolverMessages::FINISHED_SUCESSFULLY, solverInformation.iterations);
344 return solverInformation;
345 }
350 auto& nonLinearOperator() { return nonLinearOperator_; }
351
352 private:
353 void logState() const {
354 spdlog::info(
355 "{:>3s} {:>3s} {:>6d} {:>9d} {:>6.2f} {:>9.2e} {:>9.2e} {:>11.2e} {:>9.2e} {:>9.2e} {:>11.2e} "
356 "{:<73}",
357 info.accstr, info.trstr, stats.outerIter, innerInfo.numInnerIter, stats.rho, stats.energy,
358 stats.energyProposal, stats.energyProposal - stats.energy, stats.gradNorm, innerInfo.Delta, stats.etaNorm,
360 }
361
362 void logFinalState() {
363 spdlog::info("{:>3s} {:>3s} {:>6d} {:>9d} {: ^6} {: ^9} {: ^9} {: ^11} {:>9.2e} {: ^9} {: ^11} {:<73}",
364 info.accstr, info.trstr, stats.outerIter, innerInfo.numInnerIter, " ", " ", " ", " ", stats.gradNorm,
365 " ", " ", info.stopReasonString + info.cauchystr + info.randomPredictionString);
366 }
367
368 inline const auto& energy() { return nonLinearOperator().value(); }
369 inline const auto& gradient() { return nonLinearOperator().derivative(); }
370 inline const auto& hessian() { return nonLinearOperator().secondDerivative(); }
371
372 bool stoppingCriterion() {
373 std::ostringstream stream;
375 if (stats.gradNorm < options.grad_tol && stats.outerIter != 0) {
376 logFinalState();
377 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(gradient): {:1.16e}", nonLinearOperator().value(),
378 stats.gradNorm);
379 stream << "Gradient norm tolerance reached; options.tolerance = " << options.grad_tol;
380
381 info.reasonString = stream.str();
382
384 return true;
385 } else if (stats.etaNorm < options.corr_tol && stats.outerIter != 0) {
386 logFinalState();
387 spdlog::info("CONVERGENCE: Energy: {:1.16e} norm(correction): {:1.16e}", nonLinearOperator().value(),
388 stats.etaNorm);
389 stream << "Displacement norm tolerance reached; = " << options.corr_tol << "." << std::endl;
390
391 info.reasonString = stream.str();
393 return true;
394 }
395
397 if (stats.time >= options.maxtime) {
398 logFinalState();
399 stream << "Max time exceeded; options.maxtime = " << options.maxtime << ".";
400 info.reasonString = stream.str();
402 return true;
403 }
404
406 if (stats.outerIter >= options.maxiter) {
407 logFinalState();
408 stream << "Max iteration count reached; options.maxiter = " << options.maxiter << ".";
409 info.reasonString = stream.str();
411 return true;
412 }
413 return false;
414 }
415
416 void solveInnerProblem() {
417 truncatedConjugateGradient.setInfo(innerInfo);
418 int attempts = 0;
419 truncatedConjugateGradient.factorize(hessian());
420 // If the preconditioner is IncompleteCholesky the factorization may fail if we have negative diagonal entries and
421 // the initial shift is too small. Therefore, if the factorization fails we increase the initial shift by a factor
422 // of 5.
423 if constexpr (preConditioner == PreConditioner::IncompleteCholesky) {
424 while (truncatedConjugateGradient.info() != Eigen::Success) {
425 choleskyInitialShift *= 5;
426 truncatedConjugateGradient.preconditioner().setInitialShift(choleskyInitialShift);
427 truncatedConjugateGradient.factorize(hessian());
428 if (attempts > 5) DUNE_THROW(Dune::MathError, "Factorization of preconditioner failed!");
429 ++attempts;
430 }
431 if (truncatedConjugateGradient.info() == Eigen::Success) choleskyInitialShift = 1e-3;
432 }
433 eta = truncatedConjugateGradient.solveWithGuess(-gradient(), eta);
434 innerInfo = truncatedConjugateGradient.getInfo();
435 }
436
437 NonLinearOperatorImpl nonLinearOperator_;
438 UpdateFunctionType updateFunction;
439 typename NonLinearOperatorImpl::template ParameterValue<0> xOld;
440 CorrectionType eta;
441 CorrectionType Heta;
442 TrustRegionSettings options;
443 AlgoInfo info;
444 double choleskyInitialShift = 1e-3;
445 Eigen::TCGInfo<double> innerInfo;
446 Stats stats;
447 static constexpr double eps = 0.0001220703125; // 0.0001220703125 is sqrt(sqrt(maschine-precision))
448 std::array<std::string, 6> tcg_stop_reason{
449 {"negative curvature", "exceeded trust region", "reached target residual-kappa (linear)",
450 "reached target residual-theta (superlinear)", "maximum inner iterations", "model increased"}};
451
452 using PreConditionerType
453 = std::conditional_t<preConditioner == PreConditioner::IdentityPreconditioner, Eigen::IdentityPreconditioner,
454 std::conditional_t<preConditioner == PreConditioner::DiagonalPreconditioner,
455 typename Eigen::DiagonalPreconditioner<ScalarType>,
456 typename Eigen::IncompleteCholesky<ScalarType>>>;
458 truncatedConjugateGradient;
459 };
460
471 template <typename NonLinearOperatorImpl, PreConditioner preConditioner = PreConditioner::IncompleteCholesky,
472 typename UpdateFunctionType = utils::UpdateDefault>
473 auto makeTrustRegion(const NonLinearOperatorImpl& p_nonLinearOperator, UpdateFunctionType&& p_updateFunction = {}) {
474 return std::make_shared<TrustRegion<NonLinearOperatorImpl, preConditioner, UpdateFunctionType>>(p_nonLinearOperator,
475 p_updateFunction);
476 }
477
478} // namespace Ikarus
Collection of fallback default functions.
Contains stl-like type traits.
Helper for the autodiff library.
Definition of TruncatedConjugateGradient class for solving linear systems using truncated conjugate g...
Enums for observer messages.
Implementation of the observer design pattern.
Implementation of the Newton-Raphson method for solving nonlinear equations.
auto norm(const Eigen::MatrixBase< Derived > &v)
Adding free norm function to Eigen types.
Definition: linearalgebrahelper.hh:258
Definition: simpleassemblers.hh:21
auto makeTrustRegion(const NonLinearOperatorImpl &p_nonLinearOperator, UpdateFunctionType &&p_updateFunction={})
Creates an instance of the TrustRegion solver.
Definition: trustregion.hh:473
StopReason
Enumeration of reasons for stopping the TrustRegion solver.
Definition: trustregion.hh:57
PreConditioner
Enumeration of available preconditioners for the trust region solver.
Definition: trustregion.hh:33
Scalar Delta
Definition: truncatedconjugategradient.hh:36
TCGStopReason stop_tCG
Definition: truncatedconjugategradient.hh:35
Eigen::Index numInnerIter
Definition: truncatedconjugategradient.hh:41
TCGInfo< typename MatrixType::RealScalar > getInfo()
Get information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:219
void setInfo(TCGInfo< typename MatrixType::RealScalar > _alginfo)
Set information about the truncated conjugate gradient algorithm.
Definition: truncatedconjugategradient.hh:225
Information about the result of a non-linear solver.
Definition: solverinfos.hh:18
Configuration settings for the TrustRegion solver.
Definition: trustregion.hh:38
int miniter
Minimum number of iterations.
Definition: trustregion.hh:41
double rho_reg
Regularization value for rho.
Definition: trustregion.hh:48
int verbosity
Verbosity level.
Definition: trustregion.hh:39
double grad_tol
Gradient tolerance.
Definition: trustregion.hh:44
double Delta0
Initial trust region radius.
Definition: trustregion.hh:50
int debug
Debugging flag.
Definition: trustregion.hh:43
double Delta_bar
Maximum trust region radius.
Definition: trustregion.hh:49
bool useRand
Flag for using random correction predictor.
Definition: trustregion.hh:47
int maxiter
Maximum number of iterations.
Definition: trustregion.hh:42
double rho_prime
Rho prime value.
Definition: trustregion.hh:46
double maxtime
Maximum allowable time for solving.
Definition: trustregion.hh:40
double corr_tol
Correction tolerance.
Definition: trustregion.hh:45
Additional information about the TrustRegion algorithm.
Definition: trustregion.hh:69
std::string cauchystr
Used Cauchy step string.
Definition: trustregion.hh:77
StopReason stop
Stopping reason.
Definition: trustregion.hh:80
std::string randomPredictionString
Random prediction string.
Definition: trustregion.hh:76
bool acceptProposal
Flag indicating whether the proposal is accepted.
Definition: trustregion.hh:78
std::string reasonString
String describing the stopping reason.
Definition: trustregion.hh:81
int consecutive_TRminus
Consecutive trust region decreases.
Definition: trustregion.hh:71
bool used_cauchy
Flag indicating whether Cauchy point was used.
Definition: trustregion.hh:79
int consecutive_Rejected
Consecutive rejected proposals.
Definition: trustregion.hh:72
std::string trstr
Trust region change string (TR+, TR-).
Definition: trustregion.hh:74
std::string stopReasonString
String describing the stopping reason.
Definition: trustregion.hh:73
std::string accstr
Acceptance string (REJ, acc).
Definition: trustregion.hh:75
int consecutive_TRplus
Consecutive trust region increases.
Definition: trustregion.hh:70
Information about the TrustRegion solver.
Definition: trustregion.hh:88
double rho
Definition: trustregion.hh:94
double etaNorm
Definition: trustregion.hh:90
double energyProposal
Definition: trustregion.hh:93
double energy
Definition: trustregion.hh:92
double gradNorm
Definition: trustregion.hh:89
int outerIter
Definition: trustregion.hh:95
double time
Definition: trustregion.hh:91
int innerIterSum
Definition: trustregion.hh:96
Trust Region solver for non-linear optimization problems.
Definition: trustregion.hh:113
auto & nonLinearOperator()
Access the nonlinear operator.
Definition: trustregion.hh:350
void setup(const TrustRegionSettings &p_settings)
Sets up the TrustRegion solver with the provided settings and checks feasibility.
Definition: trustregion.hh:146
NonLinearSolverInformation solve(const SolutionType &dx_predictor=NoPredictor{})
Solves the nonlinear optimization problem using the TrustRegion algorithm.
Definition: trustregion.hh:165
UpdateFunctionTypeImpl UpdateFunctionType
Type of the update function.
Definition: trustregion.hh:118
typename NonLinearOperatorImpl::template ParameterValue< 0 > ValueType
Definition: trustregion.hh:116
NonLinearOperatorImpl NonLinearOperator
Type of the non-linear operator.
Definition: trustregion.hh:120
std::remove_cvref_t< typename NonLinearOperatorImpl::template FunctionReturnType< 0 > > ScalarType
Definition: trustregion.hh:124
typename NonLinearOperatorImpl::DerivativeType CorrectionType
Type of the correction of x += deltaX.
Definition: trustregion.hh:117
std::remove_cvref_t< typename NonLinearOperatorImpl::template FunctionReturnType< 2 > > MatrixType
Type of the Hessian.
Definition: trustregion.hh:127
TrustRegion(const NonLinearOperatorImpl &p_nonLinearOperator, UpdateFunctionTypeImpl p_updateFunction={})
Constructs a TrustRegion solver instance.
Definition: trustregion.hh:134
Generic observable interface for the Observer design pattern. See for a description of the design pa...
Definition: observer.hh:125
void notify(NonLinearSolverMessages message)
Notify observers about a specific message type.
Definition: observer.hh:254