Implement a fallback watchdog option
Sometimes our initial watchdog action is not enough to recover the host
from the state it transitioned into. However, always using a more power
form of power cycle is not desirable as we can lose useful CPU crash
state. It is desirable in this case to have two levels of watchog
timers.
This patch implements the ability for the service to specify a fallback
watchdog action and interval. After the initial watchdog timeout is
encountered, the watchdog will be re-armed with the new parameters. Once
the watchdog times out again it will execute the fallback action.
Attempts to update the timeRemaining will reset the fallback just in
case something is still alive.
Change-Id: I69f4422c7e3963f02200815f3cef620af9e6cf8b
Signed-off-by: William A. Kennington III <wak@google.com>
diff --git a/mainapp.cpp b/mainapp.cpp
index d935bfb..365fd50 100644
--- a/mainapp.cpp
+++ b/mainapp.cpp
@@ -15,6 +15,7 @@
*/
#include <iostream>
+#include <experimental/optional>
#include <phosphor-logging/log.hpp>
#include <phosphor-logging/elog.hpp>
#include <phosphor-logging/elog-errors.hpp>
@@ -133,6 +134,47 @@
}
printActionTargets(actionTargets);
+ // Parse out the fallback settings for the watchdog. Note that we require
+ // both of the fallback arguments to do anything here, but having a fallback
+ // is entirely optional.
+ auto fallbackActionParam = (options)["fallback_action"];
+ auto fallbackIntervalParam = (options)["fallback_interval"];
+ if (fallbackActionParam.empty() ^ fallbackIntervalParam.empty())
+ {
+ exitWithError("Only one of the fallback options was specified.", argv);
+ }
+ if (fallbackActionParam.size() > 1 || fallbackIntervalParam.size() > 1)
+ {
+ exitWithError("Multiple fallbacks specified.", argv);
+ }
+ std::experimental::optional<Watchdog::Fallback> fallback;
+ if (!fallbackActionParam.empty())
+ {
+ Watchdog::Action action;
+ try
+ {
+ action = Watchdog::convertActionFromString(
+ fallbackActionParam.back());
+ }
+ catch (const sdbusplus::exception::InvalidEnumString &)
+ {
+ exitWithError("Bad action specified.", argv);
+ }
+ uint64_t interval;
+ try
+ {
+ interval = std::stoull(fallbackIntervalParam.back());
+ }
+ catch (const std::logic_error &)
+ {
+ exitWithError("Failed to convert fallback interval to integer.", argv);
+ }
+ fallback = Watchdog::Fallback{
+ .action = action,
+ .interval = interval,
+ };
+ }
+
sd_event* event = nullptr;
auto r = sd_event_default(&event);
if (r < 0)
@@ -155,7 +197,9 @@
try
{
// Create a watchdog object
- Watchdog watchdog(bus, path.c_str(), eventP, std::move(actionTargets));
+ Watchdog watchdog(bus, path.c_str(), eventP, std::move(actionTargets),
+ std::move(fallback));
+
// Claim the bus
bus.request_name(service.c_str());