watchdog: add support for systemd watchdog
Add interfaces and handling for systemd watchdog petting.
Systemd service files can specify a watchdog timeout and systemd will
expect the application to periodically poke a software watchdog, or
else the service will be restarted. This is enabled with the
'WatchdogSec=' service file directive. Add primitives for
interacting with the watchdog APIs.
Enable automatic support in the `async::context` for this watchdog
handling, such that if the watchdog is required (by checking
sd_watchdog_enabled) the daemon will automatically pet at the
appropriate rate, assuming that the `async::context` is functioning
correctly.
Signed-off-by: Patrick Williams <patrick@stwcx.xyz>
Change-Id: I68caf7b2c7166ca402b07ecee3db65f75365aa72
diff --git a/include/sdbusplus/bus.hpp b/include/sdbusplus/bus.hpp
index e0a16e6..a0b0aea 100644
--- a/include/sdbusplus/bus.hpp
+++ b/include/sdbusplus/bus.hpp
@@ -531,6 +531,18 @@
current_exception = exception;
}
+ /** @brief Determine if the systemd watchdog feature is enabled.
+ *
+ * @return 0 - watchdog disabled, N - watchdog interval in uSec.
+ */
+ uint64_t watchdog_enabled();
+
+ /** @brief Pet the systemd watchdog for this application. */
+ void watchdog_pet();
+
+ /** @brief Trigger a systemd watchdog failure for this application. */
+ void watchdog_trigger();
+
friend struct details::bus_friend;
protected:
diff --git a/include/sdbusplus/sdbus.hpp b/include/sdbusplus/sdbus.hpp
index ac14868..5f3b621 100644
--- a/include/sdbusplus/sdbus.hpp
+++ b/include/sdbusplus/sdbus.hpp
@@ -1,6 +1,7 @@
#pragma once
#include <systemd/sd-bus.h>
+#include <systemd/sd-daemon.h>
#include <chrono>
@@ -166,6 +167,10 @@
virtual int sd_bus_wait(sd_bus* bus, uint64_t timeout_usec) = 0;
+ virtual int sd_notify(int unset_environment, const char* state) = 0;
+
+ virtual int sd_watchdog_enabled(int unset_environment, uint64_t* usec) = 0;
+
virtual int sd_bus_message_append_array(sd_bus_message* m, char type,
const void* ptr, size_t size) = 0;
virtual int sd_bus_message_read_array(sd_bus_message* m, char type,
@@ -571,6 +576,16 @@
return ::sd_bus_wait(bus, timeout_usec);
}
+ int sd_notify(int unset_environment, const char* state) override
+ {
+ return ::sd_notify(unset_environment, state);
+ }
+
+ int sd_watchdog_enabled(int unset_environment, uint64_t* usec) override
+ {
+ return ::sd_watchdog_enabled(unset_environment, usec);
+ }
+
int sd_bus_message_append_array(sd_bus_message* m, char type,
const void* ptr, size_t size) override
{
diff --git a/include/sdbusplus/test/sdbus_mock.hpp b/include/sdbusplus/test/sdbus_mock.hpp
index fa65ac5..5c14281 100644
--- a/include/sdbusplus/test/sdbus_mock.hpp
+++ b/include/sdbusplus/test/sdbus_mock.hpp
@@ -164,6 +164,11 @@
MOCK_METHOD(void, sd_bus_close, (sd_bus*), (override));
MOCK_METHOD(int, sd_bus_is_open, (sd_bus*), (override));
MOCK_METHOD(int, sd_bus_wait, (sd_bus*, uint64_t), (override));
+
+ MOCK_METHOD(int, sd_notify, (int, const char*), (override));
+
+ MOCK_METHOD(int, sd_watchdog_enabled, (int, uint64_t* usec), (override));
+
MOCK_METHOD(int, sd_bus_message_append_array,
(sd_bus_message*, char, const void*, size_t), (override));
MOCK_METHOD(int, sd_bus_message_read_array,
diff --git a/src/async/context.cpp b/src/async/context.cpp
index 3381cb3..d4ec5e9 100644
--- a/src/async/context.cpp
+++ b/src/async/context.cpp
@@ -1,6 +1,8 @@
#include <systemd/sd-bus.h>
#include <sdbusplus/async/context.hpp>
+#include <sdbusplus/async/task.hpp>
+#include <sdbusplus/async/timer.hpp>
#include <chrono>
@@ -146,8 +148,29 @@
}
}
+static auto watchdog_loop(sdbusplus::async::context& ctx) -> task<>
+{
+ auto watchdog_time =
+ std::chrono::microseconds(ctx.get_bus().watchdog_enabled());
+ if (watchdog_time.count() == 0)
+ {
+ co_return;
+ }
+
+ // Recommended interval is half of WATCHDOG_USEC
+ watchdog_time /= 2;
+
+ while (!ctx.stop_requested())
+ {
+ ctx.get_bus().watchdog_pet();
+ co_await sleep_for(ctx, watchdog_time);
+ }
+}
+
void context::worker_run()
{
+ internal_tasks.spawn(watchdog_loop(*this));
+
// Start the sdbus 'wait/process' loop; treat it as an internal task.
internal_tasks.spawn(details::wait_process_completion::loop(*this));
diff --git a/src/bus.cpp b/src/bus.cpp
index 95dfc61..5d542c8 100644
--- a/src/bus.cpp
+++ b/src/bus.cpp
@@ -141,4 +141,33 @@
}
}
+void bus::watchdog_pet()
+{
+ int r = _intf->sd_notify(0, "WATCHDOG=1");
+ if (r < 0)
+ {
+ throw exception::SdBusError(-r, "sd_notify WATCHDOG=1");
+ }
+}
+
+void bus::watchdog_trigger()
+{
+ int r = _intf->sd_notify(0, "WATCHDOG=trigger");
+ if (r < 0)
+ {
+ throw exception::SdBusError(-r, "sd_notify WATCHDOG=trigger");
+ }
+}
+
+uint64_t bus::watchdog_enabled()
+{
+ uint64_t usec = 0;
+ int r = _intf->sd_watchdog_enabled(0, &usec);
+ if (r < 0)
+ {
+ throw exception::SdBusError(-r, "sd_watchdog_enabled");
+ }
+ return usec;
+}
+
} // namespace sdbusplus::bus
diff --git a/test/async/watchdog.cpp b/test/async/watchdog.cpp
new file mode 100644
index 0000000..9ab9604
--- /dev/null
+++ b/test/async/watchdog.cpp
@@ -0,0 +1,100 @@
+#include <sdbusplus/async.hpp>
+#include <sdbusplus/bus.hpp>
+#include <sdbusplus/test/sdbus_mock.hpp>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace sdbusplus::async
+{
+struct WatchdogMock : public sdbusplus::SdBusImpl
+{
+ WatchdogMock() : sdbusplus::SdBusImpl()
+ {
+ sd_bus_open(&busp);
+ }
+
+ ~WatchdogMock()
+ {
+ sd_bus_unref(busp);
+ }
+
+ MOCK_METHOD(int, sd_notify, (int, const char*), (override));
+
+ MOCK_METHOD(int, sd_watchdog_enabled, (int, uint64_t* usec), (override));
+
+ sdbusplus::bus::busp_t busp = nullptr;
+};
+
+struct WatchdogContext : public testing::Test
+{
+ WatchdogMock sdbusMock;
+ sdbusplus::async::context ctx;
+
+ WatchdogContext() : ctx(sdbusplus::bus_t(sdbusMock.busp, &sdbusMock)) {}
+
+ ~WatchdogContext() noexcept override = default;
+
+ void TearDown() override {}
+
+ void spawnStop()
+ {
+ ctx.spawn(stdexec::just() |
+ stdexec::then([this]() { ctx.request_stop(); }));
+ }
+
+ void runToStop()
+ {
+ spawnStop();
+ ctx.run();
+ }
+};
+
+TEST_F(WatchdogContext, WatchdogEnabledAndHeartbeat)
+{
+ using namespace testing;
+ using namespace std::literals;
+
+ // Expect sd_watchdog_enabled to be called once and return .1 second
+ EXPECT_CALL(sdbusMock, sd_watchdog_enabled(_, _))
+ .WillOnce([](int, uint64_t* usec) {
+ *usec = 100000; // .1 second
+ return 0;
+ });
+
+ // Expect sd_notify to be called at least once for heartbeat
+ // The watchdog_loop divides the time by 2, so it should be
+ // called every 0.05 seconds.
+ EXPECT_CALL(sdbusMock, sd_notify(_, StrEq("WATCHDOG=1")))
+ .WillRepeatedly(Return(0));
+
+ // Run the context for a short period to allow watchdog_loop to execute
+ // and send a handful of heartbeats.
+ ctx.spawn(sdbusplus::async::sleep_for(ctx, 1s));
+ runToStop();
+
+ // The EXPECT_CALL for sd_notify will verify if it was called as expected.
+ // If the test passes, it means sd_watchdog_enabled was called,
+ // and sd_notify was called for heartbeats.
+}
+
+TEST_F(WatchdogContext, WatchdogDisabled)
+{
+ using namespace testing;
+ using namespace std::literals;
+
+ // Expect sd_watchdog_enabled to be called once and return 0 (disabled)
+ EXPECT_CALL(sdbusMock, sd_watchdog_enabled(_, _))
+ .WillOnce([](int, uint64_t* usec) {
+ *usec = 0; // Watchdog disabled
+ return 0;
+ });
+
+ // Expect sd_notify will NOT be called
+ EXPECT_CALL(sdbusMock, sd_notify(_, _)).Times(0);
+
+ // Run the context. No sleep is needed as watchdog should exit immediately.
+ runToStop();
+}
+
+} // namespace sdbusplus::async
diff --git a/test/meson.build b/test/meson.build
index 4fe9b15..300c3bc 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -24,6 +24,7 @@
'async/task',
'async/timer',
'async/fdio',
+ 'async/watchdog',
'bus/exception',
'bus/list_names',
'bus/match',