George Hung | 0a40897 | 2020-07-09 22:22:22 +0800 | [diff] [blame^] | 1 | From ac0c216ac2c273c620579fd1308c8c225e0cca36 Mon Sep 17 00:00:00 2001 |
| 2 | From: James Feist <james.feist@linux.intel.com> |
| 3 | Date: Mon, 17 Jun 2019 12:00:58 -0700 |
| 4 | Subject: [PATCH] Customize phosphor-watchdog for Intel platforms |
| 5 | |
| 6 | This patch adds various changes to phosphor-watchdog that are |
| 7 | required for compatibility with Intel platforms. |
| 8 | |
| 9 | 1. Add Redfish messages for watchdog timeout and pre-interrupt |
| 10 | 2. Use dbus properties for power control insted of service files |
| 11 | 3. Use host status to enable/disable watchdog |
| 12 | 4. Set preTimeoutInterruptOccurFlag |
| 13 | |
| 14 | Signed-off-by: James Feist <james.feist@linux.intel.com> |
| 15 | Signed-off-by: Ren Yu <yux.ren@intel.com> |
| 16 | Signed-off-by: Yong Li <yong.b.li@linux.intel.com> |
| 17 | Signed-off-by: Jason M. Bills <jason.m.bills@linux.intel.com> |
| 18 | --- |
| 19 | watchdog.cpp | 193 ++++++++++++++++++++++++++++++++++++++++++++++++--- |
| 20 | watchdog.hpp | 23 +++++- |
| 21 | 2 files changed, 206 insertions(+), 10 deletions(-) |
| 22 | |
| 23 | diff --git a/watchdog.cpp b/watchdog.cpp |
| 24 | index 57e9050..3b5356f 100644 |
| 25 | --- a/watchdog.cpp |
| 26 | +++ b/watchdog.cpp |
| 27 | @@ -1,11 +1,14 @@ |
| 28 | #include "watchdog.hpp" |
| 29 | |
| 30 | +#include <systemd/sd-journal.h> |
| 31 | + |
| 32 | #include <algorithm> |
| 33 | #include <chrono> |
| 34 | #include <phosphor-logging/elog.hpp> |
| 35 | #include <phosphor-logging/log.hpp> |
| 36 | #include <sdbusplus/exception.hpp> |
| 37 | #include <xyz/openbmc_project/Common/error.hpp> |
| 38 | +#include <xyz/openbmc_project/State/Host/server.hpp> |
| 39 | |
| 40 | namespace phosphor |
| 41 | { |
| 42 | @@ -18,10 +21,69 @@ using namespace phosphor::logging; |
| 43 | using sdbusplus::exception::SdBusError; |
| 44 | using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; |
| 45 | |
| 46 | -// systemd service to kick start a target. |
| 47 | -constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; |
| 48 | -constexpr auto SYSTEMD_ROOT = "/org/freedesktop/systemd1"; |
| 49 | -constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager"; |
| 50 | +const static constexpr char* currentHostState = "CurrentHostState"; |
| 51 | +const static constexpr char* hostStatusOff = |
| 52 | + "xyz.openbmc_project.State.Host.HostState.Off"; |
| 53 | + |
| 54 | +const static constexpr char* actionDescription = " due to Watchdog timeout"; |
| 55 | +const static constexpr char* hardResetDescription = "Hard Reset - System reset"; |
| 56 | +const static constexpr char* powerOffDescription = |
| 57 | + "Power Down - System power down"; |
| 58 | +const static constexpr char* powerCycleDescription = |
| 59 | + "Power Cycle - System power cycle"; |
| 60 | +const static constexpr char* timerExpiredDescription = "Timer expired"; |
| 61 | + |
| 62 | +const static constexpr char* preInterruptActionNone = |
| 63 | + "xyz.openbmc_project.State.Watchdog.PreTimeoutInterruptAction.None"; |
| 64 | + |
| 65 | +const static constexpr char* preInterruptDescriptionSMI = "SMI"; |
| 66 | +const static constexpr char* preInterruptDescriptionNMI = "NMI"; |
| 67 | +const static constexpr char* preInterruptDescriptionMI = "Messaging Interrupt"; |
| 68 | + |
| 69 | +const static constexpr char* reservedDescription = "Reserved"; |
| 70 | + |
| 71 | +const static constexpr char* timerUseDescriptionBIOSFRB2 = "BIOS FRB2"; |
| 72 | +const static constexpr char* timerUseDescriptionBIOSPOST = "BIOS/POST"; |
| 73 | +const static constexpr char* timerUseDescriptionOSLoad = "OSLoad"; |
| 74 | +const static constexpr char* timerUseDescriptionSMSOS = "SMS/OS"; |
| 75 | +const static constexpr char* timerUseDescriptionOEM = "OEM"; |
| 76 | + |
| 77 | +namespace restart |
| 78 | +{ |
| 79 | +static constexpr const char* busName = |
| 80 | + "xyz.openbmc_project.Control.Host.RestartCause"; |
| 81 | +static constexpr const char* path = |
| 82 | + "/xyz/openbmc_project/control/host0/restart_cause"; |
| 83 | +static constexpr const char* interface = |
| 84 | + "xyz.openbmc_project.Control.Host.RestartCause"; |
| 85 | +static constexpr const char* property = "RequestedRestartCause"; |
| 86 | +} // namespace restart |
| 87 | + |
| 88 | +// chassis state manager service |
| 89 | +namespace chassis |
| 90 | +{ |
| 91 | +static constexpr const char* busName = "xyz.openbmc_project.State.Chassis"; |
| 92 | +static constexpr const char* path = "/xyz/openbmc_project/state/chassis0"; |
| 93 | +static constexpr const char* interface = "xyz.openbmc_project.State.Chassis"; |
| 94 | +static constexpr const char* request = "RequestedPowerTransition"; |
| 95 | +} // namespace chassis |
| 96 | + |
| 97 | +void Watchdog::powerStateChangedHandler( |
| 98 | + const std::map<std::string, std::variant<std::string>>& props) |
| 99 | +{ |
| 100 | + const auto iter = props.find(currentHostState); |
| 101 | + if (iter != props.end()) |
| 102 | + { |
| 103 | + const std::string* powerState = std::get_if<std::string>(&iter->second); |
| 104 | + if (powerState && (*powerState == hostStatusOff)) |
| 105 | + { |
| 106 | + if (timerEnabled()) |
| 107 | + { |
| 108 | + enabled(false); |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | +} |
| 113 | |
| 114 | void Watchdog::resetTimeRemaining(bool enableWatchdog) |
| 115 | { |
| 116 | @@ -107,13 +169,102 @@ uint64_t Watchdog::interval(uint64_t value) |
| 117 | // Optional callback function on timer expiration |
| 118 | void Watchdog::timeOutHandler() |
| 119 | { |
| 120 | + PreTimeoutInterruptAction preTimeoutInterruptAction = preTimeoutInterrupt(); |
| 121 | + std::string preInterruptActionMessageArgs{}; |
| 122 | + |
| 123 | Action action = expireAction(); |
| 124 | + std::string actionMessageArgs{}; |
| 125 | + |
| 126 | + expiredTimerUse(currentTimerUse()); |
| 127 | + |
| 128 | + TimerUse timeUser = expiredTimerUse(); |
| 129 | + std::string timeUserMessage{}; |
| 130 | + |
| 131 | if (!this->enabled()) |
| 132 | { |
| 133 | action = fallback->action; |
| 134 | } |
| 135 | |
| 136 | - expiredTimerUse(currentTimerUse()); |
| 137 | + switch (timeUser) |
| 138 | + { |
| 139 | + case Watchdog::TimerUse::BIOSFRB2: |
| 140 | + timeUserMessage = timerUseDescriptionBIOSFRB2; |
| 141 | + break; |
| 142 | + case Watchdog::TimerUse::BIOSPOST: |
| 143 | + timeUserMessage = timerUseDescriptionBIOSPOST; |
| 144 | + break; |
| 145 | + case Watchdog::TimerUse::OSLoad: |
| 146 | + timeUserMessage = timerUseDescriptionOSLoad; |
| 147 | + break; |
| 148 | + case Watchdog::TimerUse::SMSOS: |
| 149 | + timeUserMessage = timerUseDescriptionSMSOS; |
| 150 | + break; |
| 151 | + case Watchdog::TimerUse::OEM: |
| 152 | + timeUserMessage = timerUseDescriptionOEM; |
| 153 | + break; |
| 154 | + default: |
| 155 | + timeUserMessage = reservedDescription; |
| 156 | + break; |
| 157 | + } |
| 158 | + |
| 159 | + switch (action) |
| 160 | + { |
| 161 | + case Watchdog::Action::HardReset: |
| 162 | + actionMessageArgs = std::string(hardResetDescription) + |
| 163 | + std::string(actionDescription); |
| 164 | + break; |
| 165 | + case Watchdog::Action::PowerOff: |
| 166 | + actionMessageArgs = std::string(powerOffDescription) + |
| 167 | + std::string(actionDescription); |
| 168 | + break; |
| 169 | + case Watchdog::Action::PowerCycle: |
| 170 | + actionMessageArgs = std::string(powerCycleDescription) + |
| 171 | + std::string(actionDescription); |
| 172 | + break; |
| 173 | + case Watchdog::Action::None: |
| 174 | + actionMessageArgs = timerExpiredDescription; |
| 175 | + break; |
| 176 | + default: |
| 177 | + actionMessageArgs = reservedDescription; |
| 178 | + break; |
| 179 | + } |
| 180 | + |
| 181 | + // Log into redfish event log |
| 182 | + sd_journal_send("MESSAGE=IPMIWatchdog: Timed out ACTION=%s", |
| 183 | + convertForMessage(action).c_str(), "PRIORITY=%i", LOG_INFO, |
| 184 | + "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.IPMIWatchdog", |
| 185 | + "REDFISH_MESSAGE_ARGS=%s. timer use: %s", |
| 186 | + actionMessageArgs.c_str(), timeUserMessage.c_str(), NULL); |
| 187 | + |
| 188 | + switch (preTimeoutInterruptAction) |
| 189 | + { |
| 190 | + case Watchdog::PreTimeoutInterruptAction::SMI: |
| 191 | + preInterruptActionMessageArgs = preInterruptDescriptionSMI; |
| 192 | + break; |
| 193 | + case Watchdog::PreTimeoutInterruptAction::NMI: |
| 194 | + preInterruptActionMessageArgs = preInterruptDescriptionNMI; |
| 195 | + break; |
| 196 | + case Watchdog::PreTimeoutInterruptAction::MI: |
| 197 | + preInterruptActionMessageArgs = preInterruptDescriptionMI; |
| 198 | + break; |
| 199 | + default: |
| 200 | + preInterruptActionMessageArgs = reservedDescription; |
| 201 | + break; |
| 202 | + } |
| 203 | + |
| 204 | + if (preInterruptActionNone != convertForMessage(preTimeoutInterruptAction)) |
| 205 | + { |
| 206 | + preTimeoutInterruptOccurFlag(true); |
| 207 | + |
| 208 | + sd_journal_send("MESSAGE=IPMIWatchdog: Pre Timed out Interrupt=%s", |
| 209 | + convertForMessage(preTimeoutInterruptAction).c_str(), |
| 210 | + "PRIORITY=%i", LOG_INFO, "REDFISH_MESSAGE_ID=%s", |
| 211 | + "OpenBMC.0.1.IPMIWatchdog", |
| 212 | + "REDFISH_MESSAGE_ARGS=Timer interrupt - %s due to " |
| 213 | + "Watchdog timeout. timer use: %s", |
| 214 | + preInterruptActionMessageArgs.c_str(), |
| 215 | + timeUserMessage.c_str(), NULL); |
| 216 | + } |
| 217 | |
| 218 | auto target = actionTargetMap.find(action); |
| 219 | if (target == actionTargetMap.end()) |
| 220 | @@ -133,10 +284,11 @@ void Watchdog::timeOutHandler() |
| 221 | |
| 222 | try |
| 223 | { |
| 224 | - auto method = bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_ROOT, |
| 225 | - SYSTEMD_INTERFACE, "StartUnit"); |
| 226 | - method.append(target->second); |
| 227 | - method.append("replace"); |
| 228 | + auto method = |
| 229 | + bus.new_method_call(chassis::busName, chassis::path, |
| 230 | + "org.freedesktop.DBus.Properties", "Set"); |
| 231 | + method.append(chassis::interface, chassis::request, |
| 232 | + std::variant<std::string>(target->second)); |
| 233 | |
| 234 | bus.call_noreply(method); |
| 235 | } |
| 236 | @@ -147,6 +299,29 @@ void Watchdog::timeOutHandler() |
| 237 | entry("ERROR=%s", e.what())); |
| 238 | commit<InternalFailure>(); |
| 239 | } |
| 240 | + |
| 241 | + // set restart cause for watchdog HardReset & PowerCycle actions |
| 242 | + if ((action == Watchdog::Action::HardReset) || |
| 243 | + (action == Watchdog::Action::PowerCycle)) |
| 244 | + { |
| 245 | + try |
| 246 | + { |
| 247 | + auto method = bus.new_method_call( |
| 248 | + restart::busName, restart::path, |
| 249 | + "org.freedesktop.DBus.Properties", "Set"); |
| 250 | + method.append( |
| 251 | + restart::interface, restart::property, |
| 252 | + std::variant<std::string>("xyz.openbmc_project.State.Host." |
| 253 | + "RestartCause.WatchdogTimer")); |
| 254 | + bus.call(method); |
| 255 | + } |
| 256 | + catch (sdbusplus::exception_t& e) |
| 257 | + { |
| 258 | + log<level::ERR>("Failed to set HostRestartCause property", |
| 259 | + entry("ERROR=%s", e.what())); |
| 260 | + commit<InternalFailure>(); |
| 261 | + } |
| 262 | + } |
| 263 | } |
| 264 | |
| 265 | tryFallbackOrDisable(); |
| 266 | diff --git a/watchdog.hpp b/watchdog.hpp |
| 267 | index 7de9bb3..b004b7a 100644 |
| 268 | --- a/watchdog.hpp |
| 269 | +++ b/watchdog.hpp |
| 270 | @@ -68,7 +68,18 @@ class Watchdog : public WatchdogInherits |
| 271 | WatchdogInherits(bus, objPath), |
| 272 | bus(bus), actionTargetMap(std::move(actionTargetMap)), |
| 273 | fallback(std::move(fallback)), minInterval(minInterval), |
| 274 | - timer(event, std::bind(&Watchdog::timeOutHandler, this)) |
| 275 | + timer(event, std::bind(&Watchdog::timeOutHandler, this)), |
| 276 | + powerStateChangedSignal( |
| 277 | + bus, |
| 278 | + sdbusplus::bus::match::rules::propertiesChanged( |
| 279 | + "/xyz/openbmc_project/state/host0", |
| 280 | + "xyz.openbmc_project.State.Host"), |
| 281 | + [this](sdbusplus::message::message& msg) { |
| 282 | + std::string objectName; |
| 283 | + std::map<std::string, std::variant<std::string>> props; |
| 284 | + msg.read(objectName, props); |
| 285 | + powerStateChangedHandler(props); |
| 286 | + }) |
| 287 | { |
| 288 | // We set the watchdog interval with the default value. |
| 289 | interval(interval()); |
| 290 | @@ -77,6 +88,12 @@ class Watchdog : public WatchdogInherits |
| 291 | tryFallbackOrDisable(); |
| 292 | } |
| 293 | |
| 294 | + /** @brief Disable watchdog when power status change meet |
| 295 | + * the specific requirement |
| 296 | + */ |
| 297 | + void powerStateChangedHandler( |
| 298 | + const std::map<std::string, std::variant<std::string>>& props); |
| 299 | + |
| 300 | /** @brief Resets the TimeRemaining to the configured Interval |
| 301 | * Optionally enables the watchdog. |
| 302 | * |
| 303 | @@ -165,6 +182,10 @@ class Watchdog : public WatchdogInherits |
| 304 | /** @brief Contained timer object */ |
| 305 | sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic> timer; |
| 306 | |
| 307 | + /** @brief Optional Callback handler when power status change meet |
| 308 | + * the specific requirement */ |
| 309 | + sdbusplus::bus::match_t powerStateChangedSignal; |
| 310 | + |
| 311 | /** @brief Optional Callback handler on timer expirartion */ |
| 312 | void timeOutHandler(); |
| 313 | |