blob: 3cbc677dee1d320c109eee0223ecc3afbb1a119b [file] [log] [blame]
Alexander Hansen46a755f2025-10-27 16:31:08 +01001// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright 2017 Google Inc
3
Pete O_o765a6d82025-07-23 21:44:14 -07004#include "config.h"
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -06005
Patrick Ventureda4a5dd2018-08-31 09:42:48 -07006#include "dbuspassive.hpp"
7
Ed Tanousf8b6e552025-06-27 13:27:50 -07008#include "conf.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -07009#include "dbushelper_interface.hpp"
James Feist98b704e2019-06-03 16:24:53 -070010#include "dbuspassiveredundancy.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070011#include "dbusutil.hpp"
James Zheng6df8bb52024-11-27 23:38:47 +000012#include "failsafeloggers/failsafe_logger_utility.hpp"
Ed Tanousf8b6e552025-06-27 13:27:50 -070013#include "interfaces.hpp"
James Feist0c8223b2019-05-08 15:33:33 -070014#include "util.hpp"
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070015
Ed Tanousf8b6e552025-06-27 13:27:50 -070016#include <systemd/sd-bus.h>
17
Patrick Venturea83a3ec2020-08-04 09:52:05 -070018#include <sdbusplus/bus.hpp>
Ed Tanousf8b6e552025-06-27 13:27:50 -070019#include <sdbusplus/message.hpp>
Patrick Venturea83a3ec2020-08-04 09:52:05 -070020
Patrick Venture863b9242018-03-08 08:29:23 -080021#include <chrono>
22#include <cmath>
Ed Tanousf8b6e552025-06-27 13:27:50 -070023#include <cstdint>
24#include <exception>
25#include <limits>
26#include <map>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070027#include <memory>
Patrick Venture863b9242018-03-08 08:29:23 -080028#include <mutex>
Ed Tanousf8b6e552025-06-27 13:27:50 -070029#include <set>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070030#include <string>
Ed Tanousf8b6e552025-06-27 13:27:50 -070031#include <utility>
James Feist1f802f52019-02-08 13:51:43 -080032#include <variant>
Patrick Venture863b9242018-03-08 08:29:23 -080033
James Zheng6df8bb52024-11-27 23:38:47 +000034#include "failsafeloggers/failsafe_logger.cpp"
35
Patrick Venturea0764872020-08-08 07:48:43 -070036namespace pid_control
37{
38
Patrick Venture563a3562018-10-30 09:31:26 -070039std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -050040 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -070041 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
James Feist98b704e2019-06-03 16:24:53 -070042 const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
Patrick Venture0ef1faf2018-06-13 12:50:53 -070043{
44 if (helper == nullptr)
45 {
46 return nullptr;
47 }
Patrick Venture7af157b2018-10-30 11:24:40 -070048 if (!validType(type))
Patrick Venture0ef1faf2018-06-13 12:50:53 -070049 {
50 return nullptr;
51 }
52
Patrick Venture863b9242018-03-08 08:29:23 -080053 /* Need to get the scale and initial value */
Patrick Venture863b9242018-03-08 08:29:23 -080054 /* service == busname */
Harvey.Wuf2efcbb2022-02-09 10:24:30 +080055 std::string path;
56 if (info->readPath.empty())
57 {
58 path = getSensorPath(type, id);
59 }
60 else
61 {
62 path = info->readPath;
63 }
Patrick Venture34ddc902018-10-30 11:05:17 -070064
Patrick Venture1df9e872020-10-08 15:35:01 -070065 SensorProperties settings;
Patrick Venturef8cb4642018-10-30 12:02:53 -070066 bool failed;
Eric Yang3bfece82025-08-26 10:20:01 +000067 bool objectMissing = false;
Eric Yang897f31c2025-05-16 20:40:56 +080068 std::string service;
Patrick Venture863b9242018-03-08 08:29:23 -080069
Patrick Venturef8cb4642018-10-30 12:02:53 -070070 try
71 {
Eric Yang897f31c2025-05-16 20:40:56 +080072 service = helper->getService(sensorintf, path);
Patrick Venturef8cb4642018-10-30 12:02:53 -070073 }
74 catch (const std::exception& e)
75 {
Chaul Lya552fe22024-11-15 10:20:28 +000076#ifndef HANDLE_MISSING_OBJECT_PATHS
Patrick Venturef8cb4642018-10-30 12:02:53 -070077 return nullptr;
Chaul Lya552fe22024-11-15 10:20:28 +000078#else
79 // CASE1: The sensor is not on DBus, but as it is not in the
80 // MissingIsAcceptable list, the sensor should be built with a failed
81 // state to send the zone to failsafe mode. Everything will recover if
82 // all important sensors are back to DBus. swampd will be informed
83 // through InterfacesAdded signals and the sensors will be built again.
84
Eric Yang897f31c2025-05-16 20:40:56 +080085 // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
86 // fails (e.g., D-Bus error or property fetch failure). In this case,
87 // handle-missing-object-paths does not apply. The sensor build fails,
88 // and the control loop will keep restarting until getProperties
89 // succeeds.
Chaul Lya552fe22024-11-15 10:20:28 +000090
Eric Yang897f31c2025-05-16 20:40:56 +080091 // Only CASE1 may send the zone to failsafe mode if the sensor is not
92 // in MissingIsAcceptable. CASE2 results in continuous restart until
93 // recovery.
Chaul Lya552fe22024-11-15 10:20:28 +000094
95 failed = true;
Eric Yang3bfece82025-08-26 10:20:01 +000096 objectMissing = true;
Chaul Lya552fe22024-11-15 10:20:28 +000097 settings.value = std::numeric_limits<double>::quiet_NaN();
98 settings.unit = getSensorUnit(type);
99 settings.available = false;
Eric Yang897f31c2025-05-16 20:40:56 +0800100 settings.unavailableAsFailed = true;
101 if (info->ignoreDbusMinMax)
102 {
103 settings.min = 0;
104 settings.max = 0;
105 }
Chaul Lya552fe22024-11-15 10:20:28 +0000106 std::cerr << "DbusPassive: Sensor " << path
107 << " is missing from D-Bus, build this sensor as failed\n";
Eric Yang897f31c2025-05-16 20:40:56 +0800108 return std::make_unique<DbusPassive>(
Eric Yang3bfece82025-08-26 10:20:01 +0000109 bus, type, id, std::move(helper), settings, failed, objectMissing,
110 path, redundancy);
Chaul Lya552fe22024-11-15 10:20:28 +0000111#endif
Patrick Venturef8cb4642018-10-30 12:02:53 -0700112 }
113
Eric Yang897f31c2025-05-16 20:40:56 +0800114 try
115 {
116 helper->getProperties(service, path, &settings);
117 failed = helper->thresholdsAsserted(service, path);
118 }
119 catch (const std::exception& e)
120 {
121 return nullptr;
122 }
123
Patrick Venture6b9f5992019-09-10 09:18:28 -0700124 /* if these values are zero, they're ignored. */
125 if (info->ignoreDbusMinMax)
126 {
127 settings.min = 0;
128 settings.max = 0;
129 }
130
Alex.Song8f73ad72021-10-07 00:18:27 +0800131 settings.unavailableAsFailed = info->unavailableAsFailed;
132
Eric Yang3bfece82025-08-26 10:20:01 +0000133 return std::make_unique<DbusPassive>(
134 bus, type, id, std::move(helper), settings, failed, objectMissing, path,
135 redundancy);
Patrick Venturef8cb4642018-10-30 12:02:53 -0700136}
137
James Feist98b704e2019-06-03 16:24:53 -0700138DbusPassive::DbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -0500139 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -0700140 std::unique_ptr<DbusHelperInterface> helper,
Eric Yang3bfece82025-08-26 10:20:01 +0000141 const SensorProperties& settings, bool failed, bool objectMissing,
142 const std::string& path,
James Feist98b704e2019-06-03 16:24:53 -0700143 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400144 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
Eric Yang3bfece82025-08-26 10:20:01 +0000145 _id(id), _helper(std::move(helper)), _failed(failed),
146 _objectMissing(objectMissing), path(path), redundancy(redundancy)
James Feist98b704e2019-06-03 16:24:53 -0700147
Patrick Venturef8cb4642018-10-30 12:02:53 -0700148{
Patrick Venture863b9242018-03-08 08:29:23 -0800149 _scale = settings.scale;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700150 _min = settings.min * std::pow(10.0, _scale);
151 _max = settings.max * std::pow(10.0, _scale);
Alex.Song8f73ad72021-10-07 00:18:27 +0800152 _available = settings.available;
153 _unavailableAsFailed = settings.unavailableAsFailed;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700154
155 // Cache this type knowledge, to avoid repeated string comparison
156 _typeMargin = (type == "margin");
Alex.Song8f73ad72021-10-07 00:18:27 +0800157 _typeFan = (type == "fan");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700158
159 // Force value to be stored, otherwise member would be uninitialized
160 updateValue(settings.value, true);
Patrick Venture863b9242018-03-08 08:29:23 -0800161}
162
163ReadReturn DbusPassive::read(void)
164{
165 std::lock_guard<std::mutex> guard(_lock);
166
Josh Lehanb3005752022-02-22 20:48:07 -0800167 ReadReturn r = {_value, _updated, _unscaled};
Patrick Venture863b9242018-03-08 08:29:23 -0800168
169 return r;
170}
171
Josh Lehanb3005752022-02-22 20:48:07 -0800172void DbusPassive::setValue(double value, double unscaled)
Patrick Venture863b9242018-03-08 08:29:23 -0800173{
174 std::lock_guard<std::mutex> guard(_lock);
175
176 _value = value;
Josh Lehanb3005752022-02-22 20:48:07 -0800177 _unscaled = unscaled;
Patrick Venture863b9242018-03-08 08:29:23 -0800178 _updated = std::chrono::high_resolution_clock::now();
179}
180
Josh Lehanb3005752022-02-22 20:48:07 -0800181void DbusPassive::setValue(double value)
182{
183 // First param is scaled, second param is unscaled, assume same here
184 setValue(value, value);
185}
186
James Feist36b7d8e2018-10-05 15:39:01 -0700187bool DbusPassive::getFailed(void) const
188{
James Feist98b704e2019-06-03 16:24:53 -0700189 if (redundancy)
190 {
191 const std::set<std::string>& failures = redundancy->getFailed();
192 if (failures.find(path) != failures.end())
193 {
James Zheng6df8bb52024-11-27 23:38:47 +0000194 outputFailsafeLogWithSensor(_id, true, _id,
195 "The sensor path is marked redundant.");
James Feist98b704e2019-06-03 16:24:53 -0700196 return true;
197 }
198 }
James Feist4b36f262020-07-07 16:56:41 -0700199
Alex.Song8f73ad72021-10-07 00:18:27 +0800200 /*
Eric Yang3bfece82025-08-26 10:20:01 +0000201 * If handle-missing-object-paths is enabled, and the expected D-Bus object
202 * path is not exported, this sensor is created to represent that condition.
203 * Indicate this sensor has failed so the zone enters failSafe mode.
204 */
205 if (_objectMissing)
206 {
207 outputFailsafeLogWithSensor(_id, true, _id,
208 "The sensor D-Bus object is missing.");
209 return true;
210 }
211
212 /*
Alex.Song8f73ad72021-10-07 00:18:27 +0800213 * Unavailable thermal sensors, who are not present or
214 * power-state-not-matching, should not trigger the failSafe mode. For
215 * example, when a system stays at a powered-off state, its CPU Temp
216 * sensors will be unavailable, these unavailable sensors should not be
217 * treated as failed and trigger failSafe.
218 * This is important for systems whose Fans are always on.
219 */
220 if (!_typeFan && !_available && !_unavailableAsFailed)
221 {
222 return false;
223 }
224
Josh Lehan3e2f7582020-09-20 22:06:03 -0700225 // If a reading has came in,
226 // but its value bad in some way (determined by sensor type),
227 // indicate this sensor has failed,
228 // until another value comes in that is no longer bad.
229 // This is different from the overall _failed flag,
230 // which is set and cleared by other causes.
231 if (_badReading)
232 {
James Zheng6df8bb52024-11-27 23:38:47 +0000233 outputFailsafeLogWithSensor(_id, true, _id,
234 "The sensor has bad readings.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700235 return true;
236 }
237
238 // If a reading has came in, and it is not a bad reading,
239 // but it indicates there is no more thermal margin left,
240 // that is bad, something is wrong with the PID loops,
241 // they are not cooling the system, enable failsafe mode also.
242 if (_marginHot)
243 {
James Zheng6df8bb52024-11-27 23:38:47 +0000244 outputFailsafeLogWithSensor(_id, true, _id,
245 "The sensor has no thermal margin left.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700246 return true;
247 }
248
James Zheng6df8bb52024-11-27 23:38:47 +0000249 if (_failed)
250 {
251 outputFailsafeLogWithSensor(
252 _id, true, _id, "The sensor has failed with a critical issue.");
253 return true;
254 }
255
256 if (!_available)
257 {
258 outputFailsafeLogWithSensor(_id, true, _id,
259 "The sensor is unavailable.");
260 return true;
261 }
262
263 if (!_functional)
264 {
265 outputFailsafeLogWithSensor(_id, true, _id,
266 "The sensor is not functional.");
267 return true;
268 }
269
270 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
271
272 return false;
James Feist36b7d8e2018-10-05 15:39:01 -0700273}
274
Harvey Wua4270072024-05-29 16:11:13 +0800275std::string DbusPassive::getFailReason(void) const
276{
Eric Yang3bfece82025-08-26 10:20:01 +0000277 if (_objectMissing)
278 {
279 return "Sensor D-Bus object missing";
280 }
Harvey Wua4270072024-05-29 16:11:13 +0800281 if (_badReading)
282 {
283 return "Sensor reading bad";
284 }
285 if (_marginHot)
286 {
287 return "Margin hot";
288 }
289 if (_failed)
290 {
291 return "Sensor threshold asserted";
292 }
293 if (!_available)
294 {
295 return "Sensor unavailable";
296 }
297 if (!_functional)
298 {
299 return "Sensor not functional";
300 }
301 return "Unknown";
302}
303
James Feist36b7d8e2018-10-05 15:39:01 -0700304void DbusPassive::setFailed(bool value)
305{
306 _failed = value;
307}
308
James Feist4b36f262020-07-07 16:56:41 -0700309void DbusPassive::setFunctional(bool value)
310{
311 _functional = value;
312}
313
Alex.Song8f73ad72021-10-07 00:18:27 +0800314void DbusPassive::setAvailable(bool value)
315{
316 _available = value;
317}
318
Patrick Venture863b9242018-03-08 08:29:23 -0800319int64_t DbusPassive::getScale(void)
320{
321 return _scale;
322}
323
Patrick Venture563a3562018-10-30 09:31:26 -0700324std::string DbusPassive::getID(void)
Patrick Venture863b9242018-03-08 08:29:23 -0800325{
326 return _id;
327}
328
James Feist75eb7692019-02-25 12:50:02 -0800329double DbusPassive::getMax(void)
330{
331 return _max;
332}
333
334double DbusPassive::getMin(void)
335{
336 return _min;
337}
338
Josh Lehan3e2f7582020-09-20 22:06:03 -0700339void DbusPassive::updateValue(double value, bool force)
340{
341 _badReading = false;
342
343 // Do not let a NAN, or other floating-point oddity, be used to update
344 // the value, as that indicates the sensor has no valid reading.
345 if (!(std::isfinite(value)))
346 {
347 _badReading = true;
348
349 // Do not continue with a bad reading, unless caller forcing
350 if (!force)
351 {
352 return;
353 }
354 }
355
356 value *= std::pow(10.0, _scale);
357
358 auto unscaled = value;
359 scaleSensorReading(_min, _max, value);
360
361 if (_typeMargin)
362 {
363 _marginHot = false;
364
365 // Unlike an absolute temperature sensor,
366 // where 0 degrees C is a good reading,
367 // a value received of 0 (or negative) margin is worrisome,
368 // and should be flagged.
369 // Either it indicates margin not calculated properly,
370 // or somebody forgot to set the margin-zero setpoint,
371 // or the system is really overheating that much.
372 // This is a different condition from _failed
373 // and _badReading, so it merits its own flag.
374 // The sensor has not failed, the reading is good, but the zone
375 // still needs to know that it should go to failsafe mode.
376 if (unscaled <= 0.0)
377 {
378 _marginHot = true;
379 }
380 }
381
Josh Lehanb3005752022-02-22 20:48:07 -0800382 setValue(value, unscaled);
Josh Lehan3e2f7582020-09-20 22:06:03 -0700383}
384
Patrick Williamsb228bc32022-07-22 19:26:56 -0500385int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
Patrick Venture863b9242018-03-08 08:29:23 -0800386{
Patrick Venture863b9242018-03-08 08:29:23 -0800387 std::string msgSensor;
James Feist1f802f52019-02-08 13:51:43 -0800388 std::map<std::string, std::variant<int64_t, double, bool>> msgData;
Patrick Ventured0c75662018-06-12 19:03:21 -0700389
390 msg.read(msgSensor, msgData);
Patrick Venture863b9242018-03-08 08:29:23 -0800391
392 if (msgSensor == "xyz.openbmc_project.Sensor.Value")
393 {
394 auto valPropMap = msgData.find("Value");
395 if (valPropMap != msgData.end())
396 {
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400397 double value =
398 std::visit(VariantToDoubleVisitor(), valPropMap->second);
Patrick Venture863b9242018-03-08 08:29:23 -0800399
Josh Lehan3e2f7582020-09-20 22:06:03 -0700400 owner->updateValue(value, false);
Patrick Venture863b9242018-03-08 08:29:23 -0800401 }
402 }
James Feist36b7d8e2018-10-05 15:39:01 -0700403 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
404 {
405 auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
406 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
407 if (criticalAlarmHigh == msgData.end() &&
408 criticalAlarmLow == msgData.end())
409 {
410 return 0;
411 }
412
413 bool asserted = false;
414 if (criticalAlarmLow != msgData.end())
415 {
James Feist1f802f52019-02-08 13:51:43 -0800416 asserted = std::get<bool>(criticalAlarmLow->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700417 }
418
419 // checking both as in theory you could de-assert one threshold and
420 // assert the other at the same moment
421 if (!asserted && criticalAlarmHigh != msgData.end())
422 {
James Feist1f802f52019-02-08 13:51:43 -0800423 asserted = std::get<bool>(criticalAlarmHigh->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700424 }
425 owner->setFailed(asserted);
426 }
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -0600427#ifdef UNC_FAILSAFE
428 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
429 {
430 auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
431 if (warningAlarmHigh == msgData.end())
432 {
433 return 0;
434 }
435
436 bool asserted = false;
437 if (warningAlarmHigh != msgData.end())
438 {
439 asserted = std::get<bool>(warningAlarmHigh->second);
440 }
441 owner->setFailed(asserted);
442 }
443#endif
Alex.Song8f73ad72021-10-07 00:18:27 +0800444 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
445 {
446 auto available = msgData.find("Available");
447 if (available == msgData.end())
448 {
449 return 0;
450 }
451 bool asserted = std::get<bool>(available->second);
452 owner->setAvailable(asserted);
453 if (!asserted)
454 {
455 // A thermal controller will continue its PID calculation and not
456 // trigger a 'failsafe' when some inputs are unavailable.
457 // So, forced to clear the value here to prevent a historical
458 // value to participate in a latter PID calculation.
459 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
460 }
461 }
James Feist4b36f262020-07-07 16:56:41 -0700462 else if (msgSensor ==
463 "xyz.openbmc_project.State.Decorator.OperationalStatus")
464 {
465 auto functional = msgData.find("Functional");
466 if (functional == msgData.end())
467 {
468 return 0;
469 }
470 bool asserted = std::get<bool>(functional->second);
471 owner->setFunctional(asserted);
472 }
Patrick Venture863b9242018-03-08 08:29:23 -0800473
474 return 0;
475}
Patrick Ventured0c75662018-06-12 19:03:21 -0700476
Harvey.Wua1ae4fa2022-10-28 17:38:35 +0800477int dbusHandleSignal(sd_bus_message* msg, void* usrData,
478 [[maybe_unused]] sd_bus_error* err)
Patrick Ventured0c75662018-06-12 19:03:21 -0700479{
Patrick Williamsb228bc32022-07-22 19:26:56 -0500480 auto sdbpMsg = sdbusplus::message_t(msg);
Patrick Ventured0c75662018-06-12 19:03:21 -0700481 DbusPassive* obj = static_cast<DbusPassive*>(usrData);
482
Patrick Venture7af157b2018-10-30 11:24:40 -0700483 return handleSensorValue(sdbpMsg, obj);
Patrick Ventured0c75662018-06-12 19:03:21 -0700484}
Patrick Venturea0764872020-08-08 07:48:43 -0700485
486} // namespace pid_control