blob: 3e56b52795fc00c8bf5bbb796e76fee62e788758 [file] [log] [blame]
Alexander Hansen46a755f2025-10-27 16:31:08 +01001// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright 2017 Google Inc
3
Pete O_o765a6d82025-07-23 21:44:14 -07004#include "config.h"
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -06005
Patrick Ventureda4a5dd2018-08-31 09:42:48 -07006#include "dbuspassive.hpp"
7
Ed Tanousf8b6e552025-06-27 13:27:50 -07008#include "conf.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -07009#include "dbushelper_interface.hpp"
James Feist98b704e2019-06-03 16:24:53 -070010#include "dbuspassiveredundancy.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070011#include "dbusutil.hpp"
James Zheng6df8bb52024-11-27 23:38:47 +000012#include "failsafeloggers/failsafe_logger_utility.hpp"
Ed Tanousf8b6e552025-06-27 13:27:50 -070013#include "interfaces.hpp"
James Feist0c8223b2019-05-08 15:33:33 -070014#include "util.hpp"
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070015
Ed Tanousf8b6e552025-06-27 13:27:50 -070016#include <systemd/sd-bus.h>
17
Patrick Venturea83a3ec2020-08-04 09:52:05 -070018#include <sdbusplus/bus.hpp>
Ed Tanousf8b6e552025-06-27 13:27:50 -070019#include <sdbusplus/message.hpp>
Alexander Hansendae4a3a2025-11-11 17:12:01 +010020#include <xyz/openbmc_project/Sensor/Value/client.hpp>
Patrick Venturea83a3ec2020-08-04 09:52:05 -070021
Patrick Venture863b9242018-03-08 08:29:23 -080022#include <chrono>
23#include <cmath>
Ed Tanousf8b6e552025-06-27 13:27:50 -070024#include <cstdint>
25#include <exception>
26#include <limits>
27#include <map>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070028#include <memory>
Patrick Venture863b9242018-03-08 08:29:23 -080029#include <mutex>
Ed Tanousf8b6e552025-06-27 13:27:50 -070030#include <set>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070031#include <string>
Ed Tanousf8b6e552025-06-27 13:27:50 -070032#include <utility>
James Feist1f802f52019-02-08 13:51:43 -080033#include <variant>
Patrick Venture863b9242018-03-08 08:29:23 -080034
James Zheng6df8bb52024-11-27 23:38:47 +000035#include "failsafeloggers/failsafe_logger.cpp"
36
Alexander Hansendae4a3a2025-11-11 17:12:01 +010037using SensorValue = sdbusplus::common::xyz::openbmc_project::sensor::Value;
38
Patrick Venturea0764872020-08-08 07:48:43 -070039namespace pid_control
40{
41
Patrick Venture563a3562018-10-30 09:31:26 -070042std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -050043 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -070044 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
James Feist98b704e2019-06-03 16:24:53 -070045 const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
Patrick Venture0ef1faf2018-06-13 12:50:53 -070046{
47 if (helper == nullptr)
48 {
49 return nullptr;
50 }
Patrick Venture7af157b2018-10-30 11:24:40 -070051 if (!validType(type))
Patrick Venture0ef1faf2018-06-13 12:50:53 -070052 {
53 return nullptr;
54 }
55
Patrick Venture863b9242018-03-08 08:29:23 -080056 /* Need to get the scale and initial value */
Patrick Venture863b9242018-03-08 08:29:23 -080057 /* service == busname */
Harvey.Wuf2efcbb2022-02-09 10:24:30 +080058 std::string path;
59 if (info->readPath.empty())
60 {
61 path = getSensorPath(type, id);
62 }
63 else
64 {
65 path = info->readPath;
66 }
Patrick Venture34ddc902018-10-30 11:05:17 -070067
Patrick Venture1df9e872020-10-08 15:35:01 -070068 SensorProperties settings;
Patrick Venturef8cb4642018-10-30 12:02:53 -070069 bool failed;
Eric Yang3bfece82025-08-26 10:20:01 +000070 bool objectMissing = false;
Eric Yang897f31c2025-05-16 20:40:56 +080071 std::string service;
Patrick Venture863b9242018-03-08 08:29:23 -080072
Patrick Venturef8cb4642018-10-30 12:02:53 -070073 try
74 {
Alexander Hansendae4a3a2025-11-11 17:12:01 +010075 service = helper->getService(SensorValue::interface, path);
Patrick Venturef8cb4642018-10-30 12:02:53 -070076 }
77 catch (const std::exception& e)
78 {
Chaul Lya552fe22024-11-15 10:20:28 +000079#ifndef HANDLE_MISSING_OBJECT_PATHS
Patrick Venturef8cb4642018-10-30 12:02:53 -070080 return nullptr;
Chaul Lya552fe22024-11-15 10:20:28 +000081#else
82 // CASE1: The sensor is not on DBus, but as it is not in the
83 // MissingIsAcceptable list, the sensor should be built with a failed
84 // state to send the zone to failsafe mode. Everything will recover if
85 // all important sensors are back to DBus. swampd will be informed
86 // through InterfacesAdded signals and the sensors will be built again.
87
Eric Yang897f31c2025-05-16 20:40:56 +080088 // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
89 // fails (e.g., D-Bus error or property fetch failure). In this case,
90 // handle-missing-object-paths does not apply. The sensor build fails,
91 // and the control loop will keep restarting until getProperties
92 // succeeds.
Chaul Lya552fe22024-11-15 10:20:28 +000093
Eric Yang897f31c2025-05-16 20:40:56 +080094 // Only CASE1 may send the zone to failsafe mode if the sensor is not
95 // in MissingIsAcceptable. CASE2 results in continuous restart until
96 // recovery.
Chaul Lya552fe22024-11-15 10:20:28 +000097
98 failed = true;
Eric Yang3bfece82025-08-26 10:20:01 +000099 objectMissing = true;
Chaul Lya552fe22024-11-15 10:20:28 +0000100 settings.value = std::numeric_limits<double>::quiet_NaN();
101 settings.unit = getSensorUnit(type);
102 settings.available = false;
Eric Yang897f31c2025-05-16 20:40:56 +0800103 settings.unavailableAsFailed = true;
104 if (info->ignoreDbusMinMax)
105 {
106 settings.min = 0;
107 settings.max = 0;
108 }
Chaul Lya552fe22024-11-15 10:20:28 +0000109 std::cerr << "DbusPassive: Sensor " << path
110 << " is missing from D-Bus, build this sensor as failed\n";
Eric Yang897f31c2025-05-16 20:40:56 +0800111 return std::make_unique<DbusPassive>(
Eric Yang3bfece82025-08-26 10:20:01 +0000112 bus, type, id, std::move(helper), settings, failed, objectMissing,
113 path, redundancy);
Chaul Lya552fe22024-11-15 10:20:28 +0000114#endif
Patrick Venturef8cb4642018-10-30 12:02:53 -0700115 }
116
Eric Yang897f31c2025-05-16 20:40:56 +0800117 try
118 {
119 helper->getProperties(service, path, &settings);
120 failed = helper->thresholdsAsserted(service, path);
121 }
122 catch (const std::exception& e)
123 {
124 return nullptr;
125 }
126
Patrick Venture6b9f5992019-09-10 09:18:28 -0700127 /* if these values are zero, they're ignored. */
128 if (info->ignoreDbusMinMax)
129 {
130 settings.min = 0;
131 settings.max = 0;
132 }
133
Alex.Song8f73ad72021-10-07 00:18:27 +0800134 settings.unavailableAsFailed = info->unavailableAsFailed;
135
Eric Yang3bfece82025-08-26 10:20:01 +0000136 return std::make_unique<DbusPassive>(
137 bus, type, id, std::move(helper), settings, failed, objectMissing, path,
138 redundancy);
Patrick Venturef8cb4642018-10-30 12:02:53 -0700139}
140
James Feist98b704e2019-06-03 16:24:53 -0700141DbusPassive::DbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -0500142 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -0700143 std::unique_ptr<DbusHelperInterface> helper,
Eric Yang3bfece82025-08-26 10:20:01 +0000144 const SensorProperties& settings, bool failed, bool objectMissing,
145 const std::string& path,
James Feist98b704e2019-06-03 16:24:53 -0700146 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400147 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
Eric Yang3bfece82025-08-26 10:20:01 +0000148 _id(id), _helper(std::move(helper)), _failed(failed),
149 _objectMissing(objectMissing), path(path), redundancy(redundancy)
James Feist98b704e2019-06-03 16:24:53 -0700150
Patrick Venturef8cb4642018-10-30 12:02:53 -0700151{
Patrick Venture863b9242018-03-08 08:29:23 -0800152 _scale = settings.scale;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700153 _min = settings.min * std::pow(10.0, _scale);
154 _max = settings.max * std::pow(10.0, _scale);
Alex.Song8f73ad72021-10-07 00:18:27 +0800155 _available = settings.available;
156 _unavailableAsFailed = settings.unavailableAsFailed;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700157
158 // Cache this type knowledge, to avoid repeated string comparison
159 _typeMargin = (type == "margin");
Alex.Song8f73ad72021-10-07 00:18:27 +0800160 _typeFan = (type == "fan");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700161
162 // Force value to be stored, otherwise member would be uninitialized
163 updateValue(settings.value, true);
Patrick Venture863b9242018-03-08 08:29:23 -0800164}
165
166ReadReturn DbusPassive::read(void)
167{
168 std::lock_guard<std::mutex> guard(_lock);
169
Josh Lehanb3005752022-02-22 20:48:07 -0800170 ReadReturn r = {_value, _updated, _unscaled};
Patrick Venture863b9242018-03-08 08:29:23 -0800171
172 return r;
173}
174
Josh Lehanb3005752022-02-22 20:48:07 -0800175void DbusPassive::setValue(double value, double unscaled)
Patrick Venture863b9242018-03-08 08:29:23 -0800176{
177 std::lock_guard<std::mutex> guard(_lock);
178
179 _value = value;
Josh Lehanb3005752022-02-22 20:48:07 -0800180 _unscaled = unscaled;
Patrick Venture863b9242018-03-08 08:29:23 -0800181 _updated = std::chrono::high_resolution_clock::now();
182}
183
Josh Lehanb3005752022-02-22 20:48:07 -0800184void DbusPassive::setValue(double value)
185{
186 // First param is scaled, second param is unscaled, assume same here
187 setValue(value, value);
188}
189
James Feist36b7d8e2018-10-05 15:39:01 -0700190bool DbusPassive::getFailed(void) const
191{
James Feist98b704e2019-06-03 16:24:53 -0700192 if (redundancy)
193 {
194 const std::set<std::string>& failures = redundancy->getFailed();
195 if (failures.find(path) != failures.end())
196 {
James Zheng6df8bb52024-11-27 23:38:47 +0000197 outputFailsafeLogWithSensor(_id, true, _id,
198 "The sensor path is marked redundant.");
James Feist98b704e2019-06-03 16:24:53 -0700199 return true;
200 }
201 }
James Feist4b36f262020-07-07 16:56:41 -0700202
Alex.Song8f73ad72021-10-07 00:18:27 +0800203 /*
Eric Yang3bfece82025-08-26 10:20:01 +0000204 * If handle-missing-object-paths is enabled, and the expected D-Bus object
205 * path is not exported, this sensor is created to represent that condition.
206 * Indicate this sensor has failed so the zone enters failSafe mode.
207 */
208 if (_objectMissing)
209 {
210 outputFailsafeLogWithSensor(_id, true, _id,
211 "The sensor D-Bus object is missing.");
212 return true;
213 }
214
215 /*
Alex.Song8f73ad72021-10-07 00:18:27 +0800216 * Unavailable thermal sensors, who are not present or
217 * power-state-not-matching, should not trigger the failSafe mode. For
218 * example, when a system stays at a powered-off state, its CPU Temp
219 * sensors will be unavailable, these unavailable sensors should not be
220 * treated as failed and trigger failSafe.
221 * This is important for systems whose Fans are always on.
222 */
223 if (!_typeFan && !_available && !_unavailableAsFailed)
224 {
225 return false;
226 }
227
Josh Lehan3e2f7582020-09-20 22:06:03 -0700228 // If a reading has came in,
229 // but its value bad in some way (determined by sensor type),
230 // indicate this sensor has failed,
231 // until another value comes in that is no longer bad.
232 // This is different from the overall _failed flag,
233 // which is set and cleared by other causes.
234 if (_badReading)
235 {
James Zheng6df8bb52024-11-27 23:38:47 +0000236 outputFailsafeLogWithSensor(_id, true, _id,
237 "The sensor has bad readings.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700238 return true;
239 }
240
241 // If a reading has came in, and it is not a bad reading,
242 // but it indicates there is no more thermal margin left,
243 // that is bad, something is wrong with the PID loops,
244 // they are not cooling the system, enable failsafe mode also.
245 if (_marginHot)
246 {
James Zheng6df8bb52024-11-27 23:38:47 +0000247 outputFailsafeLogWithSensor(_id, true, _id,
248 "The sensor has no thermal margin left.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700249 return true;
250 }
251
James Zheng6df8bb52024-11-27 23:38:47 +0000252 if (_failed)
253 {
254 outputFailsafeLogWithSensor(
255 _id, true, _id, "The sensor has failed with a critical issue.");
256 return true;
257 }
258
259 if (!_available)
260 {
261 outputFailsafeLogWithSensor(_id, true, _id,
262 "The sensor is unavailable.");
263 return true;
264 }
265
266 if (!_functional)
267 {
268 outputFailsafeLogWithSensor(_id, true, _id,
269 "The sensor is not functional.");
270 return true;
271 }
272
273 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
274
275 return false;
James Feist36b7d8e2018-10-05 15:39:01 -0700276}
277
Harvey Wua4270072024-05-29 16:11:13 +0800278std::string DbusPassive::getFailReason(void) const
279{
Eric Yang3bfece82025-08-26 10:20:01 +0000280 if (_objectMissing)
281 {
282 return "Sensor D-Bus object missing";
283 }
Harvey Wua4270072024-05-29 16:11:13 +0800284 if (_badReading)
285 {
286 return "Sensor reading bad";
287 }
288 if (_marginHot)
289 {
290 return "Margin hot";
291 }
292 if (_failed)
293 {
294 return "Sensor threshold asserted";
295 }
296 if (!_available)
297 {
298 return "Sensor unavailable";
299 }
300 if (!_functional)
301 {
302 return "Sensor not functional";
303 }
304 return "Unknown";
305}
306
James Feist36b7d8e2018-10-05 15:39:01 -0700307void DbusPassive::setFailed(bool value)
308{
309 _failed = value;
310}
311
James Feist4b36f262020-07-07 16:56:41 -0700312void DbusPassive::setFunctional(bool value)
313{
314 _functional = value;
315}
316
Alex.Song8f73ad72021-10-07 00:18:27 +0800317void DbusPassive::setAvailable(bool value)
318{
319 _available = value;
320}
321
Patrick Venture863b9242018-03-08 08:29:23 -0800322int64_t DbusPassive::getScale(void)
323{
324 return _scale;
325}
326
Patrick Venture563a3562018-10-30 09:31:26 -0700327std::string DbusPassive::getID(void)
Patrick Venture863b9242018-03-08 08:29:23 -0800328{
329 return _id;
330}
331
James Feist75eb7692019-02-25 12:50:02 -0800332double DbusPassive::getMax(void)
333{
334 return _max;
335}
336
337double DbusPassive::getMin(void)
338{
339 return _min;
340}
341
Josh Lehan3e2f7582020-09-20 22:06:03 -0700342void DbusPassive::updateValue(double value, bool force)
343{
344 _badReading = false;
345
346 // Do not let a NAN, or other floating-point oddity, be used to update
347 // the value, as that indicates the sensor has no valid reading.
348 if (!(std::isfinite(value)))
349 {
350 _badReading = true;
351
352 // Do not continue with a bad reading, unless caller forcing
353 if (!force)
354 {
355 return;
356 }
357 }
358
359 value *= std::pow(10.0, _scale);
360
361 auto unscaled = value;
362 scaleSensorReading(_min, _max, value);
363
364 if (_typeMargin)
365 {
366 _marginHot = false;
367
368 // Unlike an absolute temperature sensor,
369 // where 0 degrees C is a good reading,
370 // a value received of 0 (or negative) margin is worrisome,
371 // and should be flagged.
372 // Either it indicates margin not calculated properly,
373 // or somebody forgot to set the margin-zero setpoint,
374 // or the system is really overheating that much.
375 // This is a different condition from _failed
376 // and _badReading, so it merits its own flag.
377 // The sensor has not failed, the reading is good, but the zone
378 // still needs to know that it should go to failsafe mode.
379 if (unscaled <= 0.0)
380 {
381 _marginHot = true;
382 }
383 }
384
Josh Lehanb3005752022-02-22 20:48:07 -0800385 setValue(value, unscaled);
Josh Lehan3e2f7582020-09-20 22:06:03 -0700386}
387
Patrick Williamsb228bc32022-07-22 19:26:56 -0500388int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
Patrick Venture863b9242018-03-08 08:29:23 -0800389{
Patrick Venture863b9242018-03-08 08:29:23 -0800390 std::string msgSensor;
James Feist1f802f52019-02-08 13:51:43 -0800391 std::map<std::string, std::variant<int64_t, double, bool>> msgData;
Patrick Ventured0c75662018-06-12 19:03:21 -0700392
393 msg.read(msgSensor, msgData);
Patrick Venture863b9242018-03-08 08:29:23 -0800394
Alexander Hansendae4a3a2025-11-11 17:12:01 +0100395 if (msgSensor == SensorValue::interface)
Patrick Venture863b9242018-03-08 08:29:23 -0800396 {
Alexander Hansendae4a3a2025-11-11 17:12:01 +0100397 auto valPropMap = msgData.find(SensorValue::property_names::value);
Patrick Venture863b9242018-03-08 08:29:23 -0800398 if (valPropMap != msgData.end())
399 {
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400400 double value =
401 std::visit(VariantToDoubleVisitor(), valPropMap->second);
Patrick Venture863b9242018-03-08 08:29:23 -0800402
Josh Lehan3e2f7582020-09-20 22:06:03 -0700403 owner->updateValue(value, false);
Patrick Venture863b9242018-03-08 08:29:23 -0800404 }
405 }
James Feist36b7d8e2018-10-05 15:39:01 -0700406 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
407 {
408 auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
409 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
410 if (criticalAlarmHigh == msgData.end() &&
411 criticalAlarmLow == msgData.end())
412 {
413 return 0;
414 }
415
416 bool asserted = false;
417 if (criticalAlarmLow != msgData.end())
418 {
James Feist1f802f52019-02-08 13:51:43 -0800419 asserted = std::get<bool>(criticalAlarmLow->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700420 }
421
422 // checking both as in theory you could de-assert one threshold and
423 // assert the other at the same moment
424 if (!asserted && criticalAlarmHigh != msgData.end())
425 {
James Feist1f802f52019-02-08 13:51:43 -0800426 asserted = std::get<bool>(criticalAlarmHigh->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700427 }
428 owner->setFailed(asserted);
429 }
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -0600430#ifdef UNC_FAILSAFE
431 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
432 {
433 auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
434 if (warningAlarmHigh == msgData.end())
435 {
436 return 0;
437 }
438
439 bool asserted = false;
440 if (warningAlarmHigh != msgData.end())
441 {
442 asserted = std::get<bool>(warningAlarmHigh->second);
443 }
444 owner->setFailed(asserted);
445 }
446#endif
Alex.Song8f73ad72021-10-07 00:18:27 +0800447 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
448 {
449 auto available = msgData.find("Available");
450 if (available == msgData.end())
451 {
452 return 0;
453 }
454 bool asserted = std::get<bool>(available->second);
455 owner->setAvailable(asserted);
456 if (!asserted)
457 {
458 // A thermal controller will continue its PID calculation and not
459 // trigger a 'failsafe' when some inputs are unavailable.
460 // So, forced to clear the value here to prevent a historical
461 // value to participate in a latter PID calculation.
462 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
463 }
464 }
James Feist4b36f262020-07-07 16:56:41 -0700465 else if (msgSensor ==
466 "xyz.openbmc_project.State.Decorator.OperationalStatus")
467 {
468 auto functional = msgData.find("Functional");
469 if (functional == msgData.end())
470 {
471 return 0;
472 }
473 bool asserted = std::get<bool>(functional->second);
474 owner->setFunctional(asserted);
475 }
Patrick Venture863b9242018-03-08 08:29:23 -0800476
477 return 0;
478}
Patrick Ventured0c75662018-06-12 19:03:21 -0700479
Harvey.Wua1ae4fa2022-10-28 17:38:35 +0800480int dbusHandleSignal(sd_bus_message* msg, void* usrData,
481 [[maybe_unused]] sd_bus_error* err)
Patrick Ventured0c75662018-06-12 19:03:21 -0700482{
Patrick Williamsb228bc32022-07-22 19:26:56 -0500483 auto sdbpMsg = sdbusplus::message_t(msg);
Patrick Ventured0c75662018-06-12 19:03:21 -0700484 DbusPassive* obj = static_cast<DbusPassive*>(usrData);
485
Patrick Venture7af157b2018-10-30 11:24:40 -0700486 return handleSensorValue(sdbpMsg, obj);
Patrick Ventured0c75662018-06-12 19:03:21 -0700487}
Patrick Venturea0764872020-08-08 07:48:43 -0700488
489} // namespace pid_control