blob: 6ca624973cc9db161a23493652842392296e4660 [file] [log] [blame]
Patrick Venture863b9242018-03-08 08:29:23 -08001/**
2 * Copyright 2017 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Pete O_o765a6d82025-07-23 21:44:14 -070016#include "config.h"
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -060017
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070018#include "dbuspassive.hpp"
19
Ed Tanousf8b6e552025-06-27 13:27:50 -070020#include "conf.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070021#include "dbushelper_interface.hpp"
James Feist98b704e2019-06-03 16:24:53 -070022#include "dbuspassiveredundancy.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070023#include "dbusutil.hpp"
James Zheng6df8bb52024-11-27 23:38:47 +000024#include "failsafeloggers/failsafe_logger_utility.hpp"
Ed Tanousf8b6e552025-06-27 13:27:50 -070025#include "interfaces.hpp"
James Feist0c8223b2019-05-08 15:33:33 -070026#include "util.hpp"
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070027
Ed Tanousf8b6e552025-06-27 13:27:50 -070028#include <systemd/sd-bus.h>
29
Patrick Venturea83a3ec2020-08-04 09:52:05 -070030#include <sdbusplus/bus.hpp>
Ed Tanousf8b6e552025-06-27 13:27:50 -070031#include <sdbusplus/message.hpp>
Patrick Venturea83a3ec2020-08-04 09:52:05 -070032
Patrick Venture863b9242018-03-08 08:29:23 -080033#include <chrono>
34#include <cmath>
Ed Tanousf8b6e552025-06-27 13:27:50 -070035#include <cstdint>
36#include <exception>
37#include <limits>
38#include <map>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070039#include <memory>
Patrick Venture863b9242018-03-08 08:29:23 -080040#include <mutex>
Ed Tanousf8b6e552025-06-27 13:27:50 -070041#include <set>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070042#include <string>
Ed Tanousf8b6e552025-06-27 13:27:50 -070043#include <utility>
James Feist1f802f52019-02-08 13:51:43 -080044#include <variant>
Patrick Venture863b9242018-03-08 08:29:23 -080045
James Zheng6df8bb52024-11-27 23:38:47 +000046#include "failsafeloggers/failsafe_logger.cpp"
47
Patrick Venturea0764872020-08-08 07:48:43 -070048namespace pid_control
49{
50
Patrick Venture563a3562018-10-30 09:31:26 -070051std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -050052 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -070053 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
James Feist98b704e2019-06-03 16:24:53 -070054 const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
Patrick Venture0ef1faf2018-06-13 12:50:53 -070055{
56 if (helper == nullptr)
57 {
58 return nullptr;
59 }
Patrick Venture7af157b2018-10-30 11:24:40 -070060 if (!validType(type))
Patrick Venture0ef1faf2018-06-13 12:50:53 -070061 {
62 return nullptr;
63 }
64
Patrick Venture863b9242018-03-08 08:29:23 -080065 /* Need to get the scale and initial value */
Patrick Venture863b9242018-03-08 08:29:23 -080066 /* service == busname */
Harvey.Wuf2efcbb2022-02-09 10:24:30 +080067 std::string path;
68 if (info->readPath.empty())
69 {
70 path = getSensorPath(type, id);
71 }
72 else
73 {
74 path = info->readPath;
75 }
Patrick Venture34ddc902018-10-30 11:05:17 -070076
Patrick Venture1df9e872020-10-08 15:35:01 -070077 SensorProperties settings;
Patrick Venturef8cb4642018-10-30 12:02:53 -070078 bool failed;
Eric Yang897f31c2025-05-16 20:40:56 +080079 std::string service;
Patrick Venture863b9242018-03-08 08:29:23 -080080
Patrick Venturef8cb4642018-10-30 12:02:53 -070081 try
82 {
Eric Yang897f31c2025-05-16 20:40:56 +080083 service = helper->getService(sensorintf, path);
Patrick Venturef8cb4642018-10-30 12:02:53 -070084 }
85 catch (const std::exception& e)
86 {
Chaul Lya552fe22024-11-15 10:20:28 +000087#ifndef HANDLE_MISSING_OBJECT_PATHS
Patrick Venturef8cb4642018-10-30 12:02:53 -070088 return nullptr;
Chaul Lya552fe22024-11-15 10:20:28 +000089#else
90 // CASE1: The sensor is not on DBus, but as it is not in the
91 // MissingIsAcceptable list, the sensor should be built with a failed
92 // state to send the zone to failsafe mode. Everything will recover if
93 // all important sensors are back to DBus. swampd will be informed
94 // through InterfacesAdded signals and the sensors will be built again.
95
Eric Yang897f31c2025-05-16 20:40:56 +080096 // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
97 // fails (e.g., D-Bus error or property fetch failure). In this case,
98 // handle-missing-object-paths does not apply. The sensor build fails,
99 // and the control loop will keep restarting until getProperties
100 // succeeds.
Chaul Lya552fe22024-11-15 10:20:28 +0000101
Eric Yang897f31c2025-05-16 20:40:56 +0800102 // Only CASE1 may send the zone to failsafe mode if the sensor is not
103 // in MissingIsAcceptable. CASE2 results in continuous restart until
104 // recovery.
Chaul Lya552fe22024-11-15 10:20:28 +0000105
106 failed = true;
107 settings.value = std::numeric_limits<double>::quiet_NaN();
108 settings.unit = getSensorUnit(type);
109 settings.available = false;
Eric Yang897f31c2025-05-16 20:40:56 +0800110 settings.unavailableAsFailed = true;
111 if (info->ignoreDbusMinMax)
112 {
113 settings.min = 0;
114 settings.max = 0;
115 }
Chaul Lya552fe22024-11-15 10:20:28 +0000116 std::cerr << "DbusPassive: Sensor " << path
117 << " is missing from D-Bus, build this sensor as failed\n";
Eric Yang897f31c2025-05-16 20:40:56 +0800118 return std::make_unique<DbusPassive>(
119 bus, type, id, std::move(helper), settings, failed, path,
120 redundancy);
Chaul Lya552fe22024-11-15 10:20:28 +0000121#endif
Patrick Venturef8cb4642018-10-30 12:02:53 -0700122 }
123
Eric Yang897f31c2025-05-16 20:40:56 +0800124 try
125 {
126 helper->getProperties(service, path, &settings);
127 failed = helper->thresholdsAsserted(service, path);
128 }
129 catch (const std::exception& e)
130 {
131 return nullptr;
132 }
133
Patrick Venture6b9f5992019-09-10 09:18:28 -0700134 /* if these values are zero, they're ignored. */
135 if (info->ignoreDbusMinMax)
136 {
137 settings.min = 0;
138 settings.max = 0;
139 }
140
Alex.Song8f73ad72021-10-07 00:18:27 +0800141 settings.unavailableAsFailed = info->unavailableAsFailed;
142
Patrick Venture8729eb92020-08-10 10:38:44 -0700143 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
144 settings, failed, path, redundancy);
Patrick Venturef8cb4642018-10-30 12:02:53 -0700145}
146
James Feist98b704e2019-06-03 16:24:53 -0700147DbusPassive::DbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -0500148 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -0700149 std::unique_ptr<DbusHelperInterface> helper,
Patrick Venture1df9e872020-10-08 15:35:01 -0700150 const SensorProperties& settings, bool failed, const std::string& path,
James Feist98b704e2019-06-03 16:24:53 -0700151 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400152 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
153 _id(id), _helper(std::move(helper)), _failed(failed), path(path),
James Feist98b704e2019-06-03 16:24:53 -0700154 redundancy(redundancy)
155
Patrick Venturef8cb4642018-10-30 12:02:53 -0700156{
Patrick Venture863b9242018-03-08 08:29:23 -0800157 _scale = settings.scale;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700158 _min = settings.min * std::pow(10.0, _scale);
159 _max = settings.max * std::pow(10.0, _scale);
Alex.Song8f73ad72021-10-07 00:18:27 +0800160 _available = settings.available;
161 _unavailableAsFailed = settings.unavailableAsFailed;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700162
163 // Cache this type knowledge, to avoid repeated string comparison
164 _typeMargin = (type == "margin");
Alex.Song8f73ad72021-10-07 00:18:27 +0800165 _typeFan = (type == "fan");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700166
167 // Force value to be stored, otherwise member would be uninitialized
168 updateValue(settings.value, true);
Patrick Venture863b9242018-03-08 08:29:23 -0800169}
170
171ReadReturn DbusPassive::read(void)
172{
173 std::lock_guard<std::mutex> guard(_lock);
174
Josh Lehanb3005752022-02-22 20:48:07 -0800175 ReadReturn r = {_value, _updated, _unscaled};
Patrick Venture863b9242018-03-08 08:29:23 -0800176
177 return r;
178}
179
Josh Lehanb3005752022-02-22 20:48:07 -0800180void DbusPassive::setValue(double value, double unscaled)
Patrick Venture863b9242018-03-08 08:29:23 -0800181{
182 std::lock_guard<std::mutex> guard(_lock);
183
184 _value = value;
Josh Lehanb3005752022-02-22 20:48:07 -0800185 _unscaled = unscaled;
Patrick Venture863b9242018-03-08 08:29:23 -0800186 _updated = std::chrono::high_resolution_clock::now();
187}
188
Josh Lehanb3005752022-02-22 20:48:07 -0800189void DbusPassive::setValue(double value)
190{
191 // First param is scaled, second param is unscaled, assume same here
192 setValue(value, value);
193}
194
James Feist36b7d8e2018-10-05 15:39:01 -0700195bool DbusPassive::getFailed(void) const
196{
James Feist98b704e2019-06-03 16:24:53 -0700197 if (redundancy)
198 {
199 const std::set<std::string>& failures = redundancy->getFailed();
200 if (failures.find(path) != failures.end())
201 {
James Zheng6df8bb52024-11-27 23:38:47 +0000202 outputFailsafeLogWithSensor(_id, true, _id,
203 "The sensor path is marked redundant.");
James Feist98b704e2019-06-03 16:24:53 -0700204 return true;
205 }
206 }
James Feist4b36f262020-07-07 16:56:41 -0700207
Alex.Song8f73ad72021-10-07 00:18:27 +0800208 /*
209 * Unavailable thermal sensors, who are not present or
210 * power-state-not-matching, should not trigger the failSafe mode. For
211 * example, when a system stays at a powered-off state, its CPU Temp
212 * sensors will be unavailable, these unavailable sensors should not be
213 * treated as failed and trigger failSafe.
214 * This is important for systems whose Fans are always on.
215 */
216 if (!_typeFan && !_available && !_unavailableAsFailed)
217 {
218 return false;
219 }
220
Josh Lehan3e2f7582020-09-20 22:06:03 -0700221 // If a reading has came in,
222 // but its value bad in some way (determined by sensor type),
223 // indicate this sensor has failed,
224 // until another value comes in that is no longer bad.
225 // This is different from the overall _failed flag,
226 // which is set and cleared by other causes.
227 if (_badReading)
228 {
James Zheng6df8bb52024-11-27 23:38:47 +0000229 outputFailsafeLogWithSensor(_id, true, _id,
230 "The sensor has bad readings.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700231 return true;
232 }
233
234 // If a reading has came in, and it is not a bad reading,
235 // but it indicates there is no more thermal margin left,
236 // that is bad, something is wrong with the PID loops,
237 // they are not cooling the system, enable failsafe mode also.
238 if (_marginHot)
239 {
James Zheng6df8bb52024-11-27 23:38:47 +0000240 outputFailsafeLogWithSensor(_id, true, _id,
241 "The sensor has no thermal margin left.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700242 return true;
243 }
244
James Zheng6df8bb52024-11-27 23:38:47 +0000245 if (_failed)
246 {
247 outputFailsafeLogWithSensor(
248 _id, true, _id, "The sensor has failed with a critical issue.");
249 return true;
250 }
251
252 if (!_available)
253 {
254 outputFailsafeLogWithSensor(_id, true, _id,
255 "The sensor is unavailable.");
256 return true;
257 }
258
259 if (!_functional)
260 {
261 outputFailsafeLogWithSensor(_id, true, _id,
262 "The sensor is not functional.");
263 return true;
264 }
265
266 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
267
268 return false;
James Feist36b7d8e2018-10-05 15:39:01 -0700269}
270
Harvey Wua4270072024-05-29 16:11:13 +0800271std::string DbusPassive::getFailReason(void) const
272{
273 if (_badReading)
274 {
275 return "Sensor reading bad";
276 }
277 if (_marginHot)
278 {
279 return "Margin hot";
280 }
281 if (_failed)
282 {
283 return "Sensor threshold asserted";
284 }
285 if (!_available)
286 {
287 return "Sensor unavailable";
288 }
289 if (!_functional)
290 {
291 return "Sensor not functional";
292 }
293 return "Unknown";
294}
295
James Feist36b7d8e2018-10-05 15:39:01 -0700296void DbusPassive::setFailed(bool value)
297{
298 _failed = value;
299}
300
James Feist4b36f262020-07-07 16:56:41 -0700301void DbusPassive::setFunctional(bool value)
302{
303 _functional = value;
304}
305
Alex.Song8f73ad72021-10-07 00:18:27 +0800306void DbusPassive::setAvailable(bool value)
307{
308 _available = value;
309}
310
Patrick Venture863b9242018-03-08 08:29:23 -0800311int64_t DbusPassive::getScale(void)
312{
313 return _scale;
314}
315
Patrick Venture563a3562018-10-30 09:31:26 -0700316std::string DbusPassive::getID(void)
Patrick Venture863b9242018-03-08 08:29:23 -0800317{
318 return _id;
319}
320
James Feist75eb7692019-02-25 12:50:02 -0800321double DbusPassive::getMax(void)
322{
323 return _max;
324}
325
326double DbusPassive::getMin(void)
327{
328 return _min;
329}
330
Josh Lehan3e2f7582020-09-20 22:06:03 -0700331void DbusPassive::updateValue(double value, bool force)
332{
333 _badReading = false;
334
335 // Do not let a NAN, or other floating-point oddity, be used to update
336 // the value, as that indicates the sensor has no valid reading.
337 if (!(std::isfinite(value)))
338 {
339 _badReading = true;
340
341 // Do not continue with a bad reading, unless caller forcing
342 if (!force)
343 {
344 return;
345 }
346 }
347
348 value *= std::pow(10.0, _scale);
349
350 auto unscaled = value;
351 scaleSensorReading(_min, _max, value);
352
353 if (_typeMargin)
354 {
355 _marginHot = false;
356
357 // Unlike an absolute temperature sensor,
358 // where 0 degrees C is a good reading,
359 // a value received of 0 (or negative) margin is worrisome,
360 // and should be flagged.
361 // Either it indicates margin not calculated properly,
362 // or somebody forgot to set the margin-zero setpoint,
363 // or the system is really overheating that much.
364 // This is a different condition from _failed
365 // and _badReading, so it merits its own flag.
366 // The sensor has not failed, the reading is good, but the zone
367 // still needs to know that it should go to failsafe mode.
368 if (unscaled <= 0.0)
369 {
370 _marginHot = true;
371 }
372 }
373
Josh Lehanb3005752022-02-22 20:48:07 -0800374 setValue(value, unscaled);
Josh Lehan3e2f7582020-09-20 22:06:03 -0700375}
376
Patrick Williamsb228bc32022-07-22 19:26:56 -0500377int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
Patrick Venture863b9242018-03-08 08:29:23 -0800378{
Patrick Venture863b9242018-03-08 08:29:23 -0800379 std::string msgSensor;
James Feist1f802f52019-02-08 13:51:43 -0800380 std::map<std::string, std::variant<int64_t, double, bool>> msgData;
Patrick Ventured0c75662018-06-12 19:03:21 -0700381
382 msg.read(msgSensor, msgData);
Patrick Venture863b9242018-03-08 08:29:23 -0800383
384 if (msgSensor == "xyz.openbmc_project.Sensor.Value")
385 {
386 auto valPropMap = msgData.find("Value");
387 if (valPropMap != msgData.end())
388 {
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400389 double value =
390 std::visit(VariantToDoubleVisitor(), valPropMap->second);
Patrick Venture863b9242018-03-08 08:29:23 -0800391
Josh Lehan3e2f7582020-09-20 22:06:03 -0700392 owner->updateValue(value, false);
Patrick Venture863b9242018-03-08 08:29:23 -0800393 }
394 }
James Feist36b7d8e2018-10-05 15:39:01 -0700395 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
396 {
397 auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
398 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
399 if (criticalAlarmHigh == msgData.end() &&
400 criticalAlarmLow == msgData.end())
401 {
402 return 0;
403 }
404
405 bool asserted = false;
406 if (criticalAlarmLow != msgData.end())
407 {
James Feist1f802f52019-02-08 13:51:43 -0800408 asserted = std::get<bool>(criticalAlarmLow->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700409 }
410
411 // checking both as in theory you could de-assert one threshold and
412 // assert the other at the same moment
413 if (!asserted && criticalAlarmHigh != msgData.end())
414 {
James Feist1f802f52019-02-08 13:51:43 -0800415 asserted = std::get<bool>(criticalAlarmHigh->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700416 }
417 owner->setFailed(asserted);
418 }
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -0600419#ifdef UNC_FAILSAFE
420 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
421 {
422 auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
423 if (warningAlarmHigh == msgData.end())
424 {
425 return 0;
426 }
427
428 bool asserted = false;
429 if (warningAlarmHigh != msgData.end())
430 {
431 asserted = std::get<bool>(warningAlarmHigh->second);
432 }
433 owner->setFailed(asserted);
434 }
435#endif
Alex.Song8f73ad72021-10-07 00:18:27 +0800436 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
437 {
438 auto available = msgData.find("Available");
439 if (available == msgData.end())
440 {
441 return 0;
442 }
443 bool asserted = std::get<bool>(available->second);
444 owner->setAvailable(asserted);
445 if (!asserted)
446 {
447 // A thermal controller will continue its PID calculation and not
448 // trigger a 'failsafe' when some inputs are unavailable.
449 // So, forced to clear the value here to prevent a historical
450 // value to participate in a latter PID calculation.
451 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
452 }
453 }
James Feist4b36f262020-07-07 16:56:41 -0700454 else if (msgSensor ==
455 "xyz.openbmc_project.State.Decorator.OperationalStatus")
456 {
457 auto functional = msgData.find("Functional");
458 if (functional == msgData.end())
459 {
460 return 0;
461 }
462 bool asserted = std::get<bool>(functional->second);
463 owner->setFunctional(asserted);
464 }
Patrick Venture863b9242018-03-08 08:29:23 -0800465
466 return 0;
467}
Patrick Ventured0c75662018-06-12 19:03:21 -0700468
Harvey.Wua1ae4fa2022-10-28 17:38:35 +0800469int dbusHandleSignal(sd_bus_message* msg, void* usrData,
470 [[maybe_unused]] sd_bus_error* err)
Patrick Ventured0c75662018-06-12 19:03:21 -0700471{
Patrick Williamsb228bc32022-07-22 19:26:56 -0500472 auto sdbpMsg = sdbusplus::message_t(msg);
Patrick Ventured0c75662018-06-12 19:03:21 -0700473 DbusPassive* obj = static_cast<DbusPassive*>(usrData);
474
Patrick Venture7af157b2018-10-30 11:24:40 -0700475 return handleSensorValue(sdbpMsg, obj);
Patrick Ventured0c75662018-06-12 19:03:21 -0700476}
Patrick Venturea0764872020-08-08 07:48:43 -0700477
478} // namespace pid_control