blob: d53e59ecb6ca47462afef9f98b6ca81b732c2bae [file] [log] [blame]
Patrick Venture863b9242018-03-08 08:29:23 -08001/**
2 * Copyright 2017 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -060016
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070017#include "dbuspassive.hpp"
18
Ed Tanousf8b6e552025-06-27 13:27:50 -070019#include "conf.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070020#include "dbushelper_interface.hpp"
James Feist98b704e2019-06-03 16:24:53 -070021#include "dbuspassiveredundancy.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070022#include "dbusutil.hpp"
James Zheng6df8bb52024-11-27 23:38:47 +000023#include "failsafeloggers/failsafe_logger_utility.hpp"
Ed Tanousf8b6e552025-06-27 13:27:50 -070024#include "interfaces.hpp"
James Feist0c8223b2019-05-08 15:33:33 -070025#include "util.hpp"
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070026
Ed Tanousf8b6e552025-06-27 13:27:50 -070027#include <systemd/sd-bus.h>
28
Patrick Venturea83a3ec2020-08-04 09:52:05 -070029#include <sdbusplus/bus.hpp>
Ed Tanousf8b6e552025-06-27 13:27:50 -070030#include <sdbusplus/message.hpp>
Patrick Venturea83a3ec2020-08-04 09:52:05 -070031
Patrick Venture863b9242018-03-08 08:29:23 -080032#include <chrono>
33#include <cmath>
Ed Tanousf8b6e552025-06-27 13:27:50 -070034#include <cstdint>
35#include <exception>
36#include <limits>
37#include <map>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070038#include <memory>
Patrick Venture863b9242018-03-08 08:29:23 -080039#include <mutex>
Ed Tanousf8b6e552025-06-27 13:27:50 -070040#include <set>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070041#include <string>
Ed Tanousf8b6e552025-06-27 13:27:50 -070042#include <utility>
James Feist1f802f52019-02-08 13:51:43 -080043#include <variant>
Patrick Venture863b9242018-03-08 08:29:23 -080044
James Zheng6df8bb52024-11-27 23:38:47 +000045#include "failsafeloggers/failsafe_logger.cpp"
46
Patrick Venturea0764872020-08-08 07:48:43 -070047namespace pid_control
48{
49
Patrick Venture563a3562018-10-30 09:31:26 -070050std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -050051 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -070052 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
James Feist98b704e2019-06-03 16:24:53 -070053 const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
Patrick Venture0ef1faf2018-06-13 12:50:53 -070054{
55 if (helper == nullptr)
56 {
57 return nullptr;
58 }
Patrick Venture7af157b2018-10-30 11:24:40 -070059 if (!validType(type))
Patrick Venture0ef1faf2018-06-13 12:50:53 -070060 {
61 return nullptr;
62 }
63
Patrick Venture863b9242018-03-08 08:29:23 -080064 /* Need to get the scale and initial value */
Patrick Venture863b9242018-03-08 08:29:23 -080065 /* service == busname */
Harvey.Wuf2efcbb2022-02-09 10:24:30 +080066 std::string path;
67 if (info->readPath.empty())
68 {
69 path = getSensorPath(type, id);
70 }
71 else
72 {
73 path = info->readPath;
74 }
Patrick Venture34ddc902018-10-30 11:05:17 -070075
Patrick Venture1df9e872020-10-08 15:35:01 -070076 SensorProperties settings;
Patrick Venturef8cb4642018-10-30 12:02:53 -070077 bool failed;
Eric Yang897f31c2025-05-16 20:40:56 +080078 std::string service;
Patrick Venture863b9242018-03-08 08:29:23 -080079
Patrick Venturef8cb4642018-10-30 12:02:53 -070080 try
81 {
Eric Yang897f31c2025-05-16 20:40:56 +080082 service = helper->getService(sensorintf, path);
Patrick Venturef8cb4642018-10-30 12:02:53 -070083 }
84 catch (const std::exception& e)
85 {
Chaul Lya552fe22024-11-15 10:20:28 +000086#ifndef HANDLE_MISSING_OBJECT_PATHS
Patrick Venturef8cb4642018-10-30 12:02:53 -070087 return nullptr;
Chaul Lya552fe22024-11-15 10:20:28 +000088#else
89 // CASE1: The sensor is not on DBus, but as it is not in the
90 // MissingIsAcceptable list, the sensor should be built with a failed
91 // state to send the zone to failsafe mode. Everything will recover if
92 // all important sensors are back to DBus. swampd will be informed
93 // through InterfacesAdded signals and the sensors will be built again.
94
Eric Yang897f31c2025-05-16 20:40:56 +080095 // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
96 // fails (e.g., D-Bus error or property fetch failure). In this case,
97 // handle-missing-object-paths does not apply. The sensor build fails,
98 // and the control loop will keep restarting until getProperties
99 // succeeds.
Chaul Lya552fe22024-11-15 10:20:28 +0000100
Eric Yang897f31c2025-05-16 20:40:56 +0800101 // Only CASE1 may send the zone to failsafe mode if the sensor is not
102 // in MissingIsAcceptable. CASE2 results in continuous restart until
103 // recovery.
Chaul Lya552fe22024-11-15 10:20:28 +0000104
105 failed = true;
106 settings.value = std::numeric_limits<double>::quiet_NaN();
107 settings.unit = getSensorUnit(type);
108 settings.available = false;
Eric Yang897f31c2025-05-16 20:40:56 +0800109 settings.unavailableAsFailed = true;
110 if (info->ignoreDbusMinMax)
111 {
112 settings.min = 0;
113 settings.max = 0;
114 }
Chaul Lya552fe22024-11-15 10:20:28 +0000115 std::cerr << "DbusPassive: Sensor " << path
116 << " is missing from D-Bus, build this sensor as failed\n";
Eric Yang897f31c2025-05-16 20:40:56 +0800117 return std::make_unique<DbusPassive>(
118 bus, type, id, std::move(helper), settings, failed, path,
119 redundancy);
Chaul Lya552fe22024-11-15 10:20:28 +0000120#endif
Patrick Venturef8cb4642018-10-30 12:02:53 -0700121 }
122
Eric Yang897f31c2025-05-16 20:40:56 +0800123 try
124 {
125 helper->getProperties(service, path, &settings);
126 failed = helper->thresholdsAsserted(service, path);
127 }
128 catch (const std::exception& e)
129 {
130 return nullptr;
131 }
132
Patrick Venture6b9f5992019-09-10 09:18:28 -0700133 /* if these values are zero, they're ignored. */
134 if (info->ignoreDbusMinMax)
135 {
136 settings.min = 0;
137 settings.max = 0;
138 }
139
Alex.Song8f73ad72021-10-07 00:18:27 +0800140 settings.unavailableAsFailed = info->unavailableAsFailed;
141
Patrick Venture8729eb92020-08-10 10:38:44 -0700142 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
143 settings, failed, path, redundancy);
Patrick Venturef8cb4642018-10-30 12:02:53 -0700144}
145
James Feist98b704e2019-06-03 16:24:53 -0700146DbusPassive::DbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -0500147 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -0700148 std::unique_ptr<DbusHelperInterface> helper,
Patrick Venture1df9e872020-10-08 15:35:01 -0700149 const SensorProperties& settings, bool failed, const std::string& path,
James Feist98b704e2019-06-03 16:24:53 -0700150 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400151 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
152 _id(id), _helper(std::move(helper)), _failed(failed), path(path),
James Feist98b704e2019-06-03 16:24:53 -0700153 redundancy(redundancy)
154
Patrick Venturef8cb4642018-10-30 12:02:53 -0700155{
Patrick Venture863b9242018-03-08 08:29:23 -0800156 _scale = settings.scale;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700157 _min = settings.min * std::pow(10.0, _scale);
158 _max = settings.max * std::pow(10.0, _scale);
Alex.Song8f73ad72021-10-07 00:18:27 +0800159 _available = settings.available;
160 _unavailableAsFailed = settings.unavailableAsFailed;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700161
162 // Cache this type knowledge, to avoid repeated string comparison
163 _typeMargin = (type == "margin");
Alex.Song8f73ad72021-10-07 00:18:27 +0800164 _typeFan = (type == "fan");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700165
166 // Force value to be stored, otherwise member would be uninitialized
167 updateValue(settings.value, true);
Patrick Venture863b9242018-03-08 08:29:23 -0800168}
169
170ReadReturn DbusPassive::read(void)
171{
172 std::lock_guard<std::mutex> guard(_lock);
173
Josh Lehanb3005752022-02-22 20:48:07 -0800174 ReadReturn r = {_value, _updated, _unscaled};
Patrick Venture863b9242018-03-08 08:29:23 -0800175
176 return r;
177}
178
Josh Lehanb3005752022-02-22 20:48:07 -0800179void DbusPassive::setValue(double value, double unscaled)
Patrick Venture863b9242018-03-08 08:29:23 -0800180{
181 std::lock_guard<std::mutex> guard(_lock);
182
183 _value = value;
Josh Lehanb3005752022-02-22 20:48:07 -0800184 _unscaled = unscaled;
Patrick Venture863b9242018-03-08 08:29:23 -0800185 _updated = std::chrono::high_resolution_clock::now();
186}
187
Josh Lehanb3005752022-02-22 20:48:07 -0800188void DbusPassive::setValue(double value)
189{
190 // First param is scaled, second param is unscaled, assume same here
191 setValue(value, value);
192}
193
James Feist36b7d8e2018-10-05 15:39:01 -0700194bool DbusPassive::getFailed(void) const
195{
James Feist98b704e2019-06-03 16:24:53 -0700196 if (redundancy)
197 {
198 const std::set<std::string>& failures = redundancy->getFailed();
199 if (failures.find(path) != failures.end())
200 {
James Zheng6df8bb52024-11-27 23:38:47 +0000201 outputFailsafeLogWithSensor(_id, true, _id,
202 "The sensor path is marked redundant.");
James Feist98b704e2019-06-03 16:24:53 -0700203 return true;
204 }
205 }
James Feist4b36f262020-07-07 16:56:41 -0700206
Alex.Song8f73ad72021-10-07 00:18:27 +0800207 /*
208 * Unavailable thermal sensors, who are not present or
209 * power-state-not-matching, should not trigger the failSafe mode. For
210 * example, when a system stays at a powered-off state, its CPU Temp
211 * sensors will be unavailable, these unavailable sensors should not be
212 * treated as failed and trigger failSafe.
213 * This is important for systems whose Fans are always on.
214 */
215 if (!_typeFan && !_available && !_unavailableAsFailed)
216 {
217 return false;
218 }
219
Josh Lehan3e2f7582020-09-20 22:06:03 -0700220 // If a reading has came in,
221 // but its value bad in some way (determined by sensor type),
222 // indicate this sensor has failed,
223 // until another value comes in that is no longer bad.
224 // This is different from the overall _failed flag,
225 // which is set and cleared by other causes.
226 if (_badReading)
227 {
James Zheng6df8bb52024-11-27 23:38:47 +0000228 outputFailsafeLogWithSensor(_id, true, _id,
229 "The sensor has bad readings.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700230 return true;
231 }
232
233 // If a reading has came in, and it is not a bad reading,
234 // but it indicates there is no more thermal margin left,
235 // that is bad, something is wrong with the PID loops,
236 // they are not cooling the system, enable failsafe mode also.
237 if (_marginHot)
238 {
James Zheng6df8bb52024-11-27 23:38:47 +0000239 outputFailsafeLogWithSensor(_id, true, _id,
240 "The sensor has no thermal margin left.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700241 return true;
242 }
243
James Zheng6df8bb52024-11-27 23:38:47 +0000244 if (_failed)
245 {
246 outputFailsafeLogWithSensor(
247 _id, true, _id, "The sensor has failed with a critical issue.");
248 return true;
249 }
250
251 if (!_available)
252 {
253 outputFailsafeLogWithSensor(_id, true, _id,
254 "The sensor is unavailable.");
255 return true;
256 }
257
258 if (!_functional)
259 {
260 outputFailsafeLogWithSensor(_id, true, _id,
261 "The sensor is not functional.");
262 return true;
263 }
264
265 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
266
267 return false;
James Feist36b7d8e2018-10-05 15:39:01 -0700268}
269
Harvey Wua4270072024-05-29 16:11:13 +0800270std::string DbusPassive::getFailReason(void) const
271{
272 if (_badReading)
273 {
274 return "Sensor reading bad";
275 }
276 if (_marginHot)
277 {
278 return "Margin hot";
279 }
280 if (_failed)
281 {
282 return "Sensor threshold asserted";
283 }
284 if (!_available)
285 {
286 return "Sensor unavailable";
287 }
288 if (!_functional)
289 {
290 return "Sensor not functional";
291 }
292 return "Unknown";
293}
294
James Feist36b7d8e2018-10-05 15:39:01 -0700295void DbusPassive::setFailed(bool value)
296{
297 _failed = value;
298}
299
James Feist4b36f262020-07-07 16:56:41 -0700300void DbusPassive::setFunctional(bool value)
301{
302 _functional = value;
303}
304
Alex.Song8f73ad72021-10-07 00:18:27 +0800305void DbusPassive::setAvailable(bool value)
306{
307 _available = value;
308}
309
Patrick Venture863b9242018-03-08 08:29:23 -0800310int64_t DbusPassive::getScale(void)
311{
312 return _scale;
313}
314
Patrick Venture563a3562018-10-30 09:31:26 -0700315std::string DbusPassive::getID(void)
Patrick Venture863b9242018-03-08 08:29:23 -0800316{
317 return _id;
318}
319
James Feist75eb7692019-02-25 12:50:02 -0800320double DbusPassive::getMax(void)
321{
322 return _max;
323}
324
325double DbusPassive::getMin(void)
326{
327 return _min;
328}
329
Josh Lehan3e2f7582020-09-20 22:06:03 -0700330void DbusPassive::updateValue(double value, bool force)
331{
332 _badReading = false;
333
334 // Do not let a NAN, or other floating-point oddity, be used to update
335 // the value, as that indicates the sensor has no valid reading.
336 if (!(std::isfinite(value)))
337 {
338 _badReading = true;
339
340 // Do not continue with a bad reading, unless caller forcing
341 if (!force)
342 {
343 return;
344 }
345 }
346
347 value *= std::pow(10.0, _scale);
348
349 auto unscaled = value;
350 scaleSensorReading(_min, _max, value);
351
352 if (_typeMargin)
353 {
354 _marginHot = false;
355
356 // Unlike an absolute temperature sensor,
357 // where 0 degrees C is a good reading,
358 // a value received of 0 (or negative) margin is worrisome,
359 // and should be flagged.
360 // Either it indicates margin not calculated properly,
361 // or somebody forgot to set the margin-zero setpoint,
362 // or the system is really overheating that much.
363 // This is a different condition from _failed
364 // and _badReading, so it merits its own flag.
365 // The sensor has not failed, the reading is good, but the zone
366 // still needs to know that it should go to failsafe mode.
367 if (unscaled <= 0.0)
368 {
369 _marginHot = true;
370 }
371 }
372
Josh Lehanb3005752022-02-22 20:48:07 -0800373 setValue(value, unscaled);
Josh Lehan3e2f7582020-09-20 22:06:03 -0700374}
375
Patrick Williamsb228bc32022-07-22 19:26:56 -0500376int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
Patrick Venture863b9242018-03-08 08:29:23 -0800377{
Patrick Venture863b9242018-03-08 08:29:23 -0800378 std::string msgSensor;
James Feist1f802f52019-02-08 13:51:43 -0800379 std::map<std::string, std::variant<int64_t, double, bool>> msgData;
Patrick Ventured0c75662018-06-12 19:03:21 -0700380
381 msg.read(msgSensor, msgData);
Patrick Venture863b9242018-03-08 08:29:23 -0800382
383 if (msgSensor == "xyz.openbmc_project.Sensor.Value")
384 {
385 auto valPropMap = msgData.find("Value");
386 if (valPropMap != msgData.end())
387 {
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400388 double value =
389 std::visit(VariantToDoubleVisitor(), valPropMap->second);
Patrick Venture863b9242018-03-08 08:29:23 -0800390
Josh Lehan3e2f7582020-09-20 22:06:03 -0700391 owner->updateValue(value, false);
Patrick Venture863b9242018-03-08 08:29:23 -0800392 }
393 }
James Feist36b7d8e2018-10-05 15:39:01 -0700394 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
395 {
396 auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
397 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
398 if (criticalAlarmHigh == msgData.end() &&
399 criticalAlarmLow == msgData.end())
400 {
401 return 0;
402 }
403
404 bool asserted = false;
405 if (criticalAlarmLow != msgData.end())
406 {
James Feist1f802f52019-02-08 13:51:43 -0800407 asserted = std::get<bool>(criticalAlarmLow->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700408 }
409
410 // checking both as in theory you could de-assert one threshold and
411 // assert the other at the same moment
412 if (!asserted && criticalAlarmHigh != msgData.end())
413 {
James Feist1f802f52019-02-08 13:51:43 -0800414 asserted = std::get<bool>(criticalAlarmHigh->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700415 }
416 owner->setFailed(asserted);
417 }
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -0600418#ifdef UNC_FAILSAFE
419 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
420 {
421 auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
422 if (warningAlarmHigh == msgData.end())
423 {
424 return 0;
425 }
426
427 bool asserted = false;
428 if (warningAlarmHigh != msgData.end())
429 {
430 asserted = std::get<bool>(warningAlarmHigh->second);
431 }
432 owner->setFailed(asserted);
433 }
434#endif
Alex.Song8f73ad72021-10-07 00:18:27 +0800435 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
436 {
437 auto available = msgData.find("Available");
438 if (available == msgData.end())
439 {
440 return 0;
441 }
442 bool asserted = std::get<bool>(available->second);
443 owner->setAvailable(asserted);
444 if (!asserted)
445 {
446 // A thermal controller will continue its PID calculation and not
447 // trigger a 'failsafe' when some inputs are unavailable.
448 // So, forced to clear the value here to prevent a historical
449 // value to participate in a latter PID calculation.
450 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
451 }
452 }
James Feist4b36f262020-07-07 16:56:41 -0700453 else if (msgSensor ==
454 "xyz.openbmc_project.State.Decorator.OperationalStatus")
455 {
456 auto functional = msgData.find("Functional");
457 if (functional == msgData.end())
458 {
459 return 0;
460 }
461 bool asserted = std::get<bool>(functional->second);
462 owner->setFunctional(asserted);
463 }
Patrick Venture863b9242018-03-08 08:29:23 -0800464
465 return 0;
466}
Patrick Ventured0c75662018-06-12 19:03:21 -0700467
Harvey.Wua1ae4fa2022-10-28 17:38:35 +0800468int dbusHandleSignal(sd_bus_message* msg, void* usrData,
469 [[maybe_unused]] sd_bus_error* err)
Patrick Ventured0c75662018-06-12 19:03:21 -0700470{
Patrick Williamsb228bc32022-07-22 19:26:56 -0500471 auto sdbpMsg = sdbusplus::message_t(msg);
Patrick Ventured0c75662018-06-12 19:03:21 -0700472 DbusPassive* obj = static_cast<DbusPassive*>(usrData);
473
Patrick Venture7af157b2018-10-30 11:24:40 -0700474 return handleSensorValue(sdbpMsg, obj);
Patrick Ventured0c75662018-06-12 19:03:21 -0700475}
Patrick Venturea0764872020-08-08 07:48:43 -0700476
477} // namespace pid_control