blob: a0fa217ed97080aaedb84c27e33ec97c19cf62bd [file] [log] [blame]
Patrick Venture863b9242018-03-08 08:29:23 -08001/**
2 * Copyright 2017 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -060016#include "config.h"
17
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070018#include "dbuspassive.hpp"
19
Patrick Ventureaadb30d2020-08-10 09:17:11 -070020#include "dbushelper_interface.hpp"
James Feist98b704e2019-06-03 16:24:53 -070021#include "dbuspassiveredundancy.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070022#include "dbusutil.hpp"
James Zheng6df8bb52024-11-27 23:38:47 +000023#include "failsafeloggers/builder.hpp"
24#include "failsafeloggers/failsafe_logger_utility.hpp"
James Feist0c8223b2019-05-08 15:33:33 -070025#include "util.hpp"
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070026
Patrick Venturea83a3ec2020-08-04 09:52:05 -070027#include <sdbusplus/bus.hpp>
28
Patrick Venture863b9242018-03-08 08:29:23 -080029#include <chrono>
30#include <cmath>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070031#include <memory>
Patrick Venture863b9242018-03-08 08:29:23 -080032#include <mutex>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070033#include <string>
James Feist1f802f52019-02-08 13:51:43 -080034#include <variant>
Patrick Venture863b9242018-03-08 08:29:23 -080035
James Zheng6df8bb52024-11-27 23:38:47 +000036#include "failsafeloggers/failsafe_logger.cpp"
37
Patrick Venturea0764872020-08-08 07:48:43 -070038namespace pid_control
39{
40
Patrick Venture563a3562018-10-30 09:31:26 -070041std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -050042 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -070043 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
James Feist98b704e2019-06-03 16:24:53 -070044 const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
Patrick Venture0ef1faf2018-06-13 12:50:53 -070045{
46 if (helper == nullptr)
47 {
48 return nullptr;
49 }
Patrick Venture7af157b2018-10-30 11:24:40 -070050 if (!validType(type))
Patrick Venture0ef1faf2018-06-13 12:50:53 -070051 {
52 return nullptr;
53 }
54
Patrick Venture863b9242018-03-08 08:29:23 -080055 /* Need to get the scale and initial value */
Patrick Venture863b9242018-03-08 08:29:23 -080056 /* service == busname */
Harvey.Wuf2efcbb2022-02-09 10:24:30 +080057 std::string path;
58 if (info->readPath.empty())
59 {
60 path = getSensorPath(type, id);
61 }
62 else
63 {
64 path = info->readPath;
65 }
Patrick Venture34ddc902018-10-30 11:05:17 -070066
Patrick Venture1df9e872020-10-08 15:35:01 -070067 SensorProperties settings;
Patrick Venturef8cb4642018-10-30 12:02:53 -070068 bool failed;
Eric Yang897f31c2025-05-16 20:40:56 +080069 std::string service;
Patrick Venture863b9242018-03-08 08:29:23 -080070
Patrick Venturef8cb4642018-10-30 12:02:53 -070071 try
72 {
Eric Yang897f31c2025-05-16 20:40:56 +080073 service = helper->getService(sensorintf, path);
Patrick Venturef8cb4642018-10-30 12:02:53 -070074 }
75 catch (const std::exception& e)
76 {
Chaul Lya552fe22024-11-15 10:20:28 +000077#ifndef HANDLE_MISSING_OBJECT_PATHS
Patrick Venturef8cb4642018-10-30 12:02:53 -070078 return nullptr;
Chaul Lya552fe22024-11-15 10:20:28 +000079#else
80 // CASE1: The sensor is not on DBus, but as it is not in the
81 // MissingIsAcceptable list, the sensor should be built with a failed
82 // state to send the zone to failsafe mode. Everything will recover if
83 // all important sensors are back to DBus. swampd will be informed
84 // through InterfacesAdded signals and the sensors will be built again.
85
Eric Yang897f31c2025-05-16 20:40:56 +080086 // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
87 // fails (e.g., D-Bus error or property fetch failure). In this case,
88 // handle-missing-object-paths does not apply. The sensor build fails,
89 // and the control loop will keep restarting until getProperties
90 // succeeds.
Chaul Lya552fe22024-11-15 10:20:28 +000091
Eric Yang897f31c2025-05-16 20:40:56 +080092 // Only CASE1 may send the zone to failsafe mode if the sensor is not
93 // in MissingIsAcceptable. CASE2 results in continuous restart until
94 // recovery.
Chaul Lya552fe22024-11-15 10:20:28 +000095
96 failed = true;
97 settings.value = std::numeric_limits<double>::quiet_NaN();
98 settings.unit = getSensorUnit(type);
99 settings.available = false;
Eric Yang897f31c2025-05-16 20:40:56 +0800100 settings.unavailableAsFailed = true;
101 if (info->ignoreDbusMinMax)
102 {
103 settings.min = 0;
104 settings.max = 0;
105 }
Chaul Lya552fe22024-11-15 10:20:28 +0000106 std::cerr << "DbusPassive: Sensor " << path
107 << " is missing from D-Bus, build this sensor as failed\n";
Eric Yang897f31c2025-05-16 20:40:56 +0800108 return std::make_unique<DbusPassive>(
109 bus, type, id, std::move(helper), settings, failed, path,
110 redundancy);
Chaul Lya552fe22024-11-15 10:20:28 +0000111#endif
Patrick Venturef8cb4642018-10-30 12:02:53 -0700112 }
113
Eric Yang897f31c2025-05-16 20:40:56 +0800114 try
115 {
116 helper->getProperties(service, path, &settings);
117 failed = helper->thresholdsAsserted(service, path);
118 }
119 catch (const std::exception& e)
120 {
121 return nullptr;
122 }
123
Patrick Venture6b9f5992019-09-10 09:18:28 -0700124 /* if these values are zero, they're ignored. */
125 if (info->ignoreDbusMinMax)
126 {
127 settings.min = 0;
128 settings.max = 0;
129 }
130
Alex.Song8f73ad72021-10-07 00:18:27 +0800131 settings.unavailableAsFailed = info->unavailableAsFailed;
132
Patrick Venture8729eb92020-08-10 10:38:44 -0700133 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
134 settings, failed, path, redundancy);
Patrick Venturef8cb4642018-10-30 12:02:53 -0700135}
136
James Feist98b704e2019-06-03 16:24:53 -0700137DbusPassive::DbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -0500138 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -0700139 std::unique_ptr<DbusHelperInterface> helper,
Patrick Venture1df9e872020-10-08 15:35:01 -0700140 const SensorProperties& settings, bool failed, const std::string& path,
James Feist98b704e2019-06-03 16:24:53 -0700141 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400142 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
143 _id(id), _helper(std::move(helper)), _failed(failed), path(path),
James Feist98b704e2019-06-03 16:24:53 -0700144 redundancy(redundancy)
145
Patrick Venturef8cb4642018-10-30 12:02:53 -0700146{
Patrick Venture863b9242018-03-08 08:29:23 -0800147 _scale = settings.scale;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700148 _min = settings.min * std::pow(10.0, _scale);
149 _max = settings.max * std::pow(10.0, _scale);
Alex.Song8f73ad72021-10-07 00:18:27 +0800150 _available = settings.available;
151 _unavailableAsFailed = settings.unavailableAsFailed;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700152
153 // Cache this type knowledge, to avoid repeated string comparison
154 _typeMargin = (type == "margin");
Alex.Song8f73ad72021-10-07 00:18:27 +0800155 _typeFan = (type == "fan");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700156
157 // Force value to be stored, otherwise member would be uninitialized
158 updateValue(settings.value, true);
Patrick Venture863b9242018-03-08 08:29:23 -0800159}
160
161ReadReturn DbusPassive::read(void)
162{
163 std::lock_guard<std::mutex> guard(_lock);
164
Josh Lehanb3005752022-02-22 20:48:07 -0800165 ReadReturn r = {_value, _updated, _unscaled};
Patrick Venture863b9242018-03-08 08:29:23 -0800166
167 return r;
168}
169
Josh Lehanb3005752022-02-22 20:48:07 -0800170void DbusPassive::setValue(double value, double unscaled)
Patrick Venture863b9242018-03-08 08:29:23 -0800171{
172 std::lock_guard<std::mutex> guard(_lock);
173
174 _value = value;
Josh Lehanb3005752022-02-22 20:48:07 -0800175 _unscaled = unscaled;
Patrick Venture863b9242018-03-08 08:29:23 -0800176 _updated = std::chrono::high_resolution_clock::now();
177}
178
Josh Lehanb3005752022-02-22 20:48:07 -0800179void DbusPassive::setValue(double value)
180{
181 // First param is scaled, second param is unscaled, assume same here
182 setValue(value, value);
183}
184
James Feist36b7d8e2018-10-05 15:39:01 -0700185bool DbusPassive::getFailed(void) const
186{
James Feist98b704e2019-06-03 16:24:53 -0700187 if (redundancy)
188 {
189 const std::set<std::string>& failures = redundancy->getFailed();
190 if (failures.find(path) != failures.end())
191 {
James Zheng6df8bb52024-11-27 23:38:47 +0000192 outputFailsafeLogWithSensor(_id, true, _id,
193 "The sensor path is marked redundant.");
James Feist98b704e2019-06-03 16:24:53 -0700194 return true;
195 }
196 }
James Feist4b36f262020-07-07 16:56:41 -0700197
Alex.Song8f73ad72021-10-07 00:18:27 +0800198 /*
199 * Unavailable thermal sensors, who are not present or
200 * power-state-not-matching, should not trigger the failSafe mode. For
201 * example, when a system stays at a powered-off state, its CPU Temp
202 * sensors will be unavailable, these unavailable sensors should not be
203 * treated as failed and trigger failSafe.
204 * This is important for systems whose Fans are always on.
205 */
206 if (!_typeFan && !_available && !_unavailableAsFailed)
207 {
208 return false;
209 }
210
Josh Lehan3e2f7582020-09-20 22:06:03 -0700211 // If a reading has came in,
212 // but its value bad in some way (determined by sensor type),
213 // indicate this sensor has failed,
214 // until another value comes in that is no longer bad.
215 // This is different from the overall _failed flag,
216 // which is set and cleared by other causes.
217 if (_badReading)
218 {
James Zheng6df8bb52024-11-27 23:38:47 +0000219 outputFailsafeLogWithSensor(_id, true, _id,
220 "The sensor has bad readings.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700221 return true;
222 }
223
224 // If a reading has came in, and it is not a bad reading,
225 // but it indicates there is no more thermal margin left,
226 // that is bad, something is wrong with the PID loops,
227 // they are not cooling the system, enable failsafe mode also.
228 if (_marginHot)
229 {
James Zheng6df8bb52024-11-27 23:38:47 +0000230 outputFailsafeLogWithSensor(_id, true, _id,
231 "The sensor has no thermal margin left.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700232 return true;
233 }
234
James Zheng6df8bb52024-11-27 23:38:47 +0000235 if (_failed)
236 {
237 outputFailsafeLogWithSensor(
238 _id, true, _id, "The sensor has failed with a critical issue.");
239 return true;
240 }
241
242 if (!_available)
243 {
244 outputFailsafeLogWithSensor(_id, true, _id,
245 "The sensor is unavailable.");
246 return true;
247 }
248
249 if (!_functional)
250 {
251 outputFailsafeLogWithSensor(_id, true, _id,
252 "The sensor is not functional.");
253 return true;
254 }
255
256 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
257
258 return false;
James Feist36b7d8e2018-10-05 15:39:01 -0700259}
260
Harvey Wua4270072024-05-29 16:11:13 +0800261std::string DbusPassive::getFailReason(void) const
262{
263 if (_badReading)
264 {
265 return "Sensor reading bad";
266 }
267 if (_marginHot)
268 {
269 return "Margin hot";
270 }
271 if (_failed)
272 {
273 return "Sensor threshold asserted";
274 }
275 if (!_available)
276 {
277 return "Sensor unavailable";
278 }
279 if (!_functional)
280 {
281 return "Sensor not functional";
282 }
283 return "Unknown";
284}
285
James Feist36b7d8e2018-10-05 15:39:01 -0700286void DbusPassive::setFailed(bool value)
287{
288 _failed = value;
289}
290
James Feist4b36f262020-07-07 16:56:41 -0700291void DbusPassive::setFunctional(bool value)
292{
293 _functional = value;
294}
295
Alex.Song8f73ad72021-10-07 00:18:27 +0800296void DbusPassive::setAvailable(bool value)
297{
298 _available = value;
299}
300
Patrick Venture863b9242018-03-08 08:29:23 -0800301int64_t DbusPassive::getScale(void)
302{
303 return _scale;
304}
305
Patrick Venture563a3562018-10-30 09:31:26 -0700306std::string DbusPassive::getID(void)
Patrick Venture863b9242018-03-08 08:29:23 -0800307{
308 return _id;
309}
310
James Feist75eb7692019-02-25 12:50:02 -0800311double DbusPassive::getMax(void)
312{
313 return _max;
314}
315
316double DbusPassive::getMin(void)
317{
318 return _min;
319}
320
Josh Lehan3e2f7582020-09-20 22:06:03 -0700321void DbusPassive::updateValue(double value, bool force)
322{
323 _badReading = false;
324
325 // Do not let a NAN, or other floating-point oddity, be used to update
326 // the value, as that indicates the sensor has no valid reading.
327 if (!(std::isfinite(value)))
328 {
329 _badReading = true;
330
331 // Do not continue with a bad reading, unless caller forcing
332 if (!force)
333 {
334 return;
335 }
336 }
337
338 value *= std::pow(10.0, _scale);
339
340 auto unscaled = value;
341 scaleSensorReading(_min, _max, value);
342
343 if (_typeMargin)
344 {
345 _marginHot = false;
346
347 // Unlike an absolute temperature sensor,
348 // where 0 degrees C is a good reading,
349 // a value received of 0 (or negative) margin is worrisome,
350 // and should be flagged.
351 // Either it indicates margin not calculated properly,
352 // or somebody forgot to set the margin-zero setpoint,
353 // or the system is really overheating that much.
354 // This is a different condition from _failed
355 // and _badReading, so it merits its own flag.
356 // The sensor has not failed, the reading is good, but the zone
357 // still needs to know that it should go to failsafe mode.
358 if (unscaled <= 0.0)
359 {
360 _marginHot = true;
361 }
362 }
363
Josh Lehanb3005752022-02-22 20:48:07 -0800364 setValue(value, unscaled);
Josh Lehan3e2f7582020-09-20 22:06:03 -0700365}
366
Patrick Williamsb228bc32022-07-22 19:26:56 -0500367int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
Patrick Venture863b9242018-03-08 08:29:23 -0800368{
Patrick Venture863b9242018-03-08 08:29:23 -0800369 std::string msgSensor;
James Feist1f802f52019-02-08 13:51:43 -0800370 std::map<std::string, std::variant<int64_t, double, bool>> msgData;
Patrick Ventured0c75662018-06-12 19:03:21 -0700371
372 msg.read(msgSensor, msgData);
Patrick Venture863b9242018-03-08 08:29:23 -0800373
374 if (msgSensor == "xyz.openbmc_project.Sensor.Value")
375 {
376 auto valPropMap = msgData.find("Value");
377 if (valPropMap != msgData.end())
378 {
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400379 double value =
380 std::visit(VariantToDoubleVisitor(), valPropMap->second);
Patrick Venture863b9242018-03-08 08:29:23 -0800381
Josh Lehan3e2f7582020-09-20 22:06:03 -0700382 owner->updateValue(value, false);
Patrick Venture863b9242018-03-08 08:29:23 -0800383 }
384 }
James Feist36b7d8e2018-10-05 15:39:01 -0700385 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
386 {
387 auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
388 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
389 if (criticalAlarmHigh == msgData.end() &&
390 criticalAlarmLow == msgData.end())
391 {
392 return 0;
393 }
394
395 bool asserted = false;
396 if (criticalAlarmLow != msgData.end())
397 {
James Feist1f802f52019-02-08 13:51:43 -0800398 asserted = std::get<bool>(criticalAlarmLow->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700399 }
400
401 // checking both as in theory you could de-assert one threshold and
402 // assert the other at the same moment
403 if (!asserted && criticalAlarmHigh != msgData.end())
404 {
James Feist1f802f52019-02-08 13:51:43 -0800405 asserted = std::get<bool>(criticalAlarmHigh->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700406 }
407 owner->setFailed(asserted);
408 }
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -0600409#ifdef UNC_FAILSAFE
410 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
411 {
412 auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
413 if (warningAlarmHigh == msgData.end())
414 {
415 return 0;
416 }
417
418 bool asserted = false;
419 if (warningAlarmHigh != msgData.end())
420 {
421 asserted = std::get<bool>(warningAlarmHigh->second);
422 }
423 owner->setFailed(asserted);
424 }
425#endif
Alex.Song8f73ad72021-10-07 00:18:27 +0800426 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
427 {
428 auto available = msgData.find("Available");
429 if (available == msgData.end())
430 {
431 return 0;
432 }
433 bool asserted = std::get<bool>(available->second);
434 owner->setAvailable(asserted);
435 if (!asserted)
436 {
437 // A thermal controller will continue its PID calculation and not
438 // trigger a 'failsafe' when some inputs are unavailable.
439 // So, forced to clear the value here to prevent a historical
440 // value to participate in a latter PID calculation.
441 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
442 }
443 }
James Feist4b36f262020-07-07 16:56:41 -0700444 else if (msgSensor ==
445 "xyz.openbmc_project.State.Decorator.OperationalStatus")
446 {
447 auto functional = msgData.find("Functional");
448 if (functional == msgData.end())
449 {
450 return 0;
451 }
452 bool asserted = std::get<bool>(functional->second);
453 owner->setFunctional(asserted);
454 }
Patrick Venture863b9242018-03-08 08:29:23 -0800455
456 return 0;
457}
Patrick Ventured0c75662018-06-12 19:03:21 -0700458
Harvey.Wua1ae4fa2022-10-28 17:38:35 +0800459int dbusHandleSignal(sd_bus_message* msg, void* usrData,
460 [[maybe_unused]] sd_bus_error* err)
Patrick Ventured0c75662018-06-12 19:03:21 -0700461{
Patrick Williamsb228bc32022-07-22 19:26:56 -0500462 auto sdbpMsg = sdbusplus::message_t(msg);
Patrick Ventured0c75662018-06-12 19:03:21 -0700463 DbusPassive* obj = static_cast<DbusPassive*>(usrData);
464
Patrick Venture7af157b2018-10-30 11:24:40 -0700465 return handleSensorValue(sdbpMsg, obj);
Patrick Ventured0c75662018-06-12 19:03:21 -0700466}
Patrick Venturea0764872020-08-08 07:48:43 -0700467
468} // namespace pid_control