blob: 160dbde307c24b05f4b454143dbb8e3687c45f55 [file] [log] [blame]
Patrick Venture863b9242018-03-08 08:29:23 -08001/**
2 * Copyright 2017 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -060016#include "config.h"
17
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070018#include "dbuspassive.hpp"
19
Patrick Ventureaadb30d2020-08-10 09:17:11 -070020#include "dbushelper_interface.hpp"
James Feist98b704e2019-06-03 16:24:53 -070021#include "dbuspassiveredundancy.hpp"
Patrick Ventureaadb30d2020-08-10 09:17:11 -070022#include "dbusutil.hpp"
James Zheng6df8bb52024-11-27 23:38:47 +000023#include "failsafeloggers/builder.hpp"
24#include "failsafeloggers/failsafe_logger_utility.hpp"
James Feist0c8223b2019-05-08 15:33:33 -070025#include "util.hpp"
Patrick Ventureda4a5dd2018-08-31 09:42:48 -070026
Patrick Venturea83a3ec2020-08-04 09:52:05 -070027#include <sdbusplus/bus.hpp>
28
Patrick Venture863b9242018-03-08 08:29:23 -080029#include <chrono>
30#include <cmath>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070031#include <memory>
Patrick Venture863b9242018-03-08 08:29:23 -080032#include <mutex>
Patrick Venture0ef1faf2018-06-13 12:50:53 -070033#include <string>
James Feist1f802f52019-02-08 13:51:43 -080034#include <variant>
Patrick Venture863b9242018-03-08 08:29:23 -080035
James Zheng6df8bb52024-11-27 23:38:47 +000036#include "failsafeloggers/failsafe_logger.cpp"
37
Patrick Venturea0764872020-08-08 07:48:43 -070038namespace pid_control
39{
40
Patrick Venture563a3562018-10-30 09:31:26 -070041std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -050042 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -070043 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
James Feist98b704e2019-06-03 16:24:53 -070044 const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
Patrick Venture0ef1faf2018-06-13 12:50:53 -070045{
46 if (helper == nullptr)
47 {
48 return nullptr;
49 }
Patrick Venture7af157b2018-10-30 11:24:40 -070050 if (!validType(type))
Patrick Venture0ef1faf2018-06-13 12:50:53 -070051 {
52 return nullptr;
53 }
54
Patrick Venture863b9242018-03-08 08:29:23 -080055 /* Need to get the scale and initial value */
Patrick Venture863b9242018-03-08 08:29:23 -080056 /* service == busname */
Harvey.Wuf2efcbb2022-02-09 10:24:30 +080057 std::string path;
58 if (info->readPath.empty())
59 {
60 path = getSensorPath(type, id);
61 }
62 else
63 {
64 path = info->readPath;
65 }
Patrick Venture34ddc902018-10-30 11:05:17 -070066
Patrick Venture1df9e872020-10-08 15:35:01 -070067 SensorProperties settings;
Patrick Venturef8cb4642018-10-30 12:02:53 -070068 bool failed;
Patrick Venture863b9242018-03-08 08:29:23 -080069
Patrick Venturef8cb4642018-10-30 12:02:53 -070070 try
71 {
Patrick Venture9b936922020-08-10 11:28:39 -070072 std::string service = helper->getService(sensorintf, path);
Patrick Venturef8cb4642018-10-30 12:02:53 -070073
Patrick Venture9b936922020-08-10 11:28:39 -070074 helper->getProperties(service, path, &settings);
75 failed = helper->thresholdsAsserted(service, path);
Patrick Venturef8cb4642018-10-30 12:02:53 -070076 }
77 catch (const std::exception& e)
78 {
79 return nullptr;
80 }
81
Patrick Venture6b9f5992019-09-10 09:18:28 -070082 /* if these values are zero, they're ignored. */
83 if (info->ignoreDbusMinMax)
84 {
85 settings.min = 0;
86 settings.max = 0;
87 }
88
Alex.Song8f73ad72021-10-07 00:18:27 +080089 settings.unavailableAsFailed = info->unavailableAsFailed;
90
Patrick Venture8729eb92020-08-10 10:38:44 -070091 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
92 settings, failed, path, redundancy);
Patrick Venturef8cb4642018-10-30 12:02:53 -070093}
94
James Feist98b704e2019-06-03 16:24:53 -070095DbusPassive::DbusPassive(
Patrick Williamsb228bc32022-07-22 19:26:56 -050096 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
Patrick Venture8729eb92020-08-10 10:38:44 -070097 std::unique_ptr<DbusHelperInterface> helper,
Patrick Venture1df9e872020-10-08 15:35:01 -070098 const SensorProperties& settings, bool failed, const std::string& path,
James Feist98b704e2019-06-03 16:24:53 -070099 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400100 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
101 _id(id), _helper(std::move(helper)), _failed(failed), path(path),
James Feist98b704e2019-06-03 16:24:53 -0700102 redundancy(redundancy)
103
Patrick Venturef8cb4642018-10-30 12:02:53 -0700104{
Patrick Venture863b9242018-03-08 08:29:23 -0800105 _scale = settings.scale;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700106 _min = settings.min * std::pow(10.0, _scale);
107 _max = settings.max * std::pow(10.0, _scale);
Alex.Song8f73ad72021-10-07 00:18:27 +0800108 _available = settings.available;
109 _unavailableAsFailed = settings.unavailableAsFailed;
Josh Lehan3e2f7582020-09-20 22:06:03 -0700110
111 // Cache this type knowledge, to avoid repeated string comparison
112 _typeMargin = (type == "margin");
Alex.Song8f73ad72021-10-07 00:18:27 +0800113 _typeFan = (type == "fan");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700114
115 // Force value to be stored, otherwise member would be uninitialized
116 updateValue(settings.value, true);
Patrick Venture863b9242018-03-08 08:29:23 -0800117}
118
119ReadReturn DbusPassive::read(void)
120{
121 std::lock_guard<std::mutex> guard(_lock);
122
Josh Lehanb3005752022-02-22 20:48:07 -0800123 ReadReturn r = {_value, _updated, _unscaled};
Patrick Venture863b9242018-03-08 08:29:23 -0800124
125 return r;
126}
127
Josh Lehanb3005752022-02-22 20:48:07 -0800128void DbusPassive::setValue(double value, double unscaled)
Patrick Venture863b9242018-03-08 08:29:23 -0800129{
130 std::lock_guard<std::mutex> guard(_lock);
131
132 _value = value;
Josh Lehanb3005752022-02-22 20:48:07 -0800133 _unscaled = unscaled;
Patrick Venture863b9242018-03-08 08:29:23 -0800134 _updated = std::chrono::high_resolution_clock::now();
135}
136
Josh Lehanb3005752022-02-22 20:48:07 -0800137void DbusPassive::setValue(double value)
138{
139 // First param is scaled, second param is unscaled, assume same here
140 setValue(value, value);
141}
142
James Feist36b7d8e2018-10-05 15:39:01 -0700143bool DbusPassive::getFailed(void) const
144{
James Feist98b704e2019-06-03 16:24:53 -0700145 if (redundancy)
146 {
147 const std::set<std::string>& failures = redundancy->getFailed();
148 if (failures.find(path) != failures.end())
149 {
James Zheng6df8bb52024-11-27 23:38:47 +0000150 outputFailsafeLogWithSensor(_id, true, _id,
151 "The sensor path is marked redundant.");
James Feist98b704e2019-06-03 16:24:53 -0700152 return true;
153 }
154 }
James Feist4b36f262020-07-07 16:56:41 -0700155
Alex.Song8f73ad72021-10-07 00:18:27 +0800156 /*
157 * Unavailable thermal sensors, who are not present or
158 * power-state-not-matching, should not trigger the failSafe mode. For
159 * example, when a system stays at a powered-off state, its CPU Temp
160 * sensors will be unavailable, these unavailable sensors should not be
161 * treated as failed and trigger failSafe.
162 * This is important for systems whose Fans are always on.
163 */
164 if (!_typeFan && !_available && !_unavailableAsFailed)
165 {
166 return false;
167 }
168
Josh Lehan3e2f7582020-09-20 22:06:03 -0700169 // If a reading has came in,
170 // but its value bad in some way (determined by sensor type),
171 // indicate this sensor has failed,
172 // until another value comes in that is no longer bad.
173 // This is different from the overall _failed flag,
174 // which is set and cleared by other causes.
175 if (_badReading)
176 {
James Zheng6df8bb52024-11-27 23:38:47 +0000177 outputFailsafeLogWithSensor(_id, true, _id,
178 "The sensor has bad readings.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700179 return true;
180 }
181
182 // If a reading has came in, and it is not a bad reading,
183 // but it indicates there is no more thermal margin left,
184 // that is bad, something is wrong with the PID loops,
185 // they are not cooling the system, enable failsafe mode also.
186 if (_marginHot)
187 {
James Zheng6df8bb52024-11-27 23:38:47 +0000188 outputFailsafeLogWithSensor(_id, true, _id,
189 "The sensor has no thermal margin left.");
Josh Lehan3e2f7582020-09-20 22:06:03 -0700190 return true;
191 }
192
James Zheng6df8bb52024-11-27 23:38:47 +0000193 if (_failed)
194 {
195 outputFailsafeLogWithSensor(
196 _id, true, _id, "The sensor has failed with a critical issue.");
197 return true;
198 }
199
200 if (!_available)
201 {
202 outputFailsafeLogWithSensor(_id, true, _id,
203 "The sensor is unavailable.");
204 return true;
205 }
206
207 if (!_functional)
208 {
209 outputFailsafeLogWithSensor(_id, true, _id,
210 "The sensor is not functional.");
211 return true;
212 }
213
214 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
215
216 return false;
James Feist36b7d8e2018-10-05 15:39:01 -0700217}
218
219void DbusPassive::setFailed(bool value)
220{
221 _failed = value;
222}
223
James Feist4b36f262020-07-07 16:56:41 -0700224void DbusPassive::setFunctional(bool value)
225{
226 _functional = value;
227}
228
Alex.Song8f73ad72021-10-07 00:18:27 +0800229void DbusPassive::setAvailable(bool value)
230{
231 _available = value;
232}
233
Patrick Venture863b9242018-03-08 08:29:23 -0800234int64_t DbusPassive::getScale(void)
235{
236 return _scale;
237}
238
Patrick Venture563a3562018-10-30 09:31:26 -0700239std::string DbusPassive::getID(void)
Patrick Venture863b9242018-03-08 08:29:23 -0800240{
241 return _id;
242}
243
James Feist75eb7692019-02-25 12:50:02 -0800244double DbusPassive::getMax(void)
245{
246 return _max;
247}
248
249double DbusPassive::getMin(void)
250{
251 return _min;
252}
253
Josh Lehan3e2f7582020-09-20 22:06:03 -0700254void DbusPassive::updateValue(double value, bool force)
255{
256 _badReading = false;
257
258 // Do not let a NAN, or other floating-point oddity, be used to update
259 // the value, as that indicates the sensor has no valid reading.
260 if (!(std::isfinite(value)))
261 {
262 _badReading = true;
263
264 // Do not continue with a bad reading, unless caller forcing
265 if (!force)
266 {
267 return;
268 }
269 }
270
271 value *= std::pow(10.0, _scale);
272
273 auto unscaled = value;
274 scaleSensorReading(_min, _max, value);
275
276 if (_typeMargin)
277 {
278 _marginHot = false;
279
280 // Unlike an absolute temperature sensor,
281 // where 0 degrees C is a good reading,
282 // a value received of 0 (or negative) margin is worrisome,
283 // and should be flagged.
284 // Either it indicates margin not calculated properly,
285 // or somebody forgot to set the margin-zero setpoint,
286 // or the system is really overheating that much.
287 // This is a different condition from _failed
288 // and _badReading, so it merits its own flag.
289 // The sensor has not failed, the reading is good, but the zone
290 // still needs to know that it should go to failsafe mode.
291 if (unscaled <= 0.0)
292 {
293 _marginHot = true;
294 }
295 }
296
Josh Lehanb3005752022-02-22 20:48:07 -0800297 setValue(value, unscaled);
Josh Lehan3e2f7582020-09-20 22:06:03 -0700298}
299
Patrick Williamsb228bc32022-07-22 19:26:56 -0500300int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
Patrick Venture863b9242018-03-08 08:29:23 -0800301{
Patrick Venture863b9242018-03-08 08:29:23 -0800302 std::string msgSensor;
James Feist1f802f52019-02-08 13:51:43 -0800303 std::map<std::string, std::variant<int64_t, double, bool>> msgData;
Patrick Ventured0c75662018-06-12 19:03:21 -0700304
305 msg.read(msgSensor, msgData);
Patrick Venture863b9242018-03-08 08:29:23 -0800306
307 if (msgSensor == "xyz.openbmc_project.Sensor.Value")
308 {
309 auto valPropMap = msgData.find("Value");
310 if (valPropMap != msgData.end())
311 {
Patrick Williamsbd63bca2024-08-16 15:21:10 -0400312 double value =
313 std::visit(VariantToDoubleVisitor(), valPropMap->second);
Patrick Venture863b9242018-03-08 08:29:23 -0800314
Josh Lehan3e2f7582020-09-20 22:06:03 -0700315 owner->updateValue(value, false);
Patrick Venture863b9242018-03-08 08:29:23 -0800316 }
317 }
James Feist36b7d8e2018-10-05 15:39:01 -0700318 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
319 {
320 auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
321 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
322 if (criticalAlarmHigh == msgData.end() &&
323 criticalAlarmLow == msgData.end())
324 {
325 return 0;
326 }
327
328 bool asserted = false;
329 if (criticalAlarmLow != msgData.end())
330 {
James Feist1f802f52019-02-08 13:51:43 -0800331 asserted = std::get<bool>(criticalAlarmLow->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700332 }
333
334 // checking both as in theory you could de-assert one threshold and
335 // assert the other at the same moment
336 if (!asserted && criticalAlarmHigh != msgData.end())
337 {
James Feist1f802f52019-02-08 13:51:43 -0800338 asserted = std::get<bool>(criticalAlarmHigh->second);
James Feist36b7d8e2018-10-05 15:39:01 -0700339 }
340 owner->setFailed(asserted);
341 }
Jonico Eustaquioaf97d8e2024-01-02 14:35:07 -0600342#ifdef UNC_FAILSAFE
343 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
344 {
345 auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
346 if (warningAlarmHigh == msgData.end())
347 {
348 return 0;
349 }
350
351 bool asserted = false;
352 if (warningAlarmHigh != msgData.end())
353 {
354 asserted = std::get<bool>(warningAlarmHigh->second);
355 }
356 owner->setFailed(asserted);
357 }
358#endif
Alex.Song8f73ad72021-10-07 00:18:27 +0800359 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
360 {
361 auto available = msgData.find("Available");
362 if (available == msgData.end())
363 {
364 return 0;
365 }
366 bool asserted = std::get<bool>(available->second);
367 owner->setAvailable(asserted);
368 if (!asserted)
369 {
370 // A thermal controller will continue its PID calculation and not
371 // trigger a 'failsafe' when some inputs are unavailable.
372 // So, forced to clear the value here to prevent a historical
373 // value to participate in a latter PID calculation.
374 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
375 }
376 }
James Feist4b36f262020-07-07 16:56:41 -0700377 else if (msgSensor ==
378 "xyz.openbmc_project.State.Decorator.OperationalStatus")
379 {
380 auto functional = msgData.find("Functional");
381 if (functional == msgData.end())
382 {
383 return 0;
384 }
385 bool asserted = std::get<bool>(functional->second);
386 owner->setFunctional(asserted);
387 }
Patrick Venture863b9242018-03-08 08:29:23 -0800388
389 return 0;
390}
Patrick Ventured0c75662018-06-12 19:03:21 -0700391
Harvey.Wua1ae4fa2022-10-28 17:38:35 +0800392int dbusHandleSignal(sd_bus_message* msg, void* usrData,
393 [[maybe_unused]] sd_bus_error* err)
Patrick Ventured0c75662018-06-12 19:03:21 -0700394{
Patrick Williamsb228bc32022-07-22 19:26:56 -0500395 auto sdbpMsg = sdbusplus::message_t(msg);
Patrick Ventured0c75662018-06-12 19:03:21 -0700396 DbusPassive* obj = static_cast<DbusPassive*>(usrData);
397
Patrick Venture7af157b2018-10-30 11:24:40 -0700398 return handleSensorValue(sdbpMsg, obj);
Patrick Ventured0c75662018-06-12 19:03:21 -0700399}
Patrick Venturea0764872020-08-08 07:48:43 -0700400
401} // namespace pid_control