blob: 54f5a8a64fb2d4594d5491a78dbc7c295ca527d2 [file] [log] [blame]
Matt Spinler403d1f52021-02-01 15:35:25 -06001/**
2 * Copyright © 2021 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Jerry C Chen35fb3a02024-08-30 14:54:30 +080016#include "config.h"
17
Matt Spinler403d1f52021-02-01 15:35:25 -060018#include "threshold_alarm_logger.hpp"
19
Matt Spinler50bf8162021-02-01 16:24:01 -060020#include "sdbusplus.hpp"
21
Matt Spinler3efec612021-05-11 15:26:17 -050022#include <unistd.h>
Matt Spinler50bf8162021-02-01 16:24:01 -060023
Anwaar Hadi32c4fef2025-04-02 16:08:27 +000024#include <phosphor-logging/lg2.hpp>
Matt Spinler50bf8162021-02-01 16:24:01 -060025#include <xyz/openbmc_project/Logging/Entry/server.hpp>
26
Matt Spinler403d1f52021-02-01 15:35:25 -060027namespace sensor::monitor
28{
29
Matt Spinler50bf8162021-02-01 16:24:01 -060030using namespace sdbusplus::xyz::openbmc_project::Logging::server;
Matt Spinler66e75a72021-05-14 10:32:47 -050031using namespace phosphor::fan;
Matt Spinler50bf8162021-02-01 16:24:01 -060032using namespace phosphor::fan::util;
33
Matt Spinler403d1f52021-02-01 15:35:25 -060034const std::string warningInterface =
35 "xyz.openbmc_project.Sensor.Threshold.Warning";
36const std::string criticalInterface =
37 "xyz.openbmc_project.Sensor.Threshold.Critical";
38const std::string perfLossInterface =
39 "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
Matt Spinler2f182672021-02-01 16:51:38 -060040constexpr auto loggingService = "xyz.openbmc_project.Logging";
41constexpr auto loggingPath = "/xyz/openbmc_project/logging";
42constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
43constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
44constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
45constexpr auto assocInterface = "xyz.openbmc_project.Association";
Matt Spinler403d1f52021-02-01 15:35:25 -060046
Matt Spinler8ce65072022-11-03 15:15:55 -040047const std::vector<std::string> thresholdIfaceNames{
48 warningInterface, criticalInterface, perfLossInterface};
49
Delphine CC Chiu99914e52024-05-21 17:38:58 +080050using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
Matt Spinler50bf8162021-02-01 16:24:01 -060051
52/**
53 * Map of threshold interfaces and alarm properties and values to error data.
54 */
55const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
56 thresholdData{
57
58 {warningInterface,
59 {{"WarningAlarmHigh",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080060 {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
Matt Spinler50bf8162021-02-01 16:24:01 -060061 {false,
Delphine CC Chiu99914e52024-05-21 17:38:58 +080062 ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060063 {"WarningAlarmLow",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080064 {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
Matt Spinler50bf8162021-02-01 16:24:01 -060065 {false,
Delphine CC Chiu99914e52024-05-21 17:38:58 +080066 ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060067
68 {criticalInterface,
69 {{"CriticalAlarmHigh",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080070 {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
Matt Spinler50bf8162021-02-01 16:24:01 -060071 {false,
Delphine CC Chiu99914e52024-05-21 17:38:58 +080072 ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060073 {"CriticalAlarmLow",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080074 {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
75 {false, ErrorData{"CriticalLow", "Clear",
76 Entry::Level::Informational}}}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060077
78 {perfLossInterface,
79 {{"PerfLossAlarmHigh",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080080 {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
81 {false, ErrorData{"PerformanceLossHigh", "Clear",
82 Entry::Level::Informational}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060083 {"PerfLossAlarmLow",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080084 {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
85 {false, ErrorData{"PerformanceLossLow", "Clear",
86 Entry::Level::Informational}}}}}}};
Matt Spinler50bf8162021-02-01 16:24:01 -060087
Matt Spinler7f6946b2021-05-14 12:43:50 -050088ThresholdAlarmLogger::ThresholdAlarmLogger(
Patrick Williamscb356d42022-07-22 19:26:53 -050089 sdbusplus::bus_t& bus, sdeventplus::Event& event,
Matt Spinler7f6946b2021-05-14 12:43:50 -050090 std::shared_ptr<PowerState> powerState) :
Patrick Williamsdfddd642024-08-16 15:21:51 -040091 bus(bus), event(event), _powerState(std::move(powerState)),
Matt Spinler403d1f52021-02-01 15:35:25 -060092 warningMatch(bus,
93 "type='signal',member='PropertiesChanged',"
94 "path_namespace='/xyz/openbmc_project/sensors',"
95 "arg0='" +
96 warningInterface + "'",
97 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
98 std::placeholders::_1)),
99 criticalMatch(bus,
100 "type='signal',member='PropertiesChanged',"
101 "path_namespace='/xyz/openbmc_project/sensors',"
102 "arg0='" +
103 criticalInterface + "'",
104 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
105 std::placeholders::_1)),
106 perfLossMatch(bus,
107 "type='signal',member='PropertiesChanged',"
108 "path_namespace='/xyz/openbmc_project/sensors',"
109 "arg0='" +
110 perfLossInterface + "'",
111 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
Matt Spinlerb7a55402021-10-11 13:45:35 -0500112 std::placeholders::_1)),
113 ifacesRemovedMatch(bus,
114 "type='signal',member='InterfacesRemoved',arg0path="
115 "'/xyz/openbmc_project/sensors/'",
116 std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
Matt Spinler8ce65072022-11-03 15:15:55 -0400117 std::placeholders::_1)),
118 ifacesAddedMatch(bus,
119 "type='signal',member='InterfacesAdded',arg0path="
120 "'/xyz/openbmc_project/sensors/'",
121 std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
122 std::placeholders::_1))
Matt Spinler50bf8162021-02-01 16:24:01 -0600123{
Matt Spinler7f6946b2021-05-14 12:43:50 -0500124 _powerState->addCallback("thresholdMon",
125 std::bind(&ThresholdAlarmLogger::powerStateChanged,
126 this, std::placeholders::_1));
127
Matt Spinler50bf8162021-02-01 16:24:01 -0600128 // check for any currently asserted threshold alarms
Patrick Williamsdfddd642024-08-16 15:21:51 -0400129 std::for_each(
130 thresholdData.begin(), thresholdData.end(),
131 [this](const auto& thresholdInterface) {
132 const auto& interface = thresholdInterface.first;
133 auto objects =
134 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
135 std::for_each(objects.begin(), objects.end(),
136 [interface, this](const auto& object) {
137 const auto& path = object.first;
138 const auto& service =
139 object.second.begin()->first;
140 checkThresholds(interface, path, service);
141 });
Matt Spinler50bf8162021-02-01 16:24:01 -0600142 });
143}
Matt Spinler403d1f52021-02-01 15:35:25 -0600144
Patrick Williamscb356d42022-07-22 19:26:53 -0500145void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
Matt Spinler403d1f52021-02-01 15:35:25 -0600146{
Matt Spinlerf5d3be42021-02-01 16:38:01 -0600147 std::map<std::string, std::variant<bool>> properties;
148 std::string sensorPath = msg.get_path();
149 std::string interface;
150
151 msg.read(interface, properties);
152
Matt Spinler8ce65072022-11-03 15:15:55 -0400153 checkProperties(sensorPath, interface, properties);
154}
155
156void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
157{
158 sdbusplus::message::object_path path;
159 std::vector<std::string> interfaces;
160
161 msg.read(path, interfaces);
162
163 for (const auto& interface : interfaces)
164 {
165 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
166 interface) != thresholdIfaceNames.end())
167 {
168 alarms.erase(InterfaceKey{path, interface});
169 }
170 }
171}
172
173void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
174{
175 sdbusplus::message::object_path path;
176 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
177
178 msg.read(path, interfaces);
179
180 for (const auto& [interface, properties] : interfaces)
181 {
182 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
183 interface) != thresholdIfaceNames.end())
184 {
185 checkProperties(path, interface, properties);
186 }
187 }
188}
189
190void ThresholdAlarmLogger::checkProperties(
191 const std::string& sensorPath, const std::string& interface,
192 const std::map<std::string, std::variant<bool>>& properties)
193{
Matt Spinlerf5d3be42021-02-01 16:38:01 -0600194 auto alarmProperties = thresholdData.find(interface);
195 if (alarmProperties == thresholdData.end())
196 {
197 return;
198 }
199
200 for (const auto& [propertyName, propertyValue] : properties)
201 {
202 if (alarmProperties->second.find(propertyName) !=
203 alarmProperties->second.end())
204 {
205 // If this is the first time we've seen this alarm, then
206 // assume it was off before so it doesn't create an event
207 // log for a value of false.
208
209 InterfaceKey key{sensorPath, interface};
210 if (alarms.find(key) == alarms.end())
211 {
212 alarms[key][propertyName] = false;
213 }
214
215 // Check if the value changed from what was there before.
216 auto alarmValue = std::get<bool>(propertyValue);
217 if (alarmValue != alarms[key][propertyName])
218 {
219 alarms[key][propertyName] = alarmValue;
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800220#ifndef SKIP_POWER_CHECKING
Matt Spinler66e75a72021-05-14 10:32:47 -0500221 if (_powerState->isPowerOn())
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800222#endif
Matt Spinler66e75a72021-05-14 10:32:47 -0500223 {
224 createEventLog(sensorPath, interface, propertyName,
225 alarmValue);
226 }
Matt Spinlerf5d3be42021-02-01 16:38:01 -0600227 }
228 }
229 }
Matt Spinler403d1f52021-02-01 15:35:25 -0600230}
231
Matt Spinler50bf8162021-02-01 16:24:01 -0600232void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
233 const std::string& sensorPath,
234 const std::string& service)
235{
236 auto properties = thresholdData.find(interface);
237 if (properties == thresholdData.end())
238 {
239 return;
240 }
241
242 for (const auto& [property, unused] : properties->second)
243 {
244 try
245 {
246 auto alarmValue = SDBusPlus::getProperty<bool>(
247 bus, service, sensorPath, interface, property);
248 alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
249
250 // This is just for checking alarms on startup,
251 // so only look for active alarms.
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800252#ifdef SKIP_POWER_CHECKING
253 if (alarmValue)
254#else
Matt Spinler66e75a72021-05-14 10:32:47 -0500255 if (alarmValue && _powerState->isPowerOn())
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800256#endif
Matt Spinler50bf8162021-02-01 16:24:01 -0600257 {
258 createEventLog(sensorPath, interface, property, alarmValue);
259 }
260 }
Patrick Williamscb356d42022-07-22 19:26:53 -0500261 catch (const sdbusplus::exception_t& e)
Matt Spinler50bf8162021-02-01 16:24:01 -0600262 {
Matt Spinler4b515922021-10-11 14:55:50 -0500263 // Sensor daemons that get their direction from entity manager
264 // may only be putting either the high alarm or low alarm on
265 // D-Bus, not both.
Matt Spinler50bf8162021-02-01 16:24:01 -0600266 continue;
267 }
268 }
269}
270
Patrick Williamsdfddd642024-08-16 15:21:51 -0400271void ThresholdAlarmLogger::createEventLog(
272 const std::string& sensorPath, const std::string& interface,
273 const std::string& alarmProperty, bool alarmValue)
Matt Spinler50bf8162021-02-01 16:24:01 -0600274{
Matt Spinler2f182672021-02-01 16:51:38 -0600275 std::map<std::string, std::string> ad;
276
277 auto type = getSensorType(sensorPath);
278 if (skipSensorType(type))
279 {
280 return;
281 }
282
283 auto it = thresholdData.find(interface);
284 if (it == thresholdData.end())
285 {
286 return;
287 }
288
289 auto properties = it->second.find(alarmProperty);
290 if (properties == it->second.end())
291 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000292 lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map",
293 "ALARM_PROPERTY", alarmProperty);
Matt Spinler2f182672021-02-01 16:51:38 -0600294 return;
295 }
296
297 ad.emplace("SENSOR_NAME", sensorPath);
Matt Spinler3efec612021-05-11 15:26:17 -0500298 ad.emplace("_PID", std::to_string(getpid()));
Matt Spinler2f182672021-02-01 16:51:38 -0600299
300 try
301 {
302 auto sensorValue = SDBusPlus::getProperty<double>(
303 bus, sensorPath, valueInterface, "Value");
304
305 ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
306
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000307 lg2::info(
308 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})",
309 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
310 "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue);
Matt Spinler2f182672021-02-01 16:51:38 -0600311 }
312 catch (const DBusServiceError& e)
313 {
314 // If the sensor was just added, the Value interface for it may
315 // not be in the mapper yet. This could only happen if the sensor
316 // application was started up after this one and the value exceeded the
317 // threshold immediately.
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000318 lg2::info(
319 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
320 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
321 "ALARM_VALUE", alarmValue);
Matt Spinler2f182672021-02-01 16:51:38 -0600322 }
323
324 auto callout = getCallout(sensorPath);
325 if (!callout.empty())
326 {
327 ad.emplace("CALLOUT_INVENTORY_PATH", callout);
328 }
329
330 auto errorData = properties->second.find(alarmValue);
331
332 // Add the base error name and the sensor type (like Temperature) to the
333 // error name that's in the thresholdData name to get something like
334 // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800335 const auto& [name, status, severity] = errorData->second;
336
337 try
338 {
Patrick Williamsdfddd642024-08-16 15:21:51 -0400339 auto thresholdValue =
340 SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800341
342 ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
343
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000344 lg2::info(
345 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})",
346 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
347 "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue);
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800348 }
349 catch (const DBusServiceError& e)
350 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000351 lg2::info(
352 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
353 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
354 "ALARM_VALUE", alarmValue);
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800355 }
356
Matt Spinler2f182672021-02-01 16:51:38 -0600357 type.front() = toupper(type.front());
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800358 std::string errorName = errorNameBase + type + name + status;
Patrick Rudolph235adf92025-02-17 08:11:30 +0100359 if (LOG_SENSOR_NAME_ON_ERROR != 0)
360 {
361 errorName += " on sensor " + getSensorName(sensorPath);
362 }
Matt Spinler2f182672021-02-01 16:51:38 -0600363
364 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
365 "Create", errorName, convertForMessage(severity), ad);
366}
367
Patrick Rudolph235adf92025-02-17 08:11:30 +0100368std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath)
369{
370 auto pos = sensorPath.find_last_of('/');
371 if ((sensorPath.back() == '/') || (pos == std::string::npos))
372 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000373 lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}",
374 "SENSOR_PATH", sensorPath);
Patrick Rudolph235adf92025-02-17 08:11:30 +0100375 return "unknown_sensor";
376 }
377
378 return sensorPath.substr(pos + 1);
379}
380
Matt Spinler2f182672021-02-01 16:51:38 -0600381std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
382{
383 auto pos = sensorPath.find_last_of('/');
384 if ((sensorPath.back() == '/') || (pos == std::string::npos))
385 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000386 lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}",
387 "SENSOR_PATH", sensorPath);
Matt Spinler2f182672021-02-01 16:51:38 -0600388 throw std::runtime_error("Invalid sensor path");
389 }
390
391 sensorPath = sensorPath.substr(0, pos);
392 return sensorPath.substr(sensorPath.find_last_of('/') + 1);
393}
394
395bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
396{
397 return (type == "utilization");
398}
399
400std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
401{
402 const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
403
404 // Different implementations handle the association to the FRU
405 // differently:
406 // * phosphor-inventory-manager uses the 'inventory' association
407 // to point to the FRU.
408 // * dbus-sensors/entity-manager uses the 'chassis' association'.
409 // * For virtual sensors, no association.
410
411 for (const auto& assocType : assocTypes)
412 {
413 auto assocPath = sensorPath + "/" + assocType;
414
415 try
416 {
417 auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
418 bus, assocPath, assocInterface, "endpoints");
419
420 if (!endpoints.empty())
421 {
422 return endpoints[0];
423 }
424 }
425 catch (const DBusServiceError& e)
426 {
427 // The association doesn't exist
428 continue;
429 }
430 }
431
432 return std::string{};
Matt Spinler50bf8162021-02-01 16:24:01 -0600433}
434
Matt Spinler66e75a72021-05-14 10:32:47 -0500435void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
436{
437 if (powerStateOn)
438 {
439 checkThresholds();
440 }
441}
442
443void ThresholdAlarmLogger::checkThresholds()
444{
Matt Spinlereee25802022-11-03 15:20:13 -0400445 std::vector<InterfaceKey> toErase;
446
Matt Spinler66e75a72021-05-14 10:32:47 -0500447 for (const auto& [interfaceKey, alarmMap] : alarms)
448 {
449 for (const auto& [propertyName, alarmValue] : alarmMap)
450 {
451 if (alarmValue)
452 {
453 const auto& sensorPath = std::get<0>(interfaceKey);
454 const auto& interface = std::get<1>(interfaceKey);
Matt Spinlereee25802022-11-03 15:20:13 -0400455 std::string service;
Matt Spinler66e75a72021-05-14 10:32:47 -0500456
Matt Spinlereee25802022-11-03 15:20:13 -0400457 try
458 {
459 // Check that the service that provides the alarm is still
460 // running, because if it died when the alarm was active
461 // there would be no indication of it unless we listened
462 // for NameOwnerChanged and tracked services, and this is
463 // easier.
464 service = SDBusPlus::getService(bus, sensorPath, interface);
465 }
466 catch (const DBusServiceError& e)
467 {
468 // No longer on D-Bus delete the alarm entry
469 toErase.emplace_back(sensorPath, interface);
470 }
471
472 if (!service.empty())
473 {
474 createEventLog(sensorPath, interface, propertyName,
475 alarmValue);
476 }
Matt Spinler66e75a72021-05-14 10:32:47 -0500477 }
478 }
479 }
Matt Spinlereee25802022-11-03 15:20:13 -0400480
481 for (const auto& e : toErase)
482 {
483 alarms.erase(e);
484 }
Matt Spinler66e75a72021-05-14 10:32:47 -0500485}
486
Matt Spinler403d1f52021-02-01 15:35:25 -0600487} // namespace sensor::monitor