blob: 646b1fe34414f23d6af877ead688019f9b5a4644 [file] [log] [blame]
Matt Spinler403d1f52021-02-01 15:35:25 -06001/**
2 * Copyright © 2021 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Jerry C Chen35fb3a02024-08-30 14:54:30 +080016#include "config.h"
17
Matt Spinler403d1f52021-02-01 15:35:25 -060018#include "threshold_alarm_logger.hpp"
19
Matt Spinler50bf8162021-02-01 16:24:01 -060020#include "sdbusplus.hpp"
21
Matt Spinler3efec612021-05-11 15:26:17 -050022#include <unistd.h>
Matt Spinler50bf8162021-02-01 16:24:01 -060023
Anwaar Hadi32c4fef2025-04-02 16:08:27 +000024#include <phosphor-logging/lg2.hpp>
Matt Spinler50bf8162021-02-01 16:24:01 -060025#include <xyz/openbmc_project/Logging/Entry/server.hpp>
26
Matt Spinler403d1f52021-02-01 15:35:25 -060027namespace sensor::monitor
28{
29
Matt Spinler50bf8162021-02-01 16:24:01 -060030using namespace sdbusplus::xyz::openbmc_project::Logging::server;
Matt Spinler66e75a72021-05-14 10:32:47 -050031using namespace phosphor::fan;
Matt Spinler50bf8162021-02-01 16:24:01 -060032using namespace phosphor::fan::util;
33
Matt Spinler403d1f52021-02-01 15:35:25 -060034const std::string warningInterface =
35 "xyz.openbmc_project.Sensor.Threshold.Warning";
36const std::string criticalInterface =
37 "xyz.openbmc_project.Sensor.Threshold.Critical";
38const std::string perfLossInterface =
39 "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
Matt Spinler2f182672021-02-01 16:51:38 -060040constexpr auto loggingService = "xyz.openbmc_project.Logging";
41constexpr auto loggingPath = "/xyz/openbmc_project/logging";
42constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
43constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
44constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
45constexpr auto assocInterface = "xyz.openbmc_project.Association";
Matt Spinler403d1f52021-02-01 15:35:25 -060046
Matt Spinler8ce65072022-11-03 15:15:55 -040047const std::vector<std::string> thresholdIfaceNames{
48 warningInterface, criticalInterface, perfLossInterface};
49
Delphine CC Chiu99914e52024-05-21 17:38:58 +080050using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
Matt Spinler50bf8162021-02-01 16:24:01 -060051
52/**
53 * Map of threshold interfaces and alarm properties and values to error data.
54 */
55const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
56 thresholdData{
57
58 {warningInterface,
59 {{"WarningAlarmHigh",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080060 {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
Matt Spinler50bf8162021-02-01 16:24:01 -060061 {false,
Delphine CC Chiu99914e52024-05-21 17:38:58 +080062 ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060063 {"WarningAlarmLow",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080064 {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
Matt Spinler50bf8162021-02-01 16:24:01 -060065 {false,
Delphine CC Chiu99914e52024-05-21 17:38:58 +080066 ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060067
68 {criticalInterface,
69 {{"CriticalAlarmHigh",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080070 {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
Matt Spinler50bf8162021-02-01 16:24:01 -060071 {false,
Delphine CC Chiu99914e52024-05-21 17:38:58 +080072 ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060073 {"CriticalAlarmLow",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080074 {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
75 {false, ErrorData{"CriticalLow", "Clear",
76 Entry::Level::Informational}}}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060077
78 {perfLossInterface,
79 {{"PerfLossAlarmHigh",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080080 {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
81 {false, ErrorData{"PerformanceLossHigh", "Clear",
82 Entry::Level::Informational}}}},
Matt Spinler50bf8162021-02-01 16:24:01 -060083 {"PerfLossAlarmLow",
Delphine CC Chiu99914e52024-05-21 17:38:58 +080084 {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
85 {false, ErrorData{"PerformanceLossLow", "Clear",
86 Entry::Level::Informational}}}}}}};
Matt Spinler50bf8162021-02-01 16:24:01 -060087
Matt Spinler7f6946b2021-05-14 12:43:50 -050088ThresholdAlarmLogger::ThresholdAlarmLogger(
Matt Spinlerc14fd612025-08-14 09:32:13 -050089 sdbusplus::bus_t& bus, std::shared_ptr<PowerState> powerState) :
90 bus(bus), _powerState(std::move(powerState)),
Matt Spinler403d1f52021-02-01 15:35:25 -060091 warningMatch(bus,
92 "type='signal',member='PropertiesChanged',"
93 "path_namespace='/xyz/openbmc_project/sensors',"
94 "arg0='" +
95 warningInterface + "'",
96 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
97 std::placeholders::_1)),
98 criticalMatch(bus,
99 "type='signal',member='PropertiesChanged',"
100 "path_namespace='/xyz/openbmc_project/sensors',"
101 "arg0='" +
102 criticalInterface + "'",
103 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
104 std::placeholders::_1)),
105 perfLossMatch(bus,
106 "type='signal',member='PropertiesChanged',"
107 "path_namespace='/xyz/openbmc_project/sensors',"
108 "arg0='" +
109 perfLossInterface + "'",
110 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
Matt Spinlerb7a55402021-10-11 13:45:35 -0500111 std::placeholders::_1)),
112 ifacesRemovedMatch(bus,
113 "type='signal',member='InterfacesRemoved',arg0path="
114 "'/xyz/openbmc_project/sensors/'",
115 std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
Matt Spinler8ce65072022-11-03 15:15:55 -0400116 std::placeholders::_1)),
117 ifacesAddedMatch(bus,
118 "type='signal',member='InterfacesAdded',arg0path="
119 "'/xyz/openbmc_project/sensors/'",
120 std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
121 std::placeholders::_1))
Matt Spinler50bf8162021-02-01 16:24:01 -0600122{
Matt Spinler7f6946b2021-05-14 12:43:50 -0500123 _powerState->addCallback("thresholdMon",
124 std::bind(&ThresholdAlarmLogger::powerStateChanged,
125 this, std::placeholders::_1));
126
Matt Spinler50bf8162021-02-01 16:24:01 -0600127 // check for any currently asserted threshold alarms
Patrick Williamsdfddd642024-08-16 15:21:51 -0400128 std::for_each(
129 thresholdData.begin(), thresholdData.end(),
130 [this](const auto& thresholdInterface) {
131 const auto& interface = thresholdInterface.first;
132 auto objects =
133 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
134 std::for_each(objects.begin(), objects.end(),
135 [interface, this](const auto& object) {
136 const auto& path = object.first;
137 const auto& service =
138 object.second.begin()->first;
Matt Spinlercd4d31b2025-08-15 10:00:44 -0500139 this->checkThresholds(interface, path, service);
Patrick Williamsdfddd642024-08-16 15:21:51 -0400140 });
Matt Spinler50bf8162021-02-01 16:24:01 -0600141 });
142}
Matt Spinler403d1f52021-02-01 15:35:25 -0600143
Patrick Williamscb356d42022-07-22 19:26:53 -0500144void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
Matt Spinler403d1f52021-02-01 15:35:25 -0600145{
Matt Spinlerf5d3be42021-02-01 16:38:01 -0600146 std::map<std::string, std::variant<bool>> properties;
147 std::string sensorPath = msg.get_path();
148 std::string interface;
149
150 msg.read(interface, properties);
151
Matt Spinler8ce65072022-11-03 15:15:55 -0400152 checkProperties(sensorPath, interface, properties);
153}
154
155void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
156{
157 sdbusplus::message::object_path path;
158 std::vector<std::string> interfaces;
159
160 msg.read(path, interfaces);
161
162 for (const auto& interface : interfaces)
163 {
164 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
165 interface) != thresholdIfaceNames.end())
166 {
167 alarms.erase(InterfaceKey{path, interface});
168 }
169 }
170}
171
172void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
173{
174 sdbusplus::message::object_path path;
175 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
176
177 msg.read(path, interfaces);
178
179 for (const auto& [interface, properties] : interfaces)
180 {
181 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
182 interface) != thresholdIfaceNames.end())
183 {
184 checkProperties(path, interface, properties);
185 }
186 }
187}
188
189void ThresholdAlarmLogger::checkProperties(
190 const std::string& sensorPath, const std::string& interface,
191 const std::map<std::string, std::variant<bool>>& properties)
192{
Matt Spinlerf5d3be42021-02-01 16:38:01 -0600193 auto alarmProperties = thresholdData.find(interface);
194 if (alarmProperties == thresholdData.end())
195 {
196 return;
197 }
198
199 for (const auto& [propertyName, propertyValue] : properties)
200 {
201 if (alarmProperties->second.find(propertyName) !=
202 alarmProperties->second.end())
203 {
204 // If this is the first time we've seen this alarm, then
205 // assume it was off before so it doesn't create an event
206 // log for a value of false.
207
208 InterfaceKey key{sensorPath, interface};
209 if (alarms.find(key) == alarms.end())
210 {
211 alarms[key][propertyName] = false;
212 }
213
214 // Check if the value changed from what was there before.
215 auto alarmValue = std::get<bool>(propertyValue);
216 if (alarmValue != alarms[key][propertyName])
217 {
218 alarms[key][propertyName] = alarmValue;
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800219#ifndef SKIP_POWER_CHECKING
Matt Spinler66e75a72021-05-14 10:32:47 -0500220 if (_powerState->isPowerOn())
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800221#endif
Matt Spinler66e75a72021-05-14 10:32:47 -0500222 {
223 createEventLog(sensorPath, interface, propertyName,
224 alarmValue);
225 }
Matt Spinlerf5d3be42021-02-01 16:38:01 -0600226 }
227 }
228 }
Matt Spinler403d1f52021-02-01 15:35:25 -0600229}
230
Matt Spinler50bf8162021-02-01 16:24:01 -0600231void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
232 const std::string& sensorPath,
233 const std::string& service)
234{
235 auto properties = thresholdData.find(interface);
236 if (properties == thresholdData.end())
237 {
238 return;
239 }
240
241 for (const auto& [property, unused] : properties->second)
242 {
243 try
244 {
245 auto alarmValue = SDBusPlus::getProperty<bool>(
246 bus, service, sensorPath, interface, property);
247 alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
248
249 // This is just for checking alarms on startup,
250 // so only look for active alarms.
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800251#ifdef SKIP_POWER_CHECKING
252 if (alarmValue)
253#else
Matt Spinler66e75a72021-05-14 10:32:47 -0500254 if (alarmValue && _powerState->isPowerOn())
Jerry C Chen35fb3a02024-08-30 14:54:30 +0800255#endif
Matt Spinler50bf8162021-02-01 16:24:01 -0600256 {
257 createEventLog(sensorPath, interface, property, alarmValue);
258 }
259 }
Patrick Williamscb356d42022-07-22 19:26:53 -0500260 catch (const sdbusplus::exception_t& e)
Matt Spinler50bf8162021-02-01 16:24:01 -0600261 {
Matt Spinler4b515922021-10-11 14:55:50 -0500262 // Sensor daemons that get their direction from entity manager
263 // may only be putting either the high alarm or low alarm on
264 // D-Bus, not both.
Matt Spinler50bf8162021-02-01 16:24:01 -0600265 continue;
266 }
267 }
268}
269
Patrick Williamsdfddd642024-08-16 15:21:51 -0400270void ThresholdAlarmLogger::createEventLog(
271 const std::string& sensorPath, const std::string& interface,
272 const std::string& alarmProperty, bool alarmValue)
Matt Spinler50bf8162021-02-01 16:24:01 -0600273{
Matt Spinler2f182672021-02-01 16:51:38 -0600274 std::map<std::string, std::string> ad;
275
276 auto type = getSensorType(sensorPath);
277 if (skipSensorType(type))
278 {
279 return;
280 }
281
282 auto it = thresholdData.find(interface);
283 if (it == thresholdData.end())
284 {
285 return;
286 }
287
288 auto properties = it->second.find(alarmProperty);
289 if (properties == it->second.end())
290 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000291 lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map",
292 "ALARM_PROPERTY", alarmProperty);
Matt Spinler2f182672021-02-01 16:51:38 -0600293 return;
294 }
295
296 ad.emplace("SENSOR_NAME", sensorPath);
Matt Spinler3efec612021-05-11 15:26:17 -0500297 ad.emplace("_PID", std::to_string(getpid()));
Matt Spinler2f182672021-02-01 16:51:38 -0600298
299 try
300 {
301 auto sensorValue = SDBusPlus::getProperty<double>(
302 bus, sensorPath, valueInterface, "Value");
303
304 ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
305
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000306 lg2::info(
307 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})",
308 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
309 "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue);
Matt Spinler2f182672021-02-01 16:51:38 -0600310 }
311 catch (const DBusServiceError& e)
312 {
313 // If the sensor was just added, the Value interface for it may
314 // not be in the mapper yet. This could only happen if the sensor
315 // application was started up after this one and the value exceeded the
316 // threshold immediately.
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000317 lg2::info(
318 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
319 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
320 "ALARM_VALUE", alarmValue);
Matt Spinler2f182672021-02-01 16:51:38 -0600321 }
322
323 auto callout = getCallout(sensorPath);
324 if (!callout.empty())
325 {
326 ad.emplace("CALLOUT_INVENTORY_PATH", callout);
327 }
328
329 auto errorData = properties->second.find(alarmValue);
330
331 // Add the base error name and the sensor type (like Temperature) to the
332 // error name that's in the thresholdData name to get something like
333 // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800334 const auto& [name, status, severity] = errorData->second;
335
336 try
337 {
Patrick Williamsdfddd642024-08-16 15:21:51 -0400338 auto thresholdValue =
339 SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800340
341 ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
342
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000343 lg2::info(
344 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})",
345 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
346 "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue);
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800347 }
348 catch (const DBusServiceError& e)
349 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000350 lg2::info(
351 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
352 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
353 "ALARM_VALUE", alarmValue);
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800354 }
355
Matt Spinler2f182672021-02-01 16:51:38 -0600356 type.front() = toupper(type.front());
Delphine CC Chiu99914e52024-05-21 17:38:58 +0800357 std::string errorName = errorNameBase + type + name + status;
Patrick Rudolph235adf92025-02-17 08:11:30 +0100358 if (LOG_SENSOR_NAME_ON_ERROR != 0)
359 {
360 errorName += " on sensor " + getSensorName(sensorPath);
361 }
Matt Spinler2f182672021-02-01 16:51:38 -0600362
363 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
364 "Create", errorName, convertForMessage(severity), ad);
365}
366
Patrick Rudolph235adf92025-02-17 08:11:30 +0100367std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath)
368{
369 auto pos = sensorPath.find_last_of('/');
370 if ((sensorPath.back() == '/') || (pos == std::string::npos))
371 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000372 lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}",
373 "SENSOR_PATH", sensorPath);
Patrick Rudolph235adf92025-02-17 08:11:30 +0100374 return "unknown_sensor";
375 }
376
377 return sensorPath.substr(pos + 1);
378}
379
Matt Spinler2f182672021-02-01 16:51:38 -0600380std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
381{
382 auto pos = sensorPath.find_last_of('/');
383 if ((sensorPath.back() == '/') || (pos == std::string::npos))
384 {
Anwaar Hadi32c4fef2025-04-02 16:08:27 +0000385 lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}",
386 "SENSOR_PATH", sensorPath);
Matt Spinler2f182672021-02-01 16:51:38 -0600387 throw std::runtime_error("Invalid sensor path");
388 }
389
390 sensorPath = sensorPath.substr(0, pos);
391 return sensorPath.substr(sensorPath.find_last_of('/') + 1);
392}
393
394bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
395{
396 return (type == "utilization");
397}
398
399std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
400{
401 const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
402
403 // Different implementations handle the association to the FRU
404 // differently:
405 // * phosphor-inventory-manager uses the 'inventory' association
406 // to point to the FRU.
407 // * dbus-sensors/entity-manager uses the 'chassis' association'.
408 // * For virtual sensors, no association.
409
410 for (const auto& assocType : assocTypes)
411 {
412 auto assocPath = sensorPath + "/" + assocType;
413
414 try
415 {
416 auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
417 bus, assocPath, assocInterface, "endpoints");
418
419 if (!endpoints.empty())
420 {
421 return endpoints[0];
422 }
423 }
424 catch (const DBusServiceError& e)
425 {
426 // The association doesn't exist
427 continue;
428 }
429 }
430
431 return std::string{};
Matt Spinler50bf8162021-02-01 16:24:01 -0600432}
433
Matt Spinler66e75a72021-05-14 10:32:47 -0500434void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
435{
436 if (powerStateOn)
437 {
438 checkThresholds();
439 }
440}
441
442void ThresholdAlarmLogger::checkThresholds()
443{
Matt Spinlereee25802022-11-03 15:20:13 -0400444 std::vector<InterfaceKey> toErase;
445
Matt Spinler66e75a72021-05-14 10:32:47 -0500446 for (const auto& [interfaceKey, alarmMap] : alarms)
447 {
448 for (const auto& [propertyName, alarmValue] : alarmMap)
449 {
450 if (alarmValue)
451 {
452 const auto& sensorPath = std::get<0>(interfaceKey);
453 const auto& interface = std::get<1>(interfaceKey);
Matt Spinlereee25802022-11-03 15:20:13 -0400454 std::string service;
Matt Spinler66e75a72021-05-14 10:32:47 -0500455
Matt Spinlereee25802022-11-03 15:20:13 -0400456 try
457 {
458 // Check that the service that provides the alarm is still
459 // running, because if it died when the alarm was active
460 // there would be no indication of it unless we listened
461 // for NameOwnerChanged and tracked services, and this is
462 // easier.
463 service = SDBusPlus::getService(bus, sensorPath, interface);
464 }
465 catch (const DBusServiceError& e)
466 {
467 // No longer on D-Bus delete the alarm entry
468 toErase.emplace_back(sensorPath, interface);
469 }
470
471 if (!service.empty())
472 {
473 createEventLog(sensorPath, interface, propertyName,
474 alarmValue);
475 }
Matt Spinler66e75a72021-05-14 10:32:47 -0500476 }
477 }
478 }
Matt Spinlereee25802022-11-03 15:20:13 -0400479
480 for (const auto& e : toErase)
481 {
482 alarms.erase(e);
483 }
Matt Spinler66e75a72021-05-14 10:32:47 -0500484}
485
Matt Spinler403d1f52021-02-01 15:35:25 -0600486} // namespace sensor::monitor