platform-mc: Add structured logging for threshold events
Create structured threshold log events up creation
and alert triggers. These will also clear previously
raised logs.
Tested:
Bump the fan speed to low and let the CPU heat up.
Ensure we emit logs on Upper Critical and Upper Hard Shutdown.
```
root@sled325945102-oob:~# busctl introspect -l xyz.openbmc_project.Logging /xyz/openbmc_project/logging/entry/19 xyz.openbmc_project.Logging.Entry | grep "AdditionalData\|Message\|Resolved"
.AdditionalData property a{ss} 8 "READING_VALUE" "90.125" "SENSOR_NAME" "/xyz/openbmc_project/sensors/temperature/Sentinel_Dome_Slot_1_MB_CPU_TEMP_C" "THRESHOLD_VALUE" "90.0" "UNITS" "xyz.openbmc_project.Sensor.Value.Unit.DegreesC" <snip>
.Message property s "xyz.openbmc_project.Sensor.Threshold.ReadingAboveUpperCriticalThreshold" emits-change writable
.Resolved property b false emits-change writable
root@sled325945102-oob:~# busctl introspect -l xyz.openbmc_project.Logging /xyz/openbmc_project/logging/entry/21 xyz.openbmc_project.Logging.Entry | grep "AdditionalData\|Message\|Resolved"
.AdditionalData property a{ss} 8 "READING_VALUE" "95.125" "SENSOR_NAME" "/xyz/openbmc_project/sensors/temperature/Sentinel_Dome_Slot_1_MB_CPU_TEMP_C" "THRESHOLD_VALUE" "95.0" "UNITS" "xyz.openbmc_project.Sensor.Value.Unit.DegreesC" <snip>
.Message property s "xyz.openbmc_project.Sensor.Threshold.ReadingAboveUpperHardShutdownThreshold" emits-change writable
.Resolved property b false emits-change writable
```
Bump the fan speed back to high and let the CPU cool down.
Ensure we emit the SensorReadingNormalRange and that the
existing logs are marked as resolved.
```
root@sled325945102-oob:~# busctl introspect -l xyz.openbmc_project.Logging /xyz/openbmc_project/logging/entry/22 xyz.openbmc_project.Logging.Entry | grep "AdditionalData\|Message\|Resolved"
.AdditionalData property a{ss} 7 "READING_VALUE" "85.75" "SENSOR_NAME" "/xyz/openbmc_project/sensors/temperature/Sentinel_Dome_Slot_1_MB_CPU_TEMP_C" "UNITS" "xyz.openbmc_project.Sensor.Value.Unit.DegreesC" <snip>
.Message property s "xyz.openbmc_project.Sensor.Threshold.SensorReadingNormalRange" change writable
.Resolved property b false change writable
root@sled325945102-oob:~# busctl introspect -l xyz.openbmc_project.Logging /xyz/openbmc_project/logging/entry/19 xyz.openbmc_project.Logging.Entry | grep "AdditionalData\|Message\|Resolved"
.AdditionalData property a{ss} 8 "READING_VALUE" "90.125" "SENSOR_NAME" "/xyz/openbmc_project/sensors/temperature/Sentinel_Dome_Slot_1_MB_CPU_TEMP_C" "THRESHOLD_VALUE" "90.0" "UNITS" "xyz.openbmc_project.Sensor.Value.Unit.DegreesC" <snip>
.Message property s "xyz.openbmc_project.Sensor.Threshold.ReadingAboveUpperCriticalThreshold" emits-change writable
.Resolved property b true emits-change writable
root@sled325945102-oob:~# busctl introspect -l xyz.openbmc_project.Logging /xyz/openbmc_project/logging/entry/21 xyz.openbmc_project.Logging.Entry | grep "AdditionalData\|Message\|Resolved"
.AdditionalData property a{ss} 8 "READING_VALUE" "95.125" "SENSOR_NAME" "/xyz/openbmc_project/sensors/temperature/Sentinel_Dome_Slot_1_MB_CPU_TEMP_C" "THRESHOLD_VALUE" "95.0" "UNITS" "xyz.openbmc_project.Sensor.Value.Unit.DegreesC" <snip>
.Message property s "xyz.openbmc_project.Sensor.Threshold.ReadingAboveUpperHardShutdownThreshold" emits-change writable
.Resolved property b true emits-change writable
```
Change-Id: I72623f7f929a4d7b17f87d102d34747ff6cab3ae
Signed-off-by: Amithash Prasad <amithash@meta.com>
diff --git a/platform-mc/numeric_sensor.cpp b/platform-mc/numeric_sensor.cpp
index 7536fe4..070dbbb 100644
--- a/platform-mc/numeric_sensor.cpp
+++ b/platform-mc/numeric_sensor.cpp
@@ -5,6 +5,11 @@
#include <libpldm/platform.h>
+#include <phosphor-logging/commit.hpp>
+#include <sdbusplus/asio/property.hpp>
+#include <xyz/openbmc_project/Logging/Entry/client.hpp>
+#include <xyz/openbmc_project/Sensor/Threshold/event.hpp>
+
#include <limits>
#include <regex>
@@ -14,6 +19,9 @@
{
namespace platform_mc
{
+
+// This allows code to cleanly iterate through all supported
+// threshold levels and directions.
static const std::array<pldm::utils::Level, 3> allThresholdLevels = {
pldm::utils::Level::WARNING, pldm::utils::Level::CRITICAL,
pldm::utils::Level::HARDSHUTDOWN};
@@ -785,6 +793,20 @@
}
return alarm;
}
+
+bool NumericSensor::hasThresholdAlarm()
+{
+ bool alarm = false;
+ for (auto level : allThresholdLevels)
+ {
+ for (auto direction : allThresholdDirections)
+ {
+ alarm |= getThresholdAlarm(level, direction);
+ }
+ }
+ return alarm;
+}
+
void NumericSensor::setWarningThresholdAlarm(pldm::utils::Direction direction,
double value, bool newAlarm)
{
@@ -902,9 +924,136 @@
default:
return PLDM_ERROR;
}
+ if (newAlarm)
+ {
+ createThresholdLog(level, direction, value);
+ }
+ else
+ {
+ auto& log = assertedLog[{level, direction}];
+ if (log.has_value())
+ {
+ clearThresholdLog(log);
+ }
+ // If all alarms have cleared. Log normal range.
+ if (!hasThresholdAlarm())
+ {
+ createNormalRangeLog(value);
+ }
+ }
return PLDM_SUCCESS;
}
+void NumericSensor::clearThresholdLog(
+ std::optional<sdbusplus::message::object_path>& log)
+{
+ if (!log)
+ {
+ return;
+ }
+ try
+ {
+ /* empty log entries are returned by commit() if the
+ requested log is being filtered out */
+ if (!log->str.empty())
+ {
+ lg2::resolve(*log);
+ }
+ }
+ catch (std::exception& ec)
+ {
+ lg2::error("Error trying to resolve: {LOG} : {ERROR}", "LOG", log->str,
+ "ERROR", ec);
+ }
+ log.reset();
+}
+
+/** @brief helper function template to create a threshold log
+ *
+ * @tparam[in] errorObj - The error object of the log we want to create.
+ * @param[in] sensorObjPath - The object path of the sensor.
+ * @param[in] value - The current value of the sensor.
+ * @param[in] sensorUnit - The units of the sensor.
+ * @param[in] threshold - The threshold value.
+ *
+ * @return optional object holding the object path of the created
+ * log entry. If the log entry is being filtered, we would return
+ * a optional holding an empty string in the object path. This ensures
+ * we follow our state machine properly even if the log is being filtered.
+ */
+template <typename errorObj>
+auto logThresholdHelper(const std::string& sensorObjPath, double value,
+ SensorUnit sensorUnit, double threshold)
+ -> std::optional<sdbusplus::message::object_path>
+{
+ return lg2::commit(
+ errorObj("SENSOR_NAME", sensorObjPath, "READING_VALUE", value, "UNITS",
+ sensorUnit, "THRESHOLD_VALUE", threshold));
+}
+
+void NumericSensor::createThresholdLog(
+ pldm::utils::Level level, pldm::utils::Direction direction, double value)
+{
+ namespace Errors =
+ sdbusplus::error::xyz::openbmc_project::sensor::Threshold;
+ /* Map from threshold level+direction to a an instantiation of
+ * logThresholdHelper with the required error object class */
+ static const std::map<
+ std::tuple<pldm::utils::Level, pldm::utils::Direction>,
+ std::function<std::optional<sdbusplus::message::object_path>(
+ const std::string&, double, SensorUnit, double)>>
+ thresholdEventMap = {
+ {{pldm::utils::Level::WARNING, pldm::utils::Direction::HIGH},
+ &logThresholdHelper<Errors::ReadingAboveUpperWarningThreshold>},
+ {{pldm::utils::Level::WARNING, pldm::utils::Direction::LOW},
+ &logThresholdHelper<Errors::ReadingBelowLowerWarningThreshold>},
+ {{pldm::utils::Level::CRITICAL, pldm::utils::Direction::HIGH},
+ &logThresholdHelper<Errors::ReadingAboveUpperCriticalThreshold>},
+ {{pldm::utils::Level::CRITICAL, pldm::utils::Direction::LOW},
+ &logThresholdHelper<Errors::ReadingBelowLowerCriticalThreshold>},
+ {{pldm::utils::Level::HARDSHUTDOWN, pldm::utils::Direction::HIGH},
+ &logThresholdHelper<
+ Errors::ReadingAboveUpperHardShutdownThreshold>},
+ {{pldm::utils::Level::HARDSHUTDOWN, pldm::utils::Direction::LOW},
+ &logThresholdHelper<
+ Errors::ReadingBelowLowerHardShutdownThreshold>},
+ };
+
+ std::string sensorObjPath = sensorNameSpace + sensorName;
+ double threshold = getThreshold(level, direction);
+ try
+ {
+ auto helper = thresholdEventMap.at({level, direction});
+ assertedLog[{level, direction}] =
+ helper(sensorObjPath, value, sensorUnit, threshold);
+ }
+ catch (std::exception& ec)
+ {
+ lg2::error(
+ "Unable to create threshold log entry for {OBJPATH}: {ERROR}",
+ "OBJPATH", sensorObjPath, "ERROR", ec);
+ }
+}
+
+void NumericSensor::createNormalRangeLog(double value)
+{
+ namespace Events =
+ sdbusplus::event::xyz::openbmc_project::sensor::Threshold;
+ std::string objPath = sensorNameSpace + sensorName;
+ try
+ {
+ lg2::commit(Events::SensorReadingNormalRange(
+ "SENSOR_NAME", objPath, "READING_VALUE", value, "UNITS",
+ sensorUnit));
+ }
+ catch (std::exception& ec)
+ {
+ lg2::error(
+ "Unable to create SensorReadingNormalRange log entry for {OBJPATH}: {ERROR}",
+ "OBJPATH", objPath, "ERROR", ec);
+ }
+}
+
void NumericSensor::updateThresholds()
{
double value = std::numeric_limits<double>::quiet_NaN();
diff --git a/platform-mc/numeric_sensor.hpp b/platform-mc/numeric_sensor.hpp
index b09bcd4..3d115b7 100644
--- a/platform-mc/numeric_sensor.hpp
+++ b/platform-mc/numeric_sensor.hpp
@@ -294,6 +294,12 @@
return false;
}
+ /* @brief Returns true if at least one threshold alarm is set
+ *
+ * @return true if at least one threshold alarm is set
+ */
+ bool hasThresholdAlarm();
+
/* @brief raises the alarm on the warning threshold
*
* @param[in] direction - The threshold direction (HIGH/LOW)
@@ -381,6 +387,31 @@
private:
/**
+ * @brief resolve and clear a log entry
+ *
+ * @param[inout] log - dbus path to log entry. The log will be resolve
+ * and the optional reset.
+ */
+ void clearThresholdLog(std::optional<sdbusplus::message::object_path>& log);
+
+ /** @brief create a log entry that all sensor alarms have cleared and is now
+ * operating in the normal operating range.
+ *
+ * @param[in] value - The current sensor value in normal range.
+ */
+ void createNormalRangeLog(double value);
+
+ /**
+ * @brief Create a threshold log for the given level/direction tuple.
+ *
+ * @param[in] level - The level of the threshold.
+ * @param[in] direction - The direction of the threshold.
+ * @param[in] value - The current sensor value.
+ */
+ void createThresholdLog(pldm::utils::Level level,
+ pldm::utils::Direction direction, double value);
+
+ /**
* @brief Check sensor reading if any threshold has been crossed and update
* Threshold interfaces accordingly
*/
@@ -431,6 +462,12 @@
/** @brief A power-of-10 multiplier for baseUnit */
int8_t baseUnitModifier;
bool useMetricInterface = false;
+
+ /** @brief An internal mapping of thresholds and its associated log
+ * entry. */
+ std::map<std::tuple<pldm::utils::Level, pldm::utils::Direction>,
+ std::optional<sdbusplus::message::object_path>>
+ assertedLog;
};
} // namespace platform_mc
} // namespace pldm