monitor: Event logs for nonfunc fan sensors
This commit adds the code to create event logs calling out the fan when
fan sensors have been nonfunctional for a certain amount of time.
This functionality is configured in the JSON, and will only be enabled
if the 'fault_handling' JSON section is present. It uses the following
new JSON parameters:
nonfunc_rotor_error_delay (per fan):
This says how many seconds a fan sensor must be nonfunctional before the
event log will be created.
num_nonfunc_rotors_before_error (under fault_handling):
This specifies how many nonfunctional fan rotors there must be at the
same time before an event log with an error severity is created for the
rotor. When there are fewer than this many nonfunctional rotors, then
event logs with an informational severity will be created.
A new FanError class is used to create the event logs. It adds the
Logger output as FFDC, plus any JSON data that is passed in with the
commit() API. It uses CALLOUT_INVENTORY_PATH in the AdditionalData
property to specify the faulted fan FRU.
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: I365114357580b4f38ec943a769c1ce7f695b51ab
diff --git a/monitor/system.cpp b/monitor/system.cpp
index f9a8804..c8bacca 100644
--- a/monitor/system.cpp
+++ b/monitor/system.cpp
@@ -24,6 +24,8 @@
#include "json_parser.hpp"
#endif
+#include "fan_error.hpp"
+
#include <nlohmann/json.hpp>
#include <phosphor-logging/log.hpp>
#include <sdbusplus/bus.hpp>
@@ -34,6 +36,8 @@
{
using json = nlohmann::json;
+using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
+
using namespace phosphor::logging;
System::System(Mode mode, sdbusplus::bus::bus& bus,
@@ -187,6 +191,8 @@
std::make_shared<PowerInterface>();
_powerOffRules = getPowerOffRules(jsonObj, powerInterface);
+
+ _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
#endif
}
@@ -211,4 +217,71 @@
}
}
+void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
+{
+ std::string fanPath{util::INVENTORY_PATH + fan.getName()};
+
+ getLogger().log(
+ fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
+ sensor.name()),
+ Logger::error);
+
+ // In order to know if the event log should have a severity of error or
+ // informational, count the number of existing nonfunctional sensors and
+ // compare it to _numNonfuncSensorsBeforeError.
+ size_t nonfuncSensors = 0;
+ for (const auto& fan : _fans)
+ {
+ for (const auto& s : fan->sensors())
+ {
+ // Don't count nonfunctional sensors that still have their
+ // error timer running as nonfunctional since they haven't
+ // had event logs created for those errors yet.
+ if (!s->functional() && !s->errorTimerRunning())
+ {
+ nonfuncSensors++;
+ }
+ }
+ }
+
+ Severity severity = Severity::Error;
+ if (nonfuncSensors < _numNonfuncSensorsBeforeError)
+ {
+ severity = Severity::Informational;
+ }
+
+ auto error =
+ std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
+ fanPath, sensor.name(), severity);
+
+ auto sensorData = captureSensorData();
+ error->commit(sensorData);
+
+ // TODO: save error so it can be committed again on a power off
+}
+
+json System::captureSensorData()
+{
+ json data;
+
+ for (const auto& fan : _fans)
+ {
+ for (const auto& sensor : fan->sensors())
+ {
+ json values;
+ values["present"] = fan->present();
+ values["functional"] = sensor->functional();
+ values["tach"] = sensor->getInput();
+ if (sensor->hasTarget())
+ {
+ values["target"] = sensor->getTarget();
+ }
+
+ data["sensors"][sensor->name()] = values;
+ }
+ }
+
+ return data;
+}
+
} // namespace phosphor::fan::monitor