sensor: Add UPDATE_FUNCTIONAL_ON_FAIL and its behavior
Add the build switch UPDATE_FUNCTIONAL_ON_FAIL. When enabled, sensor
read failures will not exit the mainloop. Instead, mainloop will update
the Functional property and skip the read of that sensor.
This will skip the "Remove RCs" check during value interface creation in
MainLoop::getObject. However, it will perform the "Remove RCs" checks
during MainLoop::read.
Tested: I was able to use busctl to read the Functional property of a
custom driver to test with UPDATE_FUNCTIONAL_ON_FAIL defined.
1. Negative values were reported, Functional was true
2. Sensor reporting errors had stale values, Functional was set to false
Resolves: openbmc/phosphor-hwmon#10
Signed-off-by: Brandon Kim <brandonkim@google.com>
Change-Id: I0984dad12250e9587ec36de2f9212de0b0e1cda6
diff --git a/configure.ac b/configure.ac
index c189114..289f34a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -161,8 +161,13 @@
)
# When a sensor read fails, set the Value on dbus with -errno.
+# It will also skip the "Remove RCs" check.
+# Incompatible with update-functional-on-fail.
AC_ARG_ENABLE([negative-errno-on-fail],
- AS_HELP_STRING([--enable-negative-errno-on-fail], [Set sensor value to -errno on read failures])
+ AS_HELP_STRING(
+ [--enable-negative-errno-on-fail],
+ [Set sensor value to -errno on read failures. Incompatible with update-functional-on-fail]
+ )
)
AC_ARG_VAR(NEGATIVE_ERRNO_ON_FAIL, [Set sensor value to -errno on read failures])
@@ -171,10 +176,40 @@
[NEGATIVE_ERRNO_ON_FAIL="yes"]
AC_DEFINE_UNQUOTED(
[NEGATIVE_ERRNO_ON_FAIL],
- ["$NEGATIVE_ERRNO_ON_FAIL"], [Set sensor value to -errno on read failures]
+ ["$NEGATIVE_ERRNO_ON_FAIL"],
+ [Set sensor value to -errno on read failures]
)
)
+# When a sensor read fails, update the OperationalState interface's Functional property.
+# This will mark the sensor as not functional and skip reading from that sensor.
+# It will skip the "Remove RCs" check during value interface creation in MainLoop::getObject.
+# However, it will perform the "Remove RCs" checks during MainLoop::read.
+# Incompatible with negative-errno-on-fail.
+AC_ARG_ENABLE([update-functional-on-fail],
+ AS_HELP_STRING(
+ [--enable-update-functional-on-fail],
+ [Update functional property on read failures. Incompatible with negative-errno-on-fail]
+ )
+)
+
+AC_ARG_VAR(UPDATE_FUNCTIONAL_ON_FAIL, [Update functional property on read failures])
+AS_IF(
+ [test "x$enable_update_functional_on_fail" == "xyes"],
+ [UPDATE_FUNCTIONAL_ON_FAIL="yes"]
+ AC_DEFINE_UNQUOTED(
+ [UPDATE_FUNCTIONAL_ON_FAIL],
+ ["$UPDATE_FUNCTIONAL_ON_FAIL"],
+ [Update functional property on sensor read failures]
+ )
+)
+
+AS_IF([test "x$enable_negative_errno_on_fail" == "xyes"], [
+ AS_IF([test "x$enable_update_functional_on_fail" == "xyes"], [
+ AC_MSG_ERROR([Invalid configuration enabling both negative-errno-on-fail and update-functional-on-fail.])
+ ])
+])
+
AC_ARG_VAR(BUSNAME_PREFIX, [The DBus busname prefix.])
AC_ARG_VAR(SENSOR_ROOT, [The DBus sensors namespace root.])
AS_IF(
diff --git a/mainloop.cpp b/mainloop.cpp
index 5fa66cd..d961a10 100644
--- a/mainloop.cpp
+++ b/mainloop.cpp
@@ -393,21 +393,19 @@
input = "";
}
+ int64_t value;
+ auto& objInfo = std::get<ObjectInfo>(i.second);
+ auto& obj = std::get<InterfaceMap>(objInfo);
+ std::unique_ptr<sensor::Sensor>& sensor = _sensorObjects[i.first];
+
+ auto& statusIface = std::any_cast<std::shared_ptr<StatusObject>&>(
+ obj[InterfaceType::STATUS]);
+ // As long as addStatus is called before addValue, statusIface
+ // should never be nullptr.
+ assert(statusIface);
+
try
{
- int64_t value;
- auto& objInfo = std::get<ObjectInfo>(i.second);
- auto& obj = std::get<InterfaceMap>(objInfo);
- std::unique_ptr<sensor::Sensor>& sensor =
- _sensorObjects[i.first];
-
- auto& statusIface =
- std::any_cast<std::shared_ptr<StatusObject>&>(
- obj[InterfaceType::STATUS]);
- // As long as addStatus is called before addValue, statusIface
- // should never be nullptr.
- assert(statusIface);
-
if (sensor->hasFaultFile())
{
auto fault = _ioAccess->read(
@@ -430,6 +428,8 @@
value =
_ioAccess->read(i.first.first, i.first.second, input,
hwmonio::retries, hwmonio::delay);
+ // Set functional property to true if we could read sensor
+ statusIface->functional(true);
value = sensor->adjustValue(value);
}
@@ -438,6 +438,13 @@
}
catch (const std::system_error& e)
{
+#ifdef UPDATE_FUNCTIONAL_ON_FAIL
+ // If UPDATE_FUNCTIONAL_ON_FAIL is defined and an exception was
+ // thrown, set the functional property to false.
+ // We cannot set this with the 'continue' in the lower block
+ // as the code may exit before reaching it.
+ statusIface->functional(false);
+#endif
auto file = sysfs::make_sysfs_path(
_ioAccess->path(), i.first.first, i.first.second,
hwmon::entry::cinput);
@@ -459,7 +466,10 @@
}
continue;
}
-
+#ifdef UPDATE_FUNCTIONAL_ON_FAIL
+ // Do not exit with failure if UPDATE_FUNCTIONAL_ON_FAIL is set
+ continue;
+#endif
using namespace sdbusplus::xyz::openbmc_project::Sensor::
Device::Error;
report<ReadFailure>(
diff --git a/sensor.cpp b/sensor.cpp
index a88410b..6e5e841 100644
--- a/sensor.cpp
+++ b/sensor.cpp
@@ -146,16 +146,32 @@
// Only read the input value if the status is functional
if (statusIface->functional())
{
- // RAII object for GPIO unlock / lock
- GpioLock gpioLock(getGpio());
+#ifdef UPDATE_FUNCTIONAL_ON_FAIL
+ try
+#endif
+ {
+ // RAII object for GPIO unlock / lock
+ GpioLock gpioLock(getGpio());
- // Retry for up to a second if device is busy
- // or has a transient error.
- val = _ioAccess->read(_sensor.first, _sensor.second,
- hwmon::entry::cinput, std::get<size_t>(retryIO),
- std::get<std::chrono::milliseconds>(retryIO));
+ // Retry for up to a second if device is busy
+ // or has a transient error.
+ val =
+ _ioAccess->read(_sensor.first, _sensor.second,
+ hwmon::entry::cinput, std::get<size_t>(retryIO),
+ std::get<std::chrono::milliseconds>(retryIO));
- val = adjustValue(val);
+ val = adjustValue(val);
+ }
+#ifdef UPDATE_FUNCTIONAL_ON_FAIL
+ catch (const std::system_error& e)
+ {
+ // Catch the exception here and update the functional property.
+ // By catching the exception, it will not propagate it up the stack
+ // and thus the code will skip the "Remove RCs" check in
+ // MainLoop::getObject and will not exit on failure.
+ statusIface->functional(false);
+ }
+#endif
}
auto iface =