sensor: Implement sensor "ASYNC_READ_TIMEOUT"
This change will prevent sensors from blocking all other sensor reads
and D-Bus if they do not report failures quickly enough.
If "ASYNC_READ_TIMEOUT" environment variable is defined in the
sensor's config file for a key_type, the sensor read will be
asynchronous with timeout set in milliseconds.
For example for "sensor1":
ASYNC_READ_TIMEOUT_sensor1="1000" // Timeout will be set to 1 sec
If the read times out, the sensor read will be skipped and the
sensor's functional property will be set to 'false'. Timed out futures
will be placed in a map to prevent their destructor from running and
blocking until the read completes (limitation of std::async).
Tested: This patch has been running downstream for over a year to
solve a slow I2C sensor reads causing IPMI slowdown.
Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48
Signed-off-by: Brandon Kim <brandonkim@google.com>
diff --git a/mainloop.cpp b/mainloop.cpp
index 75c8c12..0b6d070 100644
--- a/mainloop.cpp
+++ b/mainloop.cpp
@@ -34,6 +34,7 @@
#include <cassert>
#include <cstdlib>
#include <functional>
+#include <future>
#include <iostream>
#include <memory>
#include <phosphor-logging/elog-errors.hpp>
@@ -242,7 +243,7 @@
{
// Add status interface based on _fault file being present
sensorObj->addStatus(info);
- valueInterface = sensorObj->addValue(retryIO, info);
+ valueInterface = sensorObj->addValue(retryIO, info, _timedoutMap);
}
catch (const std::system_error& e)
{
@@ -484,10 +485,28 @@
// RAII object for GPIO unlock / lock
auto locker = sensor::gpioUnlock(sensor->getGpio());
- // Retry for up to a second if device is busy
- // or has a transient error.
- value = _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
+ // For sensors with attribute ASYNC_READ_TIMEOUT,
+ // spawn a thread with timeout
+ auto asyncReadTimeout =
+ env::getEnv("ASYNC_READ_TIMEOUT", sensorSetKey);
+ if (!asyncReadTimeout.empty())
+ {
+ std::chrono::milliseconds asyncTimeout{
+ std::stoi(asyncReadTimeout)};
+ value = sensor::asyncRead(
+ sensorSetKey, _ioAccess, asyncTimeout, _timedoutMap,
+ sensorSysfsType, sensorSysfsNum, input,
+ hwmonio::retries, hwmonio::delay);
+ }
+ else
+ {
+ // Retry for up to a second if device is busy
+ // or has a transient error.
+ value =
+ _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
hwmonio::retries, hwmonio::delay);
+ }
+
// Set functional property to true if we could read sensor
statusIface->functional(true);
diff --git a/mainloop.hpp b/mainloop.hpp
index af92479..530d4f3 100644
--- a/mainloop.hpp
+++ b/mainloop.hpp
@@ -9,6 +9,7 @@
#include "types.hpp"
#include <any>
+#include <future>
#include <memory>
#include <optional>
#include <sdbusplus/server.hpp>
@@ -120,6 +121,8 @@
/** @brief Store the specifications of sensor objects */
std::map<SensorSet::key_type, std::unique_ptr<sensor::Sensor>>
_sensorObjects;
+ /** @brief Store the async futures of timed out sensor objects */
+ sensor::TimedoutMap _timedoutMap;
/**
* @brief Map of removed sensors
diff --git a/meson.build b/meson.build
index 764cdd6..d080492 100644
--- a/meson.build
+++ b/meson.build
@@ -45,6 +45,7 @@
dependency('sdbusplus'),
dependency('sdeventplus'),
dependency('stdplus'),
+ dependency('threads'),
sysfs_dep,
]
diff --git a/sensor.cpp b/sensor.cpp
index 43af2f6..ddca883 100644
--- a/sensor.cpp
+++ b/sensor.cpp
@@ -15,6 +15,7 @@
#include <cmath>
#include <cstring>
#include <filesystem>
+#include <future>
#include <phosphor-logging/elog-errors.hpp>
#include <thread>
#include <xyz/openbmc_project/Common/error.hpp>
@@ -117,7 +118,8 @@
}
std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
- ObjectInfo& info)
+ ObjectInfo& info,
+ TimedoutMap& timedoutMap)
{
static constexpr bool deferSignals = true;
@@ -144,12 +146,27 @@
// RAII object for GPIO unlock / lock
auto locker = gpioUnlock(getGpio());
- // Retry for up to a second if device is busy
- // or has a transient error.
- val =
- _ioAccess->read(_sensor.first, _sensor.second,
+ // For sensors with attribute ASYNC_READ_TIMEOUT,
+ // spawn a thread with timeout
+ auto asyncReadTimeout = env::getEnv("ASYNC_READ_TIMEOUT", _sensor);
+ if (!asyncReadTimeout.empty())
+ {
+ std::chrono::milliseconds asyncTimeout{
+ std::stoi(asyncReadTimeout)};
+ val = asyncRead(_sensor, _ioAccess, asyncTimeout, timedoutMap,
+ _sensor.first, _sensor.second,
hwmon::entry::cinput, std::get<size_t>(retryIO),
std::get<std::chrono::milliseconds>(retryIO));
+ }
+ else
+ {
+ // Retry for up to a second if device is busy
+ // or has a transient error.
+ val = _ioAccess->read(
+ _sensor.first, _sensor.second, hwmon::entry::cinput,
+ std::get<size_t>(retryIO),
+ std::get<std::chrono::milliseconds>(retryIO));
+ }
}
#if UPDATE_FUNCTIONAL_ON_FAIL
catch (const std::system_error& e)
@@ -266,4 +283,68 @@
return GpioLocker(std::move(handle));
}
+SensorValueType asyncRead(const SensorSet::key_type& sensorSetKey,
+ const hwmonio::HwmonIOInterface* ioAccess,
+ std::chrono::milliseconds asyncTimeout,
+ TimedoutMap& timedoutMap, const std::string& type,
+ const std::string& id, const std::string& sensor,
+ const size_t retries,
+ const std::chrono::milliseconds delay)
+{
+ // Default async read timeout
+ bool valueIsValid = false;
+ std::future<int64_t> asyncThread;
+
+ auto asyncIter = timedoutMap.find(sensorSetKey);
+ if (asyncIter == timedoutMap.end())
+ {
+ // If sensor not found in timedoutMap, spawn an async thread
+ asyncThread =
+ std::async(std::launch::async, &hwmonio::HwmonIOInterface::read,
+ ioAccess, type, id, sensor, retries, delay);
+ valueIsValid = true;
+ }
+ else
+ {
+ // If we already have the async thread in the timedoutMap, it means this
+ // sensor has already timed out in the previous reads. No need to wait
+ // on subsequent reads - proceed to check the future_status to see when
+ // the async thread finishes
+ asyncTimeout = std::chrono::seconds(0);
+ asyncThread = std::move(asyncIter->second);
+ }
+
+ // TODO: This is still not a true asynchronous read as it still blocks the
+ // main thread for asyncTimeout amount of time. To make this completely
+ // asynchronous, schedule a read and register a callback to update the
+ // sensor value
+ std::future_status status = asyncThread.wait_for(asyncTimeout);
+ switch (status)
+ {
+ case std::future_status::ready:
+ // Read has finished
+ if (valueIsValid)
+ {
+ return asyncThread.get();
+ // Good sensor reads should skip the code below
+ }
+ // Async read thread has completed but had previously timed out (was
+ // found in the timedoutMap). Erase from timedoutMap and throw to
+ // allow retry in the next read cycle. Not returning the read value
+ // as the sensor reading may be bad / corrupted if it took so long.
+ timedoutMap.erase(sensorSetKey);
+ throw AsyncSensorReadTimeOut();
+ default:
+ // Read timed out so add the thread to the timedoutMap (if the entry
+ // already exists, operator[] updates it).
+ //
+ // Keeping the timed out futures in a map is required to prevent
+ // their destructor from being called when returning from this
+ // stack. The destructor will otherwise block until the read
+ // completes due to the limitation of std::async.
+ timedoutMap[sensorSetKey] = std::move(asyncThread);
+ throw AsyncSensorReadTimeOut();
+ }
+}
+
} // namespace sensor
diff --git a/sensor.hpp b/sensor.hpp
index 4b2d281..a1e0524 100644
--- a/sensor.hpp
+++ b/sensor.hpp
@@ -4,7 +4,10 @@
#include "sensorset.hpp"
#include "types.hpp"
+#include <cerrno>
+#include <future>
#include <gpioplus/handle.hpp>
+#include <map>
#include <memory>
#include <optional>
#include <stdplus/handle/managed.hpp>
@@ -13,6 +16,8 @@
namespace sensor
{
+using TimedoutMap = std::map<SensorSet::key_type, std::future<int64_t>>;
+
struct valueAdjust
{
double gain = 1.0;
@@ -20,6 +25,17 @@
std::unordered_set<int> rmRCs;
};
+/** @brief Custom exception for async sensor reading timeout
+ */
+struct AsyncSensorReadTimeOut : public std::system_error
+{
+ AsyncSensorReadTimeOut() :
+ system_error(std::error_code(ETIMEDOUT, std::system_category()),
+ "Async sensor read timed out")
+ {
+ }
+};
+
/** @class Sensor
* @brief Sensor object based on a SensorSet container's key type
* @details Sensor object to create and modify an associated device's sensor
@@ -87,10 +103,13 @@
* (number of and delay between)
* @param[in] info - Sensor object information
*
+ * @param[in] timedoutMap - Map to track timed out threads
+ *
* @return - Shared pointer to the value object
*/
std::shared_ptr<ValueObject> addValue(const RetryIO& retryIO,
- ObjectInfo& info);
+ ObjectInfo& info,
+ TimedoutMap& timedoutMap);
/**
* @brief Add status interface and functional property for sensor
@@ -177,4 +196,29 @@
*/
std::optional<GpioLocker> gpioUnlock(const gpioplus::HandleInterface* handle);
+/**
+ * @brief Asynchronously read a sensor with timeout defined by
+ * ASYNC_READ_TIMEOUT environment variable
+ *
+ * @param[in] sensorSetKey - Sensor object's identifiers
+ * @param[in] ioAccess - Hwmon sysfs access
+ * @param[in] asyncTimeout - Async read timeout in milliseconds
+ * @param[in] timedoutMap - Map to track timed out threads
+ *
+ * (Params needed for HwmonIO::read)
+ * @param[in] type - The hwmon type (ex. temp).
+ * @param[in] id - The hwmon id (ex. 1).
+ * @param[in] sensor - The hwmon sensor (ex. input).
+ * @param[in] retries - The number of times to retry.
+ * @param[in] delay - The time to sleep between retry attempts.
+ *
+ * @return - SensorValueType read asynchronously, will throw if timed out
+ */
+SensorValueType asyncRead(const SensorSet::key_type& sensorSetKey,
+ const hwmonio::HwmonIOInterface* ioAccess,
+ std::chrono::milliseconds asyncTimeout,
+ TimedoutMap& timedoutMap, const std::string& type,
+ const std::string& id, const std::string& sensor,
+ const size_t retries,
+ const std::chrono::milliseconds delay);
} // namespace sensor