sensor: Implement sensor "ASYNC_READ_TIMEOUT"
This change will prevent sensors from blocking all other sensor reads
and D-Bus if they do not report failures quickly enough.
If "ASYNC_READ_TIMEOUT" environment variable is defined in the
sensor's config file for a key_type, the sensor read will be
asynchronous with timeout set in milliseconds.
For example for "sensor1":
ASYNC_READ_TIMEOUT_sensor1="1000" // Timeout will be set to 1 sec
If the read times out, the sensor read will be skipped and the
sensor's functional property will be set to 'false'. Timed out futures
will be placed in a map to prevent their destructor from running and
blocking until the read completes (limitation of std::async).
Tested: This patch has been running downstream for over a year to
solve a slow I2C sensor reads causing IPMI slowdown.
Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48
Signed-off-by: Brandon Kim <brandonkim@google.com>
diff --git a/sensor.cpp b/sensor.cpp
index 43af2f6..ddca883 100644
--- a/sensor.cpp
+++ b/sensor.cpp
@@ -15,6 +15,7 @@
#include <cmath>
#include <cstring>
#include <filesystem>
+#include <future>
#include <phosphor-logging/elog-errors.hpp>
#include <thread>
#include <xyz/openbmc_project/Common/error.hpp>
@@ -117,7 +118,8 @@
}
std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
- ObjectInfo& info)
+ ObjectInfo& info,
+ TimedoutMap& timedoutMap)
{
static constexpr bool deferSignals = true;
@@ -144,12 +146,27 @@
// RAII object for GPIO unlock / lock
auto locker = gpioUnlock(getGpio());
- // Retry for up to a second if device is busy
- // or has a transient error.
- val =
- _ioAccess->read(_sensor.first, _sensor.second,
+ // For sensors with attribute ASYNC_READ_TIMEOUT,
+ // spawn a thread with timeout
+ auto asyncReadTimeout = env::getEnv("ASYNC_READ_TIMEOUT", _sensor);
+ if (!asyncReadTimeout.empty())
+ {
+ std::chrono::milliseconds asyncTimeout{
+ std::stoi(asyncReadTimeout)};
+ val = asyncRead(_sensor, _ioAccess, asyncTimeout, timedoutMap,
+ _sensor.first, _sensor.second,
hwmon::entry::cinput, std::get<size_t>(retryIO),
std::get<std::chrono::milliseconds>(retryIO));
+ }
+ else
+ {
+ // Retry for up to a second if device is busy
+ // or has a transient error.
+ val = _ioAccess->read(
+ _sensor.first, _sensor.second, hwmon::entry::cinput,
+ std::get<size_t>(retryIO),
+ std::get<std::chrono::milliseconds>(retryIO));
+ }
}
#if UPDATE_FUNCTIONAL_ON_FAIL
catch (const std::system_error& e)
@@ -266,4 +283,68 @@
return GpioLocker(std::move(handle));
}
+SensorValueType asyncRead(const SensorSet::key_type& sensorSetKey,
+ const hwmonio::HwmonIOInterface* ioAccess,
+ std::chrono::milliseconds asyncTimeout,
+ TimedoutMap& timedoutMap, const std::string& type,
+ const std::string& id, const std::string& sensor,
+ const size_t retries,
+ const std::chrono::milliseconds delay)
+{
+ // Default async read timeout
+ bool valueIsValid = false;
+ std::future<int64_t> asyncThread;
+
+ auto asyncIter = timedoutMap.find(sensorSetKey);
+ if (asyncIter == timedoutMap.end())
+ {
+ // If sensor not found in timedoutMap, spawn an async thread
+ asyncThread =
+ std::async(std::launch::async, &hwmonio::HwmonIOInterface::read,
+ ioAccess, type, id, sensor, retries, delay);
+ valueIsValid = true;
+ }
+ else
+ {
+ // If we already have the async thread in the timedoutMap, it means this
+ // sensor has already timed out in the previous reads. No need to wait
+ // on subsequent reads - proceed to check the future_status to see when
+ // the async thread finishes
+ asyncTimeout = std::chrono::seconds(0);
+ asyncThread = std::move(asyncIter->second);
+ }
+
+ // TODO: This is still not a true asynchronous read as it still blocks the
+ // main thread for asyncTimeout amount of time. To make this completely
+ // asynchronous, schedule a read and register a callback to update the
+ // sensor value
+ std::future_status status = asyncThread.wait_for(asyncTimeout);
+ switch (status)
+ {
+ case std::future_status::ready:
+ // Read has finished
+ if (valueIsValid)
+ {
+ return asyncThread.get();
+ // Good sensor reads should skip the code below
+ }
+ // Async read thread has completed but had previously timed out (was
+ // found in the timedoutMap). Erase from timedoutMap and throw to
+ // allow retry in the next read cycle. Not returning the read value
+ // as the sensor reading may be bad / corrupted if it took so long.
+ timedoutMap.erase(sensorSetKey);
+ throw AsyncSensorReadTimeOut();
+ default:
+ // Read timed out so add the thread to the timedoutMap (if the entry
+ // already exists, operator[] updates it).
+ //
+ // Keeping the timed out futures in a map is required to prevent
+ // their destructor from being called when returning from this
+ // stack. The destructor will otherwise block until the read
+ // completes due to the limitation of std::async.
+ timedoutMap[sensorSetKey] = std::move(asyncThread);
+ throw AsyncSensorReadTimeOut();
+ }
+}
+
} // namespace sensor