| From 9d3c86863cd49f18603f6334b3e09e8963c73ba2 Mon Sep 17 00:00:00 2001 |
| From: Brandon Kim <brandonkim@google.com> |
| Date: Fri, 9 Aug 2019 15:38:53 -0700 |
| Subject: [PATCH] sensor: Implement sensor "ASYNC_READ_TIMEOUT" |
| |
| This change will prevent sensors from blocking all other sensor reads |
| and D-Bus if they do not report failures quickly enough. |
| |
| If "ASYNC_READ_TIMEOUT" environment variable is defined in the |
| sensor's config file for a key_type, the sensor read will be |
| asynchronous with timeout set in milliseconds. |
| |
| For example for "sensor1": |
| ASYNC_READ_TIMEOUT_sensor1="1000" // Timeout will be set to 1 sec |
| |
| If the read times out, the sensor read will be skipped and the |
| sensor's functional property will be set to 'false'. Timed out futures |
| will be placed in a map to prevent their destructor from running and |
| blocking until the read completes (limitation of std::async). |
| |
| Tested: This patch has been running downstream for over a year to |
| solve a slow I2C sensor reads causing IPMI slowdown. |
| |
| Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48 |
| Signed-off-by: Brandon Kim <brandonkim@google.com> |
| --- |
| mainloop.cpp | 27 ++++++++++++--- |
| mainloop.hpp | 3 ++ |
| meson.build | 1 + |
| sensor.cpp | 92 +++++++++++++++++++++++++++++++++++++++++++++++++--- |
| sensor.hpp | 46 +++++++++++++++++++++++++- |
| 5 files changed, 159 insertions(+), 10 deletions(-) |
| |
| diff --git a/mainloop.cpp b/mainloop.cpp |
| index 40c429a..e138db7 100644 |
| --- a/mainloop.cpp |
| +++ b/mainloop.cpp |
| @@ -34,6 +34,7 @@ |
| #include <cassert> |
| #include <cstdlib> |
| #include <functional> |
| +#include <future> |
| #include <iostream> |
| #include <memory> |
| #include <phosphor-logging/elog-errors.hpp> |
| @@ -242,7 +243,7 @@ std::optional<ObjectStateData> |
| { |
| // Add status interface based on _fault file being present |
| sensorObj->addStatus(info); |
| - valueInterface = sensorObj->addValue(retryIO, info); |
| + valueInterface = sensorObj->addValue(retryIO, info, _timedoutMap); |
| } |
| catch (const std::system_error& e) |
| { |
| @@ -478,10 +479,28 @@ void MainLoop::read() |
| // RAII object for GPIO unlock / lock |
| auto locker = sensor::gpioUnlock(sensor->getGpio()); |
| |
| - // Retry for up to a second if device is busy |
| - // or has a transient error. |
| - value = _ioAccess->read(sensorSysfsType, sensorSysfsNum, input, |
| + // For sensors with attribute ASYNC_READ_TIMEOUT, |
| + // spawn a thread with timeout |
| + auto asyncReadTimeout = |
| + env::getEnv("ASYNC_READ_TIMEOUT", sensorSetKey); |
| + if (!asyncReadTimeout.empty()) |
| + { |
| + std::chrono::milliseconds asyncTimeout{ |
| + std::stoi(asyncReadTimeout)}; |
| + value = sensor::asyncRead( |
| + sensorSetKey, _ioAccess, asyncTimeout, _timedoutMap, |
| + sensorSysfsType, sensorSysfsNum, input, |
| + hwmonio::retries, hwmonio::delay); |
| + } |
| + else |
| + { |
| + // Retry for up to a second if device is busy |
| + // or has a transient error. |
| + value = |
| + _ioAccess->read(sensorSysfsType, sensorSysfsNum, input, |
| hwmonio::retries, hwmonio::delay); |
| + } |
| + |
| // Set functional property to true if we could read sensor |
| statusIface->functional(true); |
| |
| diff --git a/mainloop.hpp b/mainloop.hpp |
| index b3de022..047d614 100644 |
| --- a/mainloop.hpp |
| +++ b/mainloop.hpp |
| @@ -9,6 +9,7 @@ |
| #include "types.hpp" |
| |
| #include <any> |
| +#include <future> |
| #include <memory> |
| #include <optional> |
| #include <sdbusplus/server.hpp> |
| @@ -116,6 +117,8 @@ class MainLoop |
| /** @brief Store the specifications of sensor objects */ |
| std::map<SensorSet::key_type, std::unique_ptr<sensor::Sensor>> |
| _sensorObjects; |
| + /** @brief Store the async futures of timed out sensor objects */ |
| + sensor::TimedoutMap _timedoutMap; |
| |
| /** |
| * @brief Map of removed sensors |
| diff --git a/meson.build b/meson.build |
| index 77f7761..e10a3b5 100644 |
| --- a/meson.build |
| +++ b/meson.build |
| @@ -45,6 +45,7 @@ hwmon_deps = [ |
| dependency('sdbusplus'), |
| dependency('sdeventplus'), |
| dependency('stdplus'), |
| + dependency('threads'), |
| sysfs_dep, |
| ] |
| |
| diff --git a/sensor.cpp b/sensor.cpp |
| index 43af2f6..a2e33cb 100644 |
| --- a/sensor.cpp |
| +++ b/sensor.cpp |
| @@ -15,6 +15,7 @@ |
| #include <cmath> |
| #include <cstring> |
| #include <filesystem> |
| +#include <future> |
| #include <phosphor-logging/elog-errors.hpp> |
| #include <thread> |
| #include <xyz/openbmc_project/Common/error.hpp> |
| @@ -117,7 +118,8 @@ SensorValueType Sensor::adjustValue(SensorValueType value) |
| } |
| |
| std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO, |
| - ObjectInfo& info) |
| + ObjectInfo& info, |
| + TimedoutMap& timedoutMap) |
| { |
| static constexpr bool deferSignals = true; |
| |
| @@ -144,12 +146,27 @@ std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO, |
| // RAII object for GPIO unlock / lock |
| auto locker = gpioUnlock(getGpio()); |
| |
| - // Retry for up to a second if device is busy |
| - // or has a transient error. |
| - val = |
| - _ioAccess->read(_sensor.first, _sensor.second, |
| + // For sensors with attribute ASYNC_READ_TIMEOUT, |
| + // spawn a thread with timeout |
| + auto asyncReadTimeout = env::getEnv("ASYNC_READ_TIMEOUT", _sensor); |
| + if (!asyncReadTimeout.empty()) |
| + { |
| + std::chrono::milliseconds asyncTimeout{ |
| + std::stoi(asyncReadTimeout)}; |
| + val = asyncRead(_sensor, _ioAccess, asyncTimeout, timedoutMap, |
| + _sensor.first, _sensor.second, |
| hwmon::entry::cinput, std::get<size_t>(retryIO), |
| std::get<std::chrono::milliseconds>(retryIO)); |
| + } |
| + else |
| + { |
| + // Retry for up to a second if device is busy |
| + // or has a transient error. |
| + val = _ioAccess->read( |
| + _sensor.first, _sensor.second, hwmon::entry::cinput, |
| + std::get<size_t>(retryIO), |
| + std::get<std::chrono::milliseconds>(retryIO)); |
| + } |
| } |
| #if UPDATE_FUNCTIONAL_ON_FAIL |
| catch (const std::system_error& e) |
| @@ -266,4 +283,69 @@ std::optional<GpioLocker> gpioUnlock(const gpioplus::HandleInterface* handle) |
| return GpioLocker(std::move(handle)); |
| } |
| |
| +SensorValueType asyncRead(const SensorSet::key_type& sensorSetKey, |
| + const hwmonio::HwmonIOInterface* ioAccess, |
| + const std::chrono::milliseconds asyncTimeout, |
| + TimedoutMap& timedoutMap, const std::string& type, |
| + const std::string& id, const std::string& sensor, |
| + const size_t retries, |
| + const std::chrono::milliseconds delay) |
| +{ |
| + // Default async read timeout |
| + auto asyncReadTimeout = asyncTimeout; |
| + bool valueIsValid = false; |
| + std::future<int64_t> asyncThread; |
| + |
| + auto asyncIter = timedoutMap.find(sensorSetKey); |
| + if (asyncIter == timedoutMap.end()) |
| + { |
| + // If sensor not found in timedoutMap, spawn an async thread |
| + asyncThread = |
| + std::async(std::launch::async, &hwmonio::HwmonIOInterface::read, |
| + ioAccess, type, id, sensor, retries, delay); |
| + valueIsValid = true; |
| + } |
| + else |
| + { |
| + // If we already have the async thread in the timedoutMap, it means this |
| + // sensor has already timed out in the previous reads. No need to wait |
| + // on subsequent reads - proceed to check the future_status to see when |
| + // the async thread finishes |
| + asyncReadTimeout = std::chrono::seconds(0); |
| + asyncThread = std::move(asyncIter->second); |
| + } |
| + |
| + // TODO: This is still not a true asynchronous read as it still blocks the |
| + // main thread for asyncReadTimeout amount of time. To make this completely |
| + // asynchronous, schedule a read and register a callback to update the |
| + // sensor value |
| + std::future_status status = asyncThread.wait_for(asyncReadTimeout); |
| + switch (status) |
| + { |
| + case std::future_status::ready: |
| + // Read has finished |
| + if (valueIsValid) |
| + { |
| + return asyncThread.get(); |
| + // Good sensor reads should skip the code below |
| + } |
| + // Async read thread has completed but had previously timed out (was |
| + // found in the timedoutMap). Erase from timedoutMap and throw to |
| + // allow retry in the next read cycle. Not returning the read value |
| + // as the sensor reading may be bad / corrupted if it took so long. |
| + timedoutMap.erase(sensorSetKey); |
| + throw AsyncSensorReadTimeOut(); |
| + default: |
| + // Read timed out so add the thread to the timedoutMap (if the entry |
| + // already exists, operator[] updates it). |
| + // |
| + // Keeping the timed out futures in a map is required to prevent |
| + // their destructor from being called when returning from this |
| + // stack. The destructor will otherwise block until the read |
| + // completes due to the limitation of std::async. |
| + timedoutMap[sensorSetKey] = std::move(asyncThread); |
| + throw AsyncSensorReadTimeOut(); |
| + } |
| +} |
| + |
| } // namespace sensor |
| diff --git a/sensor.hpp b/sensor.hpp |
| index 4b2d281..830d403 100644 |
| --- a/sensor.hpp |
| +++ b/sensor.hpp |
| @@ -4,7 +4,10 @@ |
| #include "sensorset.hpp" |
| #include "types.hpp" |
| |
| +#include <cerrno> |
| +#include <future> |
| #include <gpioplus/handle.hpp> |
| +#include <map> |
| #include <memory> |
| #include <optional> |
| #include <stdplus/handle/managed.hpp> |
| @@ -13,6 +16,8 @@ |
| namespace sensor |
| { |
| |
| +using TimedoutMap = std::map<SensorSet::key_type, std::future<int64_t>>; |
| + |
| struct valueAdjust |
| { |
| double gain = 1.0; |
| @@ -20,6 +25,17 @@ struct valueAdjust |
| std::unordered_set<int> rmRCs; |
| }; |
| |
| +/** @brief Custom exception for async sensor reading timeout |
| + */ |
| +struct AsyncSensorReadTimeOut : public std::system_error |
| +{ |
| + AsyncSensorReadTimeOut() : |
| + system_error(std::error_code(ETIMEDOUT, std::system_category()), |
| + "Async sensor read timed out") |
| + { |
| + } |
| +}; |
| + |
| /** @class Sensor |
| * @brief Sensor object based on a SensorSet container's key type |
| * @details Sensor object to create and modify an associated device's sensor |
| @@ -87,10 +103,13 @@ class Sensor |
| * (number of and delay between) |
| * @param[in] info - Sensor object information |
| * |
| + * @param[in] timedoutMap - Map to track timed out threads |
| + * |
| * @return - Shared pointer to the value object |
| */ |
| std::shared_ptr<ValueObject> addValue(const RetryIO& retryIO, |
| - ObjectInfo& info); |
| + ObjectInfo& info, |
| + TimedoutMap& timedoutMap); |
| |
| /** |
| * @brief Add status interface and functional property for sensor |
| @@ -177,4 +196,29 @@ using GpioLocker = |
| */ |
| std::optional<GpioLocker> gpioUnlock(const gpioplus::HandleInterface* handle); |
| |
| +/** |
| + * @brief Asynchronously read a sensor with timeout defined by |
| + * ASYNC_READ_TIMEOUT environment variable |
| + * |
| + * @param[in] sensorSetKey - Sensor object's identifiers |
| + * @param[in] ioAccess - Hwmon sysfs access |
| + * @param[in] asyncTimeout - Async read timeout in milliseconds |
| + * @param[in] timedoutMap - Map to track timed out threads |
| + * |
| + * (Params needed for HwmonIO::read) |
| + * @param[in] type - The hwmon type (ex. temp). |
| + * @param[in] id - The hwmon id (ex. 1). |
| + * @param[in] sensor - The hwmon sensor (ex. input). |
| + * @param[in] retries - The number of times to retry. |
| + * @param[in] delay - The time to sleep between retry attempts. |
| + * |
| + * @return - SensorValueType read asynchronously, will throw if timed out |
| + */ |
| +SensorValueType asyncRead(const SensorSet::key_type& sensorSetKey, |
| + const hwmonio::HwmonIOInterface* ioAccess, |
| + const std::chrono::milliseconds asyncTimeout, |
| + TimedoutMap& timedoutMap, const std::string& type, |
| + const std::string& id, const std::string& sensor, |
| + const size_t retries, |
| + const std::chrono::milliseconds delay); |
| } // namespace sensor |
| -- |
| 2.21.0 |
| |