George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 1 | From c1b3d32430df12ab01f9cacf132aedff8324d55b Mon Sep 17 00:00:00 2001 |
| 2 | From: Brandon Kim <brandonkim@google.com> |
| 3 | Date: Fri, 9 Aug 2019 15:38:53 -0700 |
| 4 | Subject: [PATCH] sensor: Implement sensor "ASYNC_READ_TIMEOUT" |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 5 | |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 6 | This change will prevent sensors from blocking all other sensor reads |
| 7 | and D-Bus if they do not report failures quickly enough. |
| 8 | |
| 9 | If "ASYNC_READ_TIMEOUT" environment variable is defined in the |
| 10 | sensor's config file for a key_type, the sensor read will be |
| 11 | asynchronous with timeout set in milliseconds. |
| 12 | |
| 13 | For example for "sensor1": |
| 14 | ASYNC_READ_TIMEOUT_sensor1="1000" // Timeout will be set to 1 sec |
| 15 | |
| 16 | If the read times out, the sensor read will be skipped and the |
| 17 | sensor's functional property will be set to 'false'. Timed out futures |
| 18 | will be placed in a map to prevent their destructor from running and |
| 19 | blocking until the read completes (limiation of std::async). |
| 20 | |
| 21 | Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48 |
| 22 | Signed-off-by: Brandon Kim <brandonkim@google.com> |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 23 | --- |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 24 | Makefile.am | 2 ++ |
| 25 | mainloop.cpp | 73 +++++++++++++++++++++++++++++++++++++++++++++++--- |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 26 | mainloop.hpp | 3 +++ |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 27 | meson.build | 1 + |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 28 | sensor.cpp | 75 ++++++++++++++++++++++++++++++++++++++++++++++------ |
| 29 | sensor.hpp | 20 ++++++++++++-- |
| 30 | 6 files changed, 160 insertions(+), 14 deletions(-) |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 31 | |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 32 | diff --git a/Makefile.am b/Makefile.am |
| 33 | index 706a6cc..c620fa4 100644 |
| 34 | --- a/Makefile.am |
| 35 | +++ b/Makefile.am |
| 36 | @@ -46,6 +46,7 @@ libhwmon_la_LIBADD = \ |
| 37 | $(SDEVENTPLUS_LIBS) \ |
| 38 | $(PHOSPHOR_DBUS_INTERFACES_LIBS) \ |
| 39 | $(PHOSPHOR_LOGGING_LIBS) \ |
| 40 | + $(PTHREAD_LIBS) \ |
| 41 | $(GPIOPLUS_LIBS) \ |
| 42 | $(STDPLUS_LIBS) \ |
| 43 | $(CODE_COVERAGE_LIBS) \ |
| 44 | @@ -55,6 +56,7 @@ libhwmon_la_CXXFLAGS = \ |
| 45 | $(SDEVENTPLUS_CFLAGS) \ |
| 46 | $(PHOSPHOR_DBUS_INTERFACES_CFLAGS) \ |
| 47 | $(PHOSPHOR_LOGGING_CFLAGS) \ |
| 48 | + $(PTHREAD_CFLAGS) \ |
| 49 | $(STDPLUS_CFLAGS) \ |
| 50 | $(CODE_COVERAGE_CXXFLAGS) |
| 51 | |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 52 | diff --git a/mainloop.cpp b/mainloop.cpp |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 53 | index ecceee5..29dc26a 100644 |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 54 | --- a/mainloop.cpp |
| 55 | +++ b/mainloop.cpp |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 56 | @@ -34,6 +34,7 @@ |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 57 | #include <cassert> |
| 58 | #include <cstdlib> |
| 59 | #include <functional> |
| 60 | +#include <future> |
| 61 | #include <iostream> |
| 62 | #include <memory> |
| 63 | #include <phosphor-logging/elog-errors.hpp> |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 64 | @@ -242,7 +243,7 @@ std::optional<ObjectStateData> |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 65 | { |
| 66 | // Add status interface based on _fault file being present |
| 67 | sensorObj->addStatus(info); |
| 68 | - valueInterface = sensorObj->addValue(retryIO, info); |
| 69 | + valueInterface = sensorObj->addValue(retryIO, info, _timedoutMap); |
| 70 | } |
| 71 | catch (const std::system_error& e) |
| 72 | { |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 73 | @@ -478,10 +479,74 @@ void MainLoop::read() |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 74 | // RAII object for GPIO unlock / lock |
| 75 | auto locker = sensor::gpioUnlock(sensor->getGpio()); |
| 76 | |
| 77 | - // Retry for up to a second if device is busy |
| 78 | - // or has a transient error. |
| 79 | - value = _ioAccess->read(sensorSysfsType, sensorSysfsNum, input, |
| 80 | + // For sensors with attribute ASYNC_READ_TIMEOUT, |
| 81 | + // spawn a thread with timeout |
| 82 | + auto asyncRead = |
| 83 | + env::getEnv("ASYNC_READ_TIMEOUT", sensorSetKey); |
| 84 | + if (!asyncRead.empty()) |
| 85 | + { |
| 86 | + // Default async read timeout |
| 87 | + std::chrono::milliseconds asyncReadTimeout{ |
| 88 | + std::stoi(asyncRead)}; |
| 89 | + bool valueIsValid = false; |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 90 | + std::future<int64_t> asyncThread; |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 91 | + |
| 92 | + auto asyncIter = _timedoutMap.find(sensorSetKey); |
| 93 | + if (asyncIter == _timedoutMap.end()) |
| 94 | + { |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 95 | + // If sensor not found in timedoutMap, spawn an async |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 96 | + // thread |
| 97 | + asyncThread = std::async( |
| 98 | + std::launch::async, |
| 99 | + &hwmonio::HwmonIOInterface::read, _ioAccess, |
| 100 | + sensorSysfsType, sensorSysfsNum, input, |
| 101 | + hwmonio::retries, hwmonio::delay); |
| 102 | + valueIsValid = true; |
| 103 | + } |
| 104 | + else |
| 105 | + { |
| 106 | + // If we already have the async thread in the |
| 107 | + // timedoutMap, it means this sensor has already timed |
| 108 | + // out in the previous reads. No need to wait on |
| 109 | + // subsequent reads |
| 110 | + asyncReadTimeout = std::chrono::seconds(0); |
| 111 | + asyncThread = std::move(asyncIter->second); |
| 112 | + } |
| 113 | + |
| 114 | + std::future_status status = |
| 115 | + asyncThread.wait_for(asyncReadTimeout); |
| 116 | + switch (status) |
| 117 | + { |
| 118 | + // Read has finished |
| 119 | + case std::future_status::ready: |
| 120 | + // Read has finished |
| 121 | + if (valueIsValid) |
| 122 | + { |
| 123 | + value = asyncThread.get(); |
| 124 | + break; |
| 125 | + // Good sensor reads should skip the code below |
| 126 | + } |
| 127 | + // Async read thread has completed, erase from |
| 128 | + // timedoutMap to allow retry then throw |
| 129 | + _timedoutMap.erase(sensorSetKey); |
| 130 | + throw sensor::AsyncSensorReadTimeOut(); |
| 131 | + default: |
| 132 | + // Read timed out so add the thread to the |
| 133 | + // timedoutMap (if the entry already exists, |
| 134 | + // operator[] updates it) |
| 135 | + _timedoutMap[sensorSetKey] = std::move(asyncThread); |
| 136 | + throw sensor::AsyncSensorReadTimeOut(); |
| 137 | + } |
| 138 | + } |
| 139 | + else |
| 140 | + { |
| 141 | + // Retry for up to a second if device is busy |
| 142 | + // or has a transient error. |
| 143 | + value = |
| 144 | + _ioAccess->read(sensorSysfsType, sensorSysfsNum, input, |
| 145 | hwmonio::retries, hwmonio::delay); |
| 146 | + } |
| 147 | + |
| 148 | // Set functional property to true if we could read sensor |
| 149 | statusIface->functional(true); |
| 150 | |
| 151 | diff --git a/mainloop.hpp b/mainloop.hpp |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 152 | index b3de022..6803c4b 100644 |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 153 | --- a/mainloop.hpp |
| 154 | +++ b/mainloop.hpp |
| 155 | @@ -9,6 +9,7 @@ |
| 156 | #include "types.hpp" |
| 157 | |
| 158 | #include <any> |
| 159 | +#include <future> |
| 160 | #include <memory> |
| 161 | #include <optional> |
| 162 | #include <sdbusplus/server.hpp> |
| 163 | @@ -116,6 +117,8 @@ class MainLoop |
| 164 | /** @brief Store the specifications of sensor objects */ |
| 165 | std::map<SensorSet::key_type, std::unique_ptr<sensor::Sensor>> |
| 166 | _sensorObjects; |
| 167 | + /** @brief Store the async futures of timed out sensor objects */ |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 168 | + std::map<SensorSet::key_type, std::future<int64_t>> _timedoutMap; |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 169 | |
| 170 | /** |
| 171 | * @brief Map of removed sensors |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 172 | diff --git a/meson.build b/meson.build |
| 173 | index 66e6801..d6a92f8 100644 |
| 174 | --- a/meson.build |
| 175 | +++ b/meson.build |
| 176 | @@ -84,6 +84,7 @@ libhwmon_all = static_library( |
| 177 | gpioplus, |
| 178 | phosphor_dbus_interfaces, |
| 179 | phosphor_logging, |
| 180 | + threads, |
| 181 | ], |
| 182 | link_with: [ |
| 183 | libaverage, |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 184 | diff --git a/sensor.cpp b/sensor.cpp |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 185 | index 09aeca6..ac2f896 100644 |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 186 | --- a/sensor.cpp |
| 187 | +++ b/sensor.cpp |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 188 | @@ -15,6 +15,7 @@ |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 189 | #include <cmath> |
| 190 | #include <cstring> |
| 191 | #include <filesystem> |
| 192 | +#include <future> |
| 193 | #include <phosphor-logging/elog-errors.hpp> |
| 194 | #include <thread> |
| 195 | #include <xyz/openbmc_project/Common/error.hpp> |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 196 | @@ -116,8 +117,9 @@ SensorValueType Sensor::adjustValue(SensorValueType value) |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 197 | return value; |
| 198 | } |
| 199 | |
| 200 | -std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO, |
| 201 | - ObjectInfo& info) |
| 202 | +std::shared_ptr<ValueObject> Sensor::addValue( |
| 203 | + const RetryIO& retryIO, ObjectInfo& info, |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 204 | + std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap) |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 205 | { |
| 206 | static constexpr bool deferSignals = true; |
| 207 | |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 208 | @@ -144,12 +146,69 @@ std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO, |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 209 | // RAII object for GPIO unlock / lock |
| 210 | auto locker = gpioUnlock(getGpio()); |
| 211 | |
| 212 | - // Retry for up to a second if device is busy |
| 213 | - // or has a transient error. |
| 214 | - val = |
| 215 | - _ioAccess->read(_sensor.first, _sensor.second, |
| 216 | - hwmon::entry::cinput, std::get<size_t>(retryIO), |
| 217 | - std::get<std::chrono::milliseconds>(retryIO)); |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 218 | + // For sensors with attribute ASYNC_READ_TIMEOUT, |
| 219 | + // spawn a thread with timeout |
| 220 | + auto asyncRead = env::getEnv("ASYNC_READ_TIMEOUT", _sensor); |
| 221 | + if (!asyncRead.empty()) |
| 222 | + { |
| 223 | + // Default async read timeout |
| 224 | + std::chrono::milliseconds asyncReadTimeout{ |
| 225 | + std::stoi(asyncRead)}; |
| 226 | + bool valueIsValid = false; |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 227 | + std::future<int64_t> asyncThread; |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 228 | + |
| 229 | + auto asyncIter = timedoutMap.find(_sensor); |
| 230 | + if (asyncIter == timedoutMap.end()) |
| 231 | + { |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 232 | + // If sensor not found in timedoutMap, spawn an async thread |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 233 | + asyncThread = std::async( |
| 234 | + std::launch::async, &hwmonio::HwmonIOInterface::read, |
| 235 | + _ioAccess, _sensor.first, _sensor.second, |
| 236 | + hwmon::entry::cinput, std::get<size_t>(retryIO), |
| 237 | + std::get<std::chrono::milliseconds>(retryIO)); |
| 238 | + valueIsValid = true; |
| 239 | + } |
| 240 | + else |
| 241 | + { |
| 242 | + // If we already have the async thread in the timedoutMap, |
| 243 | + // it means this sensor has already timed out in the |
| 244 | + // previous reads. No need to wait on subsequent reads |
| 245 | + asyncReadTimeout = std::chrono::seconds(0); |
| 246 | + asyncThread = std::move(asyncIter->second); |
| 247 | + } |
| 248 | + |
| 249 | + std::future_status status = |
| 250 | + asyncThread.wait_for(asyncReadTimeout); |
| 251 | + switch (status) |
| 252 | + { |
| 253 | + case std::future_status::ready: |
| 254 | + // Read has finished |
| 255 | + if (valueIsValid) |
| 256 | + { |
| 257 | + val = asyncThread.get(); |
| 258 | + break; |
| 259 | + // Good sensor reads should skip the code below |
| 260 | + } |
| 261 | + // Async read thread has completed, erase from |
| 262 | + // timedoutMap to allow retry then throw |
| 263 | + timedoutMap.erase(_sensor); |
| 264 | + throw AsyncSensorReadTimeOut(); |
| 265 | + default: |
| 266 | + // Read timed out so add the thread to the timedoutMap |
| 267 | + // (if the entry already exists, operator[] updates it) |
| 268 | + timedoutMap[_sensor] = std::move(asyncThread); |
| 269 | + throw AsyncSensorReadTimeOut(); |
| 270 | + } |
| 271 | + } |
| 272 | + else |
| 273 | + { |
| 274 | + // Retry for up to a second if device is busy |
| 275 | + // or has a transient error. |
| 276 | + val = _ioAccess->read( |
| 277 | + _sensor.first, _sensor.second, hwmon::entry::cinput, |
| 278 | + std::get<size_t>(retryIO), |
| 279 | + std::get<std::chrono::milliseconds>(retryIO)); |
| 280 | + } |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 281 | } |
| 282 | #ifdef UPDATE_FUNCTIONAL_ON_FAIL |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 283 | catch (const std::system_error& e) |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 284 | diff --git a/sensor.hpp b/sensor.hpp |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 285 | index 4b2d281..64d6e48 100644 |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 286 | --- a/sensor.hpp |
| 287 | +++ b/sensor.hpp |
| 288 | @@ -4,6 +4,8 @@ |
| 289 | #include "sensorset.hpp" |
| 290 | #include "types.hpp" |
| 291 | |
| 292 | +#include <cerrno> |
| 293 | +#include <future> |
| 294 | #include <gpioplus/handle.hpp> |
| 295 | #include <memory> |
| 296 | #include <optional> |
| 297 | @@ -20,6 +22,17 @@ struct valueAdjust |
| 298 | std::unordered_set<int> rmRCs; |
| 299 | }; |
| 300 | |
| 301 | +/** @brief Custom exception for async sensor reading timeout |
| 302 | + */ |
| 303 | +struct AsyncSensorReadTimeOut : public std::system_error |
| 304 | +{ |
| 305 | + AsyncSensorReadTimeOut() : |
| 306 | + system_error(std::error_code(ETIMEDOUT, std::system_category()), |
| 307 | + "Async sensor read timed out") |
| 308 | + { |
| 309 | + } |
| 310 | +}; |
| 311 | + |
| 312 | /** @class Sensor |
| 313 | * @brief Sensor object based on a SensorSet container's key type |
| 314 | * @details Sensor object to create and modify an associated device's sensor |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 315 | @@ -87,10 +100,13 @@ class Sensor |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 316 | * (number of and delay between) |
| 317 | * @param[in] info - Sensor object information |
| 318 | * |
| 319 | + * @param[in] timedoutMap - Map to track timed out threads |
| 320 | + * |
| 321 | * @return - Shared pointer to the value object |
| 322 | */ |
| 323 | - std::shared_ptr<ValueObject> addValue(const RetryIO& retryIO, |
| 324 | - ObjectInfo& info); |
George Hung | 1e04fb6 | 2020-07-31 18:45:32 +0800 | [diff] [blame] | 325 | + std::shared_ptr<ValueObject> addValue( |
| 326 | + const RetryIO& retryIO, ObjectInfo& info, |
| 327 | + std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap); |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 328 | |
| 329 | /** |
| 330 | * @brief Add status interface and functional property for sensor |
| 331 | -- |
George Hung | 95f04fd | 2021-04-28 11:00:27 +0800 | [diff] [blame^] | 332 | 2.21.0 |
George Hung | 712a27e | 2020-05-19 15:52:33 +0800 | [diff] [blame] | 333 | |