blob: 88f55d055b4e3d2bf75eb2b71a2bf9d83a53d38b [file] [log] [blame]
George Hung95f04fd2021-04-28 11:00:27 +08001From c1b3d32430df12ab01f9cacf132aedff8324d55b Mon Sep 17 00:00:00 2001
2From: Brandon Kim <brandonkim@google.com>
3Date: Fri, 9 Aug 2019 15:38:53 -0700
4Subject: [PATCH] sensor: Implement sensor "ASYNC_READ_TIMEOUT"
George Hung712a27e2020-05-19 15:52:33 +08005
George Hung95f04fd2021-04-28 11:00:27 +08006This change will prevent sensors from blocking all other sensor reads
7and D-Bus if they do not report failures quickly enough.
8
9If "ASYNC_READ_TIMEOUT" environment variable is defined in the
10sensor's config file for a key_type, the sensor read will be
11asynchronous with timeout set in milliseconds.
12
13For example for "sensor1":
14ASYNC_READ_TIMEOUT_sensor1="1000" // Timeout will be set to 1 sec
15
16If the read times out, the sensor read will be skipped and the
17sensor's functional property will be set to 'false'. Timed out futures
18will be placed in a map to prevent their destructor from running and
19blocking until the read completes (limiation of std::async).
20
21Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48
22Signed-off-by: Brandon Kim <brandonkim@google.com>
George Hung712a27e2020-05-19 15:52:33 +080023---
George Hung95f04fd2021-04-28 11:00:27 +080024 Makefile.am | 2 ++
25 mainloop.cpp | 73 +++++++++++++++++++++++++++++++++++++++++++++++---
George Hung712a27e2020-05-19 15:52:33 +080026 mainloop.hpp | 3 +++
George Hung1e04fb62020-07-31 18:45:32 +080027 meson.build | 1 +
George Hung95f04fd2021-04-28 11:00:27 +080028 sensor.cpp | 75 ++++++++++++++++++++++++++++++++++++++++++++++------
29 sensor.hpp | 20 ++++++++++++--
30 6 files changed, 160 insertions(+), 14 deletions(-)
George Hung712a27e2020-05-19 15:52:33 +080031
George Hung95f04fd2021-04-28 11:00:27 +080032diff --git a/Makefile.am b/Makefile.am
33index 706a6cc..c620fa4 100644
34--- a/Makefile.am
35+++ b/Makefile.am
36@@ -46,6 +46,7 @@ libhwmon_la_LIBADD = \
37 $(SDEVENTPLUS_LIBS) \
38 $(PHOSPHOR_DBUS_INTERFACES_LIBS) \
39 $(PHOSPHOR_LOGGING_LIBS) \
40+ $(PTHREAD_LIBS) \
41 $(GPIOPLUS_LIBS) \
42 $(STDPLUS_LIBS) \
43 $(CODE_COVERAGE_LIBS) \
44@@ -55,6 +56,7 @@ libhwmon_la_CXXFLAGS = \
45 $(SDEVENTPLUS_CFLAGS) \
46 $(PHOSPHOR_DBUS_INTERFACES_CFLAGS) \
47 $(PHOSPHOR_LOGGING_CFLAGS) \
48+ $(PTHREAD_CFLAGS) \
49 $(STDPLUS_CFLAGS) \
50 $(CODE_COVERAGE_CXXFLAGS)
51
George Hung712a27e2020-05-19 15:52:33 +080052diff --git a/mainloop.cpp b/mainloop.cpp
George Hung95f04fd2021-04-28 11:00:27 +080053index ecceee5..29dc26a 100644
George Hung712a27e2020-05-19 15:52:33 +080054--- a/mainloop.cpp
55+++ b/mainloop.cpp
George Hung1e04fb62020-07-31 18:45:32 +080056@@ -34,6 +34,7 @@
George Hung712a27e2020-05-19 15:52:33 +080057 #include <cassert>
58 #include <cstdlib>
59 #include <functional>
60+#include <future>
61 #include <iostream>
62 #include <memory>
63 #include <phosphor-logging/elog-errors.hpp>
George Hung95f04fd2021-04-28 11:00:27 +080064@@ -242,7 +243,7 @@ std::optional<ObjectStateData>
George Hung712a27e2020-05-19 15:52:33 +080065 {
66 // Add status interface based on _fault file being present
67 sensorObj->addStatus(info);
68- valueInterface = sensorObj->addValue(retryIO, info);
69+ valueInterface = sensorObj->addValue(retryIO, info, _timedoutMap);
70 }
71 catch (const std::system_error& e)
72 {
George Hung95f04fd2021-04-28 11:00:27 +080073@@ -478,10 +479,74 @@ void MainLoop::read()
George Hung712a27e2020-05-19 15:52:33 +080074 // RAII object for GPIO unlock / lock
75 auto locker = sensor::gpioUnlock(sensor->getGpio());
76
77- // Retry for up to a second if device is busy
78- // or has a transient error.
79- value = _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
80+ // For sensors with attribute ASYNC_READ_TIMEOUT,
81+ // spawn a thread with timeout
82+ auto asyncRead =
83+ env::getEnv("ASYNC_READ_TIMEOUT", sensorSetKey);
84+ if (!asyncRead.empty())
85+ {
86+ // Default async read timeout
87+ std::chrono::milliseconds asyncReadTimeout{
88+ std::stoi(asyncRead)};
89+ bool valueIsValid = false;
George Hung1e04fb62020-07-31 18:45:32 +080090+ std::future<int64_t> asyncThread;
George Hung712a27e2020-05-19 15:52:33 +080091+
92+ auto asyncIter = _timedoutMap.find(sensorSetKey);
93+ if (asyncIter == _timedoutMap.end())
94+ {
George Hung1e04fb62020-07-31 18:45:32 +080095+ // If sensor not found in timedoutMap, spawn an async
George Hung712a27e2020-05-19 15:52:33 +080096+ // thread
97+ asyncThread = std::async(
98+ std::launch::async,
99+ &hwmonio::HwmonIOInterface::read, _ioAccess,
100+ sensorSysfsType, sensorSysfsNum, input,
101+ hwmonio::retries, hwmonio::delay);
102+ valueIsValid = true;
103+ }
104+ else
105+ {
106+ // If we already have the async thread in the
107+ // timedoutMap, it means this sensor has already timed
108+ // out in the previous reads. No need to wait on
109+ // subsequent reads
110+ asyncReadTimeout = std::chrono::seconds(0);
111+ asyncThread = std::move(asyncIter->second);
112+ }
113+
114+ std::future_status status =
115+ asyncThread.wait_for(asyncReadTimeout);
116+ switch (status)
117+ {
118+ // Read has finished
119+ case std::future_status::ready:
120+ // Read has finished
121+ if (valueIsValid)
122+ {
123+ value = asyncThread.get();
124+ break;
125+ // Good sensor reads should skip the code below
126+ }
127+ // Async read thread has completed, erase from
128+ // timedoutMap to allow retry then throw
129+ _timedoutMap.erase(sensorSetKey);
130+ throw sensor::AsyncSensorReadTimeOut();
131+ default:
132+ // Read timed out so add the thread to the
133+ // timedoutMap (if the entry already exists,
134+ // operator[] updates it)
135+ _timedoutMap[sensorSetKey] = std::move(asyncThread);
136+ throw sensor::AsyncSensorReadTimeOut();
137+ }
138+ }
139+ else
140+ {
141+ // Retry for up to a second if device is busy
142+ // or has a transient error.
143+ value =
144+ _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
145 hwmonio::retries, hwmonio::delay);
146+ }
147+
148 // Set functional property to true if we could read sensor
149 statusIface->functional(true);
150
151diff --git a/mainloop.hpp b/mainloop.hpp
George Hung1e04fb62020-07-31 18:45:32 +0800152index b3de022..6803c4b 100644
George Hung712a27e2020-05-19 15:52:33 +0800153--- a/mainloop.hpp
154+++ b/mainloop.hpp
155@@ -9,6 +9,7 @@
156 #include "types.hpp"
157
158 #include <any>
159+#include <future>
160 #include <memory>
161 #include <optional>
162 #include <sdbusplus/server.hpp>
163@@ -116,6 +117,8 @@ class MainLoop
164 /** @brief Store the specifications of sensor objects */
165 std::map<SensorSet::key_type, std::unique_ptr<sensor::Sensor>>
166 _sensorObjects;
167+ /** @brief Store the async futures of timed out sensor objects */
George Hung1e04fb62020-07-31 18:45:32 +0800168+ std::map<SensorSet::key_type, std::future<int64_t>> _timedoutMap;
George Hung712a27e2020-05-19 15:52:33 +0800169
170 /**
171 * @brief Map of removed sensors
George Hung1e04fb62020-07-31 18:45:32 +0800172diff --git a/meson.build b/meson.build
173index 66e6801..d6a92f8 100644
174--- a/meson.build
175+++ b/meson.build
176@@ -84,6 +84,7 @@ libhwmon_all = static_library(
177 gpioplus,
178 phosphor_dbus_interfaces,
179 phosphor_logging,
180+ threads,
181 ],
182 link_with: [
183 libaverage,
George Hung712a27e2020-05-19 15:52:33 +0800184diff --git a/sensor.cpp b/sensor.cpp
George Hung95f04fd2021-04-28 11:00:27 +0800185index 09aeca6..ac2f896 100644
George Hung712a27e2020-05-19 15:52:33 +0800186--- a/sensor.cpp
187+++ b/sensor.cpp
George Hung1e04fb62020-07-31 18:45:32 +0800188@@ -15,6 +15,7 @@
George Hung712a27e2020-05-19 15:52:33 +0800189 #include <cmath>
190 #include <cstring>
191 #include <filesystem>
192+#include <future>
193 #include <phosphor-logging/elog-errors.hpp>
194 #include <thread>
195 #include <xyz/openbmc_project/Common/error.hpp>
George Hung95f04fd2021-04-28 11:00:27 +0800196@@ -116,8 +117,9 @@ SensorValueType Sensor::adjustValue(SensorValueType value)
George Hung712a27e2020-05-19 15:52:33 +0800197 return value;
198 }
199
200-std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
201- ObjectInfo& info)
202+std::shared_ptr<ValueObject> Sensor::addValue(
203+ const RetryIO& retryIO, ObjectInfo& info,
George Hung1e04fb62020-07-31 18:45:32 +0800204+ std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap)
George Hung712a27e2020-05-19 15:52:33 +0800205 {
206 static constexpr bool deferSignals = true;
207
George Hung95f04fd2021-04-28 11:00:27 +0800208@@ -144,12 +146,69 @@ std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
George Hung712a27e2020-05-19 15:52:33 +0800209 // RAII object for GPIO unlock / lock
210 auto locker = gpioUnlock(getGpio());
211
212- // Retry for up to a second if device is busy
213- // or has a transient error.
214- val =
215- _ioAccess->read(_sensor.first, _sensor.second,
216- hwmon::entry::cinput, std::get<size_t>(retryIO),
217- std::get<std::chrono::milliseconds>(retryIO));
George Hung712a27e2020-05-19 15:52:33 +0800218+ // For sensors with attribute ASYNC_READ_TIMEOUT,
219+ // spawn a thread with timeout
220+ auto asyncRead = env::getEnv("ASYNC_READ_TIMEOUT", _sensor);
221+ if (!asyncRead.empty())
222+ {
223+ // Default async read timeout
224+ std::chrono::milliseconds asyncReadTimeout{
225+ std::stoi(asyncRead)};
226+ bool valueIsValid = false;
George Hung1e04fb62020-07-31 18:45:32 +0800227+ std::future<int64_t> asyncThread;
George Hung712a27e2020-05-19 15:52:33 +0800228+
229+ auto asyncIter = timedoutMap.find(_sensor);
230+ if (asyncIter == timedoutMap.end())
231+ {
George Hung1e04fb62020-07-31 18:45:32 +0800232+ // If sensor not found in timedoutMap, spawn an async thread
George Hung712a27e2020-05-19 15:52:33 +0800233+ asyncThread = std::async(
234+ std::launch::async, &hwmonio::HwmonIOInterface::read,
235+ _ioAccess, _sensor.first, _sensor.second,
236+ hwmon::entry::cinput, std::get<size_t>(retryIO),
237+ std::get<std::chrono::milliseconds>(retryIO));
238+ valueIsValid = true;
239+ }
240+ else
241+ {
242+ // If we already have the async thread in the timedoutMap,
243+ // it means this sensor has already timed out in the
244+ // previous reads. No need to wait on subsequent reads
245+ asyncReadTimeout = std::chrono::seconds(0);
246+ asyncThread = std::move(asyncIter->second);
247+ }
248+
249+ std::future_status status =
250+ asyncThread.wait_for(asyncReadTimeout);
251+ switch (status)
252+ {
253+ case std::future_status::ready:
254+ // Read has finished
255+ if (valueIsValid)
256+ {
257+ val = asyncThread.get();
258+ break;
259+ // Good sensor reads should skip the code below
260+ }
261+ // Async read thread has completed, erase from
262+ // timedoutMap to allow retry then throw
263+ timedoutMap.erase(_sensor);
264+ throw AsyncSensorReadTimeOut();
265+ default:
266+ // Read timed out so add the thread to the timedoutMap
267+ // (if the entry already exists, operator[] updates it)
268+ timedoutMap[_sensor] = std::move(asyncThread);
269+ throw AsyncSensorReadTimeOut();
270+ }
271+ }
272+ else
273+ {
274+ // Retry for up to a second if device is busy
275+ // or has a transient error.
276+ val = _ioAccess->read(
277+ _sensor.first, _sensor.second, hwmon::entry::cinput,
278+ std::get<size_t>(retryIO),
279+ std::get<std::chrono::milliseconds>(retryIO));
280+ }
George Hung712a27e2020-05-19 15:52:33 +0800281 }
282 #ifdef UPDATE_FUNCTIONAL_ON_FAIL
George Hung1e04fb62020-07-31 18:45:32 +0800283 catch (const std::system_error& e)
George Hung712a27e2020-05-19 15:52:33 +0800284diff --git a/sensor.hpp b/sensor.hpp
George Hung95f04fd2021-04-28 11:00:27 +0800285index 4b2d281..64d6e48 100644
George Hung712a27e2020-05-19 15:52:33 +0800286--- a/sensor.hpp
287+++ b/sensor.hpp
288@@ -4,6 +4,8 @@
289 #include "sensorset.hpp"
290 #include "types.hpp"
291
292+#include <cerrno>
293+#include <future>
294 #include <gpioplus/handle.hpp>
295 #include <memory>
296 #include <optional>
297@@ -20,6 +22,17 @@ struct valueAdjust
298 std::unordered_set<int> rmRCs;
299 };
300
301+/** @brief Custom exception for async sensor reading timeout
302+ */
303+struct AsyncSensorReadTimeOut : public std::system_error
304+{
305+ AsyncSensorReadTimeOut() :
306+ system_error(std::error_code(ETIMEDOUT, std::system_category()),
307+ "Async sensor read timed out")
308+ {
309+ }
310+};
311+
312 /** @class Sensor
313 * @brief Sensor object based on a SensorSet container's key type
314 * @details Sensor object to create and modify an associated device's sensor
George Hung1e04fb62020-07-31 18:45:32 +0800315@@ -87,10 +100,13 @@ class Sensor
George Hung712a27e2020-05-19 15:52:33 +0800316 * (number of and delay between)
317 * @param[in] info - Sensor object information
318 *
319+ * @param[in] timedoutMap - Map to track timed out threads
320+ *
321 * @return - Shared pointer to the value object
322 */
323- std::shared_ptr<ValueObject> addValue(const RetryIO& retryIO,
324- ObjectInfo& info);
George Hung1e04fb62020-07-31 18:45:32 +0800325+ std::shared_ptr<ValueObject> addValue(
326+ const RetryIO& retryIO, ObjectInfo& info,
327+ std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap);
George Hung712a27e2020-05-19 15:52:33 +0800328
329 /**
330 * @brief Add status interface and functional property for sensor
331--
George Hung95f04fd2021-04-28 11:00:27 +08003322.21.0
George Hung712a27e2020-05-19 15:52:33 +0800333