blob: 88f55d055b4e3d2bf75eb2b71a2bf9d83a53d38b [file] [log] [blame]
From c1b3d32430df12ab01f9cacf132aedff8324d55b Mon Sep 17 00:00:00 2001
From: Brandon Kim <brandonkim@google.com>
Date: Fri, 9 Aug 2019 15:38:53 -0700
Subject: [PATCH] sensor: Implement sensor "ASYNC_READ_TIMEOUT"
This change will prevent sensors from blocking all other sensor reads
and D-Bus if they do not report failures quickly enough.
If "ASYNC_READ_TIMEOUT" environment variable is defined in the
sensor's config file for a key_type, the sensor read will be
asynchronous with timeout set in milliseconds.
For example for "sensor1":
ASYNC_READ_TIMEOUT_sensor1="1000" // Timeout will be set to 1 sec
If the read times out, the sensor read will be skipped and the
sensor's functional property will be set to 'false'. Timed out futures
will be placed in a map to prevent their destructor from running and
blocking until the read completes (limiation of std::async).
Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48
Signed-off-by: Brandon Kim <brandonkim@google.com>
---
Makefile.am | 2 ++
mainloop.cpp | 73 +++++++++++++++++++++++++++++++++++++++++++++++---
mainloop.hpp | 3 +++
meson.build | 1 +
sensor.cpp | 75 ++++++++++++++++++++++++++++++++++++++++++++++------
sensor.hpp | 20 ++++++++++++--
6 files changed, 160 insertions(+), 14 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 706a6cc..c620fa4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -46,6 +46,7 @@ libhwmon_la_LIBADD = \
$(SDEVENTPLUS_LIBS) \
$(PHOSPHOR_DBUS_INTERFACES_LIBS) \
$(PHOSPHOR_LOGGING_LIBS) \
+ $(PTHREAD_LIBS) \
$(GPIOPLUS_LIBS) \
$(STDPLUS_LIBS) \
$(CODE_COVERAGE_LIBS) \
@@ -55,6 +56,7 @@ libhwmon_la_CXXFLAGS = \
$(SDEVENTPLUS_CFLAGS) \
$(PHOSPHOR_DBUS_INTERFACES_CFLAGS) \
$(PHOSPHOR_LOGGING_CFLAGS) \
+ $(PTHREAD_CFLAGS) \
$(STDPLUS_CFLAGS) \
$(CODE_COVERAGE_CXXFLAGS)
diff --git a/mainloop.cpp b/mainloop.cpp
index ecceee5..29dc26a 100644
--- a/mainloop.cpp
+++ b/mainloop.cpp
@@ -34,6 +34,7 @@
#include <cassert>
#include <cstdlib>
#include <functional>
+#include <future>
#include <iostream>
#include <memory>
#include <phosphor-logging/elog-errors.hpp>
@@ -242,7 +243,7 @@ std::optional<ObjectStateData>
{
// Add status interface based on _fault file being present
sensorObj->addStatus(info);
- valueInterface = sensorObj->addValue(retryIO, info);
+ valueInterface = sensorObj->addValue(retryIO, info, _timedoutMap);
}
catch (const std::system_error& e)
{
@@ -478,10 +479,74 @@ void MainLoop::read()
// RAII object for GPIO unlock / lock
auto locker = sensor::gpioUnlock(sensor->getGpio());
- // Retry for up to a second if device is busy
- // or has a transient error.
- value = _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
+ // For sensors with attribute ASYNC_READ_TIMEOUT,
+ // spawn a thread with timeout
+ auto asyncRead =
+ env::getEnv("ASYNC_READ_TIMEOUT", sensorSetKey);
+ if (!asyncRead.empty())
+ {
+ // Default async read timeout
+ std::chrono::milliseconds asyncReadTimeout{
+ std::stoi(asyncRead)};
+ bool valueIsValid = false;
+ std::future<int64_t> asyncThread;
+
+ auto asyncIter = _timedoutMap.find(sensorSetKey);
+ if (asyncIter == _timedoutMap.end())
+ {
+ // If sensor not found in timedoutMap, spawn an async
+ // thread
+ asyncThread = std::async(
+ std::launch::async,
+ &hwmonio::HwmonIOInterface::read, _ioAccess,
+ sensorSysfsType, sensorSysfsNum, input,
+ hwmonio::retries, hwmonio::delay);
+ valueIsValid = true;
+ }
+ else
+ {
+ // If we already have the async thread in the
+ // timedoutMap, it means this sensor has already timed
+ // out in the previous reads. No need to wait on
+ // subsequent reads
+ asyncReadTimeout = std::chrono::seconds(0);
+ asyncThread = std::move(asyncIter->second);
+ }
+
+ std::future_status status =
+ asyncThread.wait_for(asyncReadTimeout);
+ switch (status)
+ {
+ // Read has finished
+ case std::future_status::ready:
+ // Read has finished
+ if (valueIsValid)
+ {
+ value = asyncThread.get();
+ break;
+ // Good sensor reads should skip the code below
+ }
+ // Async read thread has completed, erase from
+ // timedoutMap to allow retry then throw
+ _timedoutMap.erase(sensorSetKey);
+ throw sensor::AsyncSensorReadTimeOut();
+ default:
+ // Read timed out so add the thread to the
+ // timedoutMap (if the entry already exists,
+ // operator[] updates it)
+ _timedoutMap[sensorSetKey] = std::move(asyncThread);
+ throw sensor::AsyncSensorReadTimeOut();
+ }
+ }
+ else
+ {
+ // Retry for up to a second if device is busy
+ // or has a transient error.
+ value =
+ _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
hwmonio::retries, hwmonio::delay);
+ }
+
// Set functional property to true if we could read sensor
statusIface->functional(true);
diff --git a/mainloop.hpp b/mainloop.hpp
index b3de022..6803c4b 100644
--- a/mainloop.hpp
+++ b/mainloop.hpp
@@ -9,6 +9,7 @@
#include "types.hpp"
#include <any>
+#include <future>
#include <memory>
#include <optional>
#include <sdbusplus/server.hpp>
@@ -116,6 +117,8 @@ class MainLoop
/** @brief Store the specifications of sensor objects */
std::map<SensorSet::key_type, std::unique_ptr<sensor::Sensor>>
_sensorObjects;
+ /** @brief Store the async futures of timed out sensor objects */
+ std::map<SensorSet::key_type, std::future<int64_t>> _timedoutMap;
/**
* @brief Map of removed sensors
diff --git a/meson.build b/meson.build
index 66e6801..d6a92f8 100644
--- a/meson.build
+++ b/meson.build
@@ -84,6 +84,7 @@ libhwmon_all = static_library(
gpioplus,
phosphor_dbus_interfaces,
phosphor_logging,
+ threads,
],
link_with: [
libaverage,
diff --git a/sensor.cpp b/sensor.cpp
index 09aeca6..ac2f896 100644
--- a/sensor.cpp
+++ b/sensor.cpp
@@ -15,6 +15,7 @@
#include <cmath>
#include <cstring>
#include <filesystem>
+#include <future>
#include <phosphor-logging/elog-errors.hpp>
#include <thread>
#include <xyz/openbmc_project/Common/error.hpp>
@@ -116,8 +117,9 @@ SensorValueType Sensor::adjustValue(SensorValueType value)
return value;
}
-std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
- ObjectInfo& info)
+std::shared_ptr<ValueObject> Sensor::addValue(
+ const RetryIO& retryIO, ObjectInfo& info,
+ std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap)
{
static constexpr bool deferSignals = true;
@@ -144,12 +146,69 @@ std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
// RAII object for GPIO unlock / lock
auto locker = gpioUnlock(getGpio());
- // Retry for up to a second if device is busy
- // or has a transient error.
- val =
- _ioAccess->read(_sensor.first, _sensor.second,
- hwmon::entry::cinput, std::get<size_t>(retryIO),
- std::get<std::chrono::milliseconds>(retryIO));
+ // For sensors with attribute ASYNC_READ_TIMEOUT,
+ // spawn a thread with timeout
+ auto asyncRead = env::getEnv("ASYNC_READ_TIMEOUT", _sensor);
+ if (!asyncRead.empty())
+ {
+ // Default async read timeout
+ std::chrono::milliseconds asyncReadTimeout{
+ std::stoi(asyncRead)};
+ bool valueIsValid = false;
+ std::future<int64_t> asyncThread;
+
+ auto asyncIter = timedoutMap.find(_sensor);
+ if (asyncIter == timedoutMap.end())
+ {
+ // If sensor not found in timedoutMap, spawn an async thread
+ asyncThread = std::async(
+ std::launch::async, &hwmonio::HwmonIOInterface::read,
+ _ioAccess, _sensor.first, _sensor.second,
+ hwmon::entry::cinput, std::get<size_t>(retryIO),
+ std::get<std::chrono::milliseconds>(retryIO));
+ valueIsValid = true;
+ }
+ else
+ {
+ // If we already have the async thread in the timedoutMap,
+ // it means this sensor has already timed out in the
+ // previous reads. No need to wait on subsequent reads
+ asyncReadTimeout = std::chrono::seconds(0);
+ asyncThread = std::move(asyncIter->second);
+ }
+
+ std::future_status status =
+ asyncThread.wait_for(asyncReadTimeout);
+ switch (status)
+ {
+ case std::future_status::ready:
+ // Read has finished
+ if (valueIsValid)
+ {
+ val = asyncThread.get();
+ break;
+ // Good sensor reads should skip the code below
+ }
+ // Async read thread has completed, erase from
+ // timedoutMap to allow retry then throw
+ timedoutMap.erase(_sensor);
+ throw AsyncSensorReadTimeOut();
+ default:
+ // Read timed out so add the thread to the timedoutMap
+ // (if the entry already exists, operator[] updates it)
+ timedoutMap[_sensor] = std::move(asyncThread);
+ throw AsyncSensorReadTimeOut();
+ }
+ }
+ else
+ {
+ // Retry for up to a second if device is busy
+ // or has a transient error.
+ val = _ioAccess->read(
+ _sensor.first, _sensor.second, hwmon::entry::cinput,
+ std::get<size_t>(retryIO),
+ std::get<std::chrono::milliseconds>(retryIO));
+ }
}
#ifdef UPDATE_FUNCTIONAL_ON_FAIL
catch (const std::system_error& e)
diff --git a/sensor.hpp b/sensor.hpp
index 4b2d281..64d6e48 100644
--- a/sensor.hpp
+++ b/sensor.hpp
@@ -4,6 +4,8 @@
#include "sensorset.hpp"
#include "types.hpp"
+#include <cerrno>
+#include <future>
#include <gpioplus/handle.hpp>
#include <memory>
#include <optional>
@@ -20,6 +22,17 @@ struct valueAdjust
std::unordered_set<int> rmRCs;
};
+/** @brief Custom exception for async sensor reading timeout
+ */
+struct AsyncSensorReadTimeOut : public std::system_error
+{
+ AsyncSensorReadTimeOut() :
+ system_error(std::error_code(ETIMEDOUT, std::system_category()),
+ "Async sensor read timed out")
+ {
+ }
+};
+
/** @class Sensor
* @brief Sensor object based on a SensorSet container's key type
* @details Sensor object to create and modify an associated device's sensor
@@ -87,10 +100,13 @@ class Sensor
* (number of and delay between)
* @param[in] info - Sensor object information
*
+ * @param[in] timedoutMap - Map to track timed out threads
+ *
* @return - Shared pointer to the value object
*/
- std::shared_ptr<ValueObject> addValue(const RetryIO& retryIO,
- ObjectInfo& info);
+ std::shared_ptr<ValueObject> addValue(
+ const RetryIO& retryIO, ObjectInfo& info,
+ std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap);
/**
* @brief Add status interface and functional property for sensor
--
2.21.0