sensor: Implement sensor "ASYNC_READ_TIMEOUT"

This change will prevent sensors from blocking all other sensor reads
and D-Bus if they do not report failures quickly enough.

If "ASYNC_READ_TIMEOUT" environment variable is defined in the
sensor's config file for a key_type, the sensor read will be
asynchronous with timeout set in milliseconds.

For example for "sensor1":
ASYNC_READ_TIMEOUT_sensor1="1000"  // Timeout will be set to 1 sec

If the read times out, the sensor read will be skipped and the
sensor's functional property will be set to 'false'. Timed out futures
will be placed in a map to prevent their destructor from running and
blocking until the read completes (limitation of std::async).

Tested: This patch has been running downstream for over a year to
        solve a slow I2C sensor reads causing IPMI slowdown.

Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48
Signed-off-by: Brandon Kim <brandonkim@google.com>
diff --git a/sensor.cpp b/sensor.cpp
index 43af2f6..ddca883 100644
--- a/sensor.cpp
+++ b/sensor.cpp
@@ -15,6 +15,7 @@
 #include <cmath>
 #include <cstring>
 #include <filesystem>
+#include <future>
 #include <phosphor-logging/elog-errors.hpp>
 #include <thread>
 #include <xyz/openbmc_project/Common/error.hpp>
@@ -117,7 +118,8 @@
 }
 
 std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
-                                              ObjectInfo& info)
+                                              ObjectInfo& info,
+                                              TimedoutMap& timedoutMap)
 {
     static constexpr bool deferSignals = true;
 
@@ -144,12 +146,27 @@
             // RAII object for GPIO unlock / lock
             auto locker = gpioUnlock(getGpio());
 
-            // Retry for up to a second if device is busy
-            // or has a transient error.
-            val =
-                _ioAccess->read(_sensor.first, _sensor.second,
+            // For sensors with attribute ASYNC_READ_TIMEOUT,
+            // spawn a thread with timeout
+            auto asyncReadTimeout = env::getEnv("ASYNC_READ_TIMEOUT", _sensor);
+            if (!asyncReadTimeout.empty())
+            {
+                std::chrono::milliseconds asyncTimeout{
+                    std::stoi(asyncReadTimeout)};
+                val = asyncRead(_sensor, _ioAccess, asyncTimeout, timedoutMap,
+                                _sensor.first, _sensor.second,
                                 hwmon::entry::cinput, std::get<size_t>(retryIO),
                                 std::get<std::chrono::milliseconds>(retryIO));
+            }
+            else
+            {
+                // Retry for up to a second if device is busy
+                // or has a transient error.
+                val = _ioAccess->read(
+                    _sensor.first, _sensor.second, hwmon::entry::cinput,
+                    std::get<size_t>(retryIO),
+                    std::get<std::chrono::milliseconds>(retryIO));
+            }
         }
 #if UPDATE_FUNCTIONAL_ON_FAIL
         catch (const std::system_error& e)
@@ -266,4 +283,68 @@
     return GpioLocker(std::move(handle));
 }
 
+SensorValueType asyncRead(const SensorSet::key_type& sensorSetKey,
+                          const hwmonio::HwmonIOInterface* ioAccess,
+                          std::chrono::milliseconds asyncTimeout,
+                          TimedoutMap& timedoutMap, const std::string& type,
+                          const std::string& id, const std::string& sensor,
+                          const size_t retries,
+                          const std::chrono::milliseconds delay)
+{
+    // Default async read timeout
+    bool valueIsValid = false;
+    std::future<int64_t> asyncThread;
+
+    auto asyncIter = timedoutMap.find(sensorSetKey);
+    if (asyncIter == timedoutMap.end())
+    {
+        // If sensor not found in timedoutMap, spawn an async thread
+        asyncThread =
+            std::async(std::launch::async, &hwmonio::HwmonIOInterface::read,
+                       ioAccess, type, id, sensor, retries, delay);
+        valueIsValid = true;
+    }
+    else
+    {
+        // If we already have the async thread in the timedoutMap, it means this
+        // sensor has already timed out in the previous reads. No need to wait
+        // on subsequent reads - proceed to check the future_status to see when
+        // the async thread finishes
+        asyncTimeout = std::chrono::seconds(0);
+        asyncThread = std::move(asyncIter->second);
+    }
+
+    // TODO: This is still not a true asynchronous read as it still blocks the
+    // main thread for asyncTimeout amount of time. To make this completely
+    // asynchronous, schedule a read and register a callback to update the
+    // sensor value
+    std::future_status status = asyncThread.wait_for(asyncTimeout);
+    switch (status)
+    {
+        case std::future_status::ready:
+            // Read has finished
+            if (valueIsValid)
+            {
+                return asyncThread.get();
+                // Good sensor reads should skip the code below
+            }
+            // Async read thread has completed but had previously timed out (was
+            // found in the timedoutMap). Erase from timedoutMap and throw to
+            // allow retry in the next read cycle. Not returning the read value
+            // as the sensor reading may be bad / corrupted if it took so long.
+            timedoutMap.erase(sensorSetKey);
+            throw AsyncSensorReadTimeOut();
+        default:
+            // Read timed out so add the thread to the timedoutMap (if the entry
+            // already exists, operator[] updates it).
+            //
+            // Keeping the timed out futures in a map is required to prevent
+            // their destructor from being called when returning from this
+            // stack. The destructor will otherwise block until the read
+            // completes due to the limitation of std::async.
+            timedoutMap[sensorSetKey] = std::move(asyncThread);
+            throw AsyncSensorReadTimeOut();
+    }
+}
+
 } // namespace sensor