Allow EAGAIN failures

Certain devices are known to return EAGAIN failures when read
too frequently, so be tolerant of them.

On startup, the code will retry for up to a second to get a good
first reading, and then in the main loop the code will just stick
with the current value if it fails that way.

Resolves openbmc/openbmc#2038

Change-Id: I7621aa30429c43276239982a03ec3eef02ce9c6e
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
diff --git a/mainloop.cpp b/mainloop.cpp
index 14b570f..a07d558 100644
--- a/mainloop.cpp
+++ b/mainloop.cpp
@@ -156,21 +156,38 @@
     auto& obj = std::get<Object>(info);
     auto& objPath = std::get<std::string>(info);
 
-    int val;
-    try
-    {
-        val = sysfs::readSysfsWithCallout(hwmonRoot,
-                                          instance,
-                                          sensor.first,
-                                          sensor.second,
-                                          hwmon::entry::input);
-    }
-    catch(const std::exception& ioe)
-    {
-        using namespace sdbusplus::xyz::openbmc_project::Sensor::Device::Error;
-        commit<ReadFailure>();
+    int val = 0;
+    bool retry = true;
+    size_t count = 10;
 
-        return static_cast<std::shared_ptr<ValueObject>>(nullptr);
+
+    //Retry for up to a second if device is busy
+
+    while (retry)
+    {
+        try
+        {
+            val = sysfs::readSysfsWithCallout(hwmonRoot,
+                    instance,
+                    sensor.first,
+                    sensor.second,
+                    hwmon::entry::input,
+                    count > 0); //throw DeviceBusy until last attempt
+        }
+        catch (sysfs::DeviceBusyException& e)
+        {
+            count--;
+            std::this_thread::sleep_for(std::chrono::milliseconds{100});
+            continue;
+        }
+        catch(const std::exception& ioe)
+        {
+            using namespace sdbusplus::xyz::openbmc_project::Sensor::Device::Error;
+            commit<ReadFailure>();
+
+            return static_cast<std::shared_ptr<ValueObject>>(nullptr);
+        }
+        retry = false;
     }
 
     auto iface = std::make_shared<ValueObject>(bus, objPath.c_str(), deferSignals);
@@ -341,11 +358,21 @@
                 int value;
                 try
                 {
-                    value = sysfs::readSysfsWithCallout(_hwmonRoot,
-                                                        _instance,
-                                                        i.first.first,
-                                                        i.first.second,
-                                                        hwmon::entry::input);
+                    try
+                    {
+                        value = sysfs::readSysfsWithCallout(_hwmonRoot,
+                                _instance,
+                                i.first.first,
+                                i.first.second,
+                                hwmon::entry::input);
+                    }
+                    catch (sysfs::DeviceBusyException& e)
+                    {
+                        //Just go with the current values and try again later.
+                        //TODO: openbmc/openbmc#2048 could keep an eye on
+                        //how long the device is actually busy.
+                        continue;
+                    }
 
                     auto& objInfo = std::get<ObjectInfo>(i.second);
                     auto& obj = std::get<Object>(objInfo);
diff --git a/sysfs.cpp b/sysfs.cpp
index b26c9ae..efdba92 100644
--- a/sysfs.cpp
+++ b/sysfs.cpp
@@ -153,7 +153,8 @@
                          const std::string& instance,
                          const std::string& type,
                          const std::string& id,
-                         const std::string& sensor)
+                         const std::string& sensor,
+                         bool throwDeviceBusy)
 {
     namespace fs = std::experimental::filesystem;
 
@@ -181,6 +182,11 @@
         // or read system calls that got us here.
         auto rc = errno;
 
+        if ((rc == EAGAIN) && throwDeviceBusy)
+        {
+            throw DeviceBusyException(fullPath);
+        }
+
         // If the directory disappeared then this application should gracefully
         // exit.  There are race conditions between the unloading of a hwmon
         // driver and the stopping of this service by systemd.  To prevent
diff --git a/sysfs.hpp b/sysfs.hpp
index ada73e8..b4709eb 100644
--- a/sysfs.hpp
+++ b/sysfs.hpp
@@ -1,10 +1,29 @@
 #pragma once
 
+#include <exception>
 #include <fstream>
 #include <string>
 
 namespace sysfs {
 
+/**
+ * @class DeviceBusyException
+ *
+ * An internal exception which will be thrown when
+ * readSysfsWithCallout() hits an EAGAIN.  Will never bubble
+ * up to terminate the application, nor does it need to be
+ * reported.
+ */
+class DeviceBusyException : public std::runtime_error
+{
+    public:
+
+        DeviceBusyException(const std::string& path) :
+            std::runtime_error(path + " busy")
+        {
+        }
+};
+
 inline std::string make_sysfs_path(const std::string& path,
                                    const std::string& type,
                                    const std::string& id,
@@ -37,6 +56,8 @@
  *  @param[in] type - The hwmon type (ex. temp).
  *  @param[in] id - The hwmon id (ex. 1).
  *  @param[in] sensor - The hwmon sensor (ex. input).
+ *  @param[in] throwDeviceBusy - will throw a DeviceBusyException
+ *             on an EAGAIN errno instead of an error log exception.
  *
  *  @returns - The read value.
  */
@@ -44,7 +65,8 @@
                          const std::string& instance,
                          const std::string& type,
                          const std::string& id,
-                         const std::string& sensor);
+                         const std::string& sensor,
+                         bool throwDeviceBusy = true);
 
  /** @brief Write a hwmon sysfs value
   *