Retry OCC read operations

OCC communication can get intermittent failures. Retry
all reads 3 times before declaring a failure and exiting
out of the openpower-occ-control application.

Resolves openbmc/openbmc#2805

Change-Id: I34dca5bc3c19a1f88975b427bdb6a683b41dbcb7
Signed-off-by: Andrew Geissler <geissonator@yahoo.com>
diff --git a/occ_errors.cpp b/occ_errors.cpp
index ee41db9..f4a5f69 100644
--- a/occ_errors.cpp
+++ b/occ_errors.cpp
@@ -129,24 +129,37 @@
 std::string Error::readFile(int len) const
 {
     auto data = std::make_unique<char[]>(len+1);
+    auto retries = 3;
+    auto delay = std::chrono::milliseconds{100};
 
-    // This file get created soon after binding. A value of 0 is
-    // deemed success and anything else is a Failure
-    // Since all the sysfs files would have size of 4096, if we read 0
-    // bytes -or- value '0', then it just means we are fine
-    auto r = read(fd, data.get(), len);
-    if (r < 0)
+    // OCC / FSI have intermittent issues so retry all reads
+    while (true)
     {
-        elog<ReadFailure>(
-            phosphor::logging::org::open_power::OCC::Device::
-                ReadFailure::CALLOUT_ERRNO(errno),
-            phosphor::logging::org::open_power::OCC::Device::
-                ReadFailure::CALLOUT_DEVICE_PATH(file.c_str()));
+        // This file get created soon after binding. A value of 0 is
+        // deemed success and anything else is a Failure
+        // Since all the sysfs files would have size of 4096, if we read 0
+        // bytes -or- value '0', then it just means we are fine
+        auto r = read(fd, data.get(), len);
+        if (r < 0)
+        {
+            retries--;
+            if (retries == 0)
+            {
+                elog<ReadFailure>(
+                    phosphor::logging::org::open_power::OCC::Device::
+                        ReadFailure::CALLOUT_ERRNO(errno),
+                    phosphor::logging::org::open_power::OCC::Device::
+                        ReadFailure::CALLOUT_DEVICE_PATH(file.c_str()));
+                break;
+            }
+            std::this_thread::sleep_for(delay);
+            continue;
+        }
+        break;
     }
-
     // Need to seek to START, else the poll returns immediately telling
     // there is data to be read
-    r = lseek(fd, 0, SEEK_SET);
+    auto r = lseek(fd, 0, SEEK_SET);
     if (r < 0)
     {
         log<level::ERR>("Failure seeking error file to START");