Don't exit program on sysfs read failures.

We have an unreliable fan in one of the test systems and at present if
the sysfs entry is unavailable or returns failure, then the program will
exit.  The program could be serving many sensors, and any one failure
will cause it to exit.  This is true not only when initially creating
the sensors, but also if any sensor read fails at run-time.

Testing:  I verified the program logged the failures, which may not
be ideal if there is a buggy sensor, but, I also ran it and managed
to catch it where the sensor wasn't there initially and it cleanly
reported only the sensors that were responsive and didn't just exit.

There is certainly a case to be made for re-scanning periodically if the
sensor returns or there was a timing issue, and there is a separate bug
for that.  This commit means only to make the program more robust on
failure.

Change-Id: I310a3e3c0e0ea86e439341a296b741ded18f46f2
Signed-off-by: Patrick Venture <venture@google.com>
diff --git a/fan_speed.cpp b/fan_speed.cpp
index df6618c..459b9d0 100644
--- a/fan_speed.cpp
+++ b/fan_speed.cpp
@@ -13,12 +13,12 @@
     if (curValue != value)
     {
         //Write target out to sysfs
-        curValue = writeSysfsWithCallout(value,
-                                         sysfsRoot,
-                                         instance,
-                                         type,
-                                         id,
-                                         entry::target);
+        curValue = sysfs::writeSysfsWithCallout(value,
+                                                sysfsRoot,
+                                                instance,
+                                                type,
+                                                id,
+                                                entry::target);
     }
 
     return FanSpeedObject::target(value);
@@ -30,20 +30,20 @@
     namespace fs = std::experimental::filesystem;
 
     auto path = sysfsRoot + "/" + instance;
-    auto fullPath = make_sysfs_path(path,
-                                    type::pwm,
-                                    id,
-                                    entry::enable);
+    auto fullPath = sysfs::make_sysfs_path(path,
+                                           type::pwm,
+                                           id,
+                                           entry::enable);
 
     if (fs::exists(fullPath))
     {
         //This class always uses RPM mode
-        writeSysfsWithCallout(enable::rpmMode,
-                              sysfsRoot,
-                              instance,
-                              type::pwm,
-                              id,
-                              entry::enable);
+        sysfs::writeSysfsWithCallout(enable::rpmMode,
+                                     sysfsRoot,
+                                     instance,
+                                     type::pwm,
+                                     id,
+                                     entry::enable);
     }
 }
 
diff --git a/mainloop.cpp b/mainloop.cpp
index 5976307..14b570f 100644
--- a/mainloop.cpp
+++ b/mainloop.cpp
@@ -17,6 +17,8 @@
 #include <memory>
 #include <cstdlib>
 #include <algorithm>
+
+#include <phosphor-logging/elog-errors.hpp>
 #include "sensorset.hpp"
 #include "hwmon.hpp"
 #include "sysfs.hpp"
@@ -26,6 +28,10 @@
 #include "targets.hpp"
 #include "fan_speed.hpp"
 
+#include <xyz/openbmc_project/Sensor/Device/error.hpp>
+
+using namespace phosphor::logging;
+
 // Initialization for Warning Objects
 decltype(Thresholds<WarningObject>::setLo) Thresholds<WarningObject>::setLo =
     &WarningObject::warningLow;
@@ -150,11 +156,23 @@
     auto& obj = std::get<Object>(info);
     auto& objPath = std::get<std::string>(info);
 
-    int val = readSysfsWithCallout(hwmonRoot,
-                                   instance,
-                                   sensor.first,
-                                   sensor.second,
-                                   hwmon::entry::input);
+    int val;
+    try
+    {
+        val = sysfs::readSysfsWithCallout(hwmonRoot,
+                                          instance,
+                                          sensor.first,
+                                          sensor.second,
+                                          hwmon::entry::input);
+    }
+    catch(const std::exception& ioe)
+    {
+        using namespace sdbusplus::xyz::openbmc_project::Sensor::Device::Error;
+        commit<ReadFailure>();
+
+        return static_cast<std::shared_ptr<ValueObject>>(nullptr);
+    }
+
     auto iface = std::make_shared<ValueObject>(bus, objPath.c_str(), deferSignals);
     iface->value(val);
 
@@ -256,6 +274,10 @@
 
         ObjectInfo info(&_bus, std::move(objectPath), Object());
         auto valueInterface = addValue(i.first, _hwmonRoot, _instance, info);
+        if (!valueInterface)
+        {
+            continue; /* skip adding this sensor for now. */
+        }
         auto sensorValue = valueInterface->value();
         addThreshold<WarningObject>(i.first, sensorValue, info);
         addThreshold<CriticalObject>(i.first, sensorValue, info);
@@ -308,6 +330,7 @@
     // Polling loop.
     while (!_shutdown)
     {
+        std::vector<SensorSet::key_type> destroy;
         // Iterate through all the sensors.
         for (auto& i : state)
         {
@@ -315,40 +338,57 @@
             if (attrs.find(hwmon::entry::input) != attrs.end())
             {
                 // Read value from sensor.
-                int value = readSysfsWithCallout(_hwmonRoot,
-                                                 _instance,
-                                                 i.first.first,
-                                                 i.first.second,
-                                                 hwmon::entry::input);
-                auto& objInfo = std::get<ObjectInfo>(i.second);
-                auto& obj = std::get<Object>(objInfo);
-
-                for (auto& iface : obj)
+                int value;
+                try
                 {
-                    auto valueIface = std::shared_ptr<ValueObject>();
-                    auto warnIface = std::shared_ptr<WarningObject>();
-                    auto critIface = std::shared_ptr<CriticalObject>();
+                    value = sysfs::readSysfsWithCallout(_hwmonRoot,
+                                                        _instance,
+                                                        i.first.first,
+                                                        i.first.second,
+                                                        hwmon::entry::input);
 
-                    switch (iface.first)
+                    auto& objInfo = std::get<ObjectInfo>(i.second);
+                    auto& obj = std::get<Object>(objInfo);
+
+                    for (auto& iface : obj)
                     {
-                        case InterfaceType::VALUE:
-                            valueIface = std::experimental::any_cast<std::shared_ptr<ValueObject>>
-                                         (iface.second);
-                            valueIface->value(value);
-                            break;
-                        case InterfaceType::WARN:
-                            checkThresholds<WarningObject>(iface.second, value);
-                            break;
-                        case InterfaceType::CRIT:
-                            checkThresholds<CriticalObject>(iface.second, value);
-                            break;
-                        default:
-                            break;
+                        auto valueIface = std::shared_ptr<ValueObject>();
+                        auto warnIface = std::shared_ptr<WarningObject>();
+                        auto critIface = std::shared_ptr<CriticalObject>();
+
+                        switch (iface.first)
+                        {
+                            case InterfaceType::VALUE:
+                                valueIface = std::experimental::any_cast<std::shared_ptr<ValueObject>>
+                                            (iface.second);
+                                valueIface->value(value);
+                                break;
+                            case InterfaceType::WARN:
+                                checkThresholds<WarningObject>(iface.second, value);
+                                break;
+                            case InterfaceType::CRIT:
+                                checkThresholds<CriticalObject>(iface.second, value);
+                                break;
+                            default:
+                                break;
+                        }
                     }
                 }
+                catch (const std::exception& e)
+                {
+                    using namespace sdbusplus::xyz::openbmc_project::Sensor::Device::Error;
+                    commit<ReadFailure>();
+
+                    destroy.push_back(i.first);
+                }
             }
         }
 
+        for (auto& i : destroy)
+        {
+            state.erase(i);
+        }
+
         // Respond to DBus
         _bus.process_discard();
 
diff --git a/readd.cpp b/readd.cpp
index 8253d92..95c6887 100644
--- a/readd.cpp
+++ b/readd.cpp
@@ -37,7 +37,7 @@
     auto path = (*options)["of-name"];
     if (path != ArgumentParser::empty_string)
     {
-        path = findHwmon(path);
+        path = sysfs::findHwmon(path);
     }
 
     if (path == ArgumentParser::empty_string)
diff --git a/sysfs.cpp b/sysfs.cpp
index e76c715..b26c9ae 100644
--- a/sysfs.cpp
+++ b/sysfs.cpp
@@ -27,6 +27,8 @@
 using namespace phosphor::logging;
 namespace fs = std::experimental::filesystem;
 
+namespace sysfs {
+
 static constexpr auto ofRoot = "/sys/firmware/devicetree/base";
 
 /**
@@ -193,14 +195,14 @@
         instancePath /= "device";
         auto callOutPath = findCalloutPath(fs::canonical(instancePath));
         using namespace sdbusplus::xyz::openbmc_project::Sensor::Device::Error;
-        report<ReadFailure>(
+
+        // this throws a ReadFailure.
+        elog<ReadFailure>(
             xyz::openbmc_project::Sensor::Device::
                 ReadFailure::CALLOUT_ERRNO(rc),
             xyz::openbmc_project::Sensor::Device::
                 ReadFailure::CALLOUT_DEVICE_PATH(
                     fs::canonical(callOutPath).c_str()));
-
-        exit(EXIT_FAILURE);
     }
 
     return value;
@@ -251,4 +253,5 @@
     return value;
 }
 
+}
 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
diff --git a/sysfs.hpp b/sysfs.hpp
index cc36ef4..ada73e8 100644
--- a/sysfs.hpp
+++ b/sysfs.hpp
@@ -3,6 +3,8 @@
 #include <fstream>
 #include <string>
 
+namespace sysfs {
+
 inline std::string make_sysfs_path(const std::string& path,
                                    const std::string& type,
                                    const std::string& id,
@@ -64,4 +66,6 @@
                                const std::string& id,
                                const std::string& sensor);
 
+}
+
 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
diff --git a/targets.hpp b/targets.hpp
index 768d451..86058ca 100644
--- a/targets.hpp
+++ b/targets.hpp
@@ -54,10 +54,10 @@
 
     // Check if target sysfs file exists
     auto targetPath = hwmonRoot + '/' + instance;
-    auto sysfsFullPath = make_sysfs_path(targetPath,
-                                         sensor.first,
-                                         sensor.second,
-                                         hwmon::entry::target);
+    auto sysfsFullPath = sysfs::make_sysfs_path(targetPath,
+                                                sensor.first,
+                                                sensor.second,
+                                                hwmon::entry::target);
     if (fs::exists(sysfsFullPath))
     {
         target = std::make_shared<T>(hwmonRoot,