Enable OCC error monitoring

Fixes openbmc/openbmc#2165

Change-Id: I93f317a32c910f279003fa0fce6ae2d597f90312
Signed-off-by: Vishwanatha Subbanna <vishwa@linux.vnet.ibm.com>
diff --git a/occ_errors.cpp b/occ_errors.cpp
index f2da49f..fe81f17 100644
--- a/occ_errors.cpp
+++ b/occ_errors.cpp
@@ -56,25 +56,37 @@
 // Starts to watch for errors
 void Error::addWatch()
 {
-    // Open the file
-    openFile();
+    if (!watching)
+    {
+        // Open the file
+        openFile();
 
-    // register the callback handler
-    registerCallBack();
+        // register the callback handler
+        registerCallBack();
+
+        // Set we are watching the error
+        watching = true;
+    }
 }
 
 // Stops watching for errors
 void Error::removeWatch()
 {
-    // Close the file
-    if (fd >= 0)
+    if (watching)
     {
-        close(fd);
-    }
+        // Close the file
+        if (fd >= 0)
+        {
+            close(fd);
+        }
 
-    // Reduce the reference count. Since there is only one instances
-    // of add_io, this will result empty loop
-    eventSource.reset();
+        // Reduce the reference count. Since there is only one instances
+        // of add_io, this will result empty loop
+        eventSource.reset();
+
+        // We are no more watching the error
+        watching = false;
+    }
 }
 
 // Callback handler when there is an activity on the FD
diff --git a/occ_errors.hpp b/occ_errors.hpp
index 1e66ffc..e3820d1 100644
--- a/occ_errors.hpp
+++ b/occ_errors.hpp
@@ -70,6 +70,9 @@
         /** @brief File descriptor to watch for errors */
         int fd = -1;
 
+        /** @brief Current state of error watching */
+        bool watching = false;
+
         /** @brief attaches FD to events and sets up callback handler */
         void registerCallBack();
 
diff --git a/occ_manager.hpp b/occ_manager.hpp
index 7b76703..df6f199 100644
--- a/occ_manager.hpp
+++ b/occ_manager.hpp
@@ -5,6 +5,8 @@
 #include <experimental/filesystem>
 #include <functional>
 #include <sdbusplus/bus.hpp>
+#include <phosphor-logging/log.hpp>
+#include <phosphor-logging/elog.hpp>
 #include <powercap.hpp>
 #include "occ_pass_through.hpp"
 #include "occ_status.hpp"
@@ -12,7 +14,6 @@
 #include "config.h"
 
 namespace sdbusRule = sdbusplus::bus::match::rules;
-
 namespace open_power
 {
 namespace occ
@@ -68,6 +69,7 @@
             }
         }
 
+    private:
         /** @brief Callback that responds to cpu creation in the inventory -
          *         by creating the needed objects.
          *
@@ -92,7 +94,6 @@
             return 0;
         }
 
-    private:
         /** @brief Create child OCC objects.
          *
          *  @param[in] occ - the occ name, such as occ0.
@@ -110,7 +111,9 @@
                 std::make_unique<Status>(
                     bus,
                     event,
-                    path.c_str()));
+                    path.c_str(),
+                    std::bind(std::mem_fn(&Manager::statusCallBack),
+                                          this, std::placeholders::_1)));
 
             // Create the power cap monitor object for master occ (0)
             if (!pcap)
@@ -121,6 +124,49 @@
             }
         }
 
+        /** @brief Callback handler invoked by Status object when the OccActive
+         *         property is changed. This is needed to make sure that the
+         *         error detection is started only after all the OCCs are bound.
+         *         Similarly, when one of the OCC gets its OccActive property
+         *         un-set, then the OCC error detection needs to be stopped on
+         *         all the OCCs
+         *
+         *  @param[in] status - OccActive status
+         */
+        void statusCallBack(bool status)
+        {
+            using namespace phosphor::logging;
+            using InternalFailure = sdbusplus::xyz::openbmc_project::Common::
+                                        Error::InternalFailure;
+
+            // At this time, it won't happen but keeping it
+            // here just in case something changes in the future
+            if ((activeCount == 0) && (!status))
+            {
+                log<level::ERR>("Invalid update on OCCActive");
+                elog<InternalFailure>();
+            }
+
+            activeCount += status ? 1 : -1;
+
+            // If all the OCCs are bound, then start error detection
+            if (activeCount == statusObjects.size())
+            {
+                for (const auto& occ: statusObjects)
+                {
+                    occ->addErrorWatch();
+                }
+            }
+            else if (!status)
+            {
+                // If some OCCs are not bound yet, those will be a NO-OP
+                for (const auto& occ: statusObjects)
+                {
+                    occ->removeErrorWatch();
+                }
+            }
+        }
+
         /** @brief reference to the bus */
         sdbusplus::bus::bus& bus;
 
@@ -138,6 +184,9 @@
 
         /** @brief sbdbusplus match objects */
         std::vector<sdbusplus::bus::match_t> cpuMatches;
+
+        /** @brief Number of OCCs that are bound */
+        uint8_t activeCount = 0;
 };
 
 } // namespace occ
diff --git a/occ_status.cpp b/occ_status.cpp
index 94b8c4c..15ffbc8 100644
--- a/occ_status.cpp
+++ b/occ_status.cpp
@@ -28,17 +28,21 @@
             // Bind the device
             device.bind();
 
-            // And watch for errors
-            // Commenting until we solve the occ error monitoring issue
-            // TODO: openbmc/openbmc#2126
-            // device.addErrorWatch();
+            // Call into Manager to let know that we have bound
+            if (this->callBack)
+            {
+                this->callBack(value);
+            }
         }
         else
         {
-            // Stop watching for errors
-            // Commenting until we solve the occ error monitoring issue
-            // TODO: openbmc/openbmc#2126
-            // device.removeErrorWatch();
+            // Call into Manager to let know that we will unbind.
+            // Need to do this before doing un-bind since it will
+            // result in slave error if Master is un-bound
+            if (this->callBack)
+            {
+                this->callBack(value);
+            }
 
             // Do the unbind.
             device.unBind();
diff --git a/occ_status.hpp b/occ_status.hpp
index 74839d0..2e12806 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <functional>
 #include <sdbusplus/bus.hpp>
 #include <sdbusplus/server/object.hpp>
 #include <org/open_power/OCC/Status/server.hpp>
@@ -42,13 +43,20 @@
         /** @brief Constructs the Status object and
          *         the underlying device object
          *
-         *  @param[in] bus  - DBus bus to attach to
-         *  @param[in] path - DBus object path
+         *  @param[in] bus      - DBus bus to attach to
+         *  @param[in] event    - sd_event unique pointer reference
+         *  @param[in] path     - DBus object path
+         *  @param[in] callBack - Callback handler to invoke during
+         *                        property change
          */
-        Status(sdbusplus::bus::bus& bus, EventPtr& event, const char* path)
+        Status(sdbusplus::bus::bus& bus,
+               EventPtr& event,
+               const char* path,
+               std::function<void(bool)> callBack = nullptr)
             : Interface(bus, path),
               bus(bus),
               path(path),
+              callBack(callBack),
               instance(((this->path.back() - '0'))),
               device(event,
                      name + std::to_string(instance + 1),
@@ -81,6 +89,18 @@
          */
         bool occActive(bool value) override;
 
+        /** @brief Starts OCC error detection */
+        inline void addErrorWatch()
+        {
+            return device.addErrorWatch();
+        }
+
+        /** @brief Stops OCC error detection */
+        inline void removeErrorWatch()
+        {
+            return device.removeErrorWatch();
+        }
+
     private:
 
         /** @brief sdbus handle */
@@ -89,6 +109,11 @@
         /** @brief OCC dbus object path */
         std::string path;
 
+        /** @brief Callback handler to be invoked during property change.
+         *         This is a handler in Manager class
+         */
+        std::function<void(bool)> callBack;
+
         /** @brief occ name prefix */
         std::string name = OCC_NAME;