Wait for OCC Active PDR to become available

occ-control was attempting to communicate with OCCs before the OCC
Active sensor PDR was available. This commit will wait until all PDRs
are available before allowing communication with the OCCs.
It will also read the OCC Active sensor to determine the state before
allowing communication.

When Hostboot moves a system to safe mode it will set the PLDM state of
the OCC Active sensor to PLDM_STATE_SET_OPERATIONAL_RUNNING_STATUS_DORMANT
This commit will also handle processing of that DORMANT status.

Change-Id: I348cf14455d9373898deec2ac4a04b3752e81c4a
Signed-off-by: Chris Cain <cjcain@us.ibm.com>
diff --git a/occ_manager.cpp b/occ_manager.cpp
index a77d49b..e621a21 100644
--- a/occ_manager.cpp
+++ b/occ_manager.cpp
@@ -67,29 +67,71 @@
 #else
     if (!fs::exists(HOST_ON_FILE))
     {
-        // Create the OCCs based on on the /dev/occX devices
-        auto occs = findOCCsInDev();
-
-        if (occs.empty() || (prevOCCSearch.size() != occs.size()))
+        static bool statusObjCreated = false;
+        if (!statusObjCreated)
         {
-            // Something changed or no OCCs yet, try again in 10s.
-            // Note on the first pass prevOCCSearch will be empty,
-            // so there will be at least one delay to give things
-            // a chance to settle.
-            prevOCCSearch = occs;
+            // Create the OCCs based on on the /dev/occX devices
+            auto occs = findOCCsInDev();
 
-            discoverTimer->restartOnce(10s);
-        }
-        else
-        {
-            discoverTimer.reset();
-
-            // createObjects requires OCC0 first.
-            std::sort(occs.begin(), occs.end());
-
-            for (auto id : occs)
+            if (occs.empty() || (prevOCCSearch.size() != occs.size()))
             {
-                createObjects(std::string(OCC_NAME) + std::to_string(id));
+                // Something changed or no OCCs yet, try again in 10s.
+                // Note on the first pass prevOCCSearch will be empty,
+                // so there will be at least one delay to give things
+                // a chance to settle.
+                prevOCCSearch = occs;
+
+                log<level::INFO>(
+                    fmt::format(
+                        "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
+                        occs.size())
+                        .c_str());
+
+                discoverTimer->restartOnce(10s);
+            }
+            else
+            {
+                // All OCCs appear to be available, create status objects
+
+                // createObjects requires OCC0 first.
+                std::sort(occs.begin(), occs.end());
+
+                log<level::INFO>(
+                    fmt::format(
+                        "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
+                        occs.size())
+                        .c_str());
+                for (auto id : occs)
+                {
+                    createObjects(std::string(OCC_NAME) + std::to_string(id));
+                }
+                statusObjCreated = true;
+            }
+        }
+
+        if (statusObjCreated)
+        {
+            static bool tracedHostWait = false;
+            if (utils::isHostRunning())
+            {
+                if (tracedHostWait)
+                {
+                    log<level::INFO>(
+                        "Manager::findAndCreateObjects(): Host is running");
+                    tracedHostWait = false;
+                }
+                waitingForAllOccActiveSensors = true;
+                checkAllActiveSensors();
+            }
+            else
+            {
+                if (!tracedHostWait)
+                {
+                    log<level::INFO>(
+                        "Manager::findAndCreateObjects(): Waiting for host to start");
+                    tracedHostWait = true;
+                }
+                discoverTimer->restartOnce(30s);
             }
         }
     }
@@ -105,6 +147,59 @@
 #endif
 }
 
+#ifdef POWER10
+// Check if all occActive sensors are available
+void Manager::checkAllActiveSensors()
+{
+    static bool allActiveSensorAvailable = false;
+    static bool tracedSensorWait = false;
+
+    // Start with the assumption that all are available
+    allActiveSensorAvailable = true;
+    for (auto& obj : statusObjects)
+    {
+        // If active sensor is already true, then no need to query sensor
+        if (!obj->occActive())
+        {
+            allActiveSensorAvailable = false;
+            if (!tracedSensorWait)
+            {
+                log<level::INFO>(
+                    fmt::format(
+                        "Manager::checkAllActiveSensors(): Waiting on OCC{} Active sensor",
+                        obj->getOccInstanceID())
+                        .c_str());
+                tracedSensorWait = true;
+            }
+            pldmHandle->checkActiveSensor(obj->getOccInstanceID());
+            break;
+        }
+    }
+
+    if (allActiveSensorAvailable)
+    {
+        // All sensors were found, disable the discovery timer
+        discoverTimer.reset();
+        waitingForAllOccActiveSensors = false;
+
+        log<level::INFO>(
+            "Manager::checkAllActiveSensors(): OCC Active sensors are available");
+        tracedSensorWait = false;
+    }
+    else
+    {
+        // Not all sensors were available, so keep waiting
+        if (!tracedSensorWait)
+        {
+            log<level::INFO>(
+                "Manager::checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
+            tracedSensorWait = true;
+        }
+        discoverTimer->restartOnce(30s);
+    }
+}
+#endif
+
 std::vector<int> Manager::findOCCsInDev()
 {
     std::vector<int> occs;
@@ -287,6 +382,13 @@
         setSensorValueToNonFunctional(instance);
 #endif
     }
+
+#ifdef POWER10
+    if (waitingForAllOccActiveSensors)
+    {
+        checkAllActiveSensors();
+    }
+#endif
 }
 
 #ifdef I2C_OCC
@@ -326,8 +428,9 @@
 
     if (obj != statusObjects.end() && (*obj)->occActive())
     {
-        log<level::INFO>("SBE timeout, requesting HRESET",
-                         entry("SBE=%d", instance));
+        log<level::INFO>(
+            fmt::format("SBE timeout, requesting HRESET (OCC{})", instance)
+                .c_str());
 
         setSBEState(instance, SBE_STATE_NOT_USABLE);
 
@@ -361,7 +464,8 @@
 {
     if (success)
     {
-        log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
+        log<level::INFO>(
+            fmt::format("HRESET succeeded (OCC{})", instance).c_str());
 
         setSBEState(instance, SBE_STATE_BOOTED);
 
@@ -372,8 +476,9 @@
 
     if (sbeCanDump(instance))
     {
-        log<level::INFO>("HRESET failed, triggering SBE dump",
-                         entry("SBE=%d", instance));
+        log<level::INFO>(
+            fmt::format("HRESET failed (OCC{}), triggering SBE dump", instance)
+                .c_str());
 
         auto& bus = utils::getBus();
         uint32_t src6 = instance << 16;
@@ -1066,6 +1171,34 @@
     int masterInstance = -1;
     for (auto& obj : statusObjects)
     {
+#ifdef POWER10
+        if (!obj->occActive())
+        {
+            if (utils::isHostRunning())
+            {
+                // OCC does not appear to be active yet, check active sensor
+                pldmHandle->checkActiveSensor(obj->getOccInstanceID());
+                if (obj->occActive())
+                {
+                    log<level::INFO>(
+                        fmt::format(
+                            "validateOccMaster: OCC{} is ACTIVE after reading sensor",
+                            obj->getOccInstanceID())
+                            .c_str());
+                }
+            }
+            else
+            {
+                log<level::WARNING>(
+                    fmt::format(
+                        "validateOccMaster: HOST is not running (OCC{})",
+                        obj->getOccInstanceID())
+                        .c_str());
+                return;
+            }
+        }
+#endif // POWER10
+
         if (obj->isMasterOcc())
         {
             obj->addPresenceWatchMaster();
@@ -1086,9 +1219,13 @@
             }
         }
     }
+
     if (masterInstance < 0)
     {
-        log<level::ERR>("validateOccMaster: Master OCC not found!");
+        log<level::ERR>(
+            fmt::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
+                        statusObjects.size())
+                .c_str());
         // request reset
         statusObjects.front()->deviceError();
     }