sst: Rediscover profiles after host reboot

In some cases, host processor reboot may change the static SST-PP
profile information. This commit adds ability to register callbacks to
run upon hostState changes, and reruns SST discovery whenever the host
exits the power-off state.

Tested:
- Ran tools/sst-compare-redfish-os.py tool on platform with SPR host
  CPU, and observed no mismatches before and after a host reboot.
- Confirmed Redfish OperatingConfig properties still populated when host
  is off.

Signed-off-by: Jonathan Doman <jonathan.doman@intel.com>
Change-Id: I9e7b0ebb8c5ec7a8464346f3476490b765579428
diff --git a/include/cpuinfo_utils.hpp b/include/cpuinfo_utils.hpp
index 926c2d1..d57203b 100644
--- a/include/cpuinfo_utils.hpp
+++ b/include/cpuinfo_utils.hpp
@@ -47,6 +47,13 @@
  */
 void hostStateSetup(const std::shared_ptr<sdbusplus::asio::connection>& conn);
 
+/**
+ * Callback which is run whenever the HostState changes. First parameter is the
+ * old state, and second parameter is the new current state.
+ */
+using HostStateHandler = std::function<void(HostState, HostState)>;
+void addHostStateCallback(HostStateHandler cb);
+
 constexpr uint64_t bit(uint8_t index)
 {
     return (1ull << index);
diff --git a/include/speed_select.hpp b/include/speed_select.hpp
index 4b4189f..b7a41fb 100644
--- a/include/speed_select.hpp
+++ b/include/speed_select.hpp
@@ -27,19 +27,13 @@
 {
 
 /**
- * Retrieve all SST configuration info for all discoverable CPUs, and publish
+ * Initialize SST subsystem.
+ *
+ * This will schedule work to be done when the host is ready, in order to
+ * retrieve all SST configuration info for all discoverable CPUs, and publish
  * the info on new D-Bus objects on the given bus connection.
- *
- * This function may block until all discovery is completed (many seconds), or
- * it may schedule the work to be done at a later time (on the given ASIO
- * context) if CPUs are not currently available, and may also schedule periodic
- * work to be done after initial discovery is completed.
- *
- * @param[in,out]   ioc     ASIO IO context/service
- * @param[in,out]   conn    D-Bus ASIO connection.
  */
-void init(boost::asio::io_context& ioc,
-          const std::shared_ptr<sdbusplus::asio::connection>& conn);
+void init();
 
 class PECIError : public std::runtime_error
 {
@@ -61,18 +55,7 @@
  *
  * @return  List of bit indexes.
  */
-static std::vector<uint32_t> convertMaskToList(std::bitset<64> mask)
-{
-    std::vector<uint32_t> bitList;
-    for (size_t i = 0; i < mask.size(); ++i)
-    {
-        if (mask.test(i))
-        {
-            bitList.push_back(i);
-        }
-    }
-    return bitList;
-}
+std::vector<uint32_t> convertMaskToList(std::bitset<64> mask);
 
 using TurboEntry = std::tuple<uint32_t, size_t>;
 
diff --git a/src/cpuinfo_main.cpp b/src/cpuinfo_main.cpp
index 1d91f17..0d9c797 100644
--- a/src/cpuinfo_main.cpp
+++ b/src/cpuinfo_main.cpp
@@ -650,7 +650,7 @@
 
     cpu_info::hostStateSetup(conn);
 
-    cpu_info::sst::init(io, conn);
+    cpu_info::sst::init();
 
     // shared_ptr conn is global for the service
     // const reference of conn is passed to async calls
diff --git a/src/cpuinfo_utils.cpp b/src/cpuinfo_utils.cpp
index 1c69e6a..7265bbb 100644
--- a/src/cpuinfo_utils.cpp
+++ b/src/cpuinfo_utils.cpp
@@ -36,11 +36,18 @@
 static PowerState powerState = PowerState::Off;
 static OsState osState = OsState::Inactive;
 static bool biosDone = false;
+static std::vector<HostStateHandler> hostStateCallbacks;
 
 static std::shared_ptr<sdbusplus::asio::connection> dbusConn;
 
+void addHostStateCallback(HostStateHandler cb)
+{
+    hostStateCallbacks.push_back(cb);
+}
+
 static void updateHostState()
 {
+    HostState prevState = hostState;
     if (powerState == PowerState::Off)
     {
         hostState = HostState::off;
@@ -67,6 +74,14 @@
         hostState = HostState::postComplete;
     }
     DEBUG_PRINT << "new host state: " << static_cast<int>(hostState) << "\n";
+
+    if (prevState != hostState)
+    {
+        for (const auto& cb : hostStateCallbacks)
+        {
+            cb(prevState, hostState);
+        }
+    }
 }
 
 void updatePowerState(const std::string& newState)
diff --git a/src/speed_select.cpp b/src/speed_select.cpp
index 3a2f8f4..8facad5 100644
--- a/src/speed_select.cpp
+++ b/src/speed_select.cpp
@@ -19,6 +19,7 @@
 
 #include <peci.h>
 
+#include <boost/asio/error.hpp>
 #include <boost/asio/steady_timer.hpp>
 #include <xyz/openbmc_project/Common/Device/error.hpp>
 #include <xyz/openbmc_project/Common/error.hpp>
@@ -55,6 +56,19 @@
     return true;
 }
 
+std::vector<uint32_t> convertMaskToList(std::bitset<64> mask)
+{
+    std::vector<uint32_t> bitList;
+    for (size_t i = 0; i < mask.size(); ++i)
+    {
+        if (mask.test(i))
+        {
+            bitList.push_back(i);
+        }
+    }
+    return bitList;
+}
+
 static std::vector<BackendProvider>& getProviders()
 {
     static auto* providers = new std::vector<BackendProvider>;
@@ -369,10 +383,6 @@
  * Retrieve all SST configuration info for all discoverable CPUs, and publish
  * the info on new D-Bus objects on the given bus connection.
  *
- * @param[out]  cpuList     List to append info about discovered CPUs,
- *                          including pointers to D-Bus objects to keep them
- *                          alive. No items may be added to list in case host
- *                          system is powered off and no CPUs are accessible.
  * @param[in,out]   ioc     ASIO context.
  * @param[in,out]   conn    D-Bus ASIO connection.
  *
@@ -381,11 +391,18 @@
  * @throw PECIError     A PECI command failed on a CPU which had previously
  *                      responded to a command.
  */
-static bool
-    discoverCPUsAndConfigs(std::vector<std::unique_ptr<CPUConfig>>& cpuList,
-                           boost::asio::io_context& ioc,
-                           sdbusplus::asio::connection& conn)
+static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc,
+                                   sdbusplus::asio::connection& conn)
 {
+    // Persistent list - only populated after complete/successful discovery
+    static std::vector<std::unique_ptr<CPUConfig>> cpus;
+    cpus.clear();
+
+    // Temporary staging list. In case there is any failure, these temporary
+    // objects will get dropped to avoid presenting incomplete info until the
+    // next discovery attempt.
+    std::vector<std::unique_ptr<CPUConfig>> cpuList;
+
     for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i)
     {
         // Let the event handler run any waiting tasks. If there is a lot of
@@ -481,25 +498,31 @@
             cpuList.pop_back();
             continue;
         }
-
-        cpu.finalize();
     }
 
+    cpuList.swap(cpus);
+    std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); });
     return true;
 }
 
-void init(boost::asio::io_context& ioc,
-          const std::shared_ptr<sdbusplus::asio::connection>& conn)
+/**
+ * Attempt discovery process, and if it fails, wait for 10 seconds to try again.
+ */
+static void discoverOrWait()
 {
-    static boost::asio::steady_timer peciRetryTimer(ioc);
-    static std::vector<std::unique_ptr<CPUConfig>> cpus;
+    static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext());
     static int peciErrorCount = 0;
-
     bool finished = false;
+
+    // This function may be called from hostStateHandler or by retrying itself.
+    // In case those overlap, cancel any outstanding retry timer.
+    peciRetryTimer.cancel();
+
     try
     {
         DEBUG_PRINT << "Starting discovery\n";
-        finished = discoverCPUsAndConfigs(cpus, ioc, *conn);
+        finished = discoverCPUsAndConfigs(dbus::getIOContext(),
+                                          *dbus::getConnection());
     }
     catch (const PECIError& err)
     {
@@ -522,19 +545,35 @@
     // Retry later if no CPUs were available, or there was a PECI error.
     if (!finished)
     {
-        // Drop any created interfaces to avoid presenting incomplete info
-        cpus.clear();
         peciRetryTimer.expires_after(std::chrono::seconds(10));
-        peciRetryTimer.async_wait([&ioc, conn](boost::system::error_code ec) {
+        peciRetryTimer.async_wait([](boost::system::error_code ec) {
             if (ec)
             {
-                std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
+                if (ec != boost::asio::error::operation_aborted)
+                {
+                    std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
+                }
                 return;
             }
-            init(ioc, conn);
+            discoverOrWait();
         });
     }
 }
 
+static void hostStateHandler(HostState prevState, HostState)
+{
+    if (prevState == HostState::off)
+    {
+        // Start or re-start discovery any time the host moves out of the
+        // powered off state.
+        discoverOrWait();
+    }
+}
+
+void init()
+{
+    addHostStateCallback(hostStateHandler);
+}
+
 } // namespace sst
 } // namespace cpu_info