sst: Rediscover profiles after host reboot
In some cases, host processor reboot may change the static SST-PP
profile information. This commit adds ability to register callbacks to
run upon hostState changes, and reruns SST discovery whenever the host
exits the power-off state.
Tested:
- Ran tools/sst-compare-redfish-os.py tool on platform with SPR host
CPU, and observed no mismatches before and after a host reboot.
- Confirmed Redfish OperatingConfig properties still populated when host
is off.
Signed-off-by: Jonathan Doman <jonathan.doman@intel.com>
Change-Id: I9e7b0ebb8c5ec7a8464346f3476490b765579428
diff --git a/src/cpuinfo_main.cpp b/src/cpuinfo_main.cpp
index 1d91f17..0d9c797 100644
--- a/src/cpuinfo_main.cpp
+++ b/src/cpuinfo_main.cpp
@@ -650,7 +650,7 @@
cpu_info::hostStateSetup(conn);
- cpu_info::sst::init(io, conn);
+ cpu_info::sst::init();
// shared_ptr conn is global for the service
// const reference of conn is passed to async calls
diff --git a/src/cpuinfo_utils.cpp b/src/cpuinfo_utils.cpp
index 1c69e6a..7265bbb 100644
--- a/src/cpuinfo_utils.cpp
+++ b/src/cpuinfo_utils.cpp
@@ -36,11 +36,18 @@
static PowerState powerState = PowerState::Off;
static OsState osState = OsState::Inactive;
static bool biosDone = false;
+static std::vector<HostStateHandler> hostStateCallbacks;
static std::shared_ptr<sdbusplus::asio::connection> dbusConn;
+void addHostStateCallback(HostStateHandler cb)
+{
+ hostStateCallbacks.push_back(cb);
+}
+
static void updateHostState()
{
+ HostState prevState = hostState;
if (powerState == PowerState::Off)
{
hostState = HostState::off;
@@ -67,6 +74,14 @@
hostState = HostState::postComplete;
}
DEBUG_PRINT << "new host state: " << static_cast<int>(hostState) << "\n";
+
+ if (prevState != hostState)
+ {
+ for (const auto& cb : hostStateCallbacks)
+ {
+ cb(prevState, hostState);
+ }
+ }
}
void updatePowerState(const std::string& newState)
diff --git a/src/speed_select.cpp b/src/speed_select.cpp
index 3a2f8f4..8facad5 100644
--- a/src/speed_select.cpp
+++ b/src/speed_select.cpp
@@ -19,6 +19,7 @@
#include <peci.h>
+#include <boost/asio/error.hpp>
#include <boost/asio/steady_timer.hpp>
#include <xyz/openbmc_project/Common/Device/error.hpp>
#include <xyz/openbmc_project/Common/error.hpp>
@@ -55,6 +56,19 @@
return true;
}
+std::vector<uint32_t> convertMaskToList(std::bitset<64> mask)
+{
+ std::vector<uint32_t> bitList;
+ for (size_t i = 0; i < mask.size(); ++i)
+ {
+ if (mask.test(i))
+ {
+ bitList.push_back(i);
+ }
+ }
+ return bitList;
+}
+
static std::vector<BackendProvider>& getProviders()
{
static auto* providers = new std::vector<BackendProvider>;
@@ -369,10 +383,6 @@
* Retrieve all SST configuration info for all discoverable CPUs, and publish
* the info on new D-Bus objects on the given bus connection.
*
- * @param[out] cpuList List to append info about discovered CPUs,
- * including pointers to D-Bus objects to keep them
- * alive. No items may be added to list in case host
- * system is powered off and no CPUs are accessible.
* @param[in,out] ioc ASIO context.
* @param[in,out] conn D-Bus ASIO connection.
*
@@ -381,11 +391,18 @@
* @throw PECIError A PECI command failed on a CPU which had previously
* responded to a command.
*/
-static bool
- discoverCPUsAndConfigs(std::vector<std::unique_ptr<CPUConfig>>& cpuList,
- boost::asio::io_context& ioc,
- sdbusplus::asio::connection& conn)
+static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc,
+ sdbusplus::asio::connection& conn)
{
+ // Persistent list - only populated after complete/successful discovery
+ static std::vector<std::unique_ptr<CPUConfig>> cpus;
+ cpus.clear();
+
+ // Temporary staging list. In case there is any failure, these temporary
+ // objects will get dropped to avoid presenting incomplete info until the
+ // next discovery attempt.
+ std::vector<std::unique_ptr<CPUConfig>> cpuList;
+
for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i)
{
// Let the event handler run any waiting tasks. If there is a lot of
@@ -481,25 +498,31 @@
cpuList.pop_back();
continue;
}
-
- cpu.finalize();
}
+ cpuList.swap(cpus);
+ std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); });
return true;
}
-void init(boost::asio::io_context& ioc,
- const std::shared_ptr<sdbusplus::asio::connection>& conn)
+/**
+ * Attempt discovery process, and if it fails, wait for 10 seconds to try again.
+ */
+static void discoverOrWait()
{
- static boost::asio::steady_timer peciRetryTimer(ioc);
- static std::vector<std::unique_ptr<CPUConfig>> cpus;
+ static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext());
static int peciErrorCount = 0;
-
bool finished = false;
+
+ // This function may be called from hostStateHandler or by retrying itself.
+ // In case those overlap, cancel any outstanding retry timer.
+ peciRetryTimer.cancel();
+
try
{
DEBUG_PRINT << "Starting discovery\n";
- finished = discoverCPUsAndConfigs(cpus, ioc, *conn);
+ finished = discoverCPUsAndConfigs(dbus::getIOContext(),
+ *dbus::getConnection());
}
catch (const PECIError& err)
{
@@ -522,19 +545,35 @@
// Retry later if no CPUs were available, or there was a PECI error.
if (!finished)
{
- // Drop any created interfaces to avoid presenting incomplete info
- cpus.clear();
peciRetryTimer.expires_after(std::chrono::seconds(10));
- peciRetryTimer.async_wait([&ioc, conn](boost::system::error_code ec) {
+ peciRetryTimer.async_wait([](boost::system::error_code ec) {
if (ec)
{
- std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
+ if (ec != boost::asio::error::operation_aborted)
+ {
+ std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
+ }
return;
}
- init(ioc, conn);
+ discoverOrWait();
});
}
}
+static void hostStateHandler(HostState prevState, HostState)
+{
+ if (prevState == HostState::off)
+ {
+ // Start or re-start discovery any time the host moves out of the
+ // powered off state.
+ discoverOrWait();
+ }
+}
+
+void init()
+{
+ addHostStateCallback(hostStateHandler);
+}
+
} // namespace sst
} // namespace cpu_info