Stop instructions before collecting dump
The instructions need to be stopped before attempting to collect
the hostboot dump. This commit calls stop instructions chip-op
on each processor SBE to make sure the instructions are stopped
before collecting the dump.
If some SBEs are not ready to accept the chip-op or timed out
the hostboot dump will not be collected from those SBEs and an
SBE dump will lbe collected in the case of a timeout.
Tests:
Tested hardware dump and hostboot dump successfully
Signed-off-by: Dhruvaraj Subhashchandran <dhruvaraj@in.ibm.com>
Change-Id: I0b0ff9e6c6d62187a680395931de0a4dfaff579a
diff --git a/dump/create_pel.cpp b/dump/create_pel.cpp
index 916d312..f6b15ee 100644
--- a/dump/create_pel.cpp
+++ b/dump/create_pel.cpp
@@ -31,7 +31,7 @@
constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
uint32_t createSbeErrorPEL(const std::string& event, const sbeError_t& sbeError,
- const FFDCData& ffdcData)
+ const FFDCData& ffdcData, const Severity severity)
{
uint32_t plid = 0;
std::unordered_map<std::string, std::string> additionalData = {
@@ -77,8 +77,7 @@
"CreatePELWithFFDCFiles");
auto level =
sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
- sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
- Error);
+ severity);
method.append(event, level, additionalData, pelFFDCInfo);
auto response = bus.call(method);
diff --git a/dump/create_pel.hpp b/dump/create_pel.hpp
index 13d30ff..a5287ea 100644
--- a/dump/create_pel.hpp
+++ b/dump/create_pel.hpp
@@ -1,5 +1,7 @@
#pragma once
+#include "xyz/openbmc_project/Logging/Entry/server.hpp"
+
#include <phal_exception.H>
#include <nlohmann/json.hpp>
@@ -11,6 +13,8 @@
using FFDCData = std::vector<std::pair<std::string, std::string>>;
+using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
+
using json = nlohmann::json;
using namespace openpower::phal;
@@ -21,10 +25,12 @@
* @param[in] event - the event type
* @param[in] sbeError - SBE error object
* @param[in] ffdcData - failure data to append to PEL
+ * @param[in] severity - severity of the log
* @return Platform log id
*/
uint32_t createSbeErrorPEL(const std::string& event, const sbeError_t& sbeError,
- const FFDCData& ffdcData);
+ const FFDCData& ffdcData,
+ const Severity severity = Severity::Error);
/**
* @class FFDCFile
diff --git a/dump/sbe_consts.hpp b/dump/sbe_consts.hpp
index d59a698..731aecc 100644
--- a/dump/sbe_consts.hpp
+++ b/dump/sbe_consts.hpp
@@ -20,4 +20,10 @@
// Get dump method
constexpr auto SBEFIFO_CMD_GET_DUMP = 0x01;
+
+// Stop instruction command class
+constexpr auto SBEFIFO_CMD_CLASS_INSTRUCTION = 0xA700;
+
+// Stop instruction method
+constexpr auto SBEFIFO_CMD_CONTROL_INSN = 0x01;
} // namespace openpower::dump::SBE
diff --git a/dump/sbe_dump_collector.cpp b/dump/sbe_dump_collector.cpp
index 9e7801c..bdf43c8 100644
--- a/dump/sbe_dump_collector.cpp
+++ b/dump/sbe_dump_collector.cpp
@@ -53,7 +53,16 @@
continue;
}
- targets.push_back(target);
+ bool includeTarget = true;
+ // if the dump type is hostboot then call stop instructions
+ if (type == SBE_DUMP_TYPE_HOSTBOOT)
+ {
+ includeTarget = executeThreadStop(target);
+ }
+ if (includeTarget)
+ {
+ targets.push_back(target);
+ }
}
std::vector<uint8_t> clockStates = {SBE_CLOCK_ON, SBE_CLOCK_OFF};
@@ -289,4 +298,42 @@
}
}
+bool SbeDumpCollector::executeThreadStop(struct pdbg_target* target)
+{
+ try
+ {
+ openpower::phal::sbe::threadStopProc(target);
+ return true;
+ }
+ catch (const openpower::phal::sbeError_t& sbeError)
+ {
+ uint64_t chipPos = pdbg_target_index(target);
+ if (sbeError.errType() ==
+ openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
+ {
+ lg2::info("SBE is not ready to accept chip-op: Skipping "
+ "stop instruction on proc-({POSITION}) error({ERROR}) ",
+ "POSITION", chipPos, "ERROR", sbeError);
+ return false; // Do not include the target for dump collection
+ }
+
+ lg2::error("Stop instructions failed on "
+ "proc-({POSITION}) error({ERROR}) ",
+ "POSITION", chipPos, "ERROR", sbeError);
+
+ logErrorAndCreatePEL(sbeError, chipPos, SBETypes::PROC,
+ SBEFIFO_CMD_CLASS_INSTRUCTION,
+ SBEFIFO_CMD_CONTROL_INSN);
+ // For TIMEOUT, log the error and skip adding the processor for dump
+ // collection
+ if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
+ {
+ return false;
+ }
+ }
+ // Include the target for dump collection for SBE_CMD_FAILED or any other
+ // non-critical errors
+ return true;
+}
+
} // namespace openpower::dump::sbe_chipop
diff --git a/dump/sbe_dump_collector.hpp b/dump/sbe_dump_collector.hpp
index 2e99b53..55147a9 100644
--- a/dump/sbe_dump_collector.hpp
+++ b/dump/sbe_dump_collector.hpp
@@ -190,6 +190,26 @@
{
return SBETypes::PROC;
}
+
+ /**
+ * @brief Executes thread stop on a processor target
+ *
+ * If the Self Boot Engine (SBE) is not ready to accept chip operations
+ * (chip-ops), it logs the condition and excludes the processor from the
+ * dump collection process. For critical errors, such as a timeout during
+ * the stop operation, it logs the error and again excludes the processor.
+ * In case of SBE command failure or non-critical errors, it continues with
+ * the dump collection process.
+ *
+ * @param target Pointer to the pdbg target structure representing the
+ * processor to perform the thread stop on.
+ * @return true If the thread stop was successful or in case of non-critical
+ * errors where dump collection can proceed.
+ * @return false If the SBE is not ready for chip-ops or in case of critical
+ * errors like timeouts, indicating the processor should be
+ * excluded from the dump collection.
+ */
+ bool executeThreadStop(struct pdbg_target* target);
};
} // namespace openpower::dump::sbe_chipop