Support for hardware dump collection from OCMB
This commit introduces the capability to collect hardware dumps from
Odyssey-based OCMB chips in addition to existing processor dump.
This commit addresses the need for additional failure data across
both processors and their associated OCMB chips.
Key changes include:
- Enabled hardware dump collection specifically for Odyssey OCMB chips.
- Optimized the collection process to occur only when the chip clock
is on to minimize overall dump collection time.
- Sequential dump collection from OCMB chips associated with each
processor.
Change-Id: Iea70912f9fef9c337ba5ca7c508728586be25af8
Signed-off-by: Dhruvaraj Subhashchandran <dhruvaraj@in.ibm.com>
diff --git a/dump/dump_utils.cpp b/dump/dump_utils.cpp
index 2689b50..3e0105e 100644
--- a/dump/dump_utils.cpp
+++ b/dump/dump_utils.cpp
@@ -60,8 +60,10 @@
void requestSBEDump(const uint32_t failingUnit, const uint32_t eid,
SBETypes sbeType)
{
- lg2::info("Requesting Dump PEL({EID}) chip position({FAILINGUNIT})", "EID",
- eid, "FAILINGUNIT", failingUnit);
+ lg2::info(
+ "Requesting Dump PEL({EID}) chip({CHIPTYPE}) position({FAILINGUNIT})",
+ "EID", eid, "CHIPTYPE", sbeTypeAttributes.at(sbeType).chipName,
+ "FAILINGUNIT", failingUnit);
auto path = sbeTypeAttributes.at(sbeType).dumpPath.c_str();
constexpr auto interface = "xyz.openbmc_project.Dump.Create";
diff --git a/dump/meson.build b/dump/meson.build
index 586b43e..9667e33 100644
--- a/dump/meson.build
+++ b/dump/meson.build
@@ -17,6 +17,7 @@
'dump_collect_main.cpp',
'dump_utils.cpp',
'create_pel.cpp',
+ 'dump_utils.cpp',
'sbe_type.cpp',
)
diff --git a/dump/sbe_dump_collector.cpp b/dump/sbe_dump_collector.cpp
index 3e74736..16ac9a7 100644
--- a/dump/sbe_dump_collector.cpp
+++ b/dump/sbe_dump_collector.cpp
@@ -42,7 +42,7 @@
initializePdbg();
- std::vector<struct pdbg_target*> targets;
+ TargetMap targets;
struct pdbg_target* target = nullptr;
pdbg_for_each_class_target("proc", target)
@@ -61,7 +61,32 @@
}
if (includeTarget)
{
- targets.push_back(target);
+ targets[target] = std::vector<struct pdbg_target*>();
+
+ // Hardware dump needs OCMB data if present
+ if (type == openpower::dump::SBE::SBE_DUMP_TYPE_HARDWARE)
+ {
+ struct pdbg_target* ocmbTarget;
+ pdbg_for_each_target("ocmb", target, ocmbTarget)
+ {
+ if (!is_ody_ocmb_chip(ocmbTarget))
+ {
+ continue;
+ }
+
+ if (pdbg_target_probe(ocmbTarget) != PDBG_TARGET_ENABLED)
+ {
+ continue;
+ }
+
+ if (!openpower::phal::pdbg::isTgtFunctional(ocmbTarget))
+ {
+ continue;
+ }
+
+ targets[target].push_back(ocmbTarget);
+ }
+ }
}
}
@@ -110,32 +135,46 @@
std::vector<std::future<void>> SbeDumpCollector::spawnDumpCollectionProcesses(
uint8_t type, uint32_t id, const std::filesystem::path& path,
- uint64_t failingUnit, uint8_t cstate,
- const std::vector<struct pdbg_target*>& targets)
+ uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap)
{
std::vector<std::future<void>> futures;
- for (auto target : targets)
+ for (const auto& [procTarget, ocmbTargets] : targetMap)
{
- if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
- !openpower::phal::pdbg::isTgtFunctional(target))
- {
- continue;
- }
-
- auto future =
- std::async(std::launch::async,
- [this, target, path, id, type, cstate, failingUnit]() {
+ auto future = std::async(std::launch::async,
+ [this, procTarget, ocmbTargets, path, id, type,
+ cstate, failingUnit]() {
try
{
- this->collectDumpFromSBE(target, path, id, type, cstate,
+ this->collectDumpFromSBE(procTarget, path, id, type, cstate,
failingUnit);
}
catch (const std::exception& e)
{
lg2::error(
- "Failed to collect dump from SBE on Proc-({PROCINDEX})",
- "PROCINDEX", pdbg_target_index(target));
+ "Failed to collect dump from SBE on Proc-({PROCINDEX}) {ERROR}",
+ "PROCINDEX", pdbg_target_index(procTarget), "ERROR", e);
+ }
+
+ // Collect OCMBs only with clock on
+ if (cstate == SBE_CLOCK_ON)
+ {
+ // Handle OCMBs serially after handling the proc
+ for (auto ocmbTarget : ocmbTargets)
+ {
+ try
+ {
+ this->collectDumpFromSBE(ocmbTarget, path, id, type,
+ cstate, failingUnit);
+ }
+ catch (const std::exception& e)
+ {
+ lg2::error(
+ "Failed to collect dump from OCMB -({OCMBINDEX}) {ERROR}",
+ "OCMBINDEX", pdbg_target_index(ocmbTarget), "ERROR",
+ e);
+ }
+ }
}
});
@@ -149,8 +188,10 @@
const openpower::phal::sbeError_t& sbeError, uint64_t chipPos,
SBETypes sbeType, uint32_t cmdClass, uint32_t cmdType)
{
+ std::string chipName;
try
{
+ chipName = sbeTypeAttributes.at(sbeType).chipName;
std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure;
auto dumpIsRequired = false;
@@ -181,9 +222,9 @@
}
catch (const std::exception& e)
{
- lg2::error("SBE Dump request failed, chip position({CHIPPOS}), "
- "Error: {ERROR}",
- "CHIPPOS", chipPos, "ERROR", e);
+ lg2::error("SBE Dump request failed, chip type({CHIPTYPE}) "
+ "position({CHIPPOS}), Error: {ERROR}",
+ "CHIPTYPE", chipName, "CHIPPOS", chipPos, "ERROR", e);
}
}
@@ -197,10 +238,10 @@
SBETypes sbeType = getSBEType(chip);
auto chipName = sbeTypeAttributes.at(sbeType).chipName;
lg2::info(
- "Collecting dump from proc({PROC}): path({PATH}) id({ID}) "
- "type({TYPE}) clockState({CLOCKSTATE}) failingUnit({FAILINGUNIT})",
- "PROC", chipPos, "PATH", path.string(), "ID", id, "TYPE", type,
- "CLOCKSTATE", clockState, "FAILINGUNIT", failingUnit);
+ "Collecting dump from ({CHIPTYPE}) ({POSITION}): path({PATH}) id({ID}) "
+ "type({TYPE}) clockState({CLOCKSTATE}) failingUnit({FAILINGUNIT})",
+ "CHIPTYPE", chipName, "POSITION", chipPos, "PATH", path.string(), "ID",
+ id, "TYPE", type, "CLOCKSTATE", clockState, "FAILINGUNIT", failingUnit);
util::DumpDataPtr dataPtr;
uint32_t len = 0;
diff --git a/dump/sbe_dump_collector.hpp b/dump/sbe_dump_collector.hpp
index 55147a9..d4cbc58 100644
--- a/dump/sbe_dump_collector.hpp
+++ b/dump/sbe_dump_collector.hpp
@@ -20,6 +20,9 @@
namespace openpower::dump::sbe_chipop
{
+using TargetMap =
+ std::map<struct pdbg_target*, std::vector<struct pdbg_target*>>;
+
/**
* @class SbeDumpCollector
* @brief Manages the collection of dumps from SBEs on failure.
@@ -105,9 +108,9 @@
* @param cstate The clock state during the dump collection. This parameter
* dictates whether the dump should be collected with the
* clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF).
- * @param targets A vector of `pdbg_target*` representing the targets from
- * which dumps should be collected. Each target corresponds to a physical or
- * logical component in the system, such as a processor or an SBE.
+ * @param targetMap A map of `pdbg_target*` representing the targets from
+ * which dumps should be collected. The key is the proc target with the
+ * list of ocmb targets associated with the proc.
*
* @return A vector of `std::future<void>` objects. Each future represents
* the completion state of an asynchronous dump collection task. The caller
@@ -118,8 +121,7 @@
*/
std::vector<std::future<void>> spawnDumpCollectionProcesses(
uint8_t type, uint32_t id, const std::filesystem::path& path,
- uint64_t failingUnit, uint8_t cstate,
- const std::vector<struct pdbg_target*>& targets);
+ uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap);
/** @brief This function creates the new dump file in dump file name
* format and then writes the contents into it.
@@ -188,6 +190,10 @@
*/
inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip)
{
+ if (is_ody_ocmb_chip(chip))
+ {
+ return SBETypes::OCMB;
+ }
return SBETypes::PROC;
}
diff --git a/dump/sbe_type.cpp b/dump/sbe_type.cpp
index 860dcca..cb355d4 100644
--- a/dump/sbe_type.cpp
+++ b/dump/sbe_type.cpp
@@ -7,6 +7,10 @@
{SBETypes::PROC,
{"proc", "/xyz/openbmc_project/dump/sbe",
"org.open_power.Processor.Error.SbeChipOpTimeout",
- "org.open_power.Processor.Error.SbeChipOpFailure"}}};
+ "org.open_power.Processor.Error.SbeChipOpFailure"}},
+ {SBETypes::OCMB,
+ {"ocmb", "/xyz/openbmc_project/dump/msbe",
+ "org.open_power.OCMB.Error.SbeChipOpTimeout",
+ "org.open_power.OCMB.Error.SbeChipOpFailure"}}};
} // namespace openpower::dump
diff --git a/dump/sbe_type.hpp b/dump/sbe_type.hpp
index e4e2827..23621e2 100644
--- a/dump/sbe_type.hpp
+++ b/dump/sbe_type.hpp
@@ -9,6 +9,7 @@
enum class SBETypes
{
PROC,
+ OCMB
};
struct SBEAttributes