Collect SBE dump during chip-op timeouts
Add support for collecting SBE dump during dump chip-op
failure.
- Request SBE dump in the case of a chip-op timeout
- Wait for the dump to complete the timeout
Signed-off-by: Dhruvaraj Subhashchandran <dhruvaraj@in.ibm.com>
Change-Id: I2f6693a3136803f3555d7e5dc00528bb9d004cdf
diff --git a/dump/dump_utils.cpp b/dump/dump_utils.cpp
index 81b1cc3..2689b50 100644
--- a/dump/dump_utils.cpp
+++ b/dump/dump_utils.cpp
@@ -14,6 +14,104 @@
{
using namespace phosphor::logging;
+static void monitorDumpCreation(const std::string& path, const uint32_t timeout)
+{
+ bool inProgress = true;
+ auto bus = sdbusplus::bus::new_system();
+ auto match = sdbusplus::bus::match::match(
+ bus,
+ sdbusplus::bus::match::rules::propertiesChanged(
+ path, "xyz.openbmc_project.Common.Progress"),
+ [&](sdbusplus::message::message& msg) {
+ std::string interface;
+ std::map<std::string, std::variant<std::string, uint8_t>> property;
+ msg.read(interface, property);
+
+ const auto dumpStatus = property.find("Status");
+ if (dumpStatus != property.end())
+ {
+ const std::string* status =
+ std::get_if<std::string>(&(dumpStatus->second));
+ if (status &&
+ *status !=
+ "xyz.openbmc_project.Common.Progress.OperationStatus.InProgress")
+ {
+ lg2::info("Dump status({STATUS}) : path={PATH}", "STATUS",
+ status->c_str(), "PATH", path.c_str());
+ inProgress = false;
+ }
+ }
+ });
+
+ // Timeout management
+ for (uint32_t secondsCount = 0; inProgress && secondsCount < timeout;
+ ++secondsCount)
+ {
+ bus.wait(std::chrono::seconds(1));
+ bus.process_discard();
+ }
+
+ if (inProgress)
+ {
+ lg2::error("Dump progress timeout; dump may not be complete.");
+ }
+}
+
+void requestSBEDump(const uint32_t failingUnit, const uint32_t eid,
+ SBETypes sbeType)
+{
+ lg2::info("Requesting Dump PEL({EID}) chip position({FAILINGUNIT})", "EID",
+ eid, "FAILINGUNIT", failingUnit);
+
+ auto path = sbeTypeAttributes.at(sbeType).dumpPath.c_str();
+ constexpr auto interface = "xyz.openbmc_project.Dump.Create";
+ constexpr auto function = "CreateDump";
+
+ try
+ {
+ auto bus = sdbusplus::bus::new_default();
+ auto service = getService(bus, interface, path);
+ auto method = bus.new_method_call(service.c_str(), path, interface,
+ function);
+
+ std::unordered_map<std::string, std::variant<std::string, uint64_t>>
+ createParams = {
+ {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
+ uint64_t(eid)},
+ {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
+ uint64_t(failingUnit)}};
+
+ method.append(createParams);
+ sdbusplus::message::object_path reply;
+ bus.call(method).read(reply);
+
+ monitorDumpCreation(reply.str, SBE_DUMP_TIMEOUT);
+ }
+ catch (const sdbusplus::exception::exception& e)
+ {
+ lg2::error("D-Bus call createDump exception OBJPATH={OBJPATH}, "
+ "INTERFACE={INTERFACE}, EXCEPTION={ERROR}",
+ "OBJPATH", path, "INTERFACE", interface, "ERROR", e);
+ constexpr auto ERROR_DUMP_DISABLED =
+ "xyz.openbmc_project.Dump.Create.Error.Disabled";
+ if (e.name() == ERROR_DUMP_DISABLED)
+ {
+ // Dump is disabled, Skip the dump collection.
+ lg2::info("Dump is disabled unit({FAILINGUNIT}), "
+ "skipping dump collection",
+ "FAILINGUNIT", failingUnit);
+ }
+ else
+ {
+ throw;
+ }
+ }
+ catch (const std::exception& e)
+ {
+ throw e;
+ }
+}
+
std::string getService(sdbusplus::bus::bus& bus, const std::string& intf,
const std::string& path)
{
diff --git a/dump/dump_utils.hpp b/dump/dump_utils.hpp
index 8918024..105884a 100644
--- a/dump/dump_utils.hpp
+++ b/dump/dump_utils.hpp
@@ -1,5 +1,7 @@
#pragma once
+#include "sbe_type.hpp"
+
#include <sdbusplus/server.hpp>
#include <filesystem>
@@ -9,6 +11,8 @@
namespace openpower::dump::util
{
+constexpr auto SBE_DUMP_TIMEOUT = 4 * 60; // Timeout in seconds
+
using DumpCreateParams =
std::map<std::string, std::variant<std::string, uint64_t>>;
@@ -81,4 +85,17 @@
auto reply = bus.call(method);
}
+/**
+ * Request SBE dump from the dump manager
+ *
+ * Request SBE dump from the dump manager and register a monitor for observing
+ * the dump progress.
+ *
+ * @param failingUnit The id of the proc containing failed SBE
+ * @param eid Error log id associated with dump
+ * @param sbeType Type of the SBE
+ */
+void requestSBEDump(const uint32_t failingUnit, const uint32_t eid,
+ SBETypes sbeType);
+
} // namespace openpower::dump::util
diff --git a/dump/sbe_dump_collector.cpp b/dump/sbe_dump_collector.cpp
index 58a34e7..9e7801c 100644
--- a/dump/sbe_dump_collector.cpp
+++ b/dump/sbe_dump_collector.cpp
@@ -138,18 +138,39 @@
try
{
std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure;
+ auto dumpIsRequired = false;
+
+ if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
+ {
+ event = sbeTypeAttributes.at(sbeType).chipOpTimeout;
+ dumpIsRequired = true;
+ }
openpower::dump::pel::FFDCData pelAdditionalData = {
{"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}};
openpower::dump::pel::createSbeErrorPEL(event, sbeError,
pelAdditionalData);
+ auto logId = openpower::dump::pel::createSbeErrorPEL(event, sbeError,
+ pelAdditionalData);
+
+ // Request SBE Dump if required
+ if (dumpIsRequired)
+ {
+ util::requestSBEDump(chipPos, logId, sbeType);
+ }
}
catch (const std::out_of_range& e)
{
lg2::error("Unknown SBE Type({SBETYPE}) ErrorMsg({ERROR})", "SBETYPE",
sbeType, "ERROR", e);
}
+ catch (const std::exception& e)
+ {
+ lg2::error("SBE Dump request failed, chip position({CHIPPOS}), "
+ "Error: {ERROR}",
+ "CHIPPOS", chipPos, "ERROR", e);
+ }
}
void SbeDumpCollector::collectDumpFromSBE(struct pdbg_target* chip,
@@ -192,11 +213,14 @@
}
lg2::error("Error in collecting dump dump type({TYPE}), "
- "clockstate({CLOCKSTATE}), proc position({PROC}), "
+ "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) "
+ "position({POSITION}), "
"collectFastArray({COLLECTFASTARRAY}) error({ERROR})",
- "TYPE", type, "CLOCKSTATE", clockState, "PROC", chipPos,
- "COLLECTFASTARRAY", collectFastArray, "ERROR", sbeError);
-
+ "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE", chipName,
+ "POSITION", chipPos, "COLLECTFASTARRAY", collectFastArray,
+ "ERROR", sbeError);
+ logErrorAndCreatePEL(sbeError, chipPos, sbeType, SBEFIFO_CMD_CLASS_DUMP,
+ SBEFIFO_CMD_GET_DUMP);
return;
}
writeDumpFile(path, id, clockState, 0, chipName, chipPos, dataPtr, len);
diff --git a/dump/sbe_type.cpp b/dump/sbe_type.cpp
index 41ae7cf..860dcca 100644
--- a/dump/sbe_type.cpp
+++ b/dump/sbe_type.cpp
@@ -5,6 +5,8 @@
const std::map<SBETypes, SBEAttributes> sbeTypeAttributes = {
{SBETypes::PROC,
- {"proc", "org.open_power.Processor.Error.SbeChipOpFailure"}}};
+ {"proc", "/xyz/openbmc_project/dump/sbe",
+ "org.open_power.Processor.Error.SbeChipOpTimeout",
+ "org.open_power.Processor.Error.SbeChipOpFailure"}}};
} // namespace openpower::dump
diff --git a/dump/sbe_type.hpp b/dump/sbe_type.hpp
index e46d030..e4e2827 100644
--- a/dump/sbe_type.hpp
+++ b/dump/sbe_type.hpp
@@ -14,6 +14,8 @@
struct SBEAttributes
{
std::string chipName;
+ std::string dumpPath;
+ std::string chipOpTimeout;
std::string chipOpFailure;
};