Implement support for POZ FFDC with multiple FFDC packets
This commit implements support for POZ FFDC, where multiple FFDC packets
can be returned after executing an SBE chip-op. This differs from the
processor SBE chip-op, which typically returns only a single FFDC packet
upon a chip-op failure.
Key aspects of the implementation:
- Each FFDC packet is associated with a unique SLID id, allowing for the
identification of separate and unrelated FFDC packets within the
collection.
- Each unique SLID is treated as an independent PEL, ensuring that each
FFDC packet is logged separately.
- FFDC data may be present even if the chip-op completes successfully.
In such cases, PELs are logged, but the chip-op is not considered a
failure.
Tests:
Testing the proc FFDC to make sure no regression
Testing POZ FFDC and making sure multiple PELs are logged
Testing FFDC with chip-op success
Change-Id: I8c70bc8df9249c5b9841baef7b5dbe0a6f22e08d
Signed-off-by: Dhruvaraj Subhashchandran <dhruvaraj@in.ibm.com>
diff --git a/dump/create_pel.cpp b/dump/create_pel.cpp
index f6b15ee..7f79fa5 100644
--- a/dump/create_pel.cpp
+++ b/dump/create_pel.cpp
@@ -1,6 +1,7 @@
#include "create_pel.hpp"
#include "dump_utils.hpp"
+#include "sbe_consts.hpp"
#include <fcntl.h>
#include <libekb.H>
@@ -26,12 +27,14 @@
{
using namespace phosphor::logging;
+using namespace openpower::dump::SBE;
constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
constexpr auto loggingInterface = "xyz.openbmc_project.Logging.Create";
constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
uint32_t createSbeErrorPEL(const std::string& event, const sbeError_t& sbeError,
- const FFDCData& ffdcData, const Severity severity)
+ const FFDCData& ffdcData, const Severity severity,
+ const std::optional<PELFFDCInfo>& pelFFDCInfoOpt)
{
uint32_t plid = 0;
std::unordered_map<std::string, std::string> additionalData = {
@@ -46,27 +49,24 @@
additionalData.emplace(data);
}
- std::vector<std::tuple<
- sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
- uint8_t, uint8_t, sdbusplus::message::unix_fd>>
- pelFFDCInfo;
-
- // get SBE ffdc file descriptor
- auto fd = sbeError.getFd();
+ PELFFDCInfo pelFFDCInfo;
+ if (pelFFDCInfoOpt)
+ {
+ pelFFDCInfo = *pelFFDCInfoOpt;
+ }
// Negative fd value indicates error case or invalid file
// No need of special processing , just log error with additional ffdc.
- if (fd > 0)
+ else if (sbeError.getFd() > 0)
{
// Refer phosphor-logging/extensions/openpower-pels/README.md section
// "Self Boot Engine(SBE) First Failure Data Capture(FFDC) Support"
// for details of related to createPEL with SBE FFDC information
// usin g CreateWithFFDCFiles api.
- pelFFDCInfo.emplace_back(
- std::make_tuple(sdbusplus::xyz::openbmc_project::Logging::server::
- Create::FFDCFormat::Custom,
- static_cast<uint8_t>(0xCB),
- static_cast<uint8_t>(0x01), sbeError.getFd()));
+ pelFFDCInfo.emplace_back(std::make_tuple(
+ sdbusplus::xyz::openbmc_project::Logging::server::Create::
+ FFDCFormat::Custom,
+ FFDC_FORMAT_SUBTYPE, FFDC_FORMAT_VERSION, sbeError.getFd()));
}
try
{
@@ -106,6 +106,48 @@
return plid;
}
+openpower::dump::pel::Severity convertSeverityToEnum(uint8_t severity)
+{
+ switch (severity)
+ {
+ case openpower::phal::FAPI2_ERRL_SEV_RECOVERED:
+ return openpower::dump::pel::Severity::Informational;
+ case openpower::phal::FAPI2_ERRL_SEV_PREDICTIVE:
+ return openpower::dump::pel::Severity::Warning;
+ case openpower::phal::FAPI2_ERRL_SEV_UNRECOVERABLE:
+ return openpower::dump::pel::Severity::Error;
+ default:
+ return openpower::dump::pel::Severity::Error;
+ }
+}
+
+void processFFDCPackets(const openpower::phal::sbeError_t& sbeError,
+ const std::string& event,
+ openpower::dump::pel::FFDCData& pelAdditionalData)
+{
+ const auto& ffdcFileList = sbeError.getFfdcFileList();
+ for (const auto& [slid, ffdcTuple] : ffdcFileList)
+ {
+ uint8_t severity;
+ int fd;
+ std::filesystem::path path;
+ std::tie(severity, fd, path) = ffdcTuple;
+
+ Severity logSeverity = convertSeverityToEnum(severity);
+
+ PELFFDCInfo pelFFDCInfo;
+ pelFFDCInfo.emplace_back(
+ std::make_tuple(sdbusplus::xyz::openbmc_project::Logging::server::
+ Create::FFDCFormat::Custom,
+ FFDC_FORMAT_SUBTYPE, FFDC_FORMAT_VERSION, fd));
+
+ auto logId = openpower::dump::pel::createSbeErrorPEL(
+ event, sbeError, pelAdditionalData, logSeverity);
+ lg2::info("Logged PEL {PELID} for SLID {SLID}", "PELID", logId, "SLID",
+ slid);
+ }
+}
+
FFDCFile::FFDCFile(const json& pHALCalloutData) :
calloutData(pHALCalloutData.dump()),
calloutFile("/tmp/phalPELCalloutsJson.XXXXXX"), fileFD(-1)
diff --git a/dump/create_pel.hpp b/dump/create_pel.hpp
index a5287ea..5af80f8 100644
--- a/dump/create_pel.hpp
+++ b/dump/create_pel.hpp
@@ -5,9 +5,13 @@
#include <phal_exception.H>
#include <nlohmann/json.hpp>
+#include <xyz/openbmc_project/Logging/Create/server.hpp>
+#include <optional>
#include <string>
+#include <tuple>
#include <vector>
+
namespace openpower::dump::pel
{
@@ -19,6 +23,10 @@
using namespace openpower::phal;
+using PELFFDCInfo = std::vector<std::tuple<
+ sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
+ uint8_t, uint8_t, sdbusplus::message::unix_fd>>;
+
/**
* @brief Create SBE boot error PEL and return id
*
@@ -28,9 +36,30 @@
* @param[in] severity - severity of the log
* @return Platform log id
*/
-uint32_t createSbeErrorPEL(const std::string& event, const sbeError_t& sbeError,
- const FFDCData& ffdcData,
- const Severity severity = Severity::Error);
+uint32_t createSbeErrorPEL(
+ const std::string& event, const sbeError_t& sbeError,
+ const FFDCData& ffdcData, const Severity severity = Severity::Error,
+ const std::optional<PELFFDCInfo>& pelFFDCInfoOpt = std::nullopt);
+
+/**
+ * @brief Convert a FAPI2 severity code to PEL severity.
+ *
+ * @param[in] severity - Severity code from FAPI2 error logs.
+ * @return Severity - The corresponding Severity enumeration value.
+ */
+openpower::dump::pel::Severity convertSeverityToEnum(uint8_t severity);
+
+/**
+ * @brief Process FFDC packets and create PELs for each packet.
+ *
+ * @param[in] sbeError - An SBE error object containing FFDC packet information.
+ * @param[in] event - The event identifier associated with the PELs.
+ * @param[out] pelAdditionalData - A reference to additional PEL data to be
+ * included in the PEL.
+ */
+void processFFDCPackets(const openpower::phal::sbeError_t& sbeError,
+ const std::string& event,
+ openpower::dump::pel::FFDCData& pelAdditionalData);
/**
* @class FFDCFile
diff --git a/dump/sbe_consts.hpp b/dump/sbe_consts.hpp
index b1d0a47..f7aad0d 100644
--- a/dump/sbe_consts.hpp
+++ b/dump/sbe_consts.hpp
@@ -1,4 +1,6 @@
#pragma once
+#include <cstdint>
+
namespace openpower::dump::SBE
{
// Dump type to the sbe_dump chipop
@@ -28,4 +30,8 @@
// Stop instruction method
constexpr auto SBEFIFO_CMD_CONTROL_INSN = 0x01;
+
+// FFDC Format details
+constexpr uint8_t FFDC_FORMAT_SUBTYPE = 0xCB;
+constexpr uint8_t FFDC_FORMAT_VERSION = 0x01;
} // namespace openpower::dump::SBE
diff --git a/dump/sbe_dump_collector.cpp b/dump/sbe_dump_collector.cpp
index 16ac9a7..11cd927 100644
--- a/dump/sbe_dump_collector.cpp
+++ b/dump/sbe_dump_collector.cpp
@@ -9,6 +9,7 @@
#include "sbe_dump_collector.hpp"
#include "sbe_type.hpp"
+#include <ekb/hwpf/fapi2/include/target_types.H>
#include <libphal.H>
#include <phal_exception.H>
@@ -30,6 +31,7 @@
using namespace phosphor::logging;
using namespace openpower::dump::SBE;
+using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
void SbeDumpCollector::collectDump(uint8_t type, uint32_t id,
uint64_t failingUnit,
@@ -83,7 +85,6 @@
{
continue;
}
-
targets[target].push_back(ocmbTarget);
}
}
@@ -184,34 +185,76 @@
return futures;
}
-void SbeDumpCollector::logErrorAndCreatePEL(
+bool SbeDumpCollector::logErrorAndCreatePEL(
const openpower::phal::sbeError_t& sbeError, uint64_t chipPos,
SBETypes sbeType, uint32_t cmdClass, uint32_t cmdType)
{
+ namespace fs = std::filesystem;
+
std::string chipName;
+ std::string event;
+ bool dumpIsRequired = false;
+ bool isDumpFailure = true;
try
{
chipName = sbeTypeAttributes.at(sbeType).chipName;
- std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure;
- auto dumpIsRequired = false;
+ event = sbeTypeAttributes.at(sbeType).chipOpFailure;
+ lg2::info("log error {CHIP} {POSITION}", "CHIP", chipName, "POSITION",
+ chipPos);
+
+ // Common FFDC data
+ openpower::dump::pel::FFDCData pelAdditionalData = {
+ {"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}};
+
+ if (sbeType == SBETypes::OCMB)
+ {
+ pelAdditionalData.emplace_back(
+ "CHIP_TYPE", std::to_string(fapi2::TARGET_TYPE_OCMB_CHIP));
+ }
+
+ // Check the error type
if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
{
event = sbeTypeAttributes.at(sbeType).chipOpTimeout;
dumpIsRequired = true;
+ // For timeout, we do not expect any FFDC packets
+ }
+ else if (sbeError.errType() ==
+ openpower::phal::exception::SBE_FFDC_NO_DATA)
+ {
+ // We will create a PEL without FFDC with the common information we
+ // added
+ lg2::error("No FFDC data after a chip-op failure {CHIP} {POSITION}",
+ "CHIP", chipName, "POSITION", chipPos);
+ event = sbeTypeAttributes.at(sbeType).noFfdc;
+ }
+ else
+ {
+ if (sbeError.errType() ==
+ openpower::phal::exception::SBE_INTERNAL_FFDC_DATA)
+ {
+ lg2::info(
+ "FFDC Not related to chip-op present {CHIP} {POSITION}",
+ "CHIP", chipName, "POSITION", chipPos);
+ event = sbeTypeAttributes.at(sbeType).sbeInternalFFDCData;
+ isDumpFailure = false;
+ }
+ else
+ {
+ lg2::error("Process FFDC {CHIP} {POSITION}", "CHIP", chipName,
+ "POSITION", chipPos);
+ }
+ // Processor FFDC Packets
+ openpower::dump::pel::processFFDCPackets(sbeError, event,
+ pelAdditionalData);
}
- openpower::dump::pel::FFDCData pelAdditionalData = {
- {"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}};
-
- openpower::dump::pel::createSbeErrorPEL(event, sbeError,
- pelAdditionalData);
- auto logId = openpower::dump::pel::createSbeErrorPEL(event, sbeError,
- pelAdditionalData);
-
- // Request SBE Dump if required
+ // If dump is required, request it
if (dumpIsRequired)
{
+ auto logId = openpower::dump::pel::createSbeErrorPEL(
+ event, sbeError, pelAdditionalData);
util::requestSBEDump(chipPos, logId, sbeType);
}
}
@@ -226,6 +269,8 @@
"position({CHIPPOS}), Error: {ERROR}",
"CHIPTYPE", chipName, "CHIPPOS", chipPos, "ERROR", e);
}
+
+ return isDumpFailure;
}
void SbeDumpCollector::collectDumpFromSBE(struct pdbg_target* chip,
@@ -267,16 +312,22 @@
return;
}
- lg2::error("Error in collecting dump dump type({TYPE}), "
- "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) "
- "position({POSITION}), "
- "collectFastArray({COLLECTFASTARRAY}) error({ERROR})",
- "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE", chipName,
- "POSITION", chipPos, "COLLECTFASTARRAY", collectFastArray,
- "ERROR", sbeError);
- logErrorAndCreatePEL(sbeError, chipPos, sbeType, SBEFIFO_CMD_CLASS_DUMP,
- SBEFIFO_CMD_GET_DUMP);
- return;
+ // If the FFDC is from actual chip-op failure this function will
+ // return true, if the chip-op is not failed but FFDC is present
+ // then create PELs with FFDC but write the dump contents to the
+ // file.
+ if (logErrorAndCreatePEL(sbeError, chipPos, sbeType,
+ SBEFIFO_CMD_CLASS_DUMP, SBEFIFO_CMD_GET_DUMP))
+ {
+ lg2::error("Error in collecting dump dump type({TYPE}), "
+ "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) "
+ "position({POSITION}), "
+ "collectFastArray({COLLECTFASTARRAY}) error({ERROR})",
+ "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE",
+ chipName, "POSITION", chipPos, "COLLECTFASTARRAY",
+ collectFastArray, "ERROR", sbeError);
+ return;
+ }
}
writeDumpFile(path, id, clockState, 0, chipName, chipPos, dataPtr, len);
}
diff --git a/dump/sbe_dump_collector.hpp b/dump/sbe_dump_collector.hpp
index d4cbc58..b926203 100644
--- a/dump/sbe_dump_collector.hpp
+++ b/dump/sbe_dump_collector.hpp
@@ -178,7 +178,7 @@
* @param cmdType - The specific type of command within the command class.
*
*/
- void logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError,
+ bool logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError,
uint64_t chipPos, SBETypes sbeType,
uint32_t cmdClass, uint32_t cmdType);
diff --git a/dump/sbe_type.cpp b/dump/sbe_type.cpp
index cb355d4..be09d81 100644
--- a/dump/sbe_type.cpp
+++ b/dump/sbe_type.cpp
@@ -7,10 +7,14 @@
{SBETypes::PROC,
{"proc", "/xyz/openbmc_project/dump/sbe",
"org.open_power.Processor.Error.SbeChipOpTimeout",
- "org.open_power.Processor.Error.SbeChipOpFailure"}},
+ "org.open_power.Processor.Error.SbeChipOpFailure",
+ "org.open_power.Processor.Error.NoFffdc",
+ "org.open_power.Processor.Error.SbeInternalFFDCData"}},
{SBETypes::OCMB,
{"ocmb", "/xyz/openbmc_project/dump/msbe",
"org.open_power.OCMB.Error.SbeChipOpTimeout",
- "org.open_power.OCMB.Error.SbeChipOpFailure"}}};
+ "org.open_power.OCMB.Error.SbeChipOpFailure",
+ "org.open_power.OCMB.Error.NoFffdc",
+ "org.open_power.OCMB.Error.SbeInternalFFDCData"}}};
} // namespace openpower::dump
diff --git a/dump/sbe_type.hpp b/dump/sbe_type.hpp
index 23621e2..fd6a4ac 100644
--- a/dump/sbe_type.hpp
+++ b/dump/sbe_type.hpp
@@ -18,6 +18,8 @@
std::string dumpPath;
std::string chipOpTimeout;
std::string chipOpFailure;
+ std::string noFfdc;
+ std::string sbeInternalFFDCData;
};
extern const std::map<SBETypes, SBEAttributes> sbeTypeAttributes;