PHAL: Redundant mode clock error handling support
Added additional error support to handle clock failures callout
support for the spare clock supported system. In this case only
deconfigure clock target.
Tested: Clock test failure, with spare clock supported system
"User Header": {
"Section Version": "1",
"Sub-section type": "0",
"Log Committed by": "0x2000",
"Subsystem": "CEC Hardware",
"Event Scope": "Entire Platform",
"Event Severity": "Informational Event",
"Event Type": "Miscellaneous, Informational Only",
"Action Flags": [
"Service Action Required",
"Report Externally",
"HMC Call Home"
],
"Host Transmission": "Not Sent",
"HMC Transmission": "Not Sent"
},
Primary SRC": {
"Section Version": "1",
"Sub-section type": "1",
"Created by": "0x3000",
"SRC Version": "0x02",
"SRC Format": "0x55",
"Virtual Progress SRC": "False",
"I5/OS Service Event Bit": "False",
"Hypervisor Dump Initiated":"False",
"Backplane CCIN": "2E33",
"Terminate FW Error": "False",
"Deconfigured": "False",
"Guarded": "False",
"Error Details": {
"Message": "Error during Spare clock initilaisation"
},
"Valid Word Count": "0x09",
"Reference Code": "BD503009",
"Hex Word 2": "00080055",
"Hex Word 3": "2E330010",
"Hex Word 4": "00000000",
"Hex Word 5": "00000000",
"Hex Word 6": "00000000",
"Hex Word 7": "00000000",
"Hex Word 8": "00000000",
"Hex Word 9": "00000000",
"Callout Section": {
"Callout Count": "1",
"Callouts": [{
"FRU Type": "Symbolic FRU",
"Priority": "Mandatory, replace all with this type as a unit",
"Part Number": "REFCLK0"
}]
}
"User Data 2": {
"Section Version": "1",
"Sub-section type": "1",
"Created by": "0x2000",
"Data": [
{
"Deconfigured": true,
"EntityPath": [
35,
1,
0,
2,
0,
26,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"Priority": "H",
"SymbolicFRU": "REFCLK0"
}
]
}
Signed-off-by: Jayanth Othayoth <ojayanth@in.ibm.com>
Change-Id: I553d5b7095ba6ea65fb9fa2c64188c05dbab936f
diff --git a/extensions/phal/phal_error.cpp b/extensions/phal/phal_error.cpp
index 8879964..009b9c0 100644
--- a/extensions/phal/phal_error.cpp
+++ b/extensions/phal/phal_error.cpp
@@ -328,6 +328,93 @@
reset();
}
+/**
+ * @brief processClockInfoErrorHelper
+ *
+ * Creates informational PEL for spare clock failure
+ *
+ * @param[in] ffdc - FFDC data capturd by the HWP
+ * @param[in] ffdc_prefix - prefix string for logging the data.
+ */
+void processClockInfoErrorHelper(FFDC* ffdc, const std::string& ffdc_prefix)
+{
+ try
+ {
+ log<level::INFO>(
+ fmt::format("processClockInfoErrorHelper: FFDC Message[{}]",
+ ffdc->message)
+ .c_str());
+
+ // To store callouts details in json format as per pel expectation.
+ json jsonCalloutDataList;
+ jsonCalloutDataList = json::array();
+
+ // To store phal trace and other additional data about ffdc.
+ FFDCData pelAdditionalData;
+
+ std::string keyWithPrefix(ffdc_prefix + "RC");
+ // Adding hardware procedures return code details
+ pelAdditionalData.emplace_back(keyWithPrefix, ffdc->hwp_errorinfo.rc);
+ keyWithPrefix = ffdc_prefix + "RC_DESC";
+ pelAdditionalData.emplace_back(keyWithPrefix,
+ ffdc->hwp_errorinfo.rc_desc);
+
+ // Adding hardware procedures required ffdc data for debug
+ for_each(ffdc->hwp_errorinfo.ffdcs_data.begin(),
+ ffdc->hwp_errorinfo.ffdcs_data.end(),
+ [&pelAdditionalData, &ffdc_prefix](
+ std::pair<std::string, std::string>& ele) -> void {
+ std::string keyWithPrefix(ffdc_prefix + "FFDC_");
+ keyWithPrefix.append(ele.first);
+
+ pelAdditionalData.emplace_back(keyWithPrefix, ele.second);
+ });
+ // get clock position information
+ auto clk_pos = 0xFF; // Invalid position.
+ for (auto& hwCallout : ffdc->hwp_errorinfo.hwcallouts)
+ {
+ if ((hwCallout.hwid == "PROC_REF_CLOCK") ||
+ (hwCallout.hwid == "PCI_REF_CLOCK"))
+ {
+ clk_pos = hwCallout.clkPos;
+ break;
+ }
+ }
+
+ // Adding CDG (Only deconfigure) targets details
+ for_each(ffdc->hwp_errorinfo.cdg_targets.begin(),
+ ffdc->hwp_errorinfo.cdg_targets.end(),
+ [&pelAdditionalData, &jsonCalloutDataList,
+ clk_pos](const CDG_Target& cdg_tgt) -> void {
+ json jsonCalloutData;
+ std::string pelPriority = "H";
+ jsonCalloutData["Priority"] = pelPriority; // Not used
+ jsonCalloutData["SymbolicFRU"] =
+ "REFCLK" + std::to_string(clk_pos);
+ jsonCalloutData["Deconfigured"] = cdg_tgt.deconfigure;
+ jsonCalloutData["EntityPath"] = cdg_tgt.target_entity_path;
+ jsonCalloutDataList.emplace_back(jsonCalloutData);
+ });
+
+ // Adding collected phal logs into PEL additional data
+ for_each(traceLog.begin(), traceLog.end(),
+ [&pelAdditionalData](
+ std::pair<std::string, std::string>& ele) -> void {
+ pelAdditionalData.emplace_back(ele.first, ele.second);
+ });
+
+ openpower::pel::createErrorPEL("org.open_power.PHAL.Error.SpareClock",
+ jsonCalloutDataList, pelAdditionalData,
+ Severity::Informational);
+ }
+ catch (const std::exception& ex)
+ {
+ reset();
+ throw ex;
+ }
+ reset();
+}
+
void processIplErrorCallback(const ipl_error_info& errInfo)
{
log<level::INFO>(
@@ -461,6 +548,12 @@
fmt::format("PHAL FFDC: Return Message[{}]", ffdc->message)
.c_str());
+ // Special handling for spare clock related errors.
+ if (ffdc->ffdc_type == FFDC_TYPE_SPARE_CLOCK_INFO)
+ {
+ processClockInfoErrorHelper(ffdc, ffdc_prefix);
+ return;
+ }
// To store callouts details in json format as per pel expectation.
json jsonCalloutDataList;
jsonCalloutDataList = json::array();