PEL: Add new error message for system health state
A new error message added that will be created when there are
guard records And/Or Deconfigured records And/Or unresolved pels
with deconfig bit set.
HMC will subscribe to this error SRC and when it gets notified
it requests for NAG dump which will capture serviceable records
in a JSON file and attach it to the dump.
This error message acts as a reminder for servicing faulty hardware
pending service present in the system.
Tested:
"0x500F05A1": {
"SRC": "BD50F138",
"PLID": "0x500F05A1",
"CreatorID": "BMC",
"Subsystem": "CEC Hardware",
"Commit Time": "04/24/2023 07:47:45",
"Sev": "Predictive Error",
"CompID": "bmc faultlog"
}
"User Header": {
"Section Version": "1",
"Sub-section type": "0",
"Log Committed by": "bmc error logging",
"Subsystem": "CEC Hardware",
"Event Scope": "Entire Platform",
"Event Severity": "Predictive Error",
"Event Type": "Not Applicable",
"Action Flags": [
"Service Action Required",
"Report Externally",
"HMC Call Home"
],
"Host Transmission": "Not Sent",
"HMC Transmission": "Not Sent"
},
"User Data 1": {
"Section Version": "1",
"Sub-section type": "1",
"Created by": "bmc error logging",
"DECONFIG_RECORD_COUNT": "0",
"GUARD_WITH_ASSOC_ERROR_COUNT": "1",
"GUARD_WITH_NO_ASSOC_ERROR_COUNT": "0",
"UNRESOLVED_PEL_WITH_DECONFIG_BIT_COUNT": "0"
}
"Primary SRC": {
"Section Version": "1",
"Sub-section type": "1",
"Created by": "bmc faultlog",
"Callout Section": {
"Callout Count": "1",
"Callouts": [{
"FRU Type": "Maintenance Procedure Required",
"Priority": "Mandatory, replace all with this type as a
unit",
"Procedure": "BMC0008"
}]
}
}
Signed-off-by: Marri Devender Rao <devenrao@in.ibm.com>
Change-Id: I1a046d4416719e479378d7032f32663fc07fd03a
diff --git a/extensions/openpower-pels/pel_values.cpp b/extensions/openpower-pels/pel_values.cpp
index 2e5b32d..a94606c 100644
--- a/extensions/openpower-pels/pel_values.cpp
+++ b/extensions/openpower-pels/pel_values.cpp
@@ -230,6 +230,8 @@
{"find_sue_root_cause", "BMC0006"},
// Correct system backplane VPD mismatch
{"system_vpd_correction", "BMC0007"},
+ // service reminder about the failed parts present in the system
+ {"detected_issue_need_service", "BMC0008"},
};
/**
diff --git a/extensions/openpower-pels/registry/O_component_ids.json b/extensions/openpower-pels/registry/O_component_ids.json
index b3d8b52..3920505 100644
--- a/extensions/openpower-pels/registry/O_component_ids.json
+++ b/extensions/openpower-pels/registry/O_component_ids.json
@@ -15,5 +15,6 @@
"6000": "bmc pldm",
"C100": "bmc system dump collector",
"D100": "bmc hw diags attention handler",
- "E500": "bmc hw diags"
+ "E500": "bmc hw diags",
+ "F100": "bmc faultlog"
}
diff --git a/extensions/openpower-pels/registry/message_registry.json b/extensions/openpower-pels/registry/message_registry.json
index 89ba5d2..37a7ff1 100644
--- a/extensions/openpower-pels/registry/message_registry.json
+++ b/extensions/openpower-pels/registry/message_registry.json
@@ -836,6 +836,54 @@
},
{
+ "Name": "org.open_power.Faultlog.Error.DeconfiguredHW",
+ "Subsystem": "cec_hardware",
+ "Severity": "predictive",
+ "ComponentID": "0xf100",
+ "SRC": {
+ "ReasonCode": "0xf138",
+ "Words6To9": {
+ "6": {
+ "Description": "Number of guard records with error object",
+ "AdditionalDataPropSource": "GUARD_WITH_ASSOC_ERROR_COUNT"
+ },
+ "7": {
+ "Description": "Number of guard records with no error object",
+ "AdditionalDataPropSource": "GUARD_WITH_NO_ASSOC_ERROR_COUNT"
+ },
+ "8": {
+ "Description": "Number of deconfigured records",
+ "AdditionalDataPropSource": "DECONFIG_RECORD_COUNT"
+ },
+ "9": {
+ "Description": "Number of unresolved pels count",
+ "AdditionalDataPropSource": "UNRESOLVED_PEL_WITH_DECONFIG_BIT_COUNT"
+ }
+ }
+ },
+ "Callouts": [
+ {
+ "CalloutList": [
+ {
+ "Priority": "high",
+ "Procedure": "detected_issue_need_service"
+ }
+ ]
+ }
+ ],
+ "Documentation": {
+ "Description": "Firmware detected either a FRU deconfigured And/Or guard record",
+ "Message": "Firmware detected either a FRU deconfigured And/Or guard record",
+ "Notes": [
+ "Either Guard And/Or Deconfiguration records found during periodic check ",
+ "as part of the NAG event. Look into the BMC Nag dump for more details. ",
+ "This is a reminder for servicing faulty hardware pending service present ",
+ "in the system."
+ ]
+ }
+ },
+
+ {
"Name": "xyz.openbmc_project.State.Error.HostNotRunning",
"Subsystem": "cec_sp_hostboot_iface",
"ComponentID": "0x3400",
diff --git a/extensions/openpower-pels/registry/schema/schema.json b/extensions/openpower-pels/registry/schema/schema.json
index 50066ee..0d23cd8 100644
--- a/extensions/openpower-pels/registry/schema/schema.json
+++ b/extensions/openpower-pels/registry/schema/schema.json
@@ -529,7 +529,8 @@
"fsi_path",
"power_overcurrent",
"find_sue_root_cause",
- "system_vpd_correction"
+ "system_vpd_correction",
+ "detected_issue_need_service"
]
},