Policy table lookup enhancements
Adds support for 2 new lookups:
1) Use the called out device path as the search modifier.
2) Use the called out FRU + the severity of the PEL error
extracted from the ESEL metadata in the log as the
search modifier.
If a match isn't found in the policy table with these modifiers,
then the code will search again using the existing modifier checks.
Note:
PEL = Platform Event Log. This is the logging standard used
by OpenPower host firmware. The PEL contents are in
the ESEL data in the AdditionalData property in an
OpenBMC error log entry.
Change-Id: I0e2f3675ece7e792f6b551a5be093351e3585eb6
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
diff --git a/policy_find.cpp b/policy_find.cpp
index 797f7b8..f785823 100644
--- a/policy_find.cpp
+++ b/policy_find.cpp
@@ -25,6 +25,8 @@
namespace policy
{
+static constexpr auto HOST_EVENT = "org.open_power.Host.Error.Event";
+
namespace optional_ns = std::experimental;
/**
@@ -59,17 +61,23 @@
* @param[in] additionalData - the AdditionalData property contents
* @param[in] name - the name of the value to find
*
- * @return optional<std::string> - the data value
+ * @return optional<std::string> - the data value. Will not be empty if found.
*/
optional_ns::optional<std::string>
getAdditionalDataItem(const std::vector<std::string>& additionalData,
const std::string& name)
{
+ std::string value;
+
for (const auto& item : additionalData)
{
if (item.find(name + "=") != std::string::npos)
{
- return item.substr(item.find('=') + 1);
+ value = item.substr(item.find('=') + 1);
+ if (!item.empty())
+ {
+ return value;
+ }
}
}
@@ -77,6 +85,129 @@
}
/**
+ * Returns a string version of the severity from the PEL
+ * log in the extended SEL data from the host, where a PEL stands
+ * for 'Platform Event Log' and is an IBM standard for error logging
+ * that OpenPower host firmware uses.
+ *
+ * The severity is the 11th byte in the 'User Header' section in a PEL
+ * that starts at byte 48. We only need the first nibble, which signifies
+ * the type - 'Recovered', 'Predictive', 'Critical', etc.
+ *
+ * type value | type | returned severity string
+ * ------------------------------------
+ * 1 Recovered Informational
+ * 2 Predictive Warning
+ * everything else na Critical
+ *
+ * @param[in] data - the PEL string in the form of "00 11 22 33 4e ff"
+ *
+ * @return optional<std::string> - the severity string as listed above
+ */
+optional_ns::optional<std::string> getESELSeverity(const std::string& data)
+{
+ // The User Header section starts at byte 48, and take into account
+ // the input data is a space separated string representation of HEX data.
+ static constexpr auto UH_OFFSET = 48 * 4;
+
+ // The eye catcher is "UH"
+ static constexpr auto UH_EYECATCHER = "55 48";
+
+ // The severity is the 11th byte in the section, and take into
+ // account a byte is "BB "
+ static constexpr auto UH_SEV_OFFSET = 10 * 3;
+
+ std::string severity = "Critical";
+
+ // The only values that don't map to "Critical"
+ const std::map<std::string, std::string> sevTypes{{"1", "Informational"},
+ {"2", "Warning"}};
+ if (data.size() <= (UH_OFFSET + UH_SEV_OFFSET))
+ {
+ return {};
+ }
+
+ // Sanity check that the User Header section is there.
+ auto userHeader = data.substr(UH_OFFSET, 5);
+ if (userHeader.compare(UH_EYECATCHER))
+ {
+ return {};
+ }
+
+ // The severity type nibble is a full byte in the string.
+ auto sevType = data.substr(UH_OFFSET + UH_SEV_OFFSET, 1);
+
+ auto sev = sevTypes.find(sevType);
+ if (sev != sevTypes.end())
+ {
+ severity = sev->second;
+ };
+
+ return severity;
+}
+
+/**
+ * Returns the search modifier to use, but if it isn't found
+ * in the table then code should then call getSearchModifier()
+ * and try again.
+ *
+ * This is to be tolerant of the policy table not having
+ * entries for every device path or FRU callout, and trying
+ * again gives code a chance to find the more generic entries
+ * for those classes of errors rather than not being found
+ * at all.
+ *
+ * e.g. If the device path is missing in the table, then it
+ * can still find the generic "Failed to read from an I2C
+ * device" entry.
+ *
+ * @param[in] message- the error message, like xyz.A.Error.B
+ * @param[in] properties - the property map for the error
+ *
+ * @return string - the search modifier
+ * may be empty if none found
+ */
+std::string getSearchModifierFirstTry(const std::string& message,
+ const DbusPropertyMap& properties)
+{
+ auto data =
+ getProperty<std::vector<std::string>>(properties, "AdditionalData");
+
+ if (!data)
+ {
+ return std::string{};
+ }
+
+ // Try the called out device path as the search modifier
+ auto devPath = getAdditionalDataItem(*data, "CALLOUT_DEVICE_PATH");
+ if (devPath)
+ {
+ return *devPath;
+ }
+
+ // For Host.Error.Event errors, try <callout>||<severity string>
+ // as the search modifier.
+ if (message == HOST_EVENT)
+ {
+ auto callout = getAdditionalDataItem(*data, "CALLOUT_INVENTORY_PATH");
+ if (callout)
+ {
+ auto selData = getAdditionalDataItem(*data, "ESEL");
+ if (selData)
+ {
+ auto severity = getESELSeverity(*selData);
+ if (severity)
+ {
+ return *callout + "||" + *severity;
+ }
+ }
+ }
+ }
+
+ return std::string{};
+}
+
+/**
* Returns the search modifier to use.
*
* The modifier is used when the error name itself isn't granular
@@ -113,7 +244,7 @@
for (const auto& field : ADFields)
{
mod = getAdditionalDataItem(*data, field);
- if (mod && !(*mod).empty())
+ if (mod)
{
return *mod;
}
@@ -173,9 +304,24 @@
"Message"); // e.g. xyz.X.Error.Y
if (errorMsg)
{
- auto modifier = getSearchModifier(errorLogProperties);
+ FindResult result;
- auto result = policy.find(*errorMsg, modifier);
+ // Try with the FirstTry modifier first, and then the regular one.
+
+ auto modifier =
+ getSearchModifierFirstTry(*errorMsg, errorLogProperties);
+
+ if (!modifier.empty())
+ {
+ result = policy.find(*errorMsg, modifier);
+ }
+
+ if (!result)
+ {
+ modifier = getSearchModifier(errorLogProperties);
+
+ result = policy.find(*errorMsg, modifier);
+ }
if (result)
{
diff --git a/policy_table.cpp b/policy_table.cpp
index d7fb93c..df32d59 100644
--- a/policy_table.cpp
+++ b/policy_table.cpp
@@ -77,8 +77,8 @@
}
}
-optional_ns::optional<DetailsReference>
- Table::find(const std::string& error, const std::string& modifier) const
+FindResult Table::find(const std::string& error,
+ const std::string& modifier) const
{
// First find the entry based on the error, and then find which
// underlying details object it is with the help of the modifier.
diff --git a/policy_table.hpp b/policy_table.hpp
index fb12659..a2e7346 100644
--- a/policy_table.hpp
+++ b/policy_table.hpp
@@ -26,13 +26,14 @@
std::string ceid;
};
+namespace optional_ns = std::experimental;
+
using DetailsList = std::vector<Details>;
using DetailsReference = std::reference_wrapper<const Details>;
+using FindResult = optional_ns::optional<DetailsReference>;
using PolicyMap = std::map<std::string, DetailsList>;
-namespace optional_ns = std::experimental;
-
/**
* @class Table
*
@@ -78,8 +79,8 @@
*
* @return optional<DetailsReference> - the details entry
*/
- optional_ns::optional<DetailsReference>
- find(const std::string& error, const std::string& modifier) const;
+ FindResult find(const std::string& error,
+ const std::string& modifier) const;
/**
* The default event ID to use when a match in the table
diff --git a/test/test_policy.cpp b/test/test_policy.cpp
index 4828c63..c200066 100644
--- a/test/test_policy.cpp
+++ b/test/test_policy.cpp
@@ -24,6 +24,31 @@
using namespace ibm::logging;
namespace fs = std::experimental::filesystem;
+static constexpr auto HOST_EVENT = "org.open_power.Host.Error.Event";
+
+// ESEL contents all of the way up to right before the severity
+// byte in the UH section
+static const std::string eSELBase =
+ "ESEL="
+ "00 00 df 00 00 00 00 20 00 04 07 5a 04 aa 00 00 50 48 00 30 01 00 e5 00 "
+ "00 00 f6 ca c9 da 5b b7 00 00 f6 ca d1 8a 2d e6 42 00 00 08 00 00 00 00 "
+ "00 00 00 00 00 00 00 00 89 00 03 44 89 00 03 44 55 48 00 18 01 00 e5 00 "
+ "13 03 ";
+
+static const std::string noUHeSEL =
+ "ESEL="
+ "00 00 df 00 00 00 00 20 00 04 07 5a 04 aa 00 00 50 48 00 30 01 00 e5 00 "
+ "00 00 f6 ca c9 da 5b b7 00 00 f6 ca d1 8a 2d e6 42 00 00 08 00 00 00 00 "
+ "00 00 00 00 00 00 00 00 89 00 03 44 89 00 03 44 00 00 00 18 01 00 e5 00 "
+ "13 03 10";
+
+// ESEL Severity bytes
+static const std::string SEV_RECOVERED = "10";
+static const std::string SEV_PREDICTIVE = "20";
+static const std::string SEV_UNRECOV = "40";
+static const std::string SEV_CRITICAL = "50";
+static const std::string SEV_DIAG = "60";
+
static constexpr auto json = R"(
[
{
@@ -93,9 +118,9 @@
"msg":"Error FFFFFFFF"
}
],
-
"err":"xyz.openbmc_project.Error.Test5"
},
+
{
"dtls":[
{
@@ -104,9 +129,9 @@
"msg":"Error GGGGGGGG"
}
],
-
"err":"xyz.openbmc_project.Error.Test6"
},
+
{
"dtls":[
{
@@ -116,6 +141,58 @@
}
],
"err":"xyz.openbmc_project.Error.Test7"
+ },
+
+ {
+ "dtls":[
+ {
+ "CEID":"IIIIIII",
+ "mod":"/match/this/path",
+ "msg":"Error IIIIIII"
+ }
+ ],
+ "err":"xyz.openbmc_project.Error.Test8"
+ },
+
+ {
+ "dtls":[
+ {
+ "CEID":"JJJJJJJJ",
+ "mod":"/inventory/core0||Warning",
+ "msg":"Error JJJJJJJJ"
+ },
+ {
+ "CEID":"KKKKKKKK",
+ "mod":"/inventory/core1||Informational",
+ "msg":"Error KKKKKKKK"
+ },
+ {
+ "CEID":"LLLLLLLL",
+ "mod":"/inventory/core2||Critical",
+ "msg":"Error LLLLLLLL"
+ },
+ {
+ "CEID":"MMMMMMMM",
+ "mod":"/inventory/core3||Critical",
+ "msg":"Error MMMMMMMM"
+ },
+ {
+ "CEID":"NNNNNNNN",
+ "mod":"/inventory/core4||Critical",
+ "msg":"Error NNNNNNNN"
+ },
+ {
+ "CEID":"OOOOOOOO",
+ "mod":"/inventory/core5",
+ "msg":"Error OOOOOOOO"
+ },
+ {
+ "CEID":"PPPPPPPP",
+ "mod":"/inventory/core5||Critical",
+ "msg":"Error PPPPPPPP"
+ }
+ ],
+ "err":"org.open_power.Host.Error.Event"
}
])";
@@ -302,4 +379,120 @@
ASSERT_EQ(std::get<policy::EIDField>(values), policy.defaultEID());
ASSERT_EQ(std::get<policy::MsgField>(values), policy.defaultMsg());
}
+
+ // Test a device path modifier match
+ {
+ std::vector<std::string> ad{"CALLOUT_DEVICE_PATH=/match/this/path"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"xyz.openbmc_project.Error.Test8"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "IIIIIII");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error IIIIIII");
+ }
+
+ // Test a predictive SEL matches on 'callout||Warning'
+ {
+ std::vector<std::string> ad{eSELBase + SEV_PREDICTIVE,
+ "CALLOUT_INVENTORY_PATH=/inventory/core0"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "JJJJJJJJ");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error JJJJJJJJ");
+ }
+
+ // Test a recovered SEL matches on 'callout||Informational'
+ {
+ std::vector<std::string> ad{eSELBase + SEV_RECOVERED,
+ "CALLOUT_INVENTORY_PATH=/inventory/core1"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "KKKKKKKK");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error KKKKKKKK");
+ }
+
+ // Test a critical severity matches on 'callout||Critical'
+ {
+ std::vector<std::string> ad{eSELBase + SEV_CRITICAL,
+ "CALLOUT_INVENTORY_PATH=/inventory/core2"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "LLLLLLLL");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error LLLLLLLL");
+ }
+
+ // Test an unrecoverable SEL matches on 'callout||Critical'
+ {
+ std::vector<std::string> ad{eSELBase + SEV_UNRECOV,
+ "CALLOUT_INVENTORY_PATH=/inventory/core3"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "MMMMMMMM");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error MMMMMMMM");
+ }
+
+ // Test a Diagnostic SEL matches on 'callout||Critical'
+ {
+ std::vector<std::string> ad{eSELBase + SEV_DIAG,
+ "CALLOUT_INVENTORY_PATH=/inventory/core4"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "NNNNNNNN");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error NNNNNNNN");
+ }
+
+ // Test a short eSEL still matches the normal callout
+ {
+ std::vector<std::string> ad{eSELBase,
+ "CALLOUT_INVENTORY_PATH=/inventory/core5"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "OOOOOOOO");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error OOOOOOOO");
+ }
+
+ // Test an eSEL with no UH section still matches a normal callout
+ {
+ std::vector<std::string> ad{noUHeSEL,
+ "CALLOUT_INVENTORY_PATH=/inventory/core5"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "OOOOOOOO");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error OOOOOOOO");
+ }
+
+ // Test a bad severity is still considered critical (by design)
+ {
+ std::vector<std::string> ad{eSELBase + "ZZ",
+ "CALLOUT_INVENTORY_PATH=/inventory/core5"s};
+ DbusPropertyMap testProperties{
+ {"Message"s, Value{"org.open_power.Host.Error.Event"s}},
+ {"AdditionalData"s, ad}};
+
+ auto values = policy::find(policy, testProperties);
+ ASSERT_EQ(std::get<policy::EIDField>(values), "PPPPPPPP");
+ ASSERT_EQ(std::get<policy::MsgField>(values), "Error PPPPPPPP");
+ }
}