Odyssey PLL unlock analysis plugin
Signed-off-by: Zane Shelley <zshelle@us.ibm.com>
Change-Id: Ia53910eecdcdeb836bd836039a509f00121a67f7
diff --git a/analyzer/filter-root-cause.cpp b/analyzer/filter-root-cause.cpp
index d122a32..4efcff0 100644
--- a/analyzer/filter-root-cause.cpp
+++ b/analyzer/filter-root-cause.cpp
@@ -37,15 +37,35 @@
bool __findPllUnlock(const std::vector<libhei::Signature>& i_list,
libhei::Signature& o_rootCause)
{
+ using namespace util::pdbg;
+
// TODO: Consider returning all of them instead of one as root cause.
- auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
- return (libhei::hash<libhei::NodeId_t>("PLL_UNLOCK") == t.getId() &&
- (0 == t.getBit() || 1 == t.getBit()));
+
+ auto nodeId = libhei::hash<libhei::NodeId_t>("PLL_UNLOCK");
+
+ // First, look for any PLL unlock attentions reported by a processsor chip.
+ auto itr1 = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
+ return (nodeId == t.getId() &&
+ TYPE_PROC == getTrgtType(getTrgt(t.getChip())));
});
- if (i_list.end() != itr)
+ if (i_list.end() != itr1)
{
- o_rootCause = *itr;
+ o_rootCause = *itr1;
+ return true;
+ }
+
+ // Then, look for any PLL unlock attentions reported by an OCMB chip. This
+ // is specifically for Odyssey, which are the only OCMBs that would report
+ // PLL unlock attentions.
+ auto itr2 = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
+ return (nodeId == t.getId() &&
+ TYPE_OCMB == getTrgtType(getTrgt(t.getChip())));
+ });
+
+ if (i_list.end() != itr2)
+ {
+ o_rootCause = *itr2;
return true;
}
diff --git a/analyzer/meson.build b/analyzer/meson.build
index 7cee22e..1e6f342 100644
--- a/analyzer/meson.build
+++ b/analyzer/meson.build
@@ -11,6 +11,7 @@
)
plugins_src = files(
+ 'plugins/ody-plugins.cpp',
'plugins/p10-plugins.cpp',
'plugins/p10-tod-plugins.cpp',
)
diff --git a/analyzer/plugins/ody-plugins.cpp b/analyzer/plugins/ody-plugins.cpp
new file mode 100644
index 0000000..e71c7f5
--- /dev/null
+++ b/analyzer/plugins/ody-plugins.cpp
@@ -0,0 +1,83 @@
+
+#include <analyzer/plugins/plugin.hpp>
+#include <hei_util.hpp>
+#include <util/pdbg.hpp>
+#include <util/trace.hpp>
+
+namespace analyzer
+{
+
+namespace Ody
+{
+
+/**
+ * @brief Adds all chips in the OCMB PLL domain with active PLL unlock
+ * attentions to the callout list.
+ *
+ * An OCMB PLL domain is scoped to just the OCMBs under the same processor chip.
+ * If more than one OCMB within the PLL domain is reporting a PLL unlock
+ * attention, the clock source (the processor) is called out with high priority
+ * and all connected OCMBs are called out with low priority. Otherwise, single
+ * OCMB is called out high and the connected processor low.
+ */
+void pll_unlock(unsigned int, const libhei::Chip& i_ocmbChip,
+ ServiceData& io_servData)
+{
+ using namespace util::pdbg;
+
+ auto nodeId = libhei::hash<libhei::NodeId_t>("PLL_UNLOCK");
+
+ auto sigList = io_servData.getIsolationData().getSignatureList();
+
+ // The PLL list is initially the same size of the signature list.
+ std::vector<libhei::Signature> pllList{sigList.size()};
+
+ // Copy all signatures PLL signatures that match the node ID and parent
+ // processor chip.
+ auto procTrgt = getParentProcessor(getTrgt(i_ocmbChip));
+ auto itr = std::copy_if(sigList.begin(), sigList.end(), pllList.begin(),
+ [&nodeId, &procTrgt](const auto& s) {
+ return (nodeId == s.getId() &&
+ procTrgt == getParentProcessor(getTrgt(s.getChip())));
+ });
+
+ // Shrink the size of the PLL list if necessary.
+ pllList.resize(std::distance(pllList.begin(), itr));
+
+ // There should be at list one signature in the list.
+ if (0 == pllList.size())
+ {
+ throw std::logic_error("Expected at least one PLL unlock signature. "
+ "i_ocmbChip=" +
+ std::string{getPath(i_ocmbChip)});
+ }
+
+ // The hardware callouts will be all OCMBs with PLL unlock attentions and
+ // the connected processor chip. The callout priorities are dependent on the
+ // number of chips at attention.
+ if (1 == pllList.size())
+ {
+ // There is only one OCMB chip with a PLL unlock. So, the error is
+ // likely in the OCMB.
+ io_servData.calloutTarget(getTrgt(pllList.front().getChip()),
+ callout::Priority::HIGH, true);
+ io_servData.calloutTarget(procTrgt, callout::Priority::LOW, false);
+ }
+ else
+ {
+ // There are more than one OCMB chip with a PLL unlock. So, the error is
+ // likely the clock source, which is the processor.
+ io_servData.calloutTarget(procTrgt, callout::Priority::HIGH, true);
+ for (const auto& sig : pllList)
+ {
+ io_servData.calloutTarget(getTrgt(sig.getChip()),
+ callout::Priority::LOW, false);
+ }
+ }
+}
+
+} // namespace Ody
+
+PLUGIN_DEFINE_NS(ODYSSEY_10, Ody, pll_unlock);
+
+} // namespace analyzer
diff --git a/test/meson.build b/test/meson.build
index fe2cc03..273087f 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -49,6 +49,7 @@
test_additional_srcs = [
files(
'../analyzer/filter-root-cause.cpp',
+ '../analyzer/plugins/ody-plugins.cpp',
'../analyzer/plugins/p10-plugins.cpp',
'../analyzer/plugins/p10-tod-plugins.cpp',
'../cli.cpp',
diff --git a/test/test-pll-unlock.cpp b/test/test-pll-unlock.cpp
index 5386b46..6c7a138 100644
--- a/test/test-pll-unlock.cpp
+++ b/test/test-pll-unlock.cpp
@@ -8,6 +8,15 @@
#include "gtest/gtest.h"
+namespace analyzer
+{
+// Forward reference of filterRootCause
+bool filterRootCause(AnalysisType i_type,
+ const libhei::IsolationData& i_isoData,
+ libhei::Signature& o_rootCause,
+ const RasDataParser& i_rasData);
+} // namespace analyzer
+
using namespace analyzer;
static const auto nodeId =
@@ -146,3 +155,242 @@
])";
EXPECT_EQ(s, j.dump(4));
}
+
+// Sub-test #3 - PLL unlock on single OCMB.
+TEST(PllUnlock, TestSet3)
+{
+ pdbg_targets_init(nullptr);
+
+ libhei::Chip ocmb0{
+ util::pdbg::getTrgt("/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"),
+ ODYSSEY_10};
+
+ libhei::Signature sig{ocmb0, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+
+ libhei::IsolationData isoData{};
+ isoData.addSignature(sig);
+ ServiceData sd{sig, AnalysisType::SYSTEM_CHECKSTOP, isoData};
+
+ RasDataParser rasData{};
+ rasData.getResolution(sig)->resolve(sd);
+
+ nlohmann::json j{};
+ std::string s{};
+
+ // Callout list
+ j = sd.getCalloutList();
+ s = R"([
+ {
+ "Deconfigured": false,
+ "EntityPath": [],
+ "GuardType": "GARD_Unrecoverable",
+ "Guarded": true,
+ "LocationCode": "/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0",
+ "Priority": "H"
+ },
+ {
+ "Deconfigured": false,
+ "Guarded": false,
+ "LocationCode": "/proc0",
+ "Priority": "L"
+ }
+])";
+ EXPECT_EQ(s, j.dump(4));
+
+ // Callout FFDC
+ j = sd.getCalloutFFDC();
+ s = R"([
+ {
+ "Callout Type": "Hardware Callout",
+ "Guard": true,
+ "Priority": "high",
+ "Target": "/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"
+ },
+ {
+ "Callout Type": "Hardware Callout",
+ "Guard": false,
+ "Priority": "low",
+ "Target": "/proc0"
+ }
+])";
+ EXPECT_EQ(s, j.dump(4));
+}
+
+// Sub-test #4 - PLL unlock on multiple OCMBs in the same domain.
+TEST(PllUnlock, TestSet4)
+{
+ pdbg_targets_init(nullptr);
+
+ libhei::Chip ocmb0{
+ util::pdbg::getTrgt("/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"),
+ ODYSSEY_10};
+
+ libhei::Chip ocmb1{
+ util::pdbg::getTrgt("/proc0/pib/perv12/mc0/mi0/mcc0/omi1/ocmb0"),
+ ODYSSEY_10};
+
+ libhei::Signature sig0{ocmb0, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+ libhei::Signature sig1{ocmb1, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+
+ libhei::IsolationData isoData{};
+ isoData.addSignature(sig0);
+ isoData.addSignature(sig1);
+ ServiceData sd{sig0, AnalysisType::SYSTEM_CHECKSTOP, isoData};
+
+ RasDataParser rasData{};
+ rasData.getResolution(sig0)->resolve(sd);
+
+ nlohmann::json j{};
+ std::string s{};
+
+ // Callout list
+ j = sd.getCalloutList();
+ s = R"([
+ {
+ "Deconfigured": false,
+ "EntityPath": [],
+ "GuardType": "GARD_Unrecoverable",
+ "Guarded": true,
+ "LocationCode": "/proc0",
+ "Priority": "H"
+ },
+ {
+ "Deconfigured": false,
+ "Guarded": false,
+ "LocationCode": "/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0",
+ "Priority": "L"
+ },
+ {
+ "Deconfigured": false,
+ "Guarded": false,
+ "LocationCode": "/proc0/pib/perv12/mc0/mi0/mcc0/omi1/ocmb0",
+ "Priority": "L"
+ }
+])";
+ EXPECT_EQ(s, j.dump(4));
+
+ // Callout FFDC
+ j = sd.getCalloutFFDC();
+ s = R"([
+ {
+ "Callout Type": "Hardware Callout",
+ "Guard": true,
+ "Priority": "high",
+ "Target": "/proc0"
+ },
+ {
+ "Callout Type": "Hardware Callout",
+ "Guard": false,
+ "Priority": "low",
+ "Target": "/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"
+ },
+ {
+ "Callout Type": "Hardware Callout",
+ "Guard": false,
+ "Priority": "low",
+ "Target": "/proc0/pib/perv12/mc0/mi0/mcc0/omi1/ocmb0"
+ }
+])";
+ EXPECT_EQ(s, j.dump(4));
+}
+
+// Sub-test #5 - PLL unlock on multiple OCMBs in different domains.
+TEST(PllUnlock, TestSet5)
+{
+ pdbg_targets_init(nullptr);
+
+ libhei::Chip ocmb0{
+ util::pdbg::getTrgt("/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"),
+ ODYSSEY_10};
+
+ libhei::Chip ocmb1{
+ util::pdbg::getTrgt("/proc1/pib/perv14/mc2/mi0/mcc0/omi0/ocmb0"),
+ ODYSSEY_10};
+
+ libhei::Signature sig0{ocmb0, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+ libhei::Signature sig1{ocmb1, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+
+ libhei::IsolationData isoData{};
+ isoData.addSignature(sig0);
+ isoData.addSignature(sig1);
+ ServiceData sd{sig0, AnalysisType::SYSTEM_CHECKSTOP, isoData};
+
+ RasDataParser rasData{};
+ rasData.getResolution(sig0)->resolve(sd);
+
+ nlohmann::json j{};
+ std::string s{};
+
+ // Callout list
+ j = sd.getCalloutList();
+ s = R"([
+ {
+ "Deconfigured": false,
+ "EntityPath": [],
+ "GuardType": "GARD_Unrecoverable",
+ "Guarded": true,
+ "LocationCode": "/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0",
+ "Priority": "H"
+ },
+ {
+ "Deconfigured": false,
+ "Guarded": false,
+ "LocationCode": "/proc0",
+ "Priority": "L"
+ }
+])";
+ EXPECT_EQ(s, j.dump(4));
+
+ // Callout FFDC
+ j = sd.getCalloutFFDC();
+ s = R"([
+ {
+ "Callout Type": "Hardware Callout",
+ "Guard": true,
+ "Priority": "high",
+ "Target": "/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"
+ },
+ {
+ "Callout Type": "Hardware Callout",
+ "Guard": false,
+ "Priority": "low",
+ "Target": "/proc0"
+ }
+])";
+ EXPECT_EQ(s, j.dump(4));
+}
+
+// Sub-test #6 - PLL unlock on mixed PROCs and OCMBs.
+TEST(PllUnlock, TestSet6)
+{
+ pdbg_targets_init(nullptr);
+
+ libhei::Chip proc0{util::pdbg::getTrgt("/proc0"), P10_20};
+ libhei::Chip proc1{util::pdbg::getTrgt("/proc1"), P10_20};
+
+ libhei::Chip ocmb0{
+ util::pdbg::getTrgt("/proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"),
+ ODYSSEY_10};
+
+ libhei::Chip ocmb1{
+ util::pdbg::getTrgt("/proc1/pib/perv14/mc2/mi0/mcc0/omi0/ocmb0"),
+ ODYSSEY_10};
+
+ libhei::Signature sig0{ocmb1, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+ libhei::Signature sig1{proc1, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+ libhei::Signature sig2{ocmb0, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+ libhei::Signature sig3{proc0, nodeId, 0, 0, libhei::ATTN_TYPE_RECOVERABLE};
+
+ libhei::IsolationData isoData{};
+ isoData.addSignature(sig0);
+ isoData.addSignature(sig1);
+ isoData.addSignature(sig2);
+ isoData.addSignature(sig3);
+
+ RasDataParser rasData{};
+ libhei::Signature rootCause;
+ bool attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData,
+ rootCause, rasData);
+ EXPECT_TRUE(attnFound);
+ EXPECT_EQ(sig1.toUint32(), rootCause.toUint32());
+}
diff --git a/util/pdbg.cpp b/util/pdbg.cpp
index 1ed25b0..2734885 100644
--- a/util/pdbg.cpp
+++ b/util/pdbg.cpp
@@ -129,6 +129,31 @@
//------------------------------------------------------------------------------
+pdbg_target* getParentProcessor(pdbg_target* i_target)
+{
+ assert(nullptr != i_target);
+
+ // Check if the given target is already a processor chip.
+ if (TYPE_PROC == getTrgtType(i_target))
+ {
+ return i_target; // simply return the given target
+ }
+
+ // Get the parent processor chip.
+ pdbg_target* parentChip = pdbg_target_parent("proc", i_target);
+
+ // There should always be a parent chip. Throw an error if not found.
+ if (nullptr == parentChip)
+ {
+ throw std::logic_error("No parent chip found: i_target=" +
+ std::string{getPath(i_target)});
+ }
+
+ return parentChip;
+}
+
+//------------------------------------------------------------------------------
+
pdbg_target* getChipUnit(pdbg_target* i_parentChip, TargetType_t i_unitType,
uint8_t i_unitPos)
{
diff --git a/util/pdbg.hpp b/util/pdbg.hpp
index 0b46c7d..b62f432 100644
--- a/util/pdbg.hpp
+++ b/util/pdbg.hpp
@@ -73,6 +73,9 @@
/** @return The parent chip target of the given unit target. */
pdbg_target* getParentChip(pdbg_target* i_unitTarget);
+/** @return The parent processor chip target of the given target. */
+pdbg_target* getParentProcessor(pdbg_target* i_target);
+
/** @return The unit target within chip of the given unit type and position
* relative to the chip. */
pdbg_target* getChipUnit(pdbg_target* i_parentChip, TargetType_t i_unitType,