blob: c616f7e3ddac13655bd845e5ffc87a828cd28a9d [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05003#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05004
Zane Shelley4ed4be52021-02-15 17:53:40 -06005#include <analyzer/service_data.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06006#include <hei_main.hpp>
Zane Shelley9fb73932020-09-15 13:34:57 -05007#include <phosphor-logging/log.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -06008#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05009#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -060010
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050011#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -050012#include <fstream>
13#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050014#include <map>
15#include <string>
16
Ben Tyner0205f3b2020-02-24 10:24:47 -060017namespace analyzer
18{
19
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060020//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050021
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060022// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050023
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060024/**
25 * @brief Will get the list of active chip and initialize the isolator.
26 * @param o_chips The returned list of active chips.
27 */
Zane Shelley171a2e02020-11-13 13:56:13 -060028void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050029
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060030/**
31 * @brief Will create and submit a PEL using the given data.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060032 * @param i_isoData The data gathered during isolation (for FFDC).
Zane Shelley4ed4be52021-02-15 17:53:40 -060033 * @param i_servData Data regarding service actions gathered during analysis.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060034 */
Zane Shelley8af9e462021-03-11 10:44:28 -060035void createPel(const libhei::IsolationData& i_isoData,
Zane Shelley4ed4be52021-02-15 17:53:40 -060036 const ServiceData& i_servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060037
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050038//------------------------------------------------------------------------------
39
Zane Shelley2f263182020-07-10 21:41:21 -050040const char* __attn(libhei::AttentionType_t i_attnType)
41{
42 const char* str = "";
43 switch (i_attnType)
44 {
45 case libhei::ATTN_TYPE_CHECKSTOP:
46 str = "CHECKSTOP";
47 break;
48 case libhei::ATTN_TYPE_UNIT_CS:
49 str = "UNIT_CS";
50 break;
51 case libhei::ATTN_TYPE_RECOVERABLE:
52 str = "RECOVERABLE";
53 break;
54 case libhei::ATTN_TYPE_SP_ATTN:
55 str = "SP_ATTN";
56 break;
57 case libhei::ATTN_TYPE_HOST_ATTN:
58 str = "HOST_ATTN";
59 break;
60 default:
61 trace::err("Unsupported attention type: %u", i_attnType);
62 assert(0);
63 }
64 return str;
65}
66
Zane Shelley2f263182020-07-10 21:41:21 -050067//------------------------------------------------------------------------------
68
Zane Shelleycb457382020-11-02 20:55:06 -060069bool __filterRootCause(const libhei::IsolationData& i_isoData,
70 libhei::Signature& o_signature)
Zane Shelley097a71a2020-06-08 15:55:29 -050071{
Zane Shelleycb457382020-11-02 20:55:06 -060072 // We'll need to make a copy of the list so that the original list is
73 // maintained for the log.
74 std::vector<libhei::Signature> sigList{i_isoData.getSignatureList()};
75
Zane Shelley2f263182020-07-10 21:41:21 -050076 // For debug, trace out the original list of signatures before filtering.
Zane Shelleycb457382020-11-02 20:55:06 -060077 for (const auto& sig : sigList)
Zane Shelley2f263182020-07-10 21:41:21 -050078 {
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060079 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -060080 util::pdbg::getPath(sig.getChip()), sig.toUint32(),
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060081 __attn(sig.getAttnType()));
Zane Shelley2f263182020-07-10 21:41:21 -050082 }
83
Zane Shelley097a71a2020-06-08 15:55:29 -050084 // Special and host attentions are not supported by this user application.
85 auto newEndItr =
Zane Shelleycb457382020-11-02 20:55:06 -060086 std::remove_if(sigList.begin(), sigList.end(), [&](const auto& t) {
Zane Shelley097a71a2020-06-08 15:55:29 -050087 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() ||
88 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType());
89 });
90
91 // Shrink the vector, if needed.
Zane Shelleycb457382020-11-02 20:55:06 -060092 sigList.resize(std::distance(sigList.begin(), newEndItr));
Zane Shelley097a71a2020-06-08 15:55:29 -050093
94 // START WORKAROUND
95 // TODO: Filtering should be determined by the RAS Data Files provided by
96 // the host firmware via the PNOR (similar to the Chip Data Files).
97 // Until that support is available, use a rudimentary filter that
98 // first looks for any recoverable attention, then any unit checkstop,
99 // and then any system checkstop. This is built on the premise that
100 // recoverable errors could be the root cause of an system checkstop
101 // attentions. Fortunately, we just need to sort the list by the
102 // greater attention type value.
Zane Shelleycb457382020-11-02 20:55:06 -0600103 std::sort(sigList.begin(), sigList.end(),
Zane Shelley097a71a2020-06-08 15:55:29 -0500104 [&](const auto& a, const auto& b) {
105 return a.getAttnType() > b.getAttnType();
106 });
107 // END WORKAROUND
Zane Shelleycb457382020-11-02 20:55:06 -0600108
109 // Check if a root cause attention was found.
110 if (!sigList.empty())
111 {
112 // The entry at the front of the list will be the root cause.
113 o_signature = sigList.front();
114 return true;
115 }
116
117 return false; // default, no active attentions found.
Zane Shelley097a71a2020-06-08 15:55:29 -0500118}
119
120//------------------------------------------------------------------------------
121
Zane Shelleycb457382020-11-02 20:55:06 -0600122bool __analyze(const libhei::IsolationData& i_isoData)
Zane Shelley9fb73932020-09-15 13:34:57 -0500123{
124 bool attnFound = false;
125
Zane Shelleycb457382020-11-02 20:55:06 -0600126 libhei::Signature rootCause{};
127 attnFound = __filterRootCause(i_isoData, rootCause);
128
129 if (!attnFound)
Zane Shelley9fb73932020-09-15 13:34:57 -0500130 {
Zane Shelleycb457382020-11-02 20:55:06 -0600131 // NOTE: It is possible for TI handling that there will not be an active
132 // attention. In which case, we will not do anything and let the
133 // caller of this function determine if this is the expected
134 // behavior.
Zane Shelley9fb73932020-09-15 13:34:57 -0500135 trace::inf("No active attentions found");
136 }
137 else
138 {
Zane Shelley9fb73932020-09-15 13:34:57 -0500139 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -0600140 util::pdbg::getPath(rootCause.getChip()),
141 rootCause.toUint32(), __attn(rootCause.getAttnType()));
142
Zane Shelley4ed4be52021-02-15 17:53:40 -0600143 // TODO: Perform service actions based on the root cause. The default
144 // callout if none other exist is level 2 support.
Zane Shelley8af9e462021-03-11 10:44:28 -0600145 ServiceData servData{rootCause};
Zane Shelley4ed4be52021-02-15 17:53:40 -0600146 servData.addCallout(std::make_shared<ProcedureCallout>(
147 ProcedureCallout::NEXTLVL, Callout::Priority::HIGH));
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600148
149 // Create and commit a PEL.
Zane Shelley8af9e462021-03-11 10:44:28 -0600150 createPel(i_isoData, servData);
Zane Shelley9fb73932020-09-15 13:34:57 -0500151 }
152
Zane Shelley9fb73932020-09-15 13:34:57 -0500153 return attnFound;
154}
155
156//------------------------------------------------------------------------------
157
158bool analyzeHardware()
Ben Tyner87eabc62020-05-14 17:56:54 -0500159{
Zane Shelley097a71a2020-06-08 15:55:29 -0500160 bool attnFound = false;
Ben Tyner87eabc62020-05-14 17:56:54 -0500161
Zane Shelley2f263182020-07-10 21:41:21 -0500162 trace::inf(">>> enter analyzeHardware()");
163
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600164 if (util::pdbg::queryHardwareAnalysisSupported())
165 {
166 // Initialize the isolator and get all of the chips to be analyzed.
167 trace::inf("Initializing the isolator...");
168 std::vector<libhei::Chip> chips;
169 initializeIsolator(chips);
Zane Shelley2e994bc2020-06-08 14:38:14 -0500170
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600171 // Isolate attentions.
172 trace::inf("Isolating errors: # of chips=%u", chips.size());
173 libhei::IsolationData isoData{};
174 libhei::isolate(chips, isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500175
Zane Shelleycb457382020-11-02 20:55:06 -0600176 // Analyze the isolation data and perform service actions if needed.
177 attnFound = __analyze(isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500178
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600179 // All done, clean up the isolator.
180 trace::inf("Uninitializing isolator...");
181 libhei::uninitialize();
182 }
183 else
184 {
185 trace::err("Hardware error analysis is not supported on this system");
186 }
Ben Tyner87eabc62020-05-14 17:56:54 -0500187
Zane Shelley2f263182020-07-10 21:41:21 -0500188 trace::inf("<<< exit analyzeHardware()");
189
Zane Shelley097a71a2020-06-08 15:55:29 -0500190 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600191}
192
193} // namespace analyzer