blob: d304ed13bb5e197e5c298ebef5a68031f7e8d021 [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05003#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05004
Ben Tyner0205f3b2020-02-24 10:24:47 -06005#include <hei_main.hpp>
Zane Shelley9fb73932020-09-15 13:34:57 -05006#include <phosphor-logging/log.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -06007#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05008#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06009
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050010#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -050011#include <fstream>
12#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050013#include <map>
14#include <string>
15
Ben Tyner0205f3b2020-02-24 10:24:47 -060016namespace analyzer
17{
18
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060019//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050020
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060021// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050022
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060023/**
24 * @brief Will get the list of active chip and initialize the isolator.
25 * @param o_chips The returned list of active chips.
26 */
Zane Shelley171a2e02020-11-13 13:56:13 -060027void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050028
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060029/**
30 * @brief Will create and submit a PEL using the given data.
31 * @param i_rootCause A signature defining the attention root cause.
32 * @param i_isoData The data gathered during isolation (for FFDC).
33 */
34void createPel(const libhei::Signature& i_rootCause,
35 const libhei::IsolationData& i_isoData);
36
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050037//------------------------------------------------------------------------------
38
Zane Shelley2f263182020-07-10 21:41:21 -050039const char* __attn(libhei::AttentionType_t i_attnType)
40{
41 const char* str = "";
42 switch (i_attnType)
43 {
44 case libhei::ATTN_TYPE_CHECKSTOP:
45 str = "CHECKSTOP";
46 break;
47 case libhei::ATTN_TYPE_UNIT_CS:
48 str = "UNIT_CS";
49 break;
50 case libhei::ATTN_TYPE_RECOVERABLE:
51 str = "RECOVERABLE";
52 break;
53 case libhei::ATTN_TYPE_SP_ATTN:
54 str = "SP_ATTN";
55 break;
56 case libhei::ATTN_TYPE_HOST_ATTN:
57 str = "HOST_ATTN";
58 break;
59 default:
60 trace::err("Unsupported attention type: %u", i_attnType);
61 assert(0);
62 }
63 return str;
64}
65
Zane Shelley2f263182020-07-10 21:41:21 -050066//------------------------------------------------------------------------------
67
Zane Shelleycb457382020-11-02 20:55:06 -060068bool __filterRootCause(const libhei::IsolationData& i_isoData,
69 libhei::Signature& o_signature)
Zane Shelley097a71a2020-06-08 15:55:29 -050070{
Zane Shelleycb457382020-11-02 20:55:06 -060071 // We'll need to make a copy of the list so that the original list is
72 // maintained for the log.
73 std::vector<libhei::Signature> sigList{i_isoData.getSignatureList()};
74
Zane Shelley2f263182020-07-10 21:41:21 -050075 // For debug, trace out the original list of signatures before filtering.
Zane Shelleycb457382020-11-02 20:55:06 -060076 for (const auto& sig : sigList)
Zane Shelley2f263182020-07-10 21:41:21 -050077 {
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060078 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -060079 util::pdbg::getPath(sig.getChip()), sig.toUint32(),
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060080 __attn(sig.getAttnType()));
Zane Shelley2f263182020-07-10 21:41:21 -050081 }
82
Zane Shelley097a71a2020-06-08 15:55:29 -050083 // Special and host attentions are not supported by this user application.
84 auto newEndItr =
Zane Shelleycb457382020-11-02 20:55:06 -060085 std::remove_if(sigList.begin(), sigList.end(), [&](const auto& t) {
Zane Shelley097a71a2020-06-08 15:55:29 -050086 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() ||
87 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType());
88 });
89
90 // Shrink the vector, if needed.
Zane Shelleycb457382020-11-02 20:55:06 -060091 sigList.resize(std::distance(sigList.begin(), newEndItr));
Zane Shelley097a71a2020-06-08 15:55:29 -050092
93 // START WORKAROUND
94 // TODO: Filtering should be determined by the RAS Data Files provided by
95 // the host firmware via the PNOR (similar to the Chip Data Files).
96 // Until that support is available, use a rudimentary filter that
97 // first looks for any recoverable attention, then any unit checkstop,
98 // and then any system checkstop. This is built on the premise that
99 // recoverable errors could be the root cause of an system checkstop
100 // attentions. Fortunately, we just need to sort the list by the
101 // greater attention type value.
Zane Shelleycb457382020-11-02 20:55:06 -0600102 std::sort(sigList.begin(), sigList.end(),
Zane Shelley097a71a2020-06-08 15:55:29 -0500103 [&](const auto& a, const auto& b) {
104 return a.getAttnType() > b.getAttnType();
105 });
106 // END WORKAROUND
Zane Shelleycb457382020-11-02 20:55:06 -0600107
108 // Check if a root cause attention was found.
109 if (!sigList.empty())
110 {
111 // The entry at the front of the list will be the root cause.
112 o_signature = sigList.front();
113 return true;
114 }
115
116 return false; // default, no active attentions found.
Zane Shelley097a71a2020-06-08 15:55:29 -0500117}
118
119//------------------------------------------------------------------------------
120
Zane Shelleycb457382020-11-02 20:55:06 -0600121bool __analyze(const libhei::IsolationData& i_isoData)
Zane Shelley9fb73932020-09-15 13:34:57 -0500122{
123 bool attnFound = false;
124
Zane Shelleycb457382020-11-02 20:55:06 -0600125 libhei::Signature rootCause{};
126 attnFound = __filterRootCause(i_isoData, rootCause);
127
128 if (!attnFound)
Zane Shelley9fb73932020-09-15 13:34:57 -0500129 {
Zane Shelleycb457382020-11-02 20:55:06 -0600130 // NOTE: It is possible for TI handling that there will not be an active
131 // attention. In which case, we will not do anything and let the
132 // caller of this function determine if this is the expected
133 // behavior.
Zane Shelley9fb73932020-09-15 13:34:57 -0500134 trace::inf("No active attentions found");
135 }
136 else
137 {
Zane Shelley9fb73932020-09-15 13:34:57 -0500138 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -0600139 util::pdbg::getPath(rootCause.getChip()),
140 rootCause.toUint32(), __attn(rootCause.getAttnType()));
141
142 // TODO: Perform service actions based on the root cause.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600143
144 // Create and commit a PEL.
Zane Shelleycb457382020-11-02 20:55:06 -0600145 createPel(rootCause, i_isoData);
Zane Shelley9fb73932020-09-15 13:34:57 -0500146 }
147
Zane Shelley9fb73932020-09-15 13:34:57 -0500148 return attnFound;
149}
150
151//------------------------------------------------------------------------------
152
153bool analyzeHardware()
Ben Tyner87eabc62020-05-14 17:56:54 -0500154{
Zane Shelley097a71a2020-06-08 15:55:29 -0500155 bool attnFound = false;
Ben Tyner87eabc62020-05-14 17:56:54 -0500156
Zane Shelley2f263182020-07-10 21:41:21 -0500157 trace::inf(">>> enter analyzeHardware()");
158
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600159 if (util::pdbg::queryHardwareAnalysisSupported())
160 {
161 // Initialize the isolator and get all of the chips to be analyzed.
162 trace::inf("Initializing the isolator...");
163 std::vector<libhei::Chip> chips;
164 initializeIsolator(chips);
Zane Shelley2e994bc2020-06-08 14:38:14 -0500165
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600166 // Isolate attentions.
167 trace::inf("Isolating errors: # of chips=%u", chips.size());
168 libhei::IsolationData isoData{};
169 libhei::isolate(chips, isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500170
Zane Shelleycb457382020-11-02 20:55:06 -0600171 // Analyze the isolation data and perform service actions if needed.
172 attnFound = __analyze(isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500173
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600174 // All done, clean up the isolator.
175 trace::inf("Uninitializing isolator...");
176 libhei::uninitialize();
177 }
178 else
179 {
180 trace::err("Hardware error analysis is not supported on this system");
181 }
Ben Tyner87eabc62020-05-14 17:56:54 -0500182
Zane Shelley2f263182020-07-10 21:41:21 -0500183 trace::inf("<<< exit analyzeHardware()");
184
Zane Shelley097a71a2020-06-08 15:55:29 -0500185 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600186}
187
188} // namespace analyzer