blob: 24853761d85dc71c4c3a68bcd47efb9d78232ff1 [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05003#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05004
Zane Shelleya9b44342021-08-08 17:15:52 -05005#include <analyzer/ras-data/ras-data-parser.hpp>
Zane Shelley4ed4be52021-02-15 17:53:40 -06006#include <analyzer/service_data.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06007#include <hei_main.hpp>
Zane Shelley9fb73932020-09-15 13:34:57 -05008#include <phosphor-logging/log.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -06009#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050010#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -060011
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050012#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -050013#include <fstream>
14#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050015#include <map>
16#include <string>
17
Ben Tyner0205f3b2020-02-24 10:24:47 -060018namespace analyzer
19{
20
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060021//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050022
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060023// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050024
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060025/**
26 * @brief Will get the list of active chip and initialize the isolator.
27 * @param o_chips The returned list of active chips.
28 */
Zane Shelley171a2e02020-11-13 13:56:13 -060029void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050030
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060031/**
32 * @brief Will create and submit a PEL using the given data.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060033 * @param i_isoData The data gathered during isolation (for FFDC).
Zane Shelley4ed4be52021-02-15 17:53:40 -060034 * @param i_servData Data regarding service actions gathered during analysis.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060035 */
Zane Shelley8af9e462021-03-11 10:44:28 -060036void createPel(const libhei::IsolationData& i_isoData,
Zane Shelley4ed4be52021-02-15 17:53:40 -060037 const ServiceData& i_servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060038
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050039//------------------------------------------------------------------------------
40
Zane Shelley2f263182020-07-10 21:41:21 -050041const char* __attn(libhei::AttentionType_t i_attnType)
42{
43 const char* str = "";
44 switch (i_attnType)
45 {
46 case libhei::ATTN_TYPE_CHECKSTOP:
47 str = "CHECKSTOP";
48 break;
49 case libhei::ATTN_TYPE_UNIT_CS:
50 str = "UNIT_CS";
51 break;
52 case libhei::ATTN_TYPE_RECOVERABLE:
53 str = "RECOVERABLE";
54 break;
55 case libhei::ATTN_TYPE_SP_ATTN:
56 str = "SP_ATTN";
57 break;
58 case libhei::ATTN_TYPE_HOST_ATTN:
59 str = "HOST_ATTN";
60 break;
61 default:
62 trace::err("Unsupported attention type: %u", i_attnType);
63 assert(0);
64 }
65 return str;
66}
67
Zane Shelley2f263182020-07-10 21:41:21 -050068//------------------------------------------------------------------------------
69
Zane Shelleycb457382020-11-02 20:55:06 -060070bool __filterRootCause(const libhei::IsolationData& i_isoData,
71 libhei::Signature& o_signature)
Zane Shelley097a71a2020-06-08 15:55:29 -050072{
Zane Shelleycb457382020-11-02 20:55:06 -060073 // We'll need to make a copy of the list so that the original list is
74 // maintained for the log.
75 std::vector<libhei::Signature> sigList{i_isoData.getSignatureList()};
76
Zane Shelley2f263182020-07-10 21:41:21 -050077 // For debug, trace out the original list of signatures before filtering.
Zane Shelleycb457382020-11-02 20:55:06 -060078 for (const auto& sig : sigList)
Zane Shelley2f263182020-07-10 21:41:21 -050079 {
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060080 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -060081 util::pdbg::getPath(sig.getChip()), sig.toUint32(),
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060082 __attn(sig.getAttnType()));
Zane Shelley2f263182020-07-10 21:41:21 -050083 }
84
Zane Shelley097a71a2020-06-08 15:55:29 -050085 // Special and host attentions are not supported by this user application.
86 auto newEndItr =
Zane Shelleycb457382020-11-02 20:55:06 -060087 std::remove_if(sigList.begin(), sigList.end(), [&](const auto& t) {
Zane Shelley097a71a2020-06-08 15:55:29 -050088 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() ||
89 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType());
90 });
91
92 // Shrink the vector, if needed.
Zane Shelleycb457382020-11-02 20:55:06 -060093 sigList.resize(std::distance(sigList.begin(), newEndItr));
Zane Shelley097a71a2020-06-08 15:55:29 -050094
95 // START WORKAROUND
96 // TODO: Filtering should be determined by the RAS Data Files provided by
97 // the host firmware via the PNOR (similar to the Chip Data Files).
98 // Until that support is available, use a rudimentary filter that
99 // first looks for any recoverable attention, then any unit checkstop,
100 // and then any system checkstop. This is built on the premise that
101 // recoverable errors could be the root cause of an system checkstop
102 // attentions. Fortunately, we just need to sort the list by the
103 // greater attention type value.
Zane Shelleycb457382020-11-02 20:55:06 -0600104 std::sort(sigList.begin(), sigList.end(),
Zane Shelley097a71a2020-06-08 15:55:29 -0500105 [&](const auto& a, const auto& b) {
106 return a.getAttnType() > b.getAttnType();
107 });
108 // END WORKAROUND
Zane Shelleycb457382020-11-02 20:55:06 -0600109
110 // Check if a root cause attention was found.
111 if (!sigList.empty())
112 {
113 // The entry at the front of the list will be the root cause.
114 o_signature = sigList.front();
115 return true;
116 }
117
118 return false; // default, no active attentions found.
Zane Shelley097a71a2020-06-08 15:55:29 -0500119}
120
121//------------------------------------------------------------------------------
122
Zane Shelleye5411f02021-08-04 22:41:35 -0500123bool analyzeHardware()
Zane Shelley9fb73932020-09-15 13:34:57 -0500124{
125 bool attnFound = false;
126
Zane Shelleye5411f02021-08-04 22:41:35 -0500127 if (!util::pdbg::queryHardwareAnalysisSupported())
128 {
129 trace::err("Hardware error analysis is not supported on this system");
130 return attnFound;
131 }
132
133 trace::inf(">>> enter analyzeHardware()");
134
135 // Initialize the isolator and get all of the chips to be analyzed.
136 trace::inf("Initializing the isolator...");
137 std::vector<libhei::Chip> chips;
138 initializeIsolator(chips);
139
140 // Isolate attentions.
141 trace::inf("Isolating errors: # of chips=%u", chips.size());
142 libhei::IsolationData isoData{};
143 libhei::isolate(chips, isoData);
144
145 // Filter for root cause attention.
Zane Shelleycb457382020-11-02 20:55:06 -0600146 libhei::Signature rootCause{};
Zane Shelleye5411f02021-08-04 22:41:35 -0500147 attnFound = __filterRootCause(isoData, rootCause);
Zane Shelleycb457382020-11-02 20:55:06 -0600148
149 if (!attnFound)
Zane Shelley9fb73932020-09-15 13:34:57 -0500150 {
Zane Shelleye5411f02021-08-04 22:41:35 -0500151 // It is possible for TI handling, or manually initiated analysis via
152 // the command line, that there will not be an active attention. In
153 // which case, we will do nothing and let the caller of this function
154 // determine if this is the expected behavior.
Zane Shelley9fb73932020-09-15 13:34:57 -0500155 trace::inf("No active attentions found");
156 }
157 else
158 {
Zane Shelley9fb73932020-09-15 13:34:57 -0500159 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -0600160 util::pdbg::getPath(rootCause.getChip()),
161 rootCause.toUint32(), __attn(rootCause.getAttnType()));
162
Zane Shelleye5411f02021-08-04 22:41:35 -0500163 // Perform service actions based on the root cause.
Zane Shelleya9b44342021-08-08 17:15:52 -0500164 RasDataParser rasData{};
Zane Shelley8af9e462021-03-11 10:44:28 -0600165 ServiceData servData{rootCause};
Zane Shelleya9b44342021-08-08 17:15:52 -0500166 rasData.getResolution(rootCause)->resolve(servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600167
168 // Create and commit a PEL.
Zane Shelleye5411f02021-08-04 22:41:35 -0500169 createPel(isoData, servData);
Zane Shelley9fb73932020-09-15 13:34:57 -0500170 }
171
Zane Shelleye5411f02021-08-04 22:41:35 -0500172 // All done, clean up the isolator.
173 trace::inf("Uninitializing isolator...");
174 libhei::uninitialize();
Ben Tyner87eabc62020-05-14 17:56:54 -0500175
Zane Shelley2f263182020-07-10 21:41:21 -0500176 trace::inf("<<< exit analyzeHardware()");
177
Zane Shelley097a71a2020-06-08 15:55:29 -0500178 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600179}
180
Ben Tynereea45422021-04-15 10:54:14 -0500181//------------------------------------------------------------------------------
182
183/**
184 * @brief Get error isolator build information
185 *
186 * @return Pointer to build information
187 */
188const char* getBuildInfo()
189{
190 return libhei::getBuildInfo();
191}
192
Ben Tyner0205f3b2020-02-24 10:24:47 -0600193} // namespace analyzer