blob: a41425e79b12495d0802247b48dac72c767d5725 [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05003#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05004
Ben Tyner0205f3b2020-02-24 10:24:47 -06005#include <hei_main.hpp>
Zane Shelley9fb73932020-09-15 13:34:57 -05006#include <phosphor-logging/log.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -06007#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05008#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06009
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050010#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -050011#include <fstream>
12#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050013#include <map>
14#include <string>
15
Ben Tyner0205f3b2020-02-24 10:24:47 -060016namespace analyzer
17{
18
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060019//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050020
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060021// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050022
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060023/**
24 * @brief Will get the list of active chip and initialize the isolator.
25 * @param o_chips The returned list of active chips.
26 */
Zane Shelley171a2e02020-11-13 13:56:13 -060027void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050028
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060029/**
30 * @brief Will create and submit a PEL using the given data.
31 * @param i_rootCause A signature defining the attention root cause.
32 * @param i_isoData The data gathered during isolation (for FFDC).
33 */
34void createPel(const libhei::Signature& i_rootCause,
35 const libhei::IsolationData& i_isoData);
36
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050037//------------------------------------------------------------------------------
38
Zane Shelley2f263182020-07-10 21:41:21 -050039const char* __attn(libhei::AttentionType_t i_attnType)
40{
41 const char* str = "";
42 switch (i_attnType)
43 {
44 case libhei::ATTN_TYPE_CHECKSTOP:
45 str = "CHECKSTOP";
46 break;
47 case libhei::ATTN_TYPE_UNIT_CS:
48 str = "UNIT_CS";
49 break;
50 case libhei::ATTN_TYPE_RECOVERABLE:
51 str = "RECOVERABLE";
52 break;
53 case libhei::ATTN_TYPE_SP_ATTN:
54 str = "SP_ATTN";
55 break;
56 case libhei::ATTN_TYPE_HOST_ATTN:
57 str = "HOST_ATTN";
58 break;
59 default:
60 trace::err("Unsupported attention type: %u", i_attnType);
61 assert(0);
62 }
63 return str;
64}
65
66uint32_t __trgt(const libhei::Signature& i_sig)
67{
Zane Shelleya0299852020-11-13 13:38:04 -060068 uint8_t type = util::pdbg::getTrgtType(i_sig.getChip());
69 uint32_t pos = util::pdbg::getChipPos(i_sig.getChip());
Zane Shelley2f263182020-07-10 21:41:21 -050070
71 // Technically, the FapiPos attribute is 32-bit, but not likely to ever go
72 // over 24-bit.
73
74 return type << 24 | (pos & 0xffffff);
75}
76
77uint32_t __sig(const libhei::Signature& i_sig)
78{
79 return i_sig.getId() << 16 | i_sig.getInstance() << 8 | i_sig.getBit();
80}
81
82//------------------------------------------------------------------------------
83
Zane Shelley097a71a2020-06-08 15:55:29 -050084// Takes a signature list that will be filtered and sorted. The first entry in
85// the returned list will be the root cause. If the returned list is empty,
86// analysis failed.
87void __filterRootCause(std::vector<libhei::Signature>& io_list)
88{
Zane Shelley2f263182020-07-10 21:41:21 -050089 // For debug, trace out the original list of signatures before filtering.
90 for (const auto& sig : io_list)
91 {
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060092 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
93 util::pdbg::getPath(sig.getChip()), __sig(sig),
94 __attn(sig.getAttnType()));
Zane Shelley2f263182020-07-10 21:41:21 -050095 }
96
Zane Shelley097a71a2020-06-08 15:55:29 -050097 // Special and host attentions are not supported by this user application.
98 auto newEndItr =
99 std::remove_if(io_list.begin(), io_list.end(), [&](const auto& t) {
100 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() ||
101 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType());
102 });
103
104 // Shrink the vector, if needed.
105 io_list.resize(std::distance(io_list.begin(), newEndItr));
106
107 // START WORKAROUND
108 // TODO: Filtering should be determined by the RAS Data Files provided by
109 // the host firmware via the PNOR (similar to the Chip Data Files).
110 // Until that support is available, use a rudimentary filter that
111 // first looks for any recoverable attention, then any unit checkstop,
112 // and then any system checkstop. This is built on the premise that
113 // recoverable errors could be the root cause of an system checkstop
114 // attentions. Fortunately, we just need to sort the list by the
115 // greater attention type value.
116 std::sort(io_list.begin(), io_list.end(),
117 [&](const auto& a, const auto& b) {
118 return a.getAttnType() > b.getAttnType();
119 });
120 // END WORKAROUND
121}
122
123//------------------------------------------------------------------------------
124
Zane Shelley9fb73932020-09-15 13:34:57 -0500125bool __logError(const std::vector<libhei::Signature>& i_sigList,
126 const libhei::IsolationData& i_isoData)
127{
128 bool attnFound = false;
129
Zane Shelley9fb73932020-09-15 13:34:57 -0500130 if (i_sigList.empty())
131 {
132 trace::inf("No active attentions found");
133 }
134 else
135 {
136 attnFound = true;
137
138 // The root cause attention is the first in the filtered list.
139 libhei::Signature root = i_sigList.front();
140
Zane Shelley9fb73932020-09-15 13:34:57 -0500141 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600142 util::pdbg::getPath(root.getChip()), root.toUint32(),
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -0600143 __attn(root.getAttnType()));
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600144
145 // Create and commit a PEL.
146 createPel(root, i_isoData);
Zane Shelley9fb73932020-09-15 13:34:57 -0500147 }
148
Zane Shelley9fb73932020-09-15 13:34:57 -0500149 return attnFound;
150}
151
152//------------------------------------------------------------------------------
153
154bool analyzeHardware()
Ben Tyner87eabc62020-05-14 17:56:54 -0500155{
Zane Shelley097a71a2020-06-08 15:55:29 -0500156 bool attnFound = false;
Ben Tyner87eabc62020-05-14 17:56:54 -0500157
Zane Shelley2f263182020-07-10 21:41:21 -0500158 trace::inf(">>> enter analyzeHardware()");
159
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600160 if (util::pdbg::queryHardwareAnalysisSupported())
161 {
162 // Initialize the isolator and get all of the chips to be analyzed.
163 trace::inf("Initializing the isolator...");
164 std::vector<libhei::Chip> chips;
165 initializeIsolator(chips);
Zane Shelley2e994bc2020-06-08 14:38:14 -0500166
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600167 // Isolate attentions.
168 trace::inf("Isolating errors: # of chips=%u", chips.size());
169 libhei::IsolationData isoData{};
170 libhei::isolate(chips, isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500171
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600172 // Filter signatures to determine root cause. We'll need to make a copy
173 // of the list so that the original list is maintained for the log.
174 std::vector<libhei::Signature> sigList{isoData.getSignatureList()};
175 __filterRootCause(sigList);
Zane Shelley097a71a2020-06-08 15:55:29 -0500176
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600177 // Create and commit a log.
178 attnFound = __logError(sigList, isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500179
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600180 // All done, clean up the isolator.
181 trace::inf("Uninitializing isolator...");
182 libhei::uninitialize();
183 }
184 else
185 {
186 trace::err("Hardware error analysis is not supported on this system");
187 }
Ben Tyner87eabc62020-05-14 17:56:54 -0500188
Zane Shelley2f263182020-07-10 21:41:21 -0500189 trace::inf("<<< exit analyzeHardware()");
190
Zane Shelley097a71a2020-06-08 15:55:29 -0500191 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600192}
193
194} // namespace analyzer