blob: 3cc1d658a6954dd39951c7853fb0bcbc0e56620f [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05003#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05004
Zane Shelley4ed4be52021-02-15 17:53:40 -06005#include <analyzer/service_data.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06006#include <hei_main.hpp>
Zane Shelley9fb73932020-09-15 13:34:57 -05007#include <phosphor-logging/log.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -06008#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05009#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -060010
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050011#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -050012#include <fstream>
13#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050014#include <map>
15#include <string>
16
Ben Tyner0205f3b2020-02-24 10:24:47 -060017namespace analyzer
18{
19
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060020//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050021
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060022// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050023
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060024/**
25 * @brief Will get the list of active chip and initialize the isolator.
26 * @param o_chips The returned list of active chips.
27 */
Zane Shelley171a2e02020-11-13 13:56:13 -060028void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050029
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060030/**
31 * @brief Will create and submit a PEL using the given data.
32 * @param i_rootCause A signature defining the attention root cause.
33 * @param i_isoData The data gathered during isolation (for FFDC).
Zane Shelley4ed4be52021-02-15 17:53:40 -060034 * @param i_servData Data regarding service actions gathered during analysis.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060035 */
36void createPel(const libhei::Signature& i_rootCause,
Zane Shelley4ed4be52021-02-15 17:53:40 -060037 const libhei::IsolationData& i_isoData,
38 const ServiceData& i_servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060039
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050040//------------------------------------------------------------------------------
41
Zane Shelley2f263182020-07-10 21:41:21 -050042const char* __attn(libhei::AttentionType_t i_attnType)
43{
44 const char* str = "";
45 switch (i_attnType)
46 {
47 case libhei::ATTN_TYPE_CHECKSTOP:
48 str = "CHECKSTOP";
49 break;
50 case libhei::ATTN_TYPE_UNIT_CS:
51 str = "UNIT_CS";
52 break;
53 case libhei::ATTN_TYPE_RECOVERABLE:
54 str = "RECOVERABLE";
55 break;
56 case libhei::ATTN_TYPE_SP_ATTN:
57 str = "SP_ATTN";
58 break;
59 case libhei::ATTN_TYPE_HOST_ATTN:
60 str = "HOST_ATTN";
61 break;
62 default:
63 trace::err("Unsupported attention type: %u", i_attnType);
64 assert(0);
65 }
66 return str;
67}
68
Zane Shelley2f263182020-07-10 21:41:21 -050069//------------------------------------------------------------------------------
70
Zane Shelleycb457382020-11-02 20:55:06 -060071bool __filterRootCause(const libhei::IsolationData& i_isoData,
72 libhei::Signature& o_signature)
Zane Shelley097a71a2020-06-08 15:55:29 -050073{
Zane Shelleycb457382020-11-02 20:55:06 -060074 // We'll need to make a copy of the list so that the original list is
75 // maintained for the log.
76 std::vector<libhei::Signature> sigList{i_isoData.getSignatureList()};
77
Zane Shelley2f263182020-07-10 21:41:21 -050078 // For debug, trace out the original list of signatures before filtering.
Zane Shelleycb457382020-11-02 20:55:06 -060079 for (const auto& sig : sigList)
Zane Shelley2f263182020-07-10 21:41:21 -050080 {
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060081 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -060082 util::pdbg::getPath(sig.getChip()), sig.toUint32(),
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060083 __attn(sig.getAttnType()));
Zane Shelley2f263182020-07-10 21:41:21 -050084 }
85
Zane Shelley097a71a2020-06-08 15:55:29 -050086 // Special and host attentions are not supported by this user application.
87 auto newEndItr =
Zane Shelleycb457382020-11-02 20:55:06 -060088 std::remove_if(sigList.begin(), sigList.end(), [&](const auto& t) {
Zane Shelley097a71a2020-06-08 15:55:29 -050089 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() ||
90 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType());
91 });
92
93 // Shrink the vector, if needed.
Zane Shelleycb457382020-11-02 20:55:06 -060094 sigList.resize(std::distance(sigList.begin(), newEndItr));
Zane Shelley097a71a2020-06-08 15:55:29 -050095
96 // START WORKAROUND
97 // TODO: Filtering should be determined by the RAS Data Files provided by
98 // the host firmware via the PNOR (similar to the Chip Data Files).
99 // Until that support is available, use a rudimentary filter that
100 // first looks for any recoverable attention, then any unit checkstop,
101 // and then any system checkstop. This is built on the premise that
102 // recoverable errors could be the root cause of an system checkstop
103 // attentions. Fortunately, we just need to sort the list by the
104 // greater attention type value.
Zane Shelleycb457382020-11-02 20:55:06 -0600105 std::sort(sigList.begin(), sigList.end(),
Zane Shelley097a71a2020-06-08 15:55:29 -0500106 [&](const auto& a, const auto& b) {
107 return a.getAttnType() > b.getAttnType();
108 });
109 // END WORKAROUND
Zane Shelleycb457382020-11-02 20:55:06 -0600110
111 // Check if a root cause attention was found.
112 if (!sigList.empty())
113 {
114 // The entry at the front of the list will be the root cause.
115 o_signature = sigList.front();
116 return true;
117 }
118
119 return false; // default, no active attentions found.
Zane Shelley097a71a2020-06-08 15:55:29 -0500120}
121
122//------------------------------------------------------------------------------
123
Zane Shelleycb457382020-11-02 20:55:06 -0600124bool __analyze(const libhei::IsolationData& i_isoData)
Zane Shelley9fb73932020-09-15 13:34:57 -0500125{
126 bool attnFound = false;
127
Zane Shelleycb457382020-11-02 20:55:06 -0600128 libhei::Signature rootCause{};
129 attnFound = __filterRootCause(i_isoData, rootCause);
130
131 if (!attnFound)
Zane Shelley9fb73932020-09-15 13:34:57 -0500132 {
Zane Shelleycb457382020-11-02 20:55:06 -0600133 // NOTE: It is possible for TI handling that there will not be an active
134 // attention. In which case, we will not do anything and let the
135 // caller of this function determine if this is the expected
136 // behavior.
Zane Shelley9fb73932020-09-15 13:34:57 -0500137 trace::inf("No active attentions found");
138 }
139 else
140 {
Zane Shelley9fb73932020-09-15 13:34:57 -0500141 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -0600142 util::pdbg::getPath(rootCause.getChip()),
143 rootCause.toUint32(), __attn(rootCause.getAttnType()));
144
Zane Shelley4ed4be52021-02-15 17:53:40 -0600145 // TODO: Perform service actions based on the root cause. The default
146 // callout if none other exist is level 2 support.
147 ServiceData servData{};
148 servData.addCallout(std::make_shared<ProcedureCallout>(
149 ProcedureCallout::NEXTLVL, Callout::Priority::HIGH));
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600150
151 // Create and commit a PEL.
Zane Shelley4ed4be52021-02-15 17:53:40 -0600152 createPel(rootCause, i_isoData, servData);
Zane Shelley9fb73932020-09-15 13:34:57 -0500153 }
154
Zane Shelley9fb73932020-09-15 13:34:57 -0500155 return attnFound;
156}
157
158//------------------------------------------------------------------------------
159
160bool analyzeHardware()
Ben Tyner87eabc62020-05-14 17:56:54 -0500161{
Zane Shelley097a71a2020-06-08 15:55:29 -0500162 bool attnFound = false;
Ben Tyner87eabc62020-05-14 17:56:54 -0500163
Zane Shelley2f263182020-07-10 21:41:21 -0500164 trace::inf(">>> enter analyzeHardware()");
165
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600166 if (util::pdbg::queryHardwareAnalysisSupported())
167 {
168 // Initialize the isolator and get all of the chips to be analyzed.
169 trace::inf("Initializing the isolator...");
170 std::vector<libhei::Chip> chips;
171 initializeIsolator(chips);
Zane Shelley2e994bc2020-06-08 14:38:14 -0500172
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600173 // Isolate attentions.
174 trace::inf("Isolating errors: # of chips=%u", chips.size());
175 libhei::IsolationData isoData{};
176 libhei::isolate(chips, isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500177
Zane Shelleycb457382020-11-02 20:55:06 -0600178 // Analyze the isolation data and perform service actions if needed.
179 attnFound = __analyze(isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500180
Zane Shelley7ae9c8c2020-12-02 20:10:31 -0600181 // All done, clean up the isolator.
182 trace::inf("Uninitializing isolator...");
183 libhei::uninitialize();
184 }
185 else
186 {
187 trace::err("Hardware error analysis is not supported on this system");
188 }
Ben Tyner87eabc62020-05-14 17:56:54 -0500189
Zane Shelley2f263182020-07-10 21:41:21 -0500190 trace::inf("<<< exit analyzeHardware()");
191
Zane Shelley097a71a2020-06-08 15:55:29 -0500192 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600193}
194
195} // namespace analyzer