blob: 05d9f4212288b95f4bce597769a3ef2b269ad1ef [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05003#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05004
Zane Shelleya9b44342021-08-08 17:15:52 -05005#include <analyzer/ras-data/ras-data-parser.hpp>
Zane Shelley4ed4be52021-02-15 17:53:40 -06006#include <analyzer/service_data.hpp>
Ben Tyner7029e522021-08-09 19:18:24 -05007#include <attn/attn_dump.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06008#include <hei_main.hpp>
Zane Shelley9fb73932020-09-15 13:34:57 -05009#include <phosphor-logging/log.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060010#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050011#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -060012
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050013#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -050014#include <fstream>
15#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050016#include <map>
17#include <string>
18
Ben Tyner0205f3b2020-02-24 10:24:47 -060019namespace analyzer
20{
21
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060022//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050023
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060024// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050025
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060026/**
27 * @brief Will get the list of active chip and initialize the isolator.
28 * @param o_chips The returned list of active chips.
29 */
Zane Shelley171a2e02020-11-13 13:56:13 -060030void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050031
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060032/**
33 * @brief Will create and submit a PEL using the given data.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060034 * @param i_isoData The data gathered during isolation (for FFDC).
Zane Shelley4ed4be52021-02-15 17:53:40 -060035 * @param i_servData Data regarding service actions gathered during analysis.
Ben Tyner7029e522021-08-09 19:18:24 -050036 * @return Tuple of BMC log id, platform log id
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060037 */
Ben Tyner7029e522021-08-09 19:18:24 -050038std::tuple<uint32_t, uint32_t> createPel(const libhei::IsolationData& i_isoData,
39 const ServiceData& i_servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060040
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050041//------------------------------------------------------------------------------
42
Zane Shelley2f263182020-07-10 21:41:21 -050043const char* __attn(libhei::AttentionType_t i_attnType)
44{
45 const char* str = "";
46 switch (i_attnType)
47 {
48 case libhei::ATTN_TYPE_CHECKSTOP:
49 str = "CHECKSTOP";
50 break;
51 case libhei::ATTN_TYPE_UNIT_CS:
52 str = "UNIT_CS";
53 break;
54 case libhei::ATTN_TYPE_RECOVERABLE:
55 str = "RECOVERABLE";
56 break;
57 case libhei::ATTN_TYPE_SP_ATTN:
58 str = "SP_ATTN";
59 break;
60 case libhei::ATTN_TYPE_HOST_ATTN:
61 str = "HOST_ATTN";
62 break;
63 default:
64 trace::err("Unsupported attention type: %u", i_attnType);
65 assert(0);
66 }
67 return str;
68}
69
Zane Shelley2f263182020-07-10 21:41:21 -050070//------------------------------------------------------------------------------
71
Zane Shelleycb457382020-11-02 20:55:06 -060072bool __filterRootCause(const libhei::IsolationData& i_isoData,
73 libhei::Signature& o_signature)
Zane Shelley097a71a2020-06-08 15:55:29 -050074{
Zane Shelleycb457382020-11-02 20:55:06 -060075 // We'll need to make a copy of the list so that the original list is
76 // maintained for the log.
77 std::vector<libhei::Signature> sigList{i_isoData.getSignatureList()};
78
Zane Shelley2f263182020-07-10 21:41:21 -050079 // For debug, trace out the original list of signatures before filtering.
Zane Shelleycb457382020-11-02 20:55:06 -060080 for (const auto& sig : sigList)
Zane Shelley2f263182020-07-10 21:41:21 -050081 {
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060082 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -060083 util::pdbg::getPath(sig.getChip()), sig.toUint32(),
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060084 __attn(sig.getAttnType()));
Zane Shelley2f263182020-07-10 21:41:21 -050085 }
86
Zane Shelley097a71a2020-06-08 15:55:29 -050087 // Special and host attentions are not supported by this user application.
88 auto newEndItr =
Zane Shelleycb457382020-11-02 20:55:06 -060089 std::remove_if(sigList.begin(), sigList.end(), [&](const auto& t) {
Zane Shelley097a71a2020-06-08 15:55:29 -050090 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() ||
91 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType());
92 });
93
94 // Shrink the vector, if needed.
Zane Shelleycb457382020-11-02 20:55:06 -060095 sigList.resize(std::distance(sigList.begin(), newEndItr));
Zane Shelley097a71a2020-06-08 15:55:29 -050096
97 // START WORKAROUND
98 // TODO: Filtering should be determined by the RAS Data Files provided by
99 // the host firmware via the PNOR (similar to the Chip Data Files).
100 // Until that support is available, use a rudimentary filter that
101 // first looks for any recoverable attention, then any unit checkstop,
102 // and then any system checkstop. This is built on the premise that
103 // recoverable errors could be the root cause of an system checkstop
104 // attentions. Fortunately, we just need to sort the list by the
105 // greater attention type value.
Zane Shelleycb457382020-11-02 20:55:06 -0600106 std::sort(sigList.begin(), sigList.end(),
Zane Shelley097a71a2020-06-08 15:55:29 -0500107 [&](const auto& a, const auto& b) {
108 return a.getAttnType() > b.getAttnType();
109 });
110 // END WORKAROUND
Zane Shelleycb457382020-11-02 20:55:06 -0600111
112 // Check if a root cause attention was found.
113 if (!sigList.empty())
114 {
115 // The entry at the front of the list will be the root cause.
116 o_signature = sigList.front();
117 return true;
118 }
119
120 return false; // default, no active attentions found.
Zane Shelley097a71a2020-06-08 15:55:29 -0500121}
122
123//------------------------------------------------------------------------------
124
Ben Tyner7029e522021-08-09 19:18:24 -0500125bool analyzeHardware(attn::DumpParameters& o_dumpParameters)
Zane Shelley9fb73932020-09-15 13:34:57 -0500126{
127 bool attnFound = false;
128
Zane Shelleye5411f02021-08-04 22:41:35 -0500129 if (!util::pdbg::queryHardwareAnalysisSupported())
130 {
131 trace::err("Hardware error analysis is not supported on this system");
132 return attnFound;
133 }
134
135 trace::inf(">>> enter analyzeHardware()");
136
137 // Initialize the isolator and get all of the chips to be analyzed.
138 trace::inf("Initializing the isolator...");
139 std::vector<libhei::Chip> chips;
140 initializeIsolator(chips);
141
142 // Isolate attentions.
143 trace::inf("Isolating errors: # of chips=%u", chips.size());
144 libhei::IsolationData isoData{};
145 libhei::isolate(chips, isoData);
146
147 // Filter for root cause attention.
Zane Shelleycb457382020-11-02 20:55:06 -0600148 libhei::Signature rootCause{};
Zane Shelleye5411f02021-08-04 22:41:35 -0500149 attnFound = __filterRootCause(isoData, rootCause);
Zane Shelleycb457382020-11-02 20:55:06 -0600150
151 if (!attnFound)
Zane Shelley9fb73932020-09-15 13:34:57 -0500152 {
Zane Shelleye5411f02021-08-04 22:41:35 -0500153 // It is possible for TI handling, or manually initiated analysis via
154 // the command line, that there will not be an active attention. In
155 // which case, we will do nothing and let the caller of this function
156 // determine if this is the expected behavior.
Zane Shelley9fb73932020-09-15 13:34:57 -0500157 trace::inf("No active attentions found");
158 }
159 else
160 {
Zane Shelley9fb73932020-09-15 13:34:57 -0500161 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -0600162 util::pdbg::getPath(rootCause.getChip()),
163 rootCause.toUint32(), __attn(rootCause.getAttnType()));
164
Zane Shelley95135822021-08-23 09:00:05 -0500165 // Resolve any service actions required by the root cause.
Zane Shelleya9b44342021-08-08 17:15:52 -0500166 RasDataParser rasData{};
Zane Shelleyca496192021-08-09 12:05:52 -0500167 ServiceData servData{rootCause, isoData.queryCheckstop()};
Zane Shelleya9b44342021-08-08 17:15:52 -0500168 rasData.getResolution(rootCause)->resolve(servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600169
170 // Create and commit a PEL.
Ben Tyner7029e522021-08-09 19:18:24 -0500171 uint32_t logId = std::get<1>(createPel(isoData, servData));
172
Zane Shelley95135822021-08-23 09:00:05 -0500173 // Write guard records to persistent storage.
174 // TODO: The PEL ID will be required, but interface is still unknown.
175 for (const auto& guard : servData.getGuardList())
176 {
177 guard.apply();
178 }
179
180 // Gather/return information needed for dump.
181 // TODO: Need ID from root cause. At the moment, HUID does not exist in
182 // devtree. Will need a better ID definition.
183 // TODO: HW dump is default, but some attentions may require something
184 // different. Will need to investigate adding that information to
185 // the RAS data files.
Ben Tyner7029e522021-08-09 19:18:24 -0500186 o_dumpParameters.logId = logId;
187 o_dumpParameters.unitId = 0;
188 o_dumpParameters.dumpType = attn::DumpType::Hardware;
Zane Shelley9fb73932020-09-15 13:34:57 -0500189 }
190
Zane Shelleye5411f02021-08-04 22:41:35 -0500191 // All done, clean up the isolator.
192 trace::inf("Uninitializing isolator...");
193 libhei::uninitialize();
Ben Tyner87eabc62020-05-14 17:56:54 -0500194
Zane Shelley2f263182020-07-10 21:41:21 -0500195 trace::inf("<<< exit analyzeHardware()");
196
Zane Shelley097a71a2020-06-08 15:55:29 -0500197 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600198}
199
Ben Tynereea45422021-04-15 10:54:14 -0500200//------------------------------------------------------------------------------
201
202/**
203 * @brief Get error isolator build information
204 *
205 * @return Pointer to build information
206 */
207const char* getBuildInfo()
208{
209 return libhei::getBuildInfo();
210}
211
Ben Tyner0205f3b2020-02-24 10:24:47 -0600212} // namespace analyzer