blob: accf275c5c7feb5647e99b8379568ccc8a6d1518 [file] [log] [blame]
Zane Shelley65fefb22021-10-18 15:35:26 -05001#include <assert.h>
2
Caleb Palmer1a4f0e72022-11-07 15:08:01 -06003#include <analyzer/analyzer_main.hpp>
4#include <analyzer/ras-data/ras-data-parser.hpp>
Zane Shelley65fefb22021-10-18 15:35:26 -05005#include <hei_main.hpp>
Zane Shelley19df3702021-12-16 22:32:54 -06006#include <hei_util.hpp>
Zane Shelleyf4792d62021-10-28 18:08:22 -05007#include <util/pdbg.hpp>
Zane Shelley65fefb22021-10-18 15:35:26 -05008
9#include <algorithm>
10#include <limits>
11#include <string>
12
13namespace analyzer
14{
Zane Shelley65fefb22021-10-18 15:35:26 -050015//------------------------------------------------------------------------------
16
Zane Shelleya7369f82021-10-18 16:52:21 -050017bool __findRcsOscError(const std::vector<libhei::Signature>& i_list,
18 libhei::Signature& o_rootCause)
19{
20 // TODO: Consider returning all of them instead of one as root cause.
21 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
Zane Shelley19df3702021-12-16 22:32:54 -060022 return (libhei::hash<libhei::NodeId_t>("TP_LOCAL_FIR") == t.getId() &&
Zane Shelleya7369f82021-10-18 16:52:21 -050023 (42 == t.getBit() || 43 == t.getBit()));
24 });
25
26 if (i_list.end() != itr)
27 {
28 o_rootCause = *itr;
29 return true;
30 }
31
32 return false;
33}
34
35//------------------------------------------------------------------------------
36
37bool __findPllUnlock(const std::vector<libhei::Signature>& i_list,
38 libhei::Signature& o_rootCause)
39{
40 // TODO: Consider returning all of them instead of one as root cause.
41 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
Zane Shelley19df3702021-12-16 22:32:54 -060042 return (libhei::hash<libhei::NodeId_t>("PLL_UNLOCK") == t.getId() &&
Zane Shelleya7369f82021-10-18 16:52:21 -050043 (0 == t.getBit() || 1 == t.getBit()));
44 });
45
46 if (i_list.end() != itr)
47 {
48 o_rootCause = *itr;
49 return true;
50 }
51
52 return false;
53}
54
55//------------------------------------------------------------------------------
56
Caleb Palmer329dbbd2022-10-03 15:05:43 -050057bool __findIueTh(const std::vector<libhei::Signature>& i_list,
58 libhei::Signature& o_rootCause)
59{
60 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
61 return (libhei::hash<libhei::NodeId_t>("RDFFIR") == t.getId() &&
62 (17 == t.getBit() || 37 == t.getBit()));
63 });
64
65 if (i_list.end() != itr)
66 {
67 o_rootCause = *itr;
68 return true;
69 }
70
71 return false;
72}
73
74//------------------------------------------------------------------------------
75
Zane Shelleyf4792d62021-10-28 18:08:22 -050076bool __findMemoryChannelFailure(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060077 libhei::Signature& o_rootCause,
78 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -050079{
80 using namespace util::pdbg;
81
Zane Shelley19df3702021-12-16 22:32:54 -060082 using func = libhei::NodeId_t (*)(const std::string& i_str);
83 func __hash = libhei::hash<libhei::NodeId_t>;
84
85 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR");
86 static const auto mc_ustl_fir = __hash("MC_USTL_FIR");
87 static const auto mc_omi_dl_err_rpt = __hash("MC_OMI_DL_ERR_RPT");
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060088 static const auto srqfir = __hash("SRQFIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -050089
90 for (const auto s : i_list)
91 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060092 // Version 1 of the RAS data files
93 if (1 == i_rasData.getVersion(s))
Zane Shelleyf4792d62021-10-28 18:08:22 -050094 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060095 const auto targetType = getTrgtType(getTrgt(s.getChip()));
96 const auto id = s.getId();
97 const auto bit = s.getBit();
98 const auto attnType = s.getAttnType();
Caleb Palmer329dbbd2022-10-03 15:05:43 -050099
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600100 // Look for any unit checkstop attentions from OCMBs.
101 if (TYPE_OCMB == targetType)
102 {
103 // Any unit checkstop attentions will trigger a channel failure.
104 if (libhei::ATTN_TYPE_UNIT_CS == attnType)
Caleb Palmer329dbbd2022-10-03 15:05:43 -0500105 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600106 // If the channel was specifically a firmware initiated
107 // channel fail (SRQFIR[25]) check for any IUE bits that are
108 // on that would have caused that (RDFFIR[17,37]).
109 if ((srqfir == id && 25 == bit) &&
110 __findIueTh(i_list, o_rootCause))
111 {
112 return true;
113 }
114
115 o_rootCause = s;
Caleb Palmer329dbbd2022-10-03 15:05:43 -0500116 return true;
117 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500118 }
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600119 // Look for channel failure attentions on processors.
120 else if (TYPE_PROC == targetType)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500121 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600122 // TODO: All of these channel failure bits are configurable.
123 // Eventually, we will need some mechanism to check that
124 // config registers for a more accurate analysis. For now,
125 // simply check for all bits that could potentially be
126 // configured to channel failure.
127
128 // Any unit checkstop bit in the MC_DSTL_FIR or MC_USTL_FIR
129 // could be a channel failure.
130 if (libhei::ATTN_TYPE_UNIT_CS == attnType)
131 {
132 // Ignore bits MC_DSTL_FIR[0:7] because they simply indicate
133 // attentions occurred on the attached OCMBs.
134 if ((mc_dstl_fir == id && 8 <= bit) || (mc_ustl_fir == id))
135 {
136 o_rootCause = s;
137 return true;
138 }
139 }
140
141 // All bits in MC_OMI_DL_ERR_RPT eventually feed into
142 // MC_OMI_DL_FIR[0,20] which are configurable to channel
143 // failure.
144 if (mc_omi_dl_err_rpt == id)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500145 {
146 o_rootCause = s;
147 return true;
148 }
149 }
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600150 }
151 // Version 2 and above of the RAS data files
152 else if (2 <= i_rasData.getVersion(s))
153 {
154 if (libhei::ATTN_TYPE_UNIT_CS == s.getAttnType() &&
155 i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::SUE_SOURCE))
156 {
157 // Special Cases:
158 // If the channel fail was specifically a firmware initiated
159 // channel fail (SRQFIR[25]) check for any IUE bits that are on
160 // that would have caused that (RDFFIR[17,37]).
161 if ((srqfir == s.getId() && 25 == s.getBit()) &&
162 __findIueTh(i_list, o_rootCause))
163 {
164 return true;
165 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500166
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600167 // TODO: The proc side channel failure bits are configurable.
168 // Eventually, we will need some mechanism to check the
169 // config registers for a more accurate analysis. For now,
170 // simply check for all bits that could potentially be
171 // configured to channel failure.
172
173 o_rootCause = s;
174 }
175 // The bits in the MC_OMI_DL_ERR_RPT register are a special case.
176 // They are possible channel fail bits but the MC_OMI_DL_FIR they
177 // feed into can't be set up to report UNIT_CS attentions, so they
178 // report as recoverable instead.
179 else if (mc_omi_dl_err_rpt == s.getId())
Zane Shelleyf4792d62021-10-28 18:08:22 -0500180 {
181 o_rootCause = s;
182 return true;
183 }
184 }
185 }
186
187 return false; // default, nothing found
188}
189
190//------------------------------------------------------------------------------
191
192// Will query if a signature is a potential system checkstop root cause.
193// attention. Note that this function excludes memory channel failure attentions
Zane Shelleyed3ab8f2022-05-24 21:08:21 -0500194// which are checked in __findMemoryChannelFailure().
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600195bool __findCsRootCause(const libhei::Signature& i_signature,
196 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500197{
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600198 // Version 1 of the RAS data files.
199 if (1 == i_rasData.getVersion(i_signature))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500200 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600201 using namespace util::pdbg;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500202
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600203 using func = libhei::NodeId_t (*)(const std::string& i_str);
204 func __hash = libhei::hash<libhei::NodeId_t>;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500205
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600206 // PROC registers
207 static const auto eq_core_fir = __hash("EQ_CORE_FIR");
208 static const auto eq_l2_fir = __hash("EQ_L2_FIR");
209 static const auto eq_l3_fir = __hash("EQ_L3_FIR");
210 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR");
211 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC");
212 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP");
213 static const auto nx_cq_fir = __hash("NX_CQ_FIR");
214 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR");
215 static const auto pau_fir_0 = __hash("PAU_FIR_0");
216 static const auto pau_fir_1 = __hash("PAU_FIR_1");
217 static const auto pau_fir_2 = __hash("PAU_FIR_2");
218 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -0500219
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600220 // OCMB registers
221 static const auto rdffir = __hash("RDFFIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -0500222
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600223 const auto targetType = getTrgtType(getTrgt(i_signature.getChip()));
224 const auto id = i_signature.getId();
225 const auto bit = i_signature.getBit();
Zane Shelleyf4792d62021-10-28 18:08:22 -0500226
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600227 if (TYPE_PROC == targetType)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500228 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600229 if (eq_core_fir == id &&
230 (0 == bit || 2 == bit || 3 == bit || 4 == bit || 5 == bit ||
231 7 == bit || 8 == bit || 9 == bit || 11 == bit || 12 == bit ||
232 13 == bit || 18 == bit || 21 == bit || 22 == bit ||
233 24 == bit || 25 == bit || 29 == bit || 31 == bit ||
234 32 == bit || 36 == bit || 37 == bit || 38 == bit ||
235 43 == bit || 46 == bit || 47 == bit))
236 {
237 return true;
238 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500239
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600240 if (eq_l2_fir == id &&
241 (1 == bit || 12 == bit || 13 == bit || 17 == bit || 18 == bit ||
242 20 == bit || 27 == bit))
243 {
244 return true;
245 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500246
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600247 if (eq_l3_fir == id &&
248 (2 == bit || 5 == bit || 8 == bit || 11 == bit || 17 == bit))
249 {
250 return true;
251 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500252
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600253 if (eq_ncu_fir == id &&
254 (3 == bit || 4 == bit || 5 == bit || 7 == bit || 8 == bit ||
255 10 == bit || 17 == bit))
256 {
257 return true;
258 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500259
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600260 if (iohs_dlp_fir_oc == id && (54 <= bit && bit <= 61))
261 {
262 return true;
263 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500264
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600265 if (iohs_dlp_fir_smp == id && (54 <= bit && bit <= 61))
266 {
267 return true;
268 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500269
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600270 if (nx_cq_fir == id && (7 == bit || 16 == bit || 21 == bit))
271 {
272 return true;
273 }
274
275 if (nx_dma_eng_fir == id && (0 == bit))
276 {
277 return true;
278 }
279
280 if (pau_fir_0 == id &&
281 (15 == bit || 18 == bit || 19 == bit || 25 == bit ||
282 26 == bit || 29 == bit || 33 == bit || 34 == bit ||
283 35 == bit || 40 == bit || 42 == bit || 44 == bit || 45 == bit))
284 {
285 return true;
286 }
287
288 if (pau_fir_1 == id &&
289 (13 == bit || 14 == bit || 15 == bit || 37 == bit ||
290 39 == bit || 40 == bit || 41 == bit || 42 == bit))
291 {
292 return true;
293 }
294
295 if (pau_fir_2 == id &&
296 ((4 <= bit && bit <= 18) || (20 <= bit && bit <= 31) ||
297 (36 <= bit && bit <= 41) || 45 == bit || 47 == bit ||
298 48 == bit || 50 == bit || 51 == bit || 52 == bit))
299 {
300 return true;
301 }
302
303 if (pau_ptl_fir == id && (4 == bit || 8 == bit))
304 {
305 return true;
306 }
307 }
308 else if (TYPE_OCMB == targetType)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500309 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600310 if (rdffir == id &&
311 (14 == bit || 15 == bit || 17 == bit || 37 == bit))
312 {
313 return true;
314 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500315 }
316 }
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600317 // Version 2 of the RAS data files. Check if the input signature has the
318 // CS_POSSIBLE or SUE_SOURCE flag set.
319 else if (i_rasData.isFlagSet(i_signature,
320 RasDataParser::RasDataFlags::CS_POSSIBLE) ||
321 i_rasData.isFlagSet(i_signature,
322 RasDataParser::RasDataFlags::SUE_SOURCE))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500323 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600324 return true;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500325 }
326
327 return false; // default, nothing found
328}
329
330//------------------------------------------------------------------------------
331
332bool __findCsRootCause_RE(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600333 libhei::Signature& o_rootCause,
334 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500335{
336 for (const auto s : i_list)
337 {
338 // Only looking for recoverable attentions.
339 if (libhei::ATTN_TYPE_RECOVERABLE != s.getAttnType())
340 {
341 continue;
342 }
343
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600344 if (__findCsRootCause(s, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500345 {
346 o_rootCause = s;
347 return true;
348 }
349 }
350
351 return false; // default, nothing found
352}
353
354//------------------------------------------------------------------------------
355
356bool __findCsRootCause_UCS(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600357 libhei::Signature& o_rootCause,
358 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500359{
360 for (const auto s : i_list)
361 {
362 // Only looking for unit checkstop attentions.
363 if (libhei::ATTN_TYPE_UNIT_CS != s.getAttnType())
364 {
365 continue;
366 }
367
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600368 if (__findCsRootCause(s, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500369 {
370 o_rootCause = s;
371 return true;
372 }
373 }
374
375 return false; // default, nothing found
376}
377
378//------------------------------------------------------------------------------
379
Caleb Palmer51f82022023-02-22 16:09:09 -0600380bool __findOcmbAttnBits(const std::vector<libhei::Signature>& i_list,
381 libhei::Signature& o_rootCause,
382 const RasDataParser& i_rasData)
383{
384 using namespace util::pdbg;
385
386 // If we have any attentions from an OCMB, assume isolation to the OCMBs
387 // was successful and the ATTN_FROM_OCMB flag does not need to be checked.
388 for (const auto s : i_list)
389 {
390 if (TYPE_OCMB == getTrgtType(getTrgt(s.getChip())))
391 {
392 return false;
393 }
394 }
395
396 for (const auto s : i_list)
397 {
398 if (1 < i_rasData.getVersion(s) &&
399 i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::ATTN_FROM_OCMB))
400 {
401 o_rootCause = s;
402 return true;
403 }
404 }
405
406 return false; // default, nothing found
407}
408
409//------------------------------------------------------------------------------
410
Zane Shelleyf4792d62021-10-28 18:08:22 -0500411bool __findNonExternalCs(const std::vector<libhei::Signature>& i_list,
412 libhei::Signature& o_rootCause)
413{
414 using namespace util::pdbg;
415
Zane Shelley19df3702021-12-16 22:32:54 -0600416 static const auto pb_ext_fir = libhei::hash<libhei::NodeId_t>("PB_EXT_FIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -0500417
418 for (const auto s : i_list)
419 {
420 const auto targetType = getTrgtType(getTrgt(s.getChip()));
421 const auto id = s.getId();
422 const auto attnType = s.getAttnType();
423
424 // Find any processor with system checkstop attention that did not
425 // originate from the PB_EXT_FIR.
426 if ((TYPE_PROC == targetType) &&
427 (libhei::ATTN_TYPE_CHECKSTOP == attnType) && (pb_ext_fir != id))
428 {
429 o_rootCause = s;
430 return true;
431 }
432 }
433
434 return false; // default, nothing found
435}
436
437//------------------------------------------------------------------------------
438
Zane Shelleybaec7c02022-03-17 11:05:20 -0500439bool __findTiRootCause(const std::vector<libhei::Signature>& i_list,
440 libhei::Signature& o_rootCause)
441{
442 using namespace util::pdbg;
443
444 using func = libhei::NodeId_t (*)(const std::string& i_str);
445 func __hash = libhei::hash<libhei::NodeId_t>;
446
447 // PROC registers
448 static const auto tp_local_fir = __hash("TP_LOCAL_FIR");
449 static const auto occ_fir = __hash("OCC_FIR");
450 static const auto pbao_fir = __hash("PBAO_FIR");
451 static const auto n0_local_fir = __hash("N0_LOCAL_FIR");
452 static const auto int_cq_fir = __hash("INT_CQ_FIR");
453 static const auto nx_cq_fir = __hash("NX_CQ_FIR");
454 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR");
455 static const auto vas_fir = __hash("VAS_FIR");
456 static const auto n1_local_fir = __hash("N1_LOCAL_FIR");
457 static const auto mcd_fir = __hash("MCD_FIR");
458 static const auto pb_station_fir_en_1 = __hash("PB_STATION_FIR_EN_1");
459 static const auto pb_station_fir_en_2 = __hash("PB_STATION_FIR_EN_2");
460 static const auto pb_station_fir_en_3 = __hash("PB_STATION_FIR_EN_3");
461 static const auto pb_station_fir_en_4 = __hash("PB_STATION_FIR_EN_4");
462 static const auto pb_station_fir_es_1 = __hash("PB_STATION_FIR_ES_1");
463 static const auto pb_station_fir_es_2 = __hash("PB_STATION_FIR_ES_2");
464 static const auto pb_station_fir_es_3 = __hash("PB_STATION_FIR_ES_3");
465 static const auto pb_station_fir_es_4 = __hash("PB_STATION_FIR_ES_4");
466 static const auto pb_station_fir_eq = __hash("PB_STATION_FIR_EQ");
467 static const auto psihb_fir = __hash("PSIHB_FIR");
468 static const auto pbaf_fir = __hash("PBAF_FIR");
469 static const auto lpc_fir = __hash("LPC_FIR");
470 static const auto eq_core_fir = __hash("EQ_CORE_FIR");
471 static const auto eq_l2_fir = __hash("EQ_L2_FIR");
472 static const auto eq_l3_fir = __hash("EQ_L3_FIR");
473 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR");
474 static const auto eq_local_fir = __hash("EQ_LOCAL_FIR");
475 static const auto eq_qme_fir = __hash("EQ_QME_FIR");
476 static const auto iohs_local_fir = __hash("IOHS_LOCAL_FIR");
477 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC");
478 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP");
479 static const auto mc_local_fir = __hash("MC_LOCAL_FIR");
480 static const auto mc_fir = __hash("MC_FIR");
481 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR");
482 static const auto mc_ustl_fir = __hash("MC_USTL_FIR");
483 static const auto nmmu_cq_fir = __hash("NMMU_CQ_FIR");
484 static const auto nmmu_fir = __hash("NMMU_FIR");
485 static const auto mc_omi_dl = __hash("MC_OMI_DL");
486 static const auto pau_local_fir = __hash("PAU_LOCAL_FIR");
487 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR");
488 static const auto pau_phy_fir = __hash("PAU_PHY_FIR");
489 static const auto pau_fir_0 = __hash("PAU_FIR_0");
490 static const auto pau_fir_2 = __hash("PAU_FIR_2");
491 static const auto pci_local_fir = __hash("PCI_LOCAL_FIR");
492 static const auto pci_iop_fir = __hash("PCI_IOP_FIR");
493 static const auto pci_nest_fir = __hash("PCI_NEST_FIR");
494
495 // OCMB registers
496 static const auto ocmb_lfir = __hash("OCMB_LFIR");
497 static const auto mmiofir = __hash("MMIOFIR");
498 static const auto srqfir = __hash("SRQFIR");
499 static const auto rdffir = __hash("RDFFIR");
500 static const auto tlxfir = __hash("TLXFIR");
501 static const auto omi_dl = __hash("OMI_DL");
502
503 for (const auto& signature : i_list)
504 {
505 const auto targetType = getTrgtType(getTrgt(signature.getChip()));
506 const auto attnType = signature.getAttnType();
507 const auto id = signature.getId();
508 const auto bit = signature.getBit();
509
510 // Only looking for recoverable or unit checkstop attentions.
511 if (libhei::ATTN_TYPE_RECOVERABLE != attnType &&
512 libhei::ATTN_TYPE_UNIT_CS != attnType)
513 {
514 continue;
515 }
516
517 // Ignore attentions that should not be blamed as root cause of a TI.
518 // This would include informational only FIRs or correctable errors.
519 if (TYPE_PROC == targetType)
520 {
521 if (tp_local_fir == id &&
522 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
523 5 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
524 11 == bit || 20 == bit || 22 == bit || 23 == bit ||
525 24 == bit || 38 == bit || 40 == bit || 41 == bit ||
526 46 == bit || 47 == bit || 48 == bit || 55 == bit ||
527 56 == bit || 57 == bit || 58 == bit || 59 == bit))
528 {
529 continue;
530 }
531
532 if (occ_fir == id &&
533 (9 == bit || 10 == bit || 15 == bit || 20 == bit || 21 == bit ||
534 22 == bit || 23 == bit || 32 == bit || 33 == bit ||
535 34 == bit || 36 == bit || 42 == bit || 43 == bit ||
536 46 == bit || 47 == bit || 48 == bit || 51 == bit ||
537 52 == bit || 53 == bit || 54 == bit || 57 == bit))
538 {
539 continue;
540 }
541
542 if (pbao_fir == id &&
543 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 11 == bit ||
544 13 == bit || 15 == bit || 16 == bit || 17 == bit))
545 {
546 continue;
547 }
548
549 if ((n0_local_fir == id || n1_local_fir == id ||
550 iohs_local_fir == id || mc_local_fir == id ||
551 pau_local_fir == id || pci_local_fir == id) &&
552 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
553 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit ||
554 10 == bit || 11 == bit || 20 == bit || 21 == bit))
555 {
556 continue;
557 }
558
559 if (int_cq_fir == id &&
560 (0 == bit || 3 == bit || 5 == bit || 7 == bit || 36 == bit ||
Caleb Palmerecde53f2022-12-13 15:11:47 -0600561 47 == bit || 48 == bit || 49 == bit || 50 == bit ||
Zane Shelleybaec7c02022-03-17 11:05:20 -0500562 58 == bit || 59 == bit || 60 == bit))
563 {
564 continue;
565 }
566
567 if (nx_cq_fir == id &&
568 (1 == bit || 4 == bit || 18 == bit || 32 == bit || 33 == bit))
569 {
570 continue;
571 }
572
573 if (nx_dma_eng_fir == id &&
574 (4 == bit || 6 == bit || 9 == bit || 10 == bit || 11 == bit ||
575 34 == bit || 35 == bit || 36 == bit || 37 == bit || 39 == bit))
576 {
577 continue;
578 }
579
580 if (vas_fir == id &&
581 (8 == bit || 9 == bit || 11 == bit || 12 == bit || 13 == bit))
582 {
583 continue;
584 }
585
586 if (mcd_fir == id && (0 == bit))
587 {
588 continue;
589 }
590
591 if ((pb_station_fir_en_1 == id || pb_station_fir_en_2 == id ||
592 pb_station_fir_en_3 == id || pb_station_fir_en_4 == id ||
593 pb_station_fir_es_1 == id || pb_station_fir_es_2 == id ||
594 pb_station_fir_es_3 == id || pb_station_fir_es_4 == id ||
595 pb_station_fir_eq == id) &&
596 (9 == bit))
597 {
598 continue;
599 }
600
601 if (psihb_fir == id && (0 == bit || 23 == bit))
602 {
603 continue;
604 }
605
606 if (pbaf_fir == id &&
607 (0 == bit || 1 == bit || 3 == bit || 4 == bit || 5 == bit ||
608 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
609 11 == bit || 19 == bit || 20 == bit || 21 == bit ||
610 28 == bit || 29 == bit || 30 == bit || 31 == bit ||
611 32 == bit || 33 == bit || 34 == bit || 35 == bit || 36 == bit))
612 {
613 continue;
614 }
615
616 if (lpc_fir == id && (5 == bit))
617 {
618 continue;
619 }
620
621 if (eq_core_fir == id &&
622 (0 == bit || 2 == bit || 4 == bit || 7 == bit || 9 == bit ||
623 11 == bit || 13 == bit || 18 == bit || 21 == bit ||
624 24 == bit || 29 == bit || 31 == bit || 37 == bit ||
625 43 == bit || 56 == bit || 57 == bit))
626 {
627 continue;
628 }
629
630 if (eq_l2_fir == id &&
631 (0 == bit || 6 == bit || 11 == bit || 19 == bit || 36 == bit))
632 {
633 continue;
634 }
635
636 if (eq_l3_fir == id &&
637 (3 == bit || 4 == bit || 7 == bit || 10 == bit || 13 == bit))
638 {
639 continue;
640 }
641
642 if (eq_ncu_fir == id && (9 == bit))
643 {
644 continue;
645 }
646
647 if (eq_local_fir == id &&
648 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 5 == bit ||
649 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
650 11 == bit || 12 == bit || 13 == bit || 14 == bit ||
651 15 == bit || 16 == bit || 20 == bit || 21 == bit ||
652 22 == bit || 23 == bit || 24 == bit || 25 == bit ||
653 26 == bit || 27 == bit || 28 == bit || 29 == bit ||
654 30 == bit || 31 == bit || 32 == bit || 33 == bit ||
655 34 == bit || 35 == bit || 36 == bit || 37 == bit ||
656 38 == bit || 39 == bit))
657 {
658 continue;
659 }
660
661 if (eq_qme_fir == id && (7 == bit || 25 == bit))
662 {
663 continue;
664 }
665
666 if (iohs_dlp_fir_oc == id &&
667 (6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
668 48 == bit || 49 == bit || 52 == bit || 53 == bit))
669 {
670 continue;
671 }
672
673 if (iohs_dlp_fir_smp == id &&
674 (6 == bit || 7 == bit || 14 == bit || 15 == bit || 16 == bit ||
675 17 == bit || 38 == bit || 39 == bit || 44 == bit ||
676 45 == bit || 50 == bit || 51 == bit))
677 {
678 continue;
679 }
680
681 if (mc_fir == id &&
682 (5 == bit || 8 == bit || 15 == bit || 16 == bit))
683 {
684 continue;
685 }
686
687 if (mc_dstl_fir == id &&
688 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
689 5 == bit || 6 == bit || 7 == bit || 14 == bit || 15 == bit))
690 {
691 continue;
692 }
693
694 if (mc_ustl_fir == id &&
695 (6 == bit || 20 == bit || 33 == bit || 34 == bit))
696 {
697 continue;
698 }
699
700 if (nmmu_cq_fir == id && (8 == bit || 11 == bit || 14 == bit))
701 {
702 continue;
703 }
704
705 if (nmmu_fir == id &&
706 (0 == bit || 3 == bit || 8 == bit || 9 == bit || 10 == bit ||
707 11 == bit || 12 == bit || 13 == bit || 14 == bit ||
708 15 == bit || 30 == bit || 31 == bit || 41 == bit))
709 {
710 continue;
711 }
712
713 if (mc_omi_dl == id && (2 == bit || 3 == bit || 6 == bit ||
714 7 == bit || 9 == bit || 10 == bit))
715 {
716 continue;
717 }
718
719 if (pau_ptl_fir == id && (5 == bit || 9 == bit))
720 {
721 continue;
722 }
723
724 if (pau_phy_fir == id &&
725 (2 == bit || 3 == bit || 6 == bit || 7 == bit || 15 == bit))
726 {
727 continue;
728 }
729
730 if (pau_fir_0 == id && (13 == bit || 30 == bit || 41 == bit))
731 {
732 continue;
733 }
734
735 if (pau_fir_2 == id && (19 == bit || 46 == bit || 49 == bit))
736 {
737 continue;
738 }
739
740 if (pci_iop_fir == id &&
741 (0 == bit || 2 == bit || 4 == bit || 6 == bit || 7 == bit ||
742 8 == bit || 10 == bit))
743 {
744 continue;
745 }
746
747 if (pci_nest_fir == id && (2 == bit || 5 == bit))
748 {
749 continue;
750 }
751 }
752 else if (TYPE_OCMB == targetType)
753 {
754 if (ocmb_lfir == id &&
755 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 23 == bit ||
756 37 == bit || 63 == bit))
757 {
758 continue;
759 }
760
761 if (mmiofir == id && (2 == bit))
762 {
763 continue;
764 }
765
766 if (srqfir == id &&
767 (2 == bit || 4 == bit || 14 == bit || 15 == bit || 23 == bit ||
768 25 == bit || 28 == bit))
769 {
770 continue;
771 }
772
773 if (rdffir == id &&
774 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
775 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit ||
776 18 == bit || 38 == bit || 40 == bit || 41 == bit ||
777 45 == bit || 46 == bit))
778 {
779 continue;
780 }
781
782 if (tlxfir == id && (0 == bit || 9 == bit || 26 == bit))
783 {
784 continue;
785 }
786
787 if (omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 7 == bit ||
788 9 == bit || 10 == bit))
789 {
790 continue;
791 }
792 }
793
794 // At this point, the attention has not been explicitly ignored. So
795 // return this signature and exit.
796 o_rootCause = signature;
797 return true;
798 }
799
800 return false; // default, nothing found
801}
802
803//------------------------------------------------------------------------------
804
Zane Shelleyec227c22021-12-09 15:54:40 -0600805bool filterRootCause(AnalysisType i_type,
806 const libhei::IsolationData& i_isoData,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600807 libhei::Signature& o_rootCause,
808 const RasDataParser& i_rasData)
Zane Shelley65fefb22021-10-18 15:35:26 -0500809{
810 // We'll need to make a copy of the list so that the original list is
Zane Shelleyec227c22021-12-09 15:54:40 -0600811 // maintained for the PEL.
Zane Shelley65fefb22021-10-18 15:35:26 -0500812 std::vector<libhei::Signature> list{i_isoData.getSignatureList()};
813
814 // START WORKAROUND
815 // TODO: Filtering should be data driven. Until that support is available,
816 // use the following isolation rules.
817
Zane Shelleyec227c22021-12-09 15:54:40 -0600818 // Ensure the list is not empty before continuing.
Zane Shelleyf4792d62021-10-28 18:08:22 -0500819 if (list.empty())
820 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600821 return false; // nothing more to do
Zane Shelleyf4792d62021-10-28 18:08:22 -0500822 }
823
824 // First, look for any RCS OSC errors. This must always be first because
825 // they can cause downstream PLL unlock attentions.
826 if (__findRcsOscError(list, o_rootCause))
Zane Shelleya7369f82021-10-18 16:52:21 -0500827 {
828 return true;
829 }
830
Zane Shelleyf4792d62021-10-28 18:08:22 -0500831 // Second, look for any PLL unlock attentions. This must always be second
832 // because PLL unlock attentions can cause any number of downstream
833 // attentions, including a system checkstop.
834 if (__findPllUnlock(list, o_rootCause))
835 {
836 return true;
837 }
838
Zane Shelleyec227c22021-12-09 15:54:40 -0600839 // Regardless of the analysis type, always look for anything that could be
840 // blamed as the root cause of a system checkstop.
841
Zane Shelleyf4792d62021-10-28 18:08:22 -0500842 // Memory channel failure attentions will produce SUEs and likely cause
843 // downstream attentions, including a system checkstop.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600844 if (__findMemoryChannelFailure(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500845 {
846 return true;
847 }
848
849 // Look for any recoverable attentions that have been identified as a
850 // potential root cause of a system checkstop attention. These would include
851 // any attention that would generate an SUE. Note that is it possible for
852 // recoverables to generate unit checkstop attentions so we must check them
853 // first.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600854 if (__findCsRootCause_RE(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500855 {
856 return true;
857 }
858
859 // Look for any unit checkstop attentions (other than memory channel
860 // failures) that have been identified as a potential root cause of a
861 // system checkstop attention. These would include any attention that would
862 // generate an SUE.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600863 if (__findCsRootCause_UCS(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500864 {
865 return true;
866 }
867
Caleb Palmer51f82022023-02-22 16:09:09 -0600868 // Version 2 of the RAS data files. If no other viable root cause has
869 // been found, check for any signatures with the ATTN_FROM_OCMB flag in
870 // case there was an attention from an inaccessible OCMB.
871 if (__findOcmbAttnBits(list, o_rootCause, i_rasData))
872 {
873 return true;
874 }
875
Zane Shelleyf4792d62021-10-28 18:08:22 -0500876 // Look for any system checkstop attentions that originated from within the
877 // chip that reported the attention. In other words, no external checkstop
878 // attentions.
879 if (__findNonExternalCs(list, o_rootCause))
880 {
881 return true;
882 }
883
Zane Shelleyec227c22021-12-09 15:54:40 -0600884 if (AnalysisType::SYSTEM_CHECKSTOP != i_type)
Zane Shelley65fefb22021-10-18 15:35:26 -0500885 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600886 // No system checkstop root cause attentions were found. Next, look for
887 // any recoverable or unit checkstop attentions that could be associated
Zane Shelleybaec7c02022-03-17 11:05:20 -0500888 // with a TI.
889 if (__findTiRootCause(list, o_rootCause))
Zane Shelleyec227c22021-12-09 15:54:40 -0600890 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600891 return true;
892 }
893
894 if (AnalysisType::TERMINATE_IMMEDIATE != i_type)
895 {
896 // No attentions associated with a system checkstop or TI were
897 // found. Simply, return the first entry in the list.
898 o_rootCause = list.front();
899 return true;
900 }
Zane Shelley65fefb22021-10-18 15:35:26 -0500901 }
902
903 // END WORKAROUND
904
905 return false; // default, no active attentions found.
906}
907
908//------------------------------------------------------------------------------
909
910} // namespace analyzer