blob: 5e1ce8210387282178343dd4dd4312fc85cbd083 [file] [log] [blame]
Zane Shelley65fefb22021-10-18 15:35:26 -05001#include <assert.h>
2
Caleb Palmer1a4f0e72022-11-07 15:08:01 -06003#include <analyzer/analyzer_main.hpp>
4#include <analyzer/ras-data/ras-data-parser.hpp>
Zane Shelley65fefb22021-10-18 15:35:26 -05005#include <hei_main.hpp>
Zane Shelley19df3702021-12-16 22:32:54 -06006#include <hei_util.hpp>
Zane Shelleyf4792d62021-10-28 18:08:22 -05007#include <util/pdbg.hpp>
Zane Shelley65fefb22021-10-18 15:35:26 -05008
9#include <algorithm>
10#include <limits>
11#include <string>
12
13namespace analyzer
14{
Zane Shelley65fefb22021-10-18 15:35:26 -050015//------------------------------------------------------------------------------
16
Zane Shelleya7369f82021-10-18 16:52:21 -050017bool __findRcsOscError(const std::vector<libhei::Signature>& i_list,
18 libhei::Signature& o_rootCause)
19{
20 // TODO: Consider returning all of them instead of one as root cause.
21 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
Zane Shelley19df3702021-12-16 22:32:54 -060022 return (libhei::hash<libhei::NodeId_t>("TP_LOCAL_FIR") == t.getId() &&
Zane Shelleya7369f82021-10-18 16:52:21 -050023 (42 == t.getBit() || 43 == t.getBit()));
24 });
25
26 if (i_list.end() != itr)
27 {
28 o_rootCause = *itr;
29 return true;
30 }
31
32 return false;
33}
34
35//------------------------------------------------------------------------------
36
37bool __findPllUnlock(const std::vector<libhei::Signature>& i_list,
38 libhei::Signature& o_rootCause)
39{
40 // TODO: Consider returning all of them instead of one as root cause.
41 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
Zane Shelley19df3702021-12-16 22:32:54 -060042 return (libhei::hash<libhei::NodeId_t>("PLL_UNLOCK") == t.getId() &&
Zane Shelleya7369f82021-10-18 16:52:21 -050043 (0 == t.getBit() || 1 == t.getBit()));
44 });
45
46 if (i_list.end() != itr)
47 {
48 o_rootCause = *itr;
49 return true;
50 }
51
52 return false;
53}
54
55//------------------------------------------------------------------------------
56
Caleb Palmer329dbbd2022-10-03 15:05:43 -050057bool __findIueTh(const std::vector<libhei::Signature>& i_list,
58 libhei::Signature& o_rootCause)
59{
60 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
61 return (libhei::hash<libhei::NodeId_t>("RDFFIR") == t.getId() &&
62 (17 == t.getBit() || 37 == t.getBit()));
63 });
64
65 if (i_list.end() != itr)
66 {
67 o_rootCause = *itr;
68 return true;
69 }
70
71 return false;
72}
73
74//------------------------------------------------------------------------------
75
Zane Shelleyf4792d62021-10-28 18:08:22 -050076bool __findMemoryChannelFailure(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060077 libhei::Signature& o_rootCause,
78 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -050079{
80 using namespace util::pdbg;
81
Zane Shelley19df3702021-12-16 22:32:54 -060082 using func = libhei::NodeId_t (*)(const std::string& i_str);
83 func __hash = libhei::hash<libhei::NodeId_t>;
84
Zane Shelley19df3702021-12-16 22:32:54 -060085 static const auto mc_omi_dl_err_rpt = __hash("MC_OMI_DL_ERR_RPT");
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060086 static const auto srqfir = __hash("SRQFIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -050087
88 for (const auto s : i_list)
89 {
Zane Shelley93b001c2023-03-24 17:45:04 -050090 if (libhei::ATTN_TYPE_UNIT_CS == s.getAttnType() &&
91 i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::SUE_SOURCE))
Zane Shelleyf4792d62021-10-28 18:08:22 -050092 {
Zane Shelley93b001c2023-03-24 17:45:04 -050093 // Special Cases:
94 // If the channel fail was specifically a firmware initiated
95 // channel fail (SRQFIR[25]) check for any IUE bits that are on
96 // that would have caused that (RDFFIR[17,37]).
97 if ((srqfir == s.getId() && 25 == s.getBit()) &&
98 __findIueTh(i_list, o_rootCause))
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060099 {
Zane Shelleyf4792d62021-10-28 18:08:22 -0500100 return true;
101 }
Zane Shelley93b001c2023-03-24 17:45:04 -0500102
103 // TODO: The proc side channel failure bits are configurable.
104 // Eventually, we will need some mechanism to check the
105 // config registers for a more accurate analysis. For now,
106 // simply check for all bits that could potentially be
107 // configured to channel failure.
108
109 o_rootCause = s;
110 }
111 // The bits in the MC_OMI_DL_ERR_RPT register are a special case.
112 // They are possible channel fail bits but the MC_OMI_DL_FIR they
113 // feed into can't be set up to report UNIT_CS attentions, so they
114 // report as recoverable instead.
115 else if (mc_omi_dl_err_rpt == s.getId())
116 {
117 o_rootCause = s;
118 return true;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500119 }
120 }
121
122 return false; // default, nothing found
123}
124
125//------------------------------------------------------------------------------
126
127// Will query if a signature is a potential system checkstop root cause.
128// attention. Note that this function excludes memory channel failure attentions
Zane Shelleyed3ab8f2022-05-24 21:08:21 -0500129// which are checked in __findMemoryChannelFailure().
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600130bool __findCsRootCause(const libhei::Signature& i_signature,
131 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500132{
Zane Shelley93b001c2023-03-24 17:45:04 -0500133 // Check if the input signature has the CS_POSSIBLE or SUE_SOURCE flag set.
134 if (i_rasData.isFlagSet(i_signature,
135 RasDataParser::RasDataFlags::CS_POSSIBLE) ||
136 i_rasData.isFlagSet(i_signature,
137 RasDataParser::RasDataFlags::SUE_SOURCE))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500138 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600139 return true;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500140 }
141
142 return false; // default, nothing found
143}
144
145//------------------------------------------------------------------------------
146
147bool __findCsRootCause_RE(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600148 libhei::Signature& o_rootCause,
149 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500150{
151 for (const auto s : i_list)
152 {
153 // Only looking for recoverable attentions.
154 if (libhei::ATTN_TYPE_RECOVERABLE != s.getAttnType())
155 {
156 continue;
157 }
158
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600159 if (__findCsRootCause(s, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500160 {
161 o_rootCause = s;
162 return true;
163 }
164 }
165
166 return false; // default, nothing found
167}
168
169//------------------------------------------------------------------------------
170
171bool __findCsRootCause_UCS(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600172 libhei::Signature& o_rootCause,
173 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500174{
175 for (const auto s : i_list)
176 {
177 // Only looking for unit checkstop attentions.
178 if (libhei::ATTN_TYPE_UNIT_CS != s.getAttnType())
179 {
180 continue;
181 }
182
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600183 if (__findCsRootCause(s, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500184 {
185 o_rootCause = s;
186 return true;
187 }
188 }
189
190 return false; // default, nothing found
191}
192
193//------------------------------------------------------------------------------
194
Caleb Palmer51f82022023-02-22 16:09:09 -0600195bool __findOcmbAttnBits(const std::vector<libhei::Signature>& i_list,
196 libhei::Signature& o_rootCause,
197 const RasDataParser& i_rasData)
198{
199 using namespace util::pdbg;
200
201 // If we have any attentions from an OCMB, assume isolation to the OCMBs
202 // was successful and the ATTN_FROM_OCMB flag does not need to be checked.
203 for (const auto s : i_list)
204 {
205 if (TYPE_OCMB == getTrgtType(getTrgt(s.getChip())))
206 {
207 return false;
208 }
209 }
210
211 for (const auto s : i_list)
212 {
Zane Shelley93b001c2023-03-24 17:45:04 -0500213 if (i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::ATTN_FROM_OCMB))
Caleb Palmer51f82022023-02-22 16:09:09 -0600214 {
215 o_rootCause = s;
216 return true;
217 }
218 }
219
220 return false; // default, nothing found
221}
222
223//------------------------------------------------------------------------------
224
Zane Shelleyf4792d62021-10-28 18:08:22 -0500225bool __findNonExternalCs(const std::vector<libhei::Signature>& i_list,
226 libhei::Signature& o_rootCause)
227{
228 using namespace util::pdbg;
229
Zane Shelley19df3702021-12-16 22:32:54 -0600230 static const auto pb_ext_fir = libhei::hash<libhei::NodeId_t>("PB_EXT_FIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -0500231
232 for (const auto s : i_list)
233 {
234 const auto targetType = getTrgtType(getTrgt(s.getChip()));
235 const auto id = s.getId();
236 const auto attnType = s.getAttnType();
237
238 // Find any processor with system checkstop attention that did not
239 // originate from the PB_EXT_FIR.
240 if ((TYPE_PROC == targetType) &&
241 (libhei::ATTN_TYPE_CHECKSTOP == attnType) && (pb_ext_fir != id))
242 {
243 o_rootCause = s;
244 return true;
245 }
246 }
247
248 return false; // default, nothing found
249}
250
251//------------------------------------------------------------------------------
252
Zane Shelleybaec7c02022-03-17 11:05:20 -0500253bool __findTiRootCause(const std::vector<libhei::Signature>& i_list,
254 libhei::Signature& o_rootCause)
255{
256 using namespace util::pdbg;
257
258 using func = libhei::NodeId_t (*)(const std::string& i_str);
259 func __hash = libhei::hash<libhei::NodeId_t>;
260
261 // PROC registers
262 static const auto tp_local_fir = __hash("TP_LOCAL_FIR");
263 static const auto occ_fir = __hash("OCC_FIR");
264 static const auto pbao_fir = __hash("PBAO_FIR");
265 static const auto n0_local_fir = __hash("N0_LOCAL_FIR");
266 static const auto int_cq_fir = __hash("INT_CQ_FIR");
267 static const auto nx_cq_fir = __hash("NX_CQ_FIR");
268 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR");
269 static const auto vas_fir = __hash("VAS_FIR");
270 static const auto n1_local_fir = __hash("N1_LOCAL_FIR");
271 static const auto mcd_fir = __hash("MCD_FIR");
272 static const auto pb_station_fir_en_1 = __hash("PB_STATION_FIR_EN_1");
273 static const auto pb_station_fir_en_2 = __hash("PB_STATION_FIR_EN_2");
274 static const auto pb_station_fir_en_3 = __hash("PB_STATION_FIR_EN_3");
275 static const auto pb_station_fir_en_4 = __hash("PB_STATION_FIR_EN_4");
276 static const auto pb_station_fir_es_1 = __hash("PB_STATION_FIR_ES_1");
277 static const auto pb_station_fir_es_2 = __hash("PB_STATION_FIR_ES_2");
278 static const auto pb_station_fir_es_3 = __hash("PB_STATION_FIR_ES_3");
279 static const auto pb_station_fir_es_4 = __hash("PB_STATION_FIR_ES_4");
280 static const auto pb_station_fir_eq = __hash("PB_STATION_FIR_EQ");
281 static const auto psihb_fir = __hash("PSIHB_FIR");
282 static const auto pbaf_fir = __hash("PBAF_FIR");
283 static const auto lpc_fir = __hash("LPC_FIR");
284 static const auto eq_core_fir = __hash("EQ_CORE_FIR");
285 static const auto eq_l2_fir = __hash("EQ_L2_FIR");
286 static const auto eq_l3_fir = __hash("EQ_L3_FIR");
287 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR");
288 static const auto eq_local_fir = __hash("EQ_LOCAL_FIR");
289 static const auto eq_qme_fir = __hash("EQ_QME_FIR");
290 static const auto iohs_local_fir = __hash("IOHS_LOCAL_FIR");
291 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC");
292 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP");
293 static const auto mc_local_fir = __hash("MC_LOCAL_FIR");
294 static const auto mc_fir = __hash("MC_FIR");
295 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR");
296 static const auto mc_ustl_fir = __hash("MC_USTL_FIR");
297 static const auto nmmu_cq_fir = __hash("NMMU_CQ_FIR");
298 static const auto nmmu_fir = __hash("NMMU_FIR");
299 static const auto mc_omi_dl = __hash("MC_OMI_DL");
300 static const auto pau_local_fir = __hash("PAU_LOCAL_FIR");
301 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR");
302 static const auto pau_phy_fir = __hash("PAU_PHY_FIR");
303 static const auto pau_fir_0 = __hash("PAU_FIR_0");
304 static const auto pau_fir_2 = __hash("PAU_FIR_2");
305 static const auto pci_local_fir = __hash("PCI_LOCAL_FIR");
306 static const auto pci_iop_fir = __hash("PCI_IOP_FIR");
307 static const auto pci_nest_fir = __hash("PCI_NEST_FIR");
308
309 // OCMB registers
310 static const auto ocmb_lfir = __hash("OCMB_LFIR");
311 static const auto mmiofir = __hash("MMIOFIR");
312 static const auto srqfir = __hash("SRQFIR");
313 static const auto rdffir = __hash("RDFFIR");
314 static const auto tlxfir = __hash("TLXFIR");
315 static const auto omi_dl = __hash("OMI_DL");
316
317 for (const auto& signature : i_list)
318 {
319 const auto targetType = getTrgtType(getTrgt(signature.getChip()));
320 const auto attnType = signature.getAttnType();
321 const auto id = signature.getId();
322 const auto bit = signature.getBit();
323
324 // Only looking for recoverable or unit checkstop attentions.
325 if (libhei::ATTN_TYPE_RECOVERABLE != attnType &&
326 libhei::ATTN_TYPE_UNIT_CS != attnType)
327 {
328 continue;
329 }
330
331 // Ignore attentions that should not be blamed as root cause of a TI.
332 // This would include informational only FIRs or correctable errors.
333 if (TYPE_PROC == targetType)
334 {
335 if (tp_local_fir == id &&
336 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
337 5 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
338 11 == bit || 20 == bit || 22 == bit || 23 == bit ||
339 24 == bit || 38 == bit || 40 == bit || 41 == bit ||
340 46 == bit || 47 == bit || 48 == bit || 55 == bit ||
341 56 == bit || 57 == bit || 58 == bit || 59 == bit))
342 {
343 continue;
344 }
345
346 if (occ_fir == id &&
347 (9 == bit || 10 == bit || 15 == bit || 20 == bit || 21 == bit ||
348 22 == bit || 23 == bit || 32 == bit || 33 == bit ||
349 34 == bit || 36 == bit || 42 == bit || 43 == bit ||
350 46 == bit || 47 == bit || 48 == bit || 51 == bit ||
351 52 == bit || 53 == bit || 54 == bit || 57 == bit))
352 {
353 continue;
354 }
355
356 if (pbao_fir == id &&
357 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 11 == bit ||
358 13 == bit || 15 == bit || 16 == bit || 17 == bit))
359 {
360 continue;
361 }
362
363 if ((n0_local_fir == id || n1_local_fir == id ||
364 iohs_local_fir == id || mc_local_fir == id ||
365 pau_local_fir == id || pci_local_fir == id) &&
366 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
367 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit ||
368 10 == bit || 11 == bit || 20 == bit || 21 == bit))
369 {
370 continue;
371 }
372
373 if (int_cq_fir == id &&
374 (0 == bit || 3 == bit || 5 == bit || 7 == bit || 36 == bit ||
Caleb Palmerecde53f2022-12-13 15:11:47 -0600375 47 == bit || 48 == bit || 49 == bit || 50 == bit ||
Zane Shelleybaec7c02022-03-17 11:05:20 -0500376 58 == bit || 59 == bit || 60 == bit))
377 {
378 continue;
379 }
380
381 if (nx_cq_fir == id &&
382 (1 == bit || 4 == bit || 18 == bit || 32 == bit || 33 == bit))
383 {
384 continue;
385 }
386
387 if (nx_dma_eng_fir == id &&
388 (4 == bit || 6 == bit || 9 == bit || 10 == bit || 11 == bit ||
389 34 == bit || 35 == bit || 36 == bit || 37 == bit || 39 == bit))
390 {
391 continue;
392 }
393
394 if (vas_fir == id &&
395 (8 == bit || 9 == bit || 11 == bit || 12 == bit || 13 == bit))
396 {
397 continue;
398 }
399
400 if (mcd_fir == id && (0 == bit))
401 {
402 continue;
403 }
404
405 if ((pb_station_fir_en_1 == id || pb_station_fir_en_2 == id ||
406 pb_station_fir_en_3 == id || pb_station_fir_en_4 == id ||
407 pb_station_fir_es_1 == id || pb_station_fir_es_2 == id ||
408 pb_station_fir_es_3 == id || pb_station_fir_es_4 == id ||
409 pb_station_fir_eq == id) &&
410 (9 == bit))
411 {
412 continue;
413 }
414
415 if (psihb_fir == id && (0 == bit || 23 == bit))
416 {
417 continue;
418 }
419
420 if (pbaf_fir == id &&
421 (0 == bit || 1 == bit || 3 == bit || 4 == bit || 5 == bit ||
422 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
423 11 == bit || 19 == bit || 20 == bit || 21 == bit ||
424 28 == bit || 29 == bit || 30 == bit || 31 == bit ||
425 32 == bit || 33 == bit || 34 == bit || 35 == bit || 36 == bit))
426 {
427 continue;
428 }
429
430 if (lpc_fir == id && (5 == bit))
431 {
432 continue;
433 }
434
435 if (eq_core_fir == id &&
436 (0 == bit || 2 == bit || 4 == bit || 7 == bit || 9 == bit ||
437 11 == bit || 13 == bit || 18 == bit || 21 == bit ||
438 24 == bit || 29 == bit || 31 == bit || 37 == bit ||
439 43 == bit || 56 == bit || 57 == bit))
440 {
441 continue;
442 }
443
444 if (eq_l2_fir == id &&
445 (0 == bit || 6 == bit || 11 == bit || 19 == bit || 36 == bit))
446 {
447 continue;
448 }
449
450 if (eq_l3_fir == id &&
451 (3 == bit || 4 == bit || 7 == bit || 10 == bit || 13 == bit))
452 {
453 continue;
454 }
455
456 if (eq_ncu_fir == id && (9 == bit))
457 {
458 continue;
459 }
460
461 if (eq_local_fir == id &&
462 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 5 == bit ||
463 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
464 11 == bit || 12 == bit || 13 == bit || 14 == bit ||
465 15 == bit || 16 == bit || 20 == bit || 21 == bit ||
466 22 == bit || 23 == bit || 24 == bit || 25 == bit ||
467 26 == bit || 27 == bit || 28 == bit || 29 == bit ||
468 30 == bit || 31 == bit || 32 == bit || 33 == bit ||
469 34 == bit || 35 == bit || 36 == bit || 37 == bit ||
470 38 == bit || 39 == bit))
471 {
472 continue;
473 }
474
475 if (eq_qme_fir == id && (7 == bit || 25 == bit))
476 {
477 continue;
478 }
479
480 if (iohs_dlp_fir_oc == id &&
481 (6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
482 48 == bit || 49 == bit || 52 == bit || 53 == bit))
483 {
484 continue;
485 }
486
487 if (iohs_dlp_fir_smp == id &&
488 (6 == bit || 7 == bit || 14 == bit || 15 == bit || 16 == bit ||
489 17 == bit || 38 == bit || 39 == bit || 44 == bit ||
490 45 == bit || 50 == bit || 51 == bit))
491 {
492 continue;
493 }
494
495 if (mc_fir == id &&
496 (5 == bit || 8 == bit || 15 == bit || 16 == bit))
497 {
498 continue;
499 }
500
501 if (mc_dstl_fir == id &&
502 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
503 5 == bit || 6 == bit || 7 == bit || 14 == bit || 15 == bit))
504 {
505 continue;
506 }
507
508 if (mc_ustl_fir == id &&
509 (6 == bit || 20 == bit || 33 == bit || 34 == bit))
510 {
511 continue;
512 }
513
514 if (nmmu_cq_fir == id && (8 == bit || 11 == bit || 14 == bit))
515 {
516 continue;
517 }
518
519 if (nmmu_fir == id &&
520 (0 == bit || 3 == bit || 8 == bit || 9 == bit || 10 == bit ||
521 11 == bit || 12 == bit || 13 == bit || 14 == bit ||
522 15 == bit || 30 == bit || 31 == bit || 41 == bit))
523 {
524 continue;
525 }
526
527 if (mc_omi_dl == id && (2 == bit || 3 == bit || 6 == bit ||
528 7 == bit || 9 == bit || 10 == bit))
529 {
530 continue;
531 }
532
533 if (pau_ptl_fir == id && (5 == bit || 9 == bit))
534 {
535 continue;
536 }
537
538 if (pau_phy_fir == id &&
539 (2 == bit || 3 == bit || 6 == bit || 7 == bit || 15 == bit))
540 {
541 continue;
542 }
543
544 if (pau_fir_0 == id && (13 == bit || 30 == bit || 41 == bit))
545 {
546 continue;
547 }
548
549 if (pau_fir_2 == id && (19 == bit || 46 == bit || 49 == bit))
550 {
551 continue;
552 }
553
554 if (pci_iop_fir == id &&
555 (0 == bit || 2 == bit || 4 == bit || 6 == bit || 7 == bit ||
556 8 == bit || 10 == bit))
557 {
558 continue;
559 }
560
561 if (pci_nest_fir == id && (2 == bit || 5 == bit))
562 {
563 continue;
564 }
565 }
566 else if (TYPE_OCMB == targetType)
567 {
568 if (ocmb_lfir == id &&
569 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 23 == bit ||
570 37 == bit || 63 == bit))
571 {
572 continue;
573 }
574
575 if (mmiofir == id && (2 == bit))
576 {
577 continue;
578 }
579
580 if (srqfir == id &&
581 (2 == bit || 4 == bit || 14 == bit || 15 == bit || 23 == bit ||
582 25 == bit || 28 == bit))
583 {
584 continue;
585 }
586
587 if (rdffir == id &&
588 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
589 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit ||
590 18 == bit || 38 == bit || 40 == bit || 41 == bit ||
591 45 == bit || 46 == bit))
592 {
593 continue;
594 }
595
596 if (tlxfir == id && (0 == bit || 9 == bit || 26 == bit))
597 {
598 continue;
599 }
600
601 if (omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 7 == bit ||
602 9 == bit || 10 == bit))
603 {
604 continue;
605 }
606 }
607
608 // At this point, the attention has not been explicitly ignored. So
609 // return this signature and exit.
610 o_rootCause = signature;
611 return true;
612 }
613
614 return false; // default, nothing found
615}
616
617//------------------------------------------------------------------------------
618
Zane Shelleyec227c22021-12-09 15:54:40 -0600619bool filterRootCause(AnalysisType i_type,
620 const libhei::IsolationData& i_isoData,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600621 libhei::Signature& o_rootCause,
622 const RasDataParser& i_rasData)
Zane Shelley65fefb22021-10-18 15:35:26 -0500623{
624 // We'll need to make a copy of the list so that the original list is
Zane Shelleyec227c22021-12-09 15:54:40 -0600625 // maintained for the PEL.
Zane Shelley65fefb22021-10-18 15:35:26 -0500626 std::vector<libhei::Signature> list{i_isoData.getSignatureList()};
627
628 // START WORKAROUND
629 // TODO: Filtering should be data driven. Until that support is available,
630 // use the following isolation rules.
631
Zane Shelleyec227c22021-12-09 15:54:40 -0600632 // Ensure the list is not empty before continuing.
Zane Shelleyf4792d62021-10-28 18:08:22 -0500633 if (list.empty())
634 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600635 return false; // nothing more to do
Zane Shelleyf4792d62021-10-28 18:08:22 -0500636 }
637
638 // First, look for any RCS OSC errors. This must always be first because
639 // they can cause downstream PLL unlock attentions.
640 if (__findRcsOscError(list, o_rootCause))
Zane Shelleya7369f82021-10-18 16:52:21 -0500641 {
642 return true;
643 }
644
Zane Shelleyf4792d62021-10-28 18:08:22 -0500645 // Second, look for any PLL unlock attentions. This must always be second
646 // because PLL unlock attentions can cause any number of downstream
647 // attentions, including a system checkstop.
648 if (__findPllUnlock(list, o_rootCause))
649 {
650 return true;
651 }
652
Zane Shelleyec227c22021-12-09 15:54:40 -0600653 // Regardless of the analysis type, always look for anything that could be
654 // blamed as the root cause of a system checkstop.
655
Zane Shelleyf4792d62021-10-28 18:08:22 -0500656 // Memory channel failure attentions will produce SUEs and likely cause
657 // downstream attentions, including a system checkstop.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600658 if (__findMemoryChannelFailure(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500659 {
660 return true;
661 }
662
663 // Look for any recoverable attentions that have been identified as a
664 // potential root cause of a system checkstop attention. These would include
665 // any attention that would generate an SUE. Note that is it possible for
666 // recoverables to generate unit checkstop attentions so we must check them
667 // first.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600668 if (__findCsRootCause_RE(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500669 {
670 return true;
671 }
672
673 // Look for any unit checkstop attentions (other than memory channel
674 // failures) that have been identified as a potential root cause of a
675 // system checkstop attention. These would include any attention that would
676 // generate an SUE.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600677 if (__findCsRootCause_UCS(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500678 {
679 return true;
680 }
681
Zane Shelley93b001c2023-03-24 17:45:04 -0500682 // If no other viable root cause has been found, check for any signatures
683 // with the ATTN_FROM_OCMB flag in case there was an attention from an
684 // inaccessible OCMB.
Caleb Palmer51f82022023-02-22 16:09:09 -0600685 if (__findOcmbAttnBits(list, o_rootCause, i_rasData))
686 {
687 return true;
688 }
689
Zane Shelleyf4792d62021-10-28 18:08:22 -0500690 // Look for any system checkstop attentions that originated from within the
691 // chip that reported the attention. In other words, no external checkstop
692 // attentions.
693 if (__findNonExternalCs(list, o_rootCause))
694 {
695 return true;
696 }
697
Zane Shelleyec227c22021-12-09 15:54:40 -0600698 if (AnalysisType::SYSTEM_CHECKSTOP != i_type)
Zane Shelley65fefb22021-10-18 15:35:26 -0500699 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600700 // No system checkstop root cause attentions were found. Next, look for
701 // any recoverable or unit checkstop attentions that could be associated
Zane Shelleybaec7c02022-03-17 11:05:20 -0500702 // with a TI.
703 if (__findTiRootCause(list, o_rootCause))
Zane Shelleyec227c22021-12-09 15:54:40 -0600704 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600705 return true;
706 }
707
708 if (AnalysisType::TERMINATE_IMMEDIATE != i_type)
709 {
710 // No attentions associated with a system checkstop or TI were
711 // found. Simply, return the first entry in the list.
712 o_rootCause = list.front();
713 return true;
714 }
Zane Shelley65fefb22021-10-18 15:35:26 -0500715 }
716
717 // END WORKAROUND
718
719 return false; // default, no active attentions found.
720}
721
722//------------------------------------------------------------------------------
723
724} // namespace analyzer