blob: 1a594b7fb406b3a7f753671211948d45830ab004 [file] [log] [blame]
Zane Shelley65fefb22021-10-18 15:35:26 -05001#include <assert.h>
2
Caleb Palmer1a4f0e72022-11-07 15:08:01 -06003#include <analyzer/analyzer_main.hpp>
4#include <analyzer/ras-data/ras-data-parser.hpp>
Zane Shelley65fefb22021-10-18 15:35:26 -05005#include <hei_main.hpp>
Zane Shelley19df3702021-12-16 22:32:54 -06006#include <hei_util.hpp>
Zane Shelleyf4792d62021-10-28 18:08:22 -05007#include <util/pdbg.hpp>
Zane Shelley65fefb22021-10-18 15:35:26 -05008
9#include <algorithm>
10#include <limits>
11#include <string>
12
13namespace analyzer
14{
Zane Shelley65fefb22021-10-18 15:35:26 -050015//------------------------------------------------------------------------------
16
Zane Shelleya7369f82021-10-18 16:52:21 -050017bool __findRcsOscError(const std::vector<libhei::Signature>& i_list,
18 libhei::Signature& o_rootCause)
19{
20 // TODO: Consider returning all of them instead of one as root cause.
21 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
Zane Shelley19df3702021-12-16 22:32:54 -060022 return (libhei::hash<libhei::NodeId_t>("TP_LOCAL_FIR") == t.getId() &&
Zane Shelleya7369f82021-10-18 16:52:21 -050023 (42 == t.getBit() || 43 == t.getBit()));
24 });
25
26 if (i_list.end() != itr)
27 {
28 o_rootCause = *itr;
29 return true;
30 }
31
32 return false;
33}
34
35//------------------------------------------------------------------------------
36
37bool __findPllUnlock(const std::vector<libhei::Signature>& i_list,
38 libhei::Signature& o_rootCause)
39{
40 // TODO: Consider returning all of them instead of one as root cause.
41 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
Zane Shelley19df3702021-12-16 22:32:54 -060042 return (libhei::hash<libhei::NodeId_t>("PLL_UNLOCK") == t.getId() &&
Zane Shelleya7369f82021-10-18 16:52:21 -050043 (0 == t.getBit() || 1 == t.getBit()));
44 });
45
46 if (i_list.end() != itr)
47 {
48 o_rootCause = *itr;
49 return true;
50 }
51
52 return false;
53}
54
55//------------------------------------------------------------------------------
56
Caleb Palmer329dbbd2022-10-03 15:05:43 -050057bool __findIueTh(const std::vector<libhei::Signature>& i_list,
58 libhei::Signature& o_rootCause)
59{
60 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
61 return (libhei::hash<libhei::NodeId_t>("RDFFIR") == t.getId() &&
62 (17 == t.getBit() || 37 == t.getBit()));
63 });
64
65 if (i_list.end() != itr)
66 {
67 o_rootCause = *itr;
68 return true;
69 }
70
71 return false;
72}
73
74//------------------------------------------------------------------------------
75
Zane Shelleyf4792d62021-10-28 18:08:22 -050076bool __findMemoryChannelFailure(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060077 libhei::Signature& o_rootCause,
78 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -050079{
80 using namespace util::pdbg;
81
Zane Shelley19df3702021-12-16 22:32:54 -060082 using func = libhei::NodeId_t (*)(const std::string& i_str);
83 func __hash = libhei::hash<libhei::NodeId_t>;
84
85 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR");
86 static const auto mc_ustl_fir = __hash("MC_USTL_FIR");
87 static const auto mc_omi_dl_err_rpt = __hash("MC_OMI_DL_ERR_RPT");
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060088 static const auto srqfir = __hash("SRQFIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -050089
90 for (const auto s : i_list)
91 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060092 // Version 1 of the RAS data files
93 if (1 == i_rasData.getVersion(s))
Zane Shelleyf4792d62021-10-28 18:08:22 -050094 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -060095 const auto targetType = getTrgtType(getTrgt(s.getChip()));
96 const auto id = s.getId();
97 const auto bit = s.getBit();
98 const auto attnType = s.getAttnType();
Caleb Palmer329dbbd2022-10-03 15:05:43 -050099
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600100 // Look for any unit checkstop attentions from OCMBs.
101 if (TYPE_OCMB == targetType)
102 {
103 // Any unit checkstop attentions will trigger a channel failure.
104 if (libhei::ATTN_TYPE_UNIT_CS == attnType)
Caleb Palmer329dbbd2022-10-03 15:05:43 -0500105 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600106 // If the channel was specifically a firmware initiated
107 // channel fail (SRQFIR[25]) check for any IUE bits that are
108 // on that would have caused that (RDFFIR[17,37]).
109 if ((srqfir == id && 25 == bit) &&
110 __findIueTh(i_list, o_rootCause))
111 {
112 return true;
113 }
114
115 o_rootCause = s;
Caleb Palmer329dbbd2022-10-03 15:05:43 -0500116 return true;
117 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500118 }
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600119 // Look for channel failure attentions on processors.
120 else if (TYPE_PROC == targetType)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500121 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600122 // TODO: All of these channel failure bits are configurable.
123 // Eventually, we will need some mechanism to check that
124 // config registers for a more accurate analysis. For now,
125 // simply check for all bits that could potentially be
126 // configured to channel failure.
127
128 // Any unit checkstop bit in the MC_DSTL_FIR or MC_USTL_FIR
129 // could be a channel failure.
130 if (libhei::ATTN_TYPE_UNIT_CS == attnType)
131 {
132 // Ignore bits MC_DSTL_FIR[0:7] because they simply indicate
133 // attentions occurred on the attached OCMBs.
134 if ((mc_dstl_fir == id && 8 <= bit) || (mc_ustl_fir == id))
135 {
136 o_rootCause = s;
137 return true;
138 }
139 }
140
141 // All bits in MC_OMI_DL_ERR_RPT eventually feed into
142 // MC_OMI_DL_FIR[0,20] which are configurable to channel
143 // failure.
144 if (mc_omi_dl_err_rpt == id)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500145 {
146 o_rootCause = s;
147 return true;
148 }
149 }
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600150 }
151 // Version 2 and above of the RAS data files
152 else if (2 <= i_rasData.getVersion(s))
153 {
154 if (libhei::ATTN_TYPE_UNIT_CS == s.getAttnType() &&
155 i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::SUE_SOURCE))
156 {
157 // Special Cases:
158 // If the channel fail was specifically a firmware initiated
159 // channel fail (SRQFIR[25]) check for any IUE bits that are on
160 // that would have caused that (RDFFIR[17,37]).
161 if ((srqfir == s.getId() && 25 == s.getBit()) &&
162 __findIueTh(i_list, o_rootCause))
163 {
164 return true;
165 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500166
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600167 // TODO: The proc side channel failure bits are configurable.
168 // Eventually, we will need some mechanism to check the
169 // config registers for a more accurate analysis. For now,
170 // simply check for all bits that could potentially be
171 // configured to channel failure.
172
173 o_rootCause = s;
174 }
175 // The bits in the MC_OMI_DL_ERR_RPT register are a special case.
176 // They are possible channel fail bits but the MC_OMI_DL_FIR they
177 // feed into can't be set up to report UNIT_CS attentions, so they
178 // report as recoverable instead.
179 else if (mc_omi_dl_err_rpt == s.getId())
Zane Shelleyf4792d62021-10-28 18:08:22 -0500180 {
181 o_rootCause = s;
182 return true;
183 }
184 }
185 }
186
187 return false; // default, nothing found
188}
189
190//------------------------------------------------------------------------------
191
192// Will query if a signature is a potential system checkstop root cause.
193// attention. Note that this function excludes memory channel failure attentions
Zane Shelleyed3ab8f2022-05-24 21:08:21 -0500194// which are checked in __findMemoryChannelFailure().
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600195bool __findCsRootCause(const libhei::Signature& i_signature,
196 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500197{
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600198 // Version 1 of the RAS data files.
199 if (1 == i_rasData.getVersion(i_signature))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500200 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600201 using namespace util::pdbg;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500202
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600203 using func = libhei::NodeId_t (*)(const std::string& i_str);
204 func __hash = libhei::hash<libhei::NodeId_t>;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500205
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600206 // PROC registers
207 static const auto eq_core_fir = __hash("EQ_CORE_FIR");
208 static const auto eq_l2_fir = __hash("EQ_L2_FIR");
209 static const auto eq_l3_fir = __hash("EQ_L3_FIR");
210 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR");
211 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC");
212 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP");
213 static const auto nx_cq_fir = __hash("NX_CQ_FIR");
214 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR");
215 static const auto pau_fir_0 = __hash("PAU_FIR_0");
216 static const auto pau_fir_1 = __hash("PAU_FIR_1");
217 static const auto pau_fir_2 = __hash("PAU_FIR_2");
218 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -0500219
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600220 // OCMB registers
221 static const auto rdffir = __hash("RDFFIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -0500222
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600223 const auto targetType = getTrgtType(getTrgt(i_signature.getChip()));
224 const auto id = i_signature.getId();
225 const auto bit = i_signature.getBit();
Zane Shelleyf4792d62021-10-28 18:08:22 -0500226
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600227 if (TYPE_PROC == targetType)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500228 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600229 if (eq_core_fir == id &&
230 (0 == bit || 2 == bit || 3 == bit || 4 == bit || 5 == bit ||
231 7 == bit || 8 == bit || 9 == bit || 11 == bit || 12 == bit ||
232 13 == bit || 18 == bit || 21 == bit || 22 == bit ||
233 24 == bit || 25 == bit || 29 == bit || 31 == bit ||
234 32 == bit || 36 == bit || 37 == bit || 38 == bit ||
235 43 == bit || 46 == bit || 47 == bit))
236 {
237 return true;
238 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500239
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600240 if (eq_l2_fir == id &&
241 (1 == bit || 12 == bit || 13 == bit || 17 == bit || 18 == bit ||
242 20 == bit || 27 == bit))
243 {
244 return true;
245 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500246
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600247 if (eq_l3_fir == id &&
248 (2 == bit || 5 == bit || 8 == bit || 11 == bit || 17 == bit))
249 {
250 return true;
251 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500252
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600253 if (eq_ncu_fir == id &&
254 (3 == bit || 4 == bit || 5 == bit || 7 == bit || 8 == bit ||
255 10 == bit || 17 == bit))
256 {
257 return true;
258 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500259
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600260 if (iohs_dlp_fir_oc == id && (54 <= bit && bit <= 61))
261 {
262 return true;
263 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500264
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600265 if (iohs_dlp_fir_smp == id && (54 <= bit && bit <= 61))
266 {
267 return true;
268 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500269
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600270 if (nx_cq_fir == id && (7 == bit || 16 == bit || 21 == bit))
271 {
272 return true;
273 }
274
275 if (nx_dma_eng_fir == id && (0 == bit))
276 {
277 return true;
278 }
279
280 if (pau_fir_0 == id &&
281 (15 == bit || 18 == bit || 19 == bit || 25 == bit ||
282 26 == bit || 29 == bit || 33 == bit || 34 == bit ||
283 35 == bit || 40 == bit || 42 == bit || 44 == bit || 45 == bit))
284 {
285 return true;
286 }
287
288 if (pau_fir_1 == id &&
289 (13 == bit || 14 == bit || 15 == bit || 37 == bit ||
290 39 == bit || 40 == bit || 41 == bit || 42 == bit))
291 {
292 return true;
293 }
294
295 if (pau_fir_2 == id &&
296 ((4 <= bit && bit <= 18) || (20 <= bit && bit <= 31) ||
297 (36 <= bit && bit <= 41) || 45 == bit || 47 == bit ||
298 48 == bit || 50 == bit || 51 == bit || 52 == bit))
299 {
300 return true;
301 }
302
303 if (pau_ptl_fir == id && (4 == bit || 8 == bit))
304 {
305 return true;
306 }
307 }
308 else if (TYPE_OCMB == targetType)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500309 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600310 if (rdffir == id &&
311 (14 == bit || 15 == bit || 17 == bit || 37 == bit))
312 {
313 return true;
314 }
Zane Shelleyf4792d62021-10-28 18:08:22 -0500315 }
316 }
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600317 // Version 2 of the RAS data files. Check if the input signature has the
318 // CS_POSSIBLE or SUE_SOURCE flag set.
319 else if (i_rasData.isFlagSet(i_signature,
320 RasDataParser::RasDataFlags::CS_POSSIBLE) ||
321 i_rasData.isFlagSet(i_signature,
322 RasDataParser::RasDataFlags::SUE_SOURCE))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500323 {
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600324 return true;
Zane Shelleyf4792d62021-10-28 18:08:22 -0500325 }
326
327 return false; // default, nothing found
328}
329
330//------------------------------------------------------------------------------
331
332bool __findCsRootCause_RE(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600333 libhei::Signature& o_rootCause,
334 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500335{
336 for (const auto s : i_list)
337 {
338 // Only looking for recoverable attentions.
339 if (libhei::ATTN_TYPE_RECOVERABLE != s.getAttnType())
340 {
341 continue;
342 }
343
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600344 if (__findCsRootCause(s, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500345 {
346 o_rootCause = s;
347 return true;
348 }
349 }
350
351 return false; // default, nothing found
352}
353
354//------------------------------------------------------------------------------
355
356bool __findCsRootCause_UCS(const std::vector<libhei::Signature>& i_list,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600357 libhei::Signature& o_rootCause,
358 const RasDataParser& i_rasData)
Zane Shelleyf4792d62021-10-28 18:08:22 -0500359{
360 for (const auto s : i_list)
361 {
362 // Only looking for unit checkstop attentions.
363 if (libhei::ATTN_TYPE_UNIT_CS != s.getAttnType())
364 {
365 continue;
366 }
367
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600368 if (__findCsRootCause(s, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500369 {
370 o_rootCause = s;
371 return true;
372 }
373 }
374
375 return false; // default, nothing found
376}
377
378//------------------------------------------------------------------------------
379
380bool __findNonExternalCs(const std::vector<libhei::Signature>& i_list,
381 libhei::Signature& o_rootCause)
382{
383 using namespace util::pdbg;
384
Zane Shelley19df3702021-12-16 22:32:54 -0600385 static const auto pb_ext_fir = libhei::hash<libhei::NodeId_t>("PB_EXT_FIR");
Zane Shelleyf4792d62021-10-28 18:08:22 -0500386
387 for (const auto s : i_list)
388 {
389 const auto targetType = getTrgtType(getTrgt(s.getChip()));
390 const auto id = s.getId();
391 const auto attnType = s.getAttnType();
392
393 // Find any processor with system checkstop attention that did not
394 // originate from the PB_EXT_FIR.
395 if ((TYPE_PROC == targetType) &&
396 (libhei::ATTN_TYPE_CHECKSTOP == attnType) && (pb_ext_fir != id))
397 {
398 o_rootCause = s;
399 return true;
400 }
401 }
402
403 return false; // default, nothing found
404}
405
406//------------------------------------------------------------------------------
407
Zane Shelleybaec7c02022-03-17 11:05:20 -0500408bool __findTiRootCause(const std::vector<libhei::Signature>& i_list,
409 libhei::Signature& o_rootCause)
410{
411 using namespace util::pdbg;
412
413 using func = libhei::NodeId_t (*)(const std::string& i_str);
414 func __hash = libhei::hash<libhei::NodeId_t>;
415
416 // PROC registers
417 static const auto tp_local_fir = __hash("TP_LOCAL_FIR");
418 static const auto occ_fir = __hash("OCC_FIR");
419 static const auto pbao_fir = __hash("PBAO_FIR");
420 static const auto n0_local_fir = __hash("N0_LOCAL_FIR");
421 static const auto int_cq_fir = __hash("INT_CQ_FIR");
422 static const auto nx_cq_fir = __hash("NX_CQ_FIR");
423 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR");
424 static const auto vas_fir = __hash("VAS_FIR");
425 static const auto n1_local_fir = __hash("N1_LOCAL_FIR");
426 static const auto mcd_fir = __hash("MCD_FIR");
427 static const auto pb_station_fir_en_1 = __hash("PB_STATION_FIR_EN_1");
428 static const auto pb_station_fir_en_2 = __hash("PB_STATION_FIR_EN_2");
429 static const auto pb_station_fir_en_3 = __hash("PB_STATION_FIR_EN_3");
430 static const auto pb_station_fir_en_4 = __hash("PB_STATION_FIR_EN_4");
431 static const auto pb_station_fir_es_1 = __hash("PB_STATION_FIR_ES_1");
432 static const auto pb_station_fir_es_2 = __hash("PB_STATION_FIR_ES_2");
433 static const auto pb_station_fir_es_3 = __hash("PB_STATION_FIR_ES_3");
434 static const auto pb_station_fir_es_4 = __hash("PB_STATION_FIR_ES_4");
435 static const auto pb_station_fir_eq = __hash("PB_STATION_FIR_EQ");
436 static const auto psihb_fir = __hash("PSIHB_FIR");
437 static const auto pbaf_fir = __hash("PBAF_FIR");
438 static const auto lpc_fir = __hash("LPC_FIR");
439 static const auto eq_core_fir = __hash("EQ_CORE_FIR");
440 static const auto eq_l2_fir = __hash("EQ_L2_FIR");
441 static const auto eq_l3_fir = __hash("EQ_L3_FIR");
442 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR");
443 static const auto eq_local_fir = __hash("EQ_LOCAL_FIR");
444 static const auto eq_qme_fir = __hash("EQ_QME_FIR");
445 static const auto iohs_local_fir = __hash("IOHS_LOCAL_FIR");
446 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC");
447 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP");
448 static const auto mc_local_fir = __hash("MC_LOCAL_FIR");
449 static const auto mc_fir = __hash("MC_FIR");
450 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR");
451 static const auto mc_ustl_fir = __hash("MC_USTL_FIR");
452 static const auto nmmu_cq_fir = __hash("NMMU_CQ_FIR");
453 static const auto nmmu_fir = __hash("NMMU_FIR");
454 static const auto mc_omi_dl = __hash("MC_OMI_DL");
455 static const auto pau_local_fir = __hash("PAU_LOCAL_FIR");
456 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR");
457 static const auto pau_phy_fir = __hash("PAU_PHY_FIR");
458 static const auto pau_fir_0 = __hash("PAU_FIR_0");
459 static const auto pau_fir_2 = __hash("PAU_FIR_2");
460 static const auto pci_local_fir = __hash("PCI_LOCAL_FIR");
461 static const auto pci_iop_fir = __hash("PCI_IOP_FIR");
462 static const auto pci_nest_fir = __hash("PCI_NEST_FIR");
463
464 // OCMB registers
465 static const auto ocmb_lfir = __hash("OCMB_LFIR");
466 static const auto mmiofir = __hash("MMIOFIR");
467 static const auto srqfir = __hash("SRQFIR");
468 static const auto rdffir = __hash("RDFFIR");
469 static const auto tlxfir = __hash("TLXFIR");
470 static const auto omi_dl = __hash("OMI_DL");
471
472 for (const auto& signature : i_list)
473 {
474 const auto targetType = getTrgtType(getTrgt(signature.getChip()));
475 const auto attnType = signature.getAttnType();
476 const auto id = signature.getId();
477 const auto bit = signature.getBit();
478
479 // Only looking for recoverable or unit checkstop attentions.
480 if (libhei::ATTN_TYPE_RECOVERABLE != attnType &&
481 libhei::ATTN_TYPE_UNIT_CS != attnType)
482 {
483 continue;
484 }
485
486 // Ignore attentions that should not be blamed as root cause of a TI.
487 // This would include informational only FIRs or correctable errors.
488 if (TYPE_PROC == targetType)
489 {
490 if (tp_local_fir == id &&
491 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
492 5 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
493 11 == bit || 20 == bit || 22 == bit || 23 == bit ||
494 24 == bit || 38 == bit || 40 == bit || 41 == bit ||
495 46 == bit || 47 == bit || 48 == bit || 55 == bit ||
496 56 == bit || 57 == bit || 58 == bit || 59 == bit))
497 {
498 continue;
499 }
500
501 if (occ_fir == id &&
502 (9 == bit || 10 == bit || 15 == bit || 20 == bit || 21 == bit ||
503 22 == bit || 23 == bit || 32 == bit || 33 == bit ||
504 34 == bit || 36 == bit || 42 == bit || 43 == bit ||
505 46 == bit || 47 == bit || 48 == bit || 51 == bit ||
506 52 == bit || 53 == bit || 54 == bit || 57 == bit))
507 {
508 continue;
509 }
510
511 if (pbao_fir == id &&
512 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 11 == bit ||
513 13 == bit || 15 == bit || 16 == bit || 17 == bit))
514 {
515 continue;
516 }
517
518 if ((n0_local_fir == id || n1_local_fir == id ||
519 iohs_local_fir == id || mc_local_fir == id ||
520 pau_local_fir == id || pci_local_fir == id) &&
521 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
522 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit ||
523 10 == bit || 11 == bit || 20 == bit || 21 == bit))
524 {
525 continue;
526 }
527
528 if (int_cq_fir == id &&
529 (0 == bit || 3 == bit || 5 == bit || 7 == bit || 36 == bit ||
Caleb Palmerecde53f2022-12-13 15:11:47 -0600530 47 == bit || 48 == bit || 49 == bit || 50 == bit ||
Zane Shelleybaec7c02022-03-17 11:05:20 -0500531 58 == bit || 59 == bit || 60 == bit))
532 {
533 continue;
534 }
535
536 if (nx_cq_fir == id &&
537 (1 == bit || 4 == bit || 18 == bit || 32 == bit || 33 == bit))
538 {
539 continue;
540 }
541
542 if (nx_dma_eng_fir == id &&
543 (4 == bit || 6 == bit || 9 == bit || 10 == bit || 11 == bit ||
544 34 == bit || 35 == bit || 36 == bit || 37 == bit || 39 == bit))
545 {
546 continue;
547 }
548
549 if (vas_fir == id &&
550 (8 == bit || 9 == bit || 11 == bit || 12 == bit || 13 == bit))
551 {
552 continue;
553 }
554
555 if (mcd_fir == id && (0 == bit))
556 {
557 continue;
558 }
559
560 if ((pb_station_fir_en_1 == id || pb_station_fir_en_2 == id ||
561 pb_station_fir_en_3 == id || pb_station_fir_en_4 == id ||
562 pb_station_fir_es_1 == id || pb_station_fir_es_2 == id ||
563 pb_station_fir_es_3 == id || pb_station_fir_es_4 == id ||
564 pb_station_fir_eq == id) &&
565 (9 == bit))
566 {
567 continue;
568 }
569
570 if (psihb_fir == id && (0 == bit || 23 == bit))
571 {
572 continue;
573 }
574
575 if (pbaf_fir == id &&
576 (0 == bit || 1 == bit || 3 == bit || 4 == bit || 5 == bit ||
577 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
578 11 == bit || 19 == bit || 20 == bit || 21 == bit ||
579 28 == bit || 29 == bit || 30 == bit || 31 == bit ||
580 32 == bit || 33 == bit || 34 == bit || 35 == bit || 36 == bit))
581 {
582 continue;
583 }
584
585 if (lpc_fir == id && (5 == bit))
586 {
587 continue;
588 }
589
590 if (eq_core_fir == id &&
591 (0 == bit || 2 == bit || 4 == bit || 7 == bit || 9 == bit ||
592 11 == bit || 13 == bit || 18 == bit || 21 == bit ||
593 24 == bit || 29 == bit || 31 == bit || 37 == bit ||
594 43 == bit || 56 == bit || 57 == bit))
595 {
596 continue;
597 }
598
599 if (eq_l2_fir == id &&
600 (0 == bit || 6 == bit || 11 == bit || 19 == bit || 36 == bit))
601 {
602 continue;
603 }
604
605 if (eq_l3_fir == id &&
606 (3 == bit || 4 == bit || 7 == bit || 10 == bit || 13 == bit))
607 {
608 continue;
609 }
610
611 if (eq_ncu_fir == id && (9 == bit))
612 {
613 continue;
614 }
615
616 if (eq_local_fir == id &&
617 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 5 == bit ||
618 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
619 11 == bit || 12 == bit || 13 == bit || 14 == bit ||
620 15 == bit || 16 == bit || 20 == bit || 21 == bit ||
621 22 == bit || 23 == bit || 24 == bit || 25 == bit ||
622 26 == bit || 27 == bit || 28 == bit || 29 == bit ||
623 30 == bit || 31 == bit || 32 == bit || 33 == bit ||
624 34 == bit || 35 == bit || 36 == bit || 37 == bit ||
625 38 == bit || 39 == bit))
626 {
627 continue;
628 }
629
630 if (eq_qme_fir == id && (7 == bit || 25 == bit))
631 {
632 continue;
633 }
634
635 if (iohs_dlp_fir_oc == id &&
636 (6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit ||
637 48 == bit || 49 == bit || 52 == bit || 53 == bit))
638 {
639 continue;
640 }
641
642 if (iohs_dlp_fir_smp == id &&
643 (6 == bit || 7 == bit || 14 == bit || 15 == bit || 16 == bit ||
644 17 == bit || 38 == bit || 39 == bit || 44 == bit ||
645 45 == bit || 50 == bit || 51 == bit))
646 {
647 continue;
648 }
649
650 if (mc_fir == id &&
651 (5 == bit || 8 == bit || 15 == bit || 16 == bit))
652 {
653 continue;
654 }
655
656 if (mc_dstl_fir == id &&
657 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
658 5 == bit || 6 == bit || 7 == bit || 14 == bit || 15 == bit))
659 {
660 continue;
661 }
662
663 if (mc_ustl_fir == id &&
664 (6 == bit || 20 == bit || 33 == bit || 34 == bit))
665 {
666 continue;
667 }
668
669 if (nmmu_cq_fir == id && (8 == bit || 11 == bit || 14 == bit))
670 {
671 continue;
672 }
673
674 if (nmmu_fir == id &&
675 (0 == bit || 3 == bit || 8 == bit || 9 == bit || 10 == bit ||
676 11 == bit || 12 == bit || 13 == bit || 14 == bit ||
677 15 == bit || 30 == bit || 31 == bit || 41 == bit))
678 {
679 continue;
680 }
681
682 if (mc_omi_dl == id && (2 == bit || 3 == bit || 6 == bit ||
683 7 == bit || 9 == bit || 10 == bit))
684 {
685 continue;
686 }
687
688 if (pau_ptl_fir == id && (5 == bit || 9 == bit))
689 {
690 continue;
691 }
692
693 if (pau_phy_fir == id &&
694 (2 == bit || 3 == bit || 6 == bit || 7 == bit || 15 == bit))
695 {
696 continue;
697 }
698
699 if (pau_fir_0 == id && (13 == bit || 30 == bit || 41 == bit))
700 {
701 continue;
702 }
703
704 if (pau_fir_2 == id && (19 == bit || 46 == bit || 49 == bit))
705 {
706 continue;
707 }
708
709 if (pci_iop_fir == id &&
710 (0 == bit || 2 == bit || 4 == bit || 6 == bit || 7 == bit ||
711 8 == bit || 10 == bit))
712 {
713 continue;
714 }
715
716 if (pci_nest_fir == id && (2 == bit || 5 == bit))
717 {
718 continue;
719 }
720 }
721 else if (TYPE_OCMB == targetType)
722 {
723 if (ocmb_lfir == id &&
724 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 23 == bit ||
725 37 == bit || 63 == bit))
726 {
727 continue;
728 }
729
730 if (mmiofir == id && (2 == bit))
731 {
732 continue;
733 }
734
735 if (srqfir == id &&
736 (2 == bit || 4 == bit || 14 == bit || 15 == bit || 23 == bit ||
737 25 == bit || 28 == bit))
738 {
739 continue;
740 }
741
742 if (rdffir == id &&
743 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit ||
744 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit ||
745 18 == bit || 38 == bit || 40 == bit || 41 == bit ||
746 45 == bit || 46 == bit))
747 {
748 continue;
749 }
750
751 if (tlxfir == id && (0 == bit || 9 == bit || 26 == bit))
752 {
753 continue;
754 }
755
756 if (omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 7 == bit ||
757 9 == bit || 10 == bit))
758 {
759 continue;
760 }
761 }
762
763 // At this point, the attention has not been explicitly ignored. So
764 // return this signature and exit.
765 o_rootCause = signature;
766 return true;
767 }
768
769 return false; // default, nothing found
770}
771
772//------------------------------------------------------------------------------
773
Zane Shelleyec227c22021-12-09 15:54:40 -0600774bool filterRootCause(AnalysisType i_type,
775 const libhei::IsolationData& i_isoData,
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600776 libhei::Signature& o_rootCause,
777 const RasDataParser& i_rasData)
Zane Shelley65fefb22021-10-18 15:35:26 -0500778{
779 // We'll need to make a copy of the list so that the original list is
Zane Shelleyec227c22021-12-09 15:54:40 -0600780 // maintained for the PEL.
Zane Shelley65fefb22021-10-18 15:35:26 -0500781 std::vector<libhei::Signature> list{i_isoData.getSignatureList()};
782
783 // START WORKAROUND
784 // TODO: Filtering should be data driven. Until that support is available,
785 // use the following isolation rules.
786
Zane Shelleyec227c22021-12-09 15:54:40 -0600787 // Ensure the list is not empty before continuing.
Zane Shelleyf4792d62021-10-28 18:08:22 -0500788 if (list.empty())
789 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600790 return false; // nothing more to do
Zane Shelleyf4792d62021-10-28 18:08:22 -0500791 }
792
793 // First, look for any RCS OSC errors. This must always be first because
794 // they can cause downstream PLL unlock attentions.
795 if (__findRcsOscError(list, o_rootCause))
Zane Shelleya7369f82021-10-18 16:52:21 -0500796 {
797 return true;
798 }
799
Zane Shelleyf4792d62021-10-28 18:08:22 -0500800 // Second, look for any PLL unlock attentions. This must always be second
801 // because PLL unlock attentions can cause any number of downstream
802 // attentions, including a system checkstop.
803 if (__findPllUnlock(list, o_rootCause))
804 {
805 return true;
806 }
807
Zane Shelleyec227c22021-12-09 15:54:40 -0600808 // Regardless of the analysis type, always look for anything that could be
809 // blamed as the root cause of a system checkstop.
810
Zane Shelleyf4792d62021-10-28 18:08:22 -0500811 // Memory channel failure attentions will produce SUEs and likely cause
812 // downstream attentions, including a system checkstop.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600813 if (__findMemoryChannelFailure(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500814 {
815 return true;
816 }
817
818 // Look for any recoverable attentions that have been identified as a
819 // potential root cause of a system checkstop attention. These would include
820 // any attention that would generate an SUE. Note that is it possible for
821 // recoverables to generate unit checkstop attentions so we must check them
822 // first.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600823 if (__findCsRootCause_RE(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500824 {
825 return true;
826 }
827
828 // Look for any unit checkstop attentions (other than memory channel
829 // failures) that have been identified as a potential root cause of a
830 // system checkstop attention. These would include any attention that would
831 // generate an SUE.
Caleb Palmer1a4f0e72022-11-07 15:08:01 -0600832 if (__findCsRootCause_UCS(list, o_rootCause, i_rasData))
Zane Shelleyf4792d62021-10-28 18:08:22 -0500833 {
834 return true;
835 }
836
837 // Look for any system checkstop attentions that originated from within the
838 // chip that reported the attention. In other words, no external checkstop
839 // attentions.
840 if (__findNonExternalCs(list, o_rootCause))
841 {
842 return true;
843 }
844
Zane Shelleyec227c22021-12-09 15:54:40 -0600845 if (AnalysisType::SYSTEM_CHECKSTOP != i_type)
Zane Shelley65fefb22021-10-18 15:35:26 -0500846 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600847 // No system checkstop root cause attentions were found. Next, look for
848 // any recoverable or unit checkstop attentions that could be associated
Zane Shelleybaec7c02022-03-17 11:05:20 -0500849 // with a TI.
850 if (__findTiRootCause(list, o_rootCause))
Zane Shelleyec227c22021-12-09 15:54:40 -0600851 {
Zane Shelleyec227c22021-12-09 15:54:40 -0600852 return true;
853 }
854
855 if (AnalysisType::TERMINATE_IMMEDIATE != i_type)
856 {
857 // No attentions associated with a system checkstop or TI were
858 // found. Simply, return the first entry in the list.
859 o_rootCause = list.front();
860 return true;
861 }
Zane Shelley65fefb22021-10-18 15:35:26 -0500862 }
863
864 // END WORKAROUND
865
866 return false; // default, no active attentions found.
867}
868
869//------------------------------------------------------------------------------
870
871} // namespace analyzer