blob: 5df6295d7c7a74f3cedb698ba381b0a9ac4de391 [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05002#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05003
Zane Shelleya9b44342021-08-08 17:15:52 -05004#include <analyzer/ras-data/ras-data-parser.hpp>
Zane Shelley4ed4be52021-02-15 17:53:40 -06005#include <analyzer/service_data.hpp>
Ben Tyner7029e522021-08-09 19:18:24 -05006#include <attn/attn_dump.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06007#include <hei_main.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -06008#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05009#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -060010
11namespace analyzer
12{
13
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060014//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050015
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060016// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050017
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060018/**
19 * @brief Will get the list of active chip and initialize the isolator.
20 * @param o_chips The returned list of active chips.
21 */
Zane Shelley171a2e02020-11-13 13:56:13 -060022void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050023
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060024/**
Zane Shelley65fefb22021-10-18 15:35:26 -050025 * @brief Will get the list of active chip and initialize the isolator.
26 * @param i_isoData The data gathered during isolation (for FFDC).
27 * @param o_rootCause The returned root cause signature.
28 * @return True, if root cause has been found. False, otherwise.
29 */
30bool filterRootCause(const libhei::IsolationData& i_isoData,
31 libhei::Signature& o_rootCause);
32
33/**
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060034 * @brief Will create and submit a PEL using the given data.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060035 * @param i_isoData The data gathered during isolation (for FFDC).
Zane Shelley4ed4be52021-02-15 17:53:40 -060036 * @param i_servData Data regarding service actions gathered during analysis.
Ben Tyner7029e522021-08-09 19:18:24 -050037 * @return Tuple of BMC log id, platform log id
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060038 */
Ben Tyner7029e522021-08-09 19:18:24 -050039std::tuple<uint32_t, uint32_t> createPel(const libhei::IsolationData& i_isoData,
40 const ServiceData& i_servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060041
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050042//------------------------------------------------------------------------------
43
Zane Shelley2f263182020-07-10 21:41:21 -050044const char* __attn(libhei::AttentionType_t i_attnType)
45{
46 const char* str = "";
47 switch (i_attnType)
48 {
49 case libhei::ATTN_TYPE_CHECKSTOP:
50 str = "CHECKSTOP";
51 break;
52 case libhei::ATTN_TYPE_UNIT_CS:
53 str = "UNIT_CS";
54 break;
55 case libhei::ATTN_TYPE_RECOVERABLE:
56 str = "RECOVERABLE";
57 break;
58 case libhei::ATTN_TYPE_SP_ATTN:
59 str = "SP_ATTN";
60 break;
61 case libhei::ATTN_TYPE_HOST_ATTN:
62 str = "HOST_ATTN";
63 break;
64 default:
65 trace::err("Unsupported attention type: %u", i_attnType);
66 assert(0);
67 }
68 return str;
69}
70
Zane Shelley2f263182020-07-10 21:41:21 -050071//------------------------------------------------------------------------------
72
Ben Tyner7029e522021-08-09 19:18:24 -050073bool analyzeHardware(attn::DumpParameters& o_dumpParameters)
Zane Shelley9fb73932020-09-15 13:34:57 -050074{
75 bool attnFound = false;
76
Zane Shelleye5411f02021-08-04 22:41:35 -050077 if (!util::pdbg::queryHardwareAnalysisSupported())
78 {
79 trace::err("Hardware error analysis is not supported on this system");
80 return attnFound;
81 }
82
83 trace::inf(">>> enter analyzeHardware()");
84
85 // Initialize the isolator and get all of the chips to be analyzed.
86 trace::inf("Initializing the isolator...");
87 std::vector<libhei::Chip> chips;
88 initializeIsolator(chips);
89
90 // Isolate attentions.
91 trace::inf("Isolating errors: # of chips=%u", chips.size());
92 libhei::IsolationData isoData{};
93 libhei::isolate(chips, isoData);
94
Zane Shelley65fefb22021-10-18 15:35:26 -050095 // For debug, trace out the original list of signatures before filtering.
96 for (const auto& sig : isoData.getSignatureList())
97 {
98 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
99 util::pdbg::getPath(sig.getChip()), sig.toUint32(),
100 __attn(sig.getAttnType()));
101 }
102
Zane Shelleye5411f02021-08-04 22:41:35 -0500103 // Filter for root cause attention.
Zane Shelleycb457382020-11-02 20:55:06 -0600104 libhei::Signature rootCause{};
Zane Shelley65fefb22021-10-18 15:35:26 -0500105 attnFound = filterRootCause(isoData, rootCause);
Zane Shelleycb457382020-11-02 20:55:06 -0600106
107 if (!attnFound)
Zane Shelley9fb73932020-09-15 13:34:57 -0500108 {
Zane Shelleye5411f02021-08-04 22:41:35 -0500109 // It is possible for TI handling, or manually initiated analysis via
110 // the command line, that there will not be an active attention. In
111 // which case, we will do nothing and let the caller of this function
112 // determine if this is the expected behavior.
Zane Shelley9fb73932020-09-15 13:34:57 -0500113 trace::inf("No active attentions found");
114 }
115 else
116 {
Zane Shelley9fb73932020-09-15 13:34:57 -0500117 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
Zane Shelleycb457382020-11-02 20:55:06 -0600118 util::pdbg::getPath(rootCause.getChip()),
119 rootCause.toUint32(), __attn(rootCause.getAttnType()));
120
Zane Shelley95135822021-08-23 09:00:05 -0500121 // Resolve any service actions required by the root cause.
Zane Shelleya9b44342021-08-08 17:15:52 -0500122 RasDataParser rasData{};
Zane Shelleyca496192021-08-09 12:05:52 -0500123 ServiceData servData{rootCause, isoData.queryCheckstop()};
Zane Shelleya9b44342021-08-08 17:15:52 -0500124 rasData.getResolution(rootCause)->resolve(servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600125
126 // Create and commit a PEL.
Ben Tyner7029e522021-08-09 19:18:24 -0500127 uint32_t logId = std::get<1>(createPel(isoData, servData));
128
Zane Shelley95135822021-08-23 09:00:05 -0500129 // Write guard records to persistent storage.
130 // TODO: The PEL ID will be required, but interface is still unknown.
131 for (const auto& guard : servData.getGuardList())
132 {
133 guard.apply();
134 }
135
136 // Gather/return information needed for dump.
137 // TODO: Need ID from root cause. At the moment, HUID does not exist in
138 // devtree. Will need a better ID definition.
139 // TODO: HW dump is default, but some attentions may require something
140 // different. Will need to investigate adding that information to
141 // the RAS data files.
Ben Tyner7029e522021-08-09 19:18:24 -0500142 o_dumpParameters.logId = logId;
143 o_dumpParameters.unitId = 0;
144 o_dumpParameters.dumpType = attn::DumpType::Hardware;
Zane Shelley9fb73932020-09-15 13:34:57 -0500145 }
146
Zane Shelleye5411f02021-08-04 22:41:35 -0500147 // All done, clean up the isolator.
148 trace::inf("Uninitializing isolator...");
149 libhei::uninitialize();
Ben Tyner87eabc62020-05-14 17:56:54 -0500150
Zane Shelley2f263182020-07-10 21:41:21 -0500151 trace::inf("<<< exit analyzeHardware()");
152
Zane Shelley097a71a2020-06-08 15:55:29 -0500153 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600154}
155
Ben Tynereea45422021-04-15 10:54:14 -0500156//------------------------------------------------------------------------------
157
158/**
159 * @brief Get error isolator build information
160 *
161 * @return Pointer to build information
162 */
163const char* getBuildInfo()
164{
165 return libhei::getBuildInfo();
166}
167
Ben Tyner0205f3b2020-02-24 10:24:47 -0600168} // namespace analyzer