blob: 3be26bc9f40ce143469d1de96eeec9c384a01dc1 [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Zane Shelley9fb73932020-09-15 13:34:57 -05002#include <unistd.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05003
Zane Shelleyebff0d32021-11-21 10:52:07 -06004#include <analyzer/analyzer_main.hpp>
Zane Shelleya9b44342021-08-08 17:15:52 -05005#include <analyzer/ras-data/ras-data-parser.hpp>
Zane Shelley4ed4be52021-02-15 17:53:40 -06006#include <analyzer/service_data.hpp>
Ben Tyner7029e522021-08-09 19:18:24 -05007#include <attn/attn_dump.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06008#include <hei_main.hpp>
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -06009#include <util/pdbg.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050010#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -060011
12namespace analyzer
13{
14
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060015//------------------------------------------------------------------------------
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050016
Zane Shelleyf4bd5ff2020-11-05 22:26:04 -060017// Forward references for externally defined functions.
Ben Tyner87eabc62020-05-14 17:56:54 -050018
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060019/**
20 * @brief Will get the list of active chip and initialize the isolator.
21 * @param o_chips The returned list of active chips.
22 */
Zane Shelley171a2e02020-11-13 13:56:13 -060023void initializeIsolator(std::vector<libhei::Chip>& o_chips);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050024
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060025/**
Zane Shelley65fefb22021-10-18 15:35:26 -050026 * @brief Will get the list of active chip and initialize the isolator.
27 * @param i_isoData The data gathered during isolation (for FFDC).
28 * @param o_rootCause The returned root cause signature.
29 * @return True, if root cause has been found. False, otherwise.
30 */
31bool filterRootCause(const libhei::IsolationData& i_isoData,
32 libhei::Signature& o_rootCause);
33
34/**
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060035 * @brief Will create and submit a PEL using the given data.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060036 * @param i_isoData The data gathered during isolation (for FFDC).
Zane Shelley4ed4be52021-02-15 17:53:40 -060037 * @param i_servData Data regarding service actions gathered during analysis.
Zane Shelley611b3442021-11-19 16:02:01 -060038 * @return The platform log ID. Will return zero if no PEL is generated.
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060039 */
Zane Shelley611b3442021-11-19 16:02:01 -060040uint32_t createPel(const libhei::IsolationData& i_isoData,
41 const ServiceData& i_servData);
Zane Shelleyd3b9bac2020-11-17 21:59:12 -060042
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050043//------------------------------------------------------------------------------
44
Zane Shelleyebff0d32021-11-21 10:52:07 -060045const char* __attn(libhei::AttentionType_t i_type)
Zane Shelley2f263182020-07-10 21:41:21 -050046{
47 const char* str = "";
Zane Shelleyebff0d32021-11-21 10:52:07 -060048 switch (i_type)
Zane Shelley2f263182020-07-10 21:41:21 -050049 {
50 case libhei::ATTN_TYPE_CHECKSTOP:
51 str = "CHECKSTOP";
52 break;
53 case libhei::ATTN_TYPE_UNIT_CS:
54 str = "UNIT_CS";
55 break;
56 case libhei::ATTN_TYPE_RECOVERABLE:
57 str = "RECOVERABLE";
58 break;
59 case libhei::ATTN_TYPE_SP_ATTN:
60 str = "SP_ATTN";
61 break;
62 case libhei::ATTN_TYPE_HOST_ATTN:
63 str = "HOST_ATTN";
64 break;
65 default:
Zane Shelleyebff0d32021-11-21 10:52:07 -060066 trace::err("Unsupported attention type: %u", i_type);
Zane Shelley2f263182020-07-10 21:41:21 -050067 assert(0);
68 }
69 return str;
70}
71
Zane Shelley2f263182020-07-10 21:41:21 -050072//------------------------------------------------------------------------------
73
Zane Shelleyebff0d32021-11-21 10:52:07 -060074const char* __analysisType(AnalysisType i_type)
75{
76 const char* str = "";
77 switch (i_type)
78 {
79 case AnalysisType::SYSTEM_CHECKSTOP:
80 str = "SYSTEM_CHECKSTOP";
81 break;
82 case AnalysisType::TERMINATE_IMMEDIATE:
83 str = "TERMINATE_IMMEDIATE";
84 break;
85 case AnalysisType::MANUAL:
86 str = "MANUAL";
87 break;
88 default:
89 trace::err("Unsupported analysis type: %u", i_type);
90 assert(0);
91 }
92 return str;
93}
94
95//------------------------------------------------------------------------------
96
97uint32_t analyzeHardware(AnalysisType i_type, attn::DumpParameters& o_dump)
Zane Shelley9fb73932020-09-15 13:34:57 -050098{
Zane Shelley611b3442021-11-19 16:02:01 -060099 uint32_t o_plid = 0; // default, zero indicates PEL was not created
Zane Shelley9fb73932020-09-15 13:34:57 -0500100
Zane Shelleye5411f02021-08-04 22:41:35 -0500101 if (!util::pdbg::queryHardwareAnalysisSupported())
102 {
103 trace::err("Hardware error analysis is not supported on this system");
Zane Shelley611b3442021-11-19 16:02:01 -0600104 return o_plid;
Zane Shelleye5411f02021-08-04 22:41:35 -0500105 }
106
Zane Shelleyebff0d32021-11-21 10:52:07 -0600107 trace::inf(">>> enter analyzeHardware(%s)", __analysisType(i_type));
Zane Shelleye5411f02021-08-04 22:41:35 -0500108
109 // Initialize the isolator and get all of the chips to be analyzed.
110 trace::inf("Initializing the isolator...");
111 std::vector<libhei::Chip> chips;
112 initializeIsolator(chips);
113
114 // Isolate attentions.
115 trace::inf("Isolating errors: # of chips=%u", chips.size());
116 libhei::IsolationData isoData{};
117 libhei::isolate(chips, isoData);
118
Zane Shelley65fefb22021-10-18 15:35:26 -0500119 // For debug, trace out the original list of signatures before filtering.
120 for (const auto& sig : isoData.getSignatureList())
121 {
122 trace::inf("Signature: %s 0x%0" PRIx32 " %s",
123 util::pdbg::getPath(sig.getChip()), sig.toUint32(),
124 __attn(sig.getAttnType()));
125 }
126
Zane Shelleye5411f02021-08-04 22:41:35 -0500127 // Filter for root cause attention.
Zane Shelleycb457382020-11-02 20:55:06 -0600128 libhei::Signature rootCause{};
Zane Shelley611b3442021-11-19 16:02:01 -0600129 bool attnFound = filterRootCause(isoData, rootCause);
Zane Shelleycb457382020-11-02 20:55:06 -0600130
Zane Shelleyb7879d32021-12-06 18:02:03 -0600131 // If a root cause attention was found, or if this was a system checkstop,
132 // generate a PEL.
133 if (attnFound || AnalysisType::SYSTEM_CHECKSTOP == i_type)
Zane Shelley9fb73932020-09-15 13:34:57 -0500134 {
Zane Shelleyb7879d32021-12-06 18:02:03 -0600135 if (attnFound)
136 {
137 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
138 util::pdbg::getPath(rootCause.getChip()),
139 rootCause.toUint32(), __attn(rootCause.getAttnType()));
140 }
141 else
142 {
143 // This is bad. Analysis should have found a root cause attention
144 // for a system checkstop. Issues could range from code bugs to SCOM
145 // errors. Regardless, generate a PEL with FFDC to assist with
146 // debug.
147 trace::err("System checkstop with no root cause attention");
148 rootCause = libhei::Signature{}; // just in case
149 }
Zane Shelleycb457382020-11-02 20:55:06 -0600150
Zane Shelleyb7879d32021-12-06 18:02:03 -0600151 // Start building the service data.
Zane Shelleyca496192021-08-09 12:05:52 -0500152 ServiceData servData{rootCause, isoData.queryCheckstop()};
Zane Shelleyb7879d32021-12-06 18:02:03 -0600153
154 // Apply any service actions, if needed. Note that there are no
155 // resolutions for manual analysis.
156 if (AnalysisType::MANUAL != i_type)
157 {
158 if (attnFound)
159 {
160 // Resolve the root cause attention.
161 RasDataParser rasData{};
162 rasData.getResolution(rootCause)->resolve(servData);
163 }
164 else
165 {
166 // Analysis failed so apply the Level 2 Support resolution.
167 ProcedureCalloutResolution res{callout::Procedure::NEXTLVL,
168 callout::Priority::HIGH};
169 res.resolve(servData);
170 }
171 }
Zane Shelleyd3b9bac2020-11-17 21:59:12 -0600172
173 // Create and commit a PEL.
Zane Shelley611b3442021-11-19 16:02:01 -0600174 o_plid = createPel(isoData, servData);
Ben Tyner7029e522021-08-09 19:18:24 -0500175
Zane Shelley611b3442021-11-19 16:02:01 -0600176 if (0 == o_plid)
177 {
178 trace::err("Failed to create PEL");
179 }
180 else
181 {
182 trace::inf("PEL created: PLID=0x%0" PRIx32, o_plid);
Zane Shelleybf3326f2021-11-12 13:41:39 -0600183
Zane Shelley611b3442021-11-19 16:02:01 -0600184 // Gather/return information needed for dump. A hardware dump will
185 // always be used for system checkstop attenions. Software dumps
186 // will be reserved for MP-IPLs during TI analysis.
187 // TODO: Need ID from root cause. At the moment, HUID does not exist
188 // in devtree. Will need a better ID definition.
Zane Shelleyebff0d32021-11-21 10:52:07 -0600189 o_dump.unitId = 0;
190 o_dump.dumpType = attn::DumpType::Hardware;
Zane Shelley611b3442021-11-19 16:02:01 -0600191 }
Zane Shelley9fb73932020-09-15 13:34:57 -0500192 }
Zane Shelleyb7879d32021-12-06 18:02:03 -0600193 else
194 {
195 // It is possible for TI handling, or manually initiated analysis via
196 // the command line, that there will not be an active attention. In
197 // which case, we will do nothing and let the caller of this function
198 // determine if this is the expected behavior.
199 trace::inf("No active attentions found");
200 }
Zane Shelley9fb73932020-09-15 13:34:57 -0500201
Zane Shelleye5411f02021-08-04 22:41:35 -0500202 // All done, clean up the isolator.
203 trace::inf("Uninitializing isolator...");
204 libhei::uninitialize();
Ben Tyner87eabc62020-05-14 17:56:54 -0500205
Zane Shelley2f263182020-07-10 21:41:21 -0500206 trace::inf("<<< exit analyzeHardware()");
207
Zane Shelley611b3442021-11-19 16:02:01 -0600208 return o_plid;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600209}
210
Ben Tynereea45422021-04-15 10:54:14 -0500211//------------------------------------------------------------------------------
212
213/**
214 * @brief Get error isolator build information
215 *
216 * @return Pointer to build information
217 */
218const char* getBuildInfo()
219{
220 return libhei::getBuildInfo();
221}
222
Ben Tyner0205f3b2020-02-24 10:24:47 -0600223} // namespace analyzer