blob: 16ac9a7bae1e223d04c1625601ec3aa775d268ee [file] [log] [blame]
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05001extern "C"
2{
3#include <libpdbg.h>
4#include <libpdbg_sbe.h>
5}
6
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -05007#include "create_pel.hpp"
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05008#include "sbe_consts.hpp"
9#include "sbe_dump_collector.hpp"
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050010#include "sbe_type.hpp"
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050011
12#include <libphal.H>
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050013#include <phal_exception.H>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050014
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050015#include <phosphor-logging/elog-errors.hpp>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050016#include <phosphor-logging/lg2.hpp>
17#include <phosphor-logging/log.hpp>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050018#include <sbe_consts.hpp>
19#include <xyz/openbmc_project/Common/File/error.hpp>
20#include <xyz/openbmc_project/Common/error.hpp>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050021
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050022#include <cstdint>
23#include <filesystem>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050024#include <format>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050025#include <fstream>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050026#include <stdexcept>
27
28namespace openpower::dump::sbe_chipop
29{
30
31using namespace phosphor::logging;
32using namespace openpower::dump::SBE;
33
34void SbeDumpCollector::collectDump(uint8_t type, uint32_t id,
35 uint64_t failingUnit,
36 const std::filesystem::path& path)
37{
38 lg2::error("Starting dump collection: type:{TYPE} id:{ID} "
39 "failingUnit:{FAILINGUNIT}, path:{PATH}",
40 "TYPE", type, "ID", id, "FAILINGUNIT", failingUnit, "PATH",
41 path.string());
42
43 initializePdbg();
44
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -050045 TargetMap targets;
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050046
47 struct pdbg_target* target = nullptr;
48 pdbg_for_each_class_target("proc", target)
49 {
50 if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
51 !openpower::phal::pdbg::isTgtFunctional(target))
52 {
53 continue;
54 }
55
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -050056 bool includeTarget = true;
57 // if the dump type is hostboot then call stop instructions
58 if (type == SBE_DUMP_TYPE_HOSTBOOT)
59 {
60 includeTarget = executeThreadStop(target);
61 }
62 if (includeTarget)
63 {
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -050064 targets[target] = std::vector<struct pdbg_target*>();
65
66 // Hardware dump needs OCMB data if present
67 if (type == openpower::dump::SBE::SBE_DUMP_TYPE_HARDWARE)
68 {
69 struct pdbg_target* ocmbTarget;
70 pdbg_for_each_target("ocmb", target, ocmbTarget)
71 {
72 if (!is_ody_ocmb_chip(ocmbTarget))
73 {
74 continue;
75 }
76
77 if (pdbg_target_probe(ocmbTarget) != PDBG_TARGET_ENABLED)
78 {
79 continue;
80 }
81
82 if (!openpower::phal::pdbg::isTgtFunctional(ocmbTarget))
83 {
84 continue;
85 }
86
87 targets[target].push_back(ocmbTarget);
88 }
89 }
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -050090 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050091 }
92
93 std::vector<uint8_t> clockStates = {SBE_CLOCK_ON, SBE_CLOCK_OFF};
94 for (auto cstate : clockStates)
95 {
Dhruvaraj Subhashchandran9098d8c2022-12-01 00:40:20 -060096 // Skip collection for performance dump if clock state is not ON
97 if (type == SBE_DUMP_TYPE_PERFORMANCE && cstate != SBE_CLOCK_ON)
98 {
99 continue;
100 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500101 auto futures = spawnDumpCollectionProcesses(type, id, path, failingUnit,
102 cstate, targets);
103
104 // Wait for all asynchronous tasks to complete
105 for (auto& future : futures)
106 {
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500107 try
108 {
109 future.wait();
110 }
111 catch (const std::exception& e)
112 {
113 lg2::error("Failed to collect dump from SBE ErrorMsg({ERROR})",
114 "ERROR", e);
115 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500116 }
117 lg2::info(
118 "Dump collection completed for clock state({CSTATE}): type({TYPE}) "
119 "id({ID}) failingUnit({FAILINGUNIT}), path({PATH})",
120 "CSTATE", cstate, "TYPE", type, "ID", id, "FAILINGUNIT",
121 failingUnit, "PATH", path.string());
122 }
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500123 if (std::filesystem::is_empty(path))
124 {
125 lg2::error("Failed to collect the dump");
126 throw std::runtime_error("Failed to collect the dump");
127 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500128 lg2::info("Dump collection completed");
129}
130
131void SbeDumpCollector::initializePdbg()
132{
133 openpower::phal::pdbg::init();
134}
135
136std::vector<std::future<void>> SbeDumpCollector::spawnDumpCollectionProcesses(
137 uint8_t type, uint32_t id, const std::filesystem::path& path,
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500138 uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap)
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500139{
140 std::vector<std::future<void>> futures;
141
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500142 for (const auto& [procTarget, ocmbTargets] : targetMap)
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500143 {
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500144 auto future = std::async(std::launch::async,
145 [this, procTarget, ocmbTargets, path, id, type,
146 cstate, failingUnit]() {
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500147 try
148 {
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500149 this->collectDumpFromSBE(procTarget, path, id, type, cstate,
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500150 failingUnit);
151 }
152 catch (const std::exception& e)
153 {
154 lg2::error(
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500155 "Failed to collect dump from SBE on Proc-({PROCINDEX}) {ERROR}",
156 "PROCINDEX", pdbg_target_index(procTarget), "ERROR", e);
157 }
158
159 // Collect OCMBs only with clock on
160 if (cstate == SBE_CLOCK_ON)
161 {
162 // Handle OCMBs serially after handling the proc
163 for (auto ocmbTarget : ocmbTargets)
164 {
165 try
166 {
167 this->collectDumpFromSBE(ocmbTarget, path, id, type,
168 cstate, failingUnit);
169 }
170 catch (const std::exception& e)
171 {
172 lg2::error(
173 "Failed to collect dump from OCMB -({OCMBINDEX}) {ERROR}",
174 "OCMBINDEX", pdbg_target_index(ocmbTarget), "ERROR",
175 e);
176 }
177 }
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500178 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500179 });
180
181 futures.push_back(std::move(future));
182 }
183
184 return futures;
185}
186
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500187void SbeDumpCollector::logErrorAndCreatePEL(
188 const openpower::phal::sbeError_t& sbeError, uint64_t chipPos,
189 SBETypes sbeType, uint32_t cmdClass, uint32_t cmdType)
190{
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500191 std::string chipName;
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500192 try
193 {
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500194 chipName = sbeTypeAttributes.at(sbeType).chipName;
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500195 std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure;
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500196 auto dumpIsRequired = false;
197
198 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
199 {
200 event = sbeTypeAttributes.at(sbeType).chipOpTimeout;
201 dumpIsRequired = true;
202 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500203
204 openpower::dump::pel::FFDCData pelAdditionalData = {
205 {"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}};
206
207 openpower::dump::pel::createSbeErrorPEL(event, sbeError,
208 pelAdditionalData);
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500209 auto logId = openpower::dump::pel::createSbeErrorPEL(event, sbeError,
210 pelAdditionalData);
211
212 // Request SBE Dump if required
213 if (dumpIsRequired)
214 {
215 util::requestSBEDump(chipPos, logId, sbeType);
216 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500217 }
218 catch (const std::out_of_range& e)
219 {
220 lg2::error("Unknown SBE Type({SBETYPE}) ErrorMsg({ERROR})", "SBETYPE",
221 sbeType, "ERROR", e);
222 }
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500223 catch (const std::exception& e)
224 {
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500225 lg2::error("SBE Dump request failed, chip type({CHIPTYPE}) "
226 "position({CHIPPOS}), Error: {ERROR}",
227 "CHIPTYPE", chipName, "CHIPPOS", chipPos, "ERROR", e);
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500228 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500229}
230
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500231void SbeDumpCollector::collectDumpFromSBE(struct pdbg_target* chip,
232 const std::filesystem::path& path,
233 uint32_t id, uint8_t type,
234 uint8_t clockState,
235 uint64_t failingUnit)
236{
237 auto chipPos = pdbg_target_index(chip);
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500238 SBETypes sbeType = getSBEType(chip);
239 auto chipName = sbeTypeAttributes.at(sbeType).chipName;
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500240 lg2::info(
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500241 "Collecting dump from ({CHIPTYPE}) ({POSITION}): path({PATH}) id({ID}) "
242 "type({TYPE}) clockState({CLOCKSTATE}) failingUnit({FAILINGUNIT})",
243 "CHIPTYPE", chipName, "POSITION", chipPos, "PATH", path.string(), "ID",
244 id, "TYPE", type, "CLOCKSTATE", clockState, "FAILINGUNIT", failingUnit);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500245
246 util::DumpDataPtr dataPtr;
247 uint32_t len = 0;
248 uint8_t collectFastArray =
249 checkFastarrayCollectionNeeded(clockState, type, failingUnit, chipPos);
250
251 try
252 {
253 openpower::phal::sbe::getDump(chip, type, clockState, collectFastArray,
254 dataPtr.getPtr(), &len);
255 }
256 catch (const openpower::phal::sbeError_t& sbeError)
257 {
258 if (sbeError.errType() ==
259 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
260 {
261 // SBE is not ready to accept chip-ops,
262 // Skip the request, no additional error handling required.
263 lg2::info("Collect dump: Skipping ({ERROR}) dump({TYPE}) "
264 "on proc({PROC}) clock state({CLOCKSTATE})",
265 "ERROR", sbeError, "TYPE", type, "PROC", chipPos,
266 "CLOCKSTATE", clockState);
267 return;
268 }
269
270 lg2::error("Error in collecting dump dump type({TYPE}), "
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500271 "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) "
272 "position({POSITION}), "
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500273 "collectFastArray({COLLECTFASTARRAY}) error({ERROR})",
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500274 "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE", chipName,
275 "POSITION", chipPos, "COLLECTFASTARRAY", collectFastArray,
276 "ERROR", sbeError);
277 logErrorAndCreatePEL(sbeError, chipPos, sbeType, SBEFIFO_CMD_CLASS_DUMP,
278 SBEFIFO_CMD_GET_DUMP);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500279 return;
280 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500281 writeDumpFile(path, id, clockState, 0, chipName, chipPos, dataPtr, len);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500282}
283
284void SbeDumpCollector::writeDumpFile(
285 const std::filesystem::path& path, const uint32_t id,
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500286 const uint8_t clockState, const uint8_t nodeNum,
287 const std::string& chipName, const uint8_t chipPos,
288 util::DumpDataPtr& dataPtr, const uint32_t len)
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500289{
290 using namespace sdbusplus::xyz::openbmc_project::Common::Error;
291 namespace fileError = sdbusplus::xyz::openbmc_project::Common::File::Error;
292
293 // Construct the filename
294 std::ostringstream filenameBuilder;
295 filenameBuilder << std::setw(8) << std::setfill('0') << id
296 << ".SbeDataClocks"
297 << (clockState == SBE_CLOCK_ON ? "On" : "Off") << ".node"
298 << static_cast<int>(nodeNum) << "." << chipName
299 << static_cast<int>(chipPos);
300
301 auto dumpPath = path / filenameBuilder.str();
302
303 // Attempt to open the file
304 std::ofstream outfile(dumpPath, std::ios::out | std::ios::binary);
305 if (!outfile)
306 {
307 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
308 using metadata = xyz::openbmc_project::Common::File::Open;
309 // Unable to open the file for writing
310 auto err = errno;
311 lg2::error("Error opening file to write dump, "
312 "errno({ERRNO}), filepath({FILEPATH})",
313 "ERRNO", err, "FILEPATH", dumpPath.string());
314
315 report<Open>(metadata::ERRNO(err), metadata::PATH(dumpPath.c_str()));
316 // Just return here, so that the dumps collected from other
317 // SBEs can be packaged.
318 return;
319 }
320
321 // Write to the file
322 try
323 {
324 outfile.write(reinterpret_cast<const char*>(dataPtr.getData()), len);
325
326 lg2::info("Successfully wrote dump file "
327 "path=({PATH}) size=({SIZE})",
328 "PATH", dumpPath.string(), "SIZE", len);
329 }
330 catch (const std::ofstream::failure& oe)
331 {
332 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
333 using metadata = xyz::openbmc_project::Common::File::Write;
334
335 lg2::error(
336 "Failed to write to dump file, "
337 "errorMsg({ERROR}), error({ERRORCODE}), filepath({FILEPATH})",
338 "ERROR", oe, "ERRORCODE", oe.code().value(), "FILEPATH",
339 dumpPath.string());
340 report<Write>(metadata::ERRNO(oe.code().value()),
341 metadata::PATH(dumpPath.c_str()));
342 // Just return here so dumps collected from other SBEs can be
343 // packaged.
344 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500345}
346
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -0500347bool SbeDumpCollector::executeThreadStop(struct pdbg_target* target)
348{
349 try
350 {
351 openpower::phal::sbe::threadStopProc(target);
352 return true;
353 }
354 catch (const openpower::phal::sbeError_t& sbeError)
355 {
356 uint64_t chipPos = pdbg_target_index(target);
357 if (sbeError.errType() ==
358 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
359 {
360 lg2::info("SBE is not ready to accept chip-op: Skipping "
361 "stop instruction on proc-({POSITION}) error({ERROR}) ",
362 "POSITION", chipPos, "ERROR", sbeError);
363 return false; // Do not include the target for dump collection
364 }
365
366 lg2::error("Stop instructions failed on "
367 "proc-({POSITION}) error({ERROR}) ",
368 "POSITION", chipPos, "ERROR", sbeError);
369
370 logErrorAndCreatePEL(sbeError, chipPos, SBETypes::PROC,
371 SBEFIFO_CMD_CLASS_INSTRUCTION,
372 SBEFIFO_CMD_CONTROL_INSN);
373 // For TIMEOUT, log the error and skip adding the processor for dump
374 // collection
375 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
376 {
377 return false;
378 }
379 }
380 // Include the target for dump collection for SBE_CMD_FAILED or any other
381 // non-critical errors
382 return true;
383}
384
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500385} // namespace openpower::dump::sbe_chipop