blob: 3e7473684b7a53b1ce0080e0304cfdce430c8dee [file] [log] [blame]
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05001extern "C"
2{
3#include <libpdbg.h>
4#include <libpdbg_sbe.h>
5}
6
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -05007#include "create_pel.hpp"
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05008#include "sbe_consts.hpp"
9#include "sbe_dump_collector.hpp"
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050010#include "sbe_type.hpp"
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050011
12#include <libphal.H>
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050013#include <phal_exception.H>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050014
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050015#include <phosphor-logging/elog-errors.hpp>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050016#include <phosphor-logging/lg2.hpp>
17#include <phosphor-logging/log.hpp>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050018#include <sbe_consts.hpp>
19#include <xyz/openbmc_project/Common/File/error.hpp>
20#include <xyz/openbmc_project/Common/error.hpp>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050021
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050022#include <cstdint>
23#include <filesystem>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050024#include <format>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050025#include <fstream>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050026#include <stdexcept>
27
28namespace openpower::dump::sbe_chipop
29{
30
31using namespace phosphor::logging;
32using namespace openpower::dump::SBE;
33
34void SbeDumpCollector::collectDump(uint8_t type, uint32_t id,
35 uint64_t failingUnit,
36 const std::filesystem::path& path)
37{
38 lg2::error("Starting dump collection: type:{TYPE} id:{ID} "
39 "failingUnit:{FAILINGUNIT}, path:{PATH}",
40 "TYPE", type, "ID", id, "FAILINGUNIT", failingUnit, "PATH",
41 path.string());
42
43 initializePdbg();
44
45 std::vector<struct pdbg_target*> targets;
46
47 struct pdbg_target* target = nullptr;
48 pdbg_for_each_class_target("proc", target)
49 {
50 if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
51 !openpower::phal::pdbg::isTgtFunctional(target))
52 {
53 continue;
54 }
55
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -050056 bool includeTarget = true;
57 // if the dump type is hostboot then call stop instructions
58 if (type == SBE_DUMP_TYPE_HOSTBOOT)
59 {
60 includeTarget = executeThreadStop(target);
61 }
62 if (includeTarget)
63 {
64 targets.push_back(target);
65 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050066 }
67
68 std::vector<uint8_t> clockStates = {SBE_CLOCK_ON, SBE_CLOCK_OFF};
69 for (auto cstate : clockStates)
70 {
Dhruvaraj Subhashchandran9098d8c2022-12-01 00:40:20 -060071 // Skip collection for performance dump if clock state is not ON
72 if (type == SBE_DUMP_TYPE_PERFORMANCE && cstate != SBE_CLOCK_ON)
73 {
74 continue;
75 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050076 auto futures = spawnDumpCollectionProcesses(type, id, path, failingUnit,
77 cstate, targets);
78
79 // Wait for all asynchronous tasks to complete
80 for (auto& future : futures)
81 {
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050082 try
83 {
84 future.wait();
85 }
86 catch (const std::exception& e)
87 {
88 lg2::error("Failed to collect dump from SBE ErrorMsg({ERROR})",
89 "ERROR", e);
90 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050091 }
92 lg2::info(
93 "Dump collection completed for clock state({CSTATE}): type({TYPE}) "
94 "id({ID}) failingUnit({FAILINGUNIT}), path({PATH})",
95 "CSTATE", cstate, "TYPE", type, "ID", id, "FAILINGUNIT",
96 failingUnit, "PATH", path.string());
97 }
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050098 if (std::filesystem::is_empty(path))
99 {
100 lg2::error("Failed to collect the dump");
101 throw std::runtime_error("Failed to collect the dump");
102 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500103 lg2::info("Dump collection completed");
104}
105
106void SbeDumpCollector::initializePdbg()
107{
108 openpower::phal::pdbg::init();
109}
110
111std::vector<std::future<void>> SbeDumpCollector::spawnDumpCollectionProcesses(
112 uint8_t type, uint32_t id, const std::filesystem::path& path,
113 uint64_t failingUnit, uint8_t cstate,
114 const std::vector<struct pdbg_target*>& targets)
115{
116 std::vector<std::future<void>> futures;
117
118 for (auto target : targets)
119 {
120 if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
121 !openpower::phal::pdbg::isTgtFunctional(target))
122 {
123 continue;
124 }
125
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500126 auto future =
127 std::async(std::launch::async,
128 [this, target, path, id, type, cstate, failingUnit]() {
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500129 try
130 {
131 this->collectDumpFromSBE(target, path, id, type, cstate,
132 failingUnit);
133 }
134 catch (const std::exception& e)
135 {
136 lg2::error(
137 "Failed to collect dump from SBE on Proc-({PROCINDEX})",
138 "PROCINDEX", pdbg_target_index(target));
139 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500140 });
141
142 futures.push_back(std::move(future));
143 }
144
145 return futures;
146}
147
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500148void SbeDumpCollector::logErrorAndCreatePEL(
149 const openpower::phal::sbeError_t& sbeError, uint64_t chipPos,
150 SBETypes sbeType, uint32_t cmdClass, uint32_t cmdType)
151{
152 try
153 {
154 std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure;
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500155 auto dumpIsRequired = false;
156
157 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
158 {
159 event = sbeTypeAttributes.at(sbeType).chipOpTimeout;
160 dumpIsRequired = true;
161 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500162
163 openpower::dump::pel::FFDCData pelAdditionalData = {
164 {"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}};
165
166 openpower::dump::pel::createSbeErrorPEL(event, sbeError,
167 pelAdditionalData);
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500168 auto logId = openpower::dump::pel::createSbeErrorPEL(event, sbeError,
169 pelAdditionalData);
170
171 // Request SBE Dump if required
172 if (dumpIsRequired)
173 {
174 util::requestSBEDump(chipPos, logId, sbeType);
175 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500176 }
177 catch (const std::out_of_range& e)
178 {
179 lg2::error("Unknown SBE Type({SBETYPE}) ErrorMsg({ERROR})", "SBETYPE",
180 sbeType, "ERROR", e);
181 }
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500182 catch (const std::exception& e)
183 {
184 lg2::error("SBE Dump request failed, chip position({CHIPPOS}), "
185 "Error: {ERROR}",
186 "CHIPPOS", chipPos, "ERROR", e);
187 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500188}
189
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500190void SbeDumpCollector::collectDumpFromSBE(struct pdbg_target* chip,
191 const std::filesystem::path& path,
192 uint32_t id, uint8_t type,
193 uint8_t clockState,
194 uint64_t failingUnit)
195{
196 auto chipPos = pdbg_target_index(chip);
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500197 SBETypes sbeType = getSBEType(chip);
198 auto chipName = sbeTypeAttributes.at(sbeType).chipName;
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500199 lg2::info(
200 "Collecting dump from proc({PROC}): path({PATH}) id({ID}) "
201 "type({TYPE}) clockState({CLOCKSTATE}) failingUnit({FAILINGUNIT})",
202 "PROC", chipPos, "PATH", path.string(), "ID", id, "TYPE", type,
203 "CLOCKSTATE", clockState, "FAILINGUNIT", failingUnit);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500204
205 util::DumpDataPtr dataPtr;
206 uint32_t len = 0;
207 uint8_t collectFastArray =
208 checkFastarrayCollectionNeeded(clockState, type, failingUnit, chipPos);
209
210 try
211 {
212 openpower::phal::sbe::getDump(chip, type, clockState, collectFastArray,
213 dataPtr.getPtr(), &len);
214 }
215 catch (const openpower::phal::sbeError_t& sbeError)
216 {
217 if (sbeError.errType() ==
218 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
219 {
220 // SBE is not ready to accept chip-ops,
221 // Skip the request, no additional error handling required.
222 lg2::info("Collect dump: Skipping ({ERROR}) dump({TYPE}) "
223 "on proc({PROC}) clock state({CLOCKSTATE})",
224 "ERROR", sbeError, "TYPE", type, "PROC", chipPos,
225 "CLOCKSTATE", clockState);
226 return;
227 }
228
229 lg2::error("Error in collecting dump dump type({TYPE}), "
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500230 "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) "
231 "position({POSITION}), "
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500232 "collectFastArray({COLLECTFASTARRAY}) error({ERROR})",
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500233 "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE", chipName,
234 "POSITION", chipPos, "COLLECTFASTARRAY", collectFastArray,
235 "ERROR", sbeError);
236 logErrorAndCreatePEL(sbeError, chipPos, sbeType, SBEFIFO_CMD_CLASS_DUMP,
237 SBEFIFO_CMD_GET_DUMP);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500238 return;
239 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500240 writeDumpFile(path, id, clockState, 0, chipName, chipPos, dataPtr, len);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500241}
242
243void SbeDumpCollector::writeDumpFile(
244 const std::filesystem::path& path, const uint32_t id,
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500245 const uint8_t clockState, const uint8_t nodeNum,
246 const std::string& chipName, const uint8_t chipPos,
247 util::DumpDataPtr& dataPtr, const uint32_t len)
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500248{
249 using namespace sdbusplus::xyz::openbmc_project::Common::Error;
250 namespace fileError = sdbusplus::xyz::openbmc_project::Common::File::Error;
251
252 // Construct the filename
253 std::ostringstream filenameBuilder;
254 filenameBuilder << std::setw(8) << std::setfill('0') << id
255 << ".SbeDataClocks"
256 << (clockState == SBE_CLOCK_ON ? "On" : "Off") << ".node"
257 << static_cast<int>(nodeNum) << "." << chipName
258 << static_cast<int>(chipPos);
259
260 auto dumpPath = path / filenameBuilder.str();
261
262 // Attempt to open the file
263 std::ofstream outfile(dumpPath, std::ios::out | std::ios::binary);
264 if (!outfile)
265 {
266 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
267 using metadata = xyz::openbmc_project::Common::File::Open;
268 // Unable to open the file for writing
269 auto err = errno;
270 lg2::error("Error opening file to write dump, "
271 "errno({ERRNO}), filepath({FILEPATH})",
272 "ERRNO", err, "FILEPATH", dumpPath.string());
273
274 report<Open>(metadata::ERRNO(err), metadata::PATH(dumpPath.c_str()));
275 // Just return here, so that the dumps collected from other
276 // SBEs can be packaged.
277 return;
278 }
279
280 // Write to the file
281 try
282 {
283 outfile.write(reinterpret_cast<const char*>(dataPtr.getData()), len);
284
285 lg2::info("Successfully wrote dump file "
286 "path=({PATH}) size=({SIZE})",
287 "PATH", dumpPath.string(), "SIZE", len);
288 }
289 catch (const std::ofstream::failure& oe)
290 {
291 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
292 using metadata = xyz::openbmc_project::Common::File::Write;
293
294 lg2::error(
295 "Failed to write to dump file, "
296 "errorMsg({ERROR}), error({ERRORCODE}), filepath({FILEPATH})",
297 "ERROR", oe, "ERRORCODE", oe.code().value(), "FILEPATH",
298 dumpPath.string());
299 report<Write>(metadata::ERRNO(oe.code().value()),
300 metadata::PATH(dumpPath.c_str()));
301 // Just return here so dumps collected from other SBEs can be
302 // packaged.
303 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500304}
305
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -0500306bool SbeDumpCollector::executeThreadStop(struct pdbg_target* target)
307{
308 try
309 {
310 openpower::phal::sbe::threadStopProc(target);
311 return true;
312 }
313 catch (const openpower::phal::sbeError_t& sbeError)
314 {
315 uint64_t chipPos = pdbg_target_index(target);
316 if (sbeError.errType() ==
317 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
318 {
319 lg2::info("SBE is not ready to accept chip-op: Skipping "
320 "stop instruction on proc-({POSITION}) error({ERROR}) ",
321 "POSITION", chipPos, "ERROR", sbeError);
322 return false; // Do not include the target for dump collection
323 }
324
325 lg2::error("Stop instructions failed on "
326 "proc-({POSITION}) error({ERROR}) ",
327 "POSITION", chipPos, "ERROR", sbeError);
328
329 logErrorAndCreatePEL(sbeError, chipPos, SBETypes::PROC,
330 SBEFIFO_CMD_CLASS_INSTRUCTION,
331 SBEFIFO_CMD_CONTROL_INSN);
332 // For TIMEOUT, log the error and skip adding the processor for dump
333 // collection
334 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
335 {
336 return false;
337 }
338 }
339 // Include the target for dump collection for SBE_CMD_FAILED or any other
340 // non-critical errors
341 return true;
342}
343
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500344} // namespace openpower::dump::sbe_chipop