blob: bdf43c8f7f4510f557b810caa53bc9cbfd33e679 [file] [log] [blame]
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05001extern "C"
2{
3#include <libpdbg.h>
4#include <libpdbg_sbe.h>
5}
6
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -05007#include "create_pel.hpp"
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05008#include "sbe_consts.hpp"
9#include "sbe_dump_collector.hpp"
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050010#include "sbe_type.hpp"
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050011
12#include <libphal.H>
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050013#include <phal_exception.H>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050014
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050015#include <phosphor-logging/elog-errors.hpp>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050016#include <phosphor-logging/lg2.hpp>
17#include <phosphor-logging/log.hpp>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050018#include <sbe_consts.hpp>
19#include <xyz/openbmc_project/Common/File/error.hpp>
20#include <xyz/openbmc_project/Common/error.hpp>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050021
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050022#include <cstdint>
23#include <filesystem>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050024#include <format>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050025#include <fstream>
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050026#include <stdexcept>
27
28namespace openpower::dump::sbe_chipop
29{
30
31using namespace phosphor::logging;
32using namespace openpower::dump::SBE;
33
34void SbeDumpCollector::collectDump(uint8_t type, uint32_t id,
35 uint64_t failingUnit,
36 const std::filesystem::path& path)
37{
38 lg2::error("Starting dump collection: type:{TYPE} id:{ID} "
39 "failingUnit:{FAILINGUNIT}, path:{PATH}",
40 "TYPE", type, "ID", id, "FAILINGUNIT", failingUnit, "PATH",
41 path.string());
42
43 initializePdbg();
44
45 std::vector<struct pdbg_target*> targets;
46
47 struct pdbg_target* target = nullptr;
48 pdbg_for_each_class_target("proc", target)
49 {
50 if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
51 !openpower::phal::pdbg::isTgtFunctional(target))
52 {
53 continue;
54 }
55
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -050056 bool includeTarget = true;
57 // if the dump type is hostboot then call stop instructions
58 if (type == SBE_DUMP_TYPE_HOSTBOOT)
59 {
60 includeTarget = executeThreadStop(target);
61 }
62 if (includeTarget)
63 {
64 targets.push_back(target);
65 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050066 }
67
68 std::vector<uint8_t> clockStates = {SBE_CLOCK_ON, SBE_CLOCK_OFF};
69 for (auto cstate : clockStates)
70 {
71 auto futures = spawnDumpCollectionProcesses(type, id, path, failingUnit,
72 cstate, targets);
73
74 // Wait for all asynchronous tasks to complete
75 for (auto& future : futures)
76 {
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050077 try
78 {
79 future.wait();
80 }
81 catch (const std::exception& e)
82 {
83 lg2::error("Failed to collect dump from SBE ErrorMsg({ERROR})",
84 "ERROR", e);
85 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050086 }
87 lg2::info(
88 "Dump collection completed for clock state({CSTATE}): type({TYPE}) "
89 "id({ID}) failingUnit({FAILINGUNIT}), path({PATH})",
90 "CSTATE", cstate, "TYPE", type, "ID", id, "FAILINGUNIT",
91 failingUnit, "PATH", path.string());
92 }
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050093 if (std::filesystem::is_empty(path))
94 {
95 lg2::error("Failed to collect the dump");
96 throw std::runtime_error("Failed to collect the dump");
97 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050098 lg2::info("Dump collection completed");
99}
100
101void SbeDumpCollector::initializePdbg()
102{
103 openpower::phal::pdbg::init();
104}
105
106std::vector<std::future<void>> SbeDumpCollector::spawnDumpCollectionProcesses(
107 uint8_t type, uint32_t id, const std::filesystem::path& path,
108 uint64_t failingUnit, uint8_t cstate,
109 const std::vector<struct pdbg_target*>& targets)
110{
111 std::vector<std::future<void>> futures;
112
113 for (auto target : targets)
114 {
115 if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
116 !openpower::phal::pdbg::isTgtFunctional(target))
117 {
118 continue;
119 }
120
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500121 auto future =
122 std::async(std::launch::async,
123 [this, target, path, id, type, cstate, failingUnit]() {
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500124 try
125 {
126 this->collectDumpFromSBE(target, path, id, type, cstate,
127 failingUnit);
128 }
129 catch (const std::exception& e)
130 {
131 lg2::error(
132 "Failed to collect dump from SBE on Proc-({PROCINDEX})",
133 "PROCINDEX", pdbg_target_index(target));
134 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500135 });
136
137 futures.push_back(std::move(future));
138 }
139
140 return futures;
141}
142
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500143void SbeDumpCollector::logErrorAndCreatePEL(
144 const openpower::phal::sbeError_t& sbeError, uint64_t chipPos,
145 SBETypes sbeType, uint32_t cmdClass, uint32_t cmdType)
146{
147 try
148 {
149 std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure;
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500150 auto dumpIsRequired = false;
151
152 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
153 {
154 event = sbeTypeAttributes.at(sbeType).chipOpTimeout;
155 dumpIsRequired = true;
156 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500157
158 openpower::dump::pel::FFDCData pelAdditionalData = {
159 {"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}};
160
161 openpower::dump::pel::createSbeErrorPEL(event, sbeError,
162 pelAdditionalData);
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500163 auto logId = openpower::dump::pel::createSbeErrorPEL(event, sbeError,
164 pelAdditionalData);
165
166 // Request SBE Dump if required
167 if (dumpIsRequired)
168 {
169 util::requestSBEDump(chipPos, logId, sbeType);
170 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500171 }
172 catch (const std::out_of_range& e)
173 {
174 lg2::error("Unknown SBE Type({SBETYPE}) ErrorMsg({ERROR})", "SBETYPE",
175 sbeType, "ERROR", e);
176 }
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500177 catch (const std::exception& e)
178 {
179 lg2::error("SBE Dump request failed, chip position({CHIPPOS}), "
180 "Error: {ERROR}",
181 "CHIPPOS", chipPos, "ERROR", e);
182 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500183}
184
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500185void SbeDumpCollector::collectDumpFromSBE(struct pdbg_target* chip,
186 const std::filesystem::path& path,
187 uint32_t id, uint8_t type,
188 uint8_t clockState,
189 uint64_t failingUnit)
190{
191 auto chipPos = pdbg_target_index(chip);
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500192 SBETypes sbeType = getSBEType(chip);
193 auto chipName = sbeTypeAttributes.at(sbeType).chipName;
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500194 lg2::info(
195 "Collecting dump from proc({PROC}): path({PATH}) id({ID}) "
196 "type({TYPE}) clockState({CLOCKSTATE}) failingUnit({FAILINGUNIT})",
197 "PROC", chipPos, "PATH", path.string(), "ID", id, "TYPE", type,
198 "CLOCKSTATE", clockState, "FAILINGUNIT", failingUnit);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500199
200 util::DumpDataPtr dataPtr;
201 uint32_t len = 0;
202 uint8_t collectFastArray =
203 checkFastarrayCollectionNeeded(clockState, type, failingUnit, chipPos);
204
205 try
206 {
207 openpower::phal::sbe::getDump(chip, type, clockState, collectFastArray,
208 dataPtr.getPtr(), &len);
209 }
210 catch (const openpower::phal::sbeError_t& sbeError)
211 {
212 if (sbeError.errType() ==
213 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
214 {
215 // SBE is not ready to accept chip-ops,
216 // Skip the request, no additional error handling required.
217 lg2::info("Collect dump: Skipping ({ERROR}) dump({TYPE}) "
218 "on proc({PROC}) clock state({CLOCKSTATE})",
219 "ERROR", sbeError, "TYPE", type, "PROC", chipPos,
220 "CLOCKSTATE", clockState);
221 return;
222 }
223
224 lg2::error("Error in collecting dump dump type({TYPE}), "
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500225 "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) "
226 "position({POSITION}), "
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500227 "collectFastArray({COLLECTFASTARRAY}) error({ERROR})",
Dhruvaraj Subhashchandran5f5c94d2021-10-19 07:18:30 -0500228 "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE", chipName,
229 "POSITION", chipPos, "COLLECTFASTARRAY", collectFastArray,
230 "ERROR", sbeError);
231 logErrorAndCreatePEL(sbeError, chipPos, sbeType, SBEFIFO_CMD_CLASS_DUMP,
232 SBEFIFO_CMD_GET_DUMP);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500233 return;
234 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500235 writeDumpFile(path, id, clockState, 0, chipName, chipPos, dataPtr, len);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500236}
237
238void SbeDumpCollector::writeDumpFile(
239 const std::filesystem::path& path, const uint32_t id,
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500240 const uint8_t clockState, const uint8_t nodeNum,
241 const std::string& chipName, const uint8_t chipPos,
242 util::DumpDataPtr& dataPtr, const uint32_t len)
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500243{
244 using namespace sdbusplus::xyz::openbmc_project::Common::Error;
245 namespace fileError = sdbusplus::xyz::openbmc_project::Common::File::Error;
246
247 // Construct the filename
248 std::ostringstream filenameBuilder;
249 filenameBuilder << std::setw(8) << std::setfill('0') << id
250 << ".SbeDataClocks"
251 << (clockState == SBE_CLOCK_ON ? "On" : "Off") << ".node"
252 << static_cast<int>(nodeNum) << "." << chipName
253 << static_cast<int>(chipPos);
254
255 auto dumpPath = path / filenameBuilder.str();
256
257 // Attempt to open the file
258 std::ofstream outfile(dumpPath, std::ios::out | std::ios::binary);
259 if (!outfile)
260 {
261 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
262 using metadata = xyz::openbmc_project::Common::File::Open;
263 // Unable to open the file for writing
264 auto err = errno;
265 lg2::error("Error opening file to write dump, "
266 "errno({ERRNO}), filepath({FILEPATH})",
267 "ERRNO", err, "FILEPATH", dumpPath.string());
268
269 report<Open>(metadata::ERRNO(err), metadata::PATH(dumpPath.c_str()));
270 // Just return here, so that the dumps collected from other
271 // SBEs can be packaged.
272 return;
273 }
274
275 // Write to the file
276 try
277 {
278 outfile.write(reinterpret_cast<const char*>(dataPtr.getData()), len);
279
280 lg2::info("Successfully wrote dump file "
281 "path=({PATH}) size=({SIZE})",
282 "PATH", dumpPath.string(), "SIZE", len);
283 }
284 catch (const std::ofstream::failure& oe)
285 {
286 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
287 using metadata = xyz::openbmc_project::Common::File::Write;
288
289 lg2::error(
290 "Failed to write to dump file, "
291 "errorMsg({ERROR}), error({ERRORCODE}), filepath({FILEPATH})",
292 "ERROR", oe, "ERRORCODE", oe.code().value(), "FILEPATH",
293 dumpPath.string());
294 report<Write>(metadata::ERRNO(oe.code().value()),
295 metadata::PATH(dumpPath.c_str()));
296 // Just return here so dumps collected from other SBEs can be
297 // packaged.
298 }
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500299}
300
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -0500301bool SbeDumpCollector::executeThreadStop(struct pdbg_target* target)
302{
303 try
304 {
305 openpower::phal::sbe::threadStopProc(target);
306 return true;
307 }
308 catch (const openpower::phal::sbeError_t& sbeError)
309 {
310 uint64_t chipPos = pdbg_target_index(target);
311 if (sbeError.errType() ==
312 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
313 {
314 lg2::info("SBE is not ready to accept chip-op: Skipping "
315 "stop instruction on proc-({POSITION}) error({ERROR}) ",
316 "POSITION", chipPos, "ERROR", sbeError);
317 return false; // Do not include the target for dump collection
318 }
319
320 lg2::error("Stop instructions failed on "
321 "proc-({POSITION}) error({ERROR}) ",
322 "POSITION", chipPos, "ERROR", sbeError);
323
324 logErrorAndCreatePEL(sbeError, chipPos, SBETypes::PROC,
325 SBEFIFO_CMD_CLASS_INSTRUCTION,
326 SBEFIFO_CMD_CONTROL_INSN);
327 // For TIMEOUT, log the error and skip adding the processor for dump
328 // collection
329 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
330 {
331 return false;
332 }
333 }
334 // Include the target for dump collection for SBE_CMD_FAILED or any other
335 // non-critical errors
336 return true;
337}
338
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500339} // namespace openpower::dump::sbe_chipop