blob: a912c573d970e9a70b8b76fa2ee20d6bd8bd9230 [file] [log] [blame]
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05001#pragma once
2
3extern "C"
4{
5#include <libpdbg.h>
6#include <libpdbg_sbe.h>
7}
8
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -05009#include "dump_utils.hpp"
10#include "sbe_consts.hpp"
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050011#include "sbe_type.hpp"
12
13#include <phal_exception.H>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050014
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050015#include <cstdint>
16#include <filesystem>
17#include <future>
18#include <vector>
19
20namespace openpower::dump::sbe_chipop
21{
22
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -050023using TargetMap =
24 std::map<struct pdbg_target*, std::vector<struct pdbg_target*>>;
25
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050026/**
27 * @class SbeDumpCollector
28 * @brief Manages the collection of dumps from SBEs on failure.
29 *
30 * This class provides functionalities to orchestrate the collection of
31 * diagnostic dumps from Self Boot Engines across multiple processors
32 * in response to failures or for diagnostic purposes.
33 */
34class SbeDumpCollector
35{
36 public:
37 /**
38 * @brief Constructs a new SbeDumpCollector object.
39 */
40 SbeDumpCollector() = default;
41
42 /**
43 * @brief Destroys the SbeDumpCollector object.
44 */
45 ~SbeDumpCollector() = default;
46
47 /**
Nabil Ananthamangalathfc4f2232025-08-07 00:14:51 -050048 * @brief Drives all type of dump collection process from SBEs.
49 *
50 * Triggers SBE, Hardware/Hostboot dump collection process from SBEs.
51 * Internally calls private method collectHWHBDump(for Hardware/Hostboot
52 * dump) or collectSBEDump(for SBE dump) based on the parameter type's value
53 *
54 * @param type The type of dump which needs to be collected.
55 * @param id ID of the collected dump.
56 * @param failingUnit ID of the failing unit from which the dump is
57 * collected.
58 * @param path Path where the collected dump will be stored.
59 */
60 void collectDump(uint8_t type, uint32_t id, uint32_t failingUnit,
61 const std::filesystem::path& path);
62
63 private:
64 /**
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050065 * @brief Orchestrates the collection of dumps from all available SBEs.
66 *
67 * Initiates the process of collecting diagnostic dumps from SBEs. This
68 * involves identifying available processors, initiating the dump
69 * collection process, and managing the collected dump files.
70 *
71 * @param type The type of dump to collect.
72 * @param id A unique identifier for the dump collection operation.
73 * @param failingUnit The identifier of the failing unit prompting the dump
74 * collection.
75 * @param path The filesystem path where collected dumps should be stored.
76 */
Nabil Ananthamangalathfc4f2232025-08-07 00:14:51 -050077 void collectHWHBDump(uint8_t type, uint32_t id, uint64_t failingUnit,
78 const std::filesystem::path& path);
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050079
Nabil Ananthamangalathfc4f2232025-08-07 00:14:51 -050080 /**
81 * @brief Execute HWPs to collect SBE dump.
82 *
83 * @param[in] id Id of the dump.
84 * @param[in] failingUnit Id of proc containing failing SBE.
85 * @param[in] dumpPath Path to stored the dump files.
86 * @param[in] sbeTypeId ID for SBE type i.e.; Odyssey or normal memory chip
87 * 0xA-->Normal SBE type,
88 * 0xB-->Odyssey SBE type Exceptions: PDBG_INIT_FAIL for any pdbg init
89 * related failure.
90 */
91 void collectSBEDump(uint32_t id, uint32_t failingUnit,
92 const std::filesystem::path& dumpPath,
93 const int sbeTypeId);
94
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050095 /**
96 * @brief Collects a dump from a single SBE.
97 *
98 * Executes the low-level operations required to collect a diagnostic
99 * dump from the specified SBE.
100 *
101 * @param chip A pointer to the pdbg_target structure representing the SBE.
102 * @param path The filesystem path where the dump should be stored.
103 * @param id The unique identifier for this dump collection operation.
104 * @param type The type of dump to collect.
105 * @param clockState The clock state of the SBE during dump collection.
106 * @param failingUnit The identifier of the failing unit.
107 */
108 void collectDumpFromSBE(struct pdbg_target* chip,
109 const std::filesystem::path& path, uint32_t id,
110 uint8_t type, uint8_t clockState,
111 uint64_t failingUnit);
112
113 /**
114 * @brief Initializes the PDBG library.
115 *
116 * Prepares the PDBG library for interacting with processor targets. This
117 * must be called before any PDBG-related operations are performed.
118 */
119 void initializePdbg();
120
121 /**
122 * @brief Launches asynchronous dump collection tasks for a set of targets.
123 *
124 * This method initiates the dump collection process asynchronously for each
125 * target provided in the `targets` vector. It launches a separate
126 * asynchronous task for each target, where each task calls
127 * `collectDumpFromSBE` with the specified parameters, including the clock
128 * state.
129 *
130 * @param type The type of the dump to collect. This could be a hardware
131 * dump, software dump, etc., as defined by the SBE dump type enumeration.
132 * @param id A unique identifier for the dump collection operation. This ID
133 * is used to tag the collected dump for identification.
134 * @param path The filesystem path where the collected dumps should be
135 * stored. Each dump file will be stored under this directory.
136 * @param failingUnit The identifier of the unit or component that is
137 * failing or suspected to be the cause of the issue prompting the dump
138 * collection. This is used for diagnostic purposes.
139 * @param cstate The clock state during the dump collection. This parameter
140 * dictates whether the dump should be collected with the
141 * clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF).
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500142 * @param targetMap A map of `pdbg_target*` representing the targets from
143 * which dumps should be collected. The key is the proc target with the
144 * list of ocmb targets associated with the proc.
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500145 *
146 * @return A vector of `std::future<void>` objects. Each future represents
147 * the completion state of an asynchronous dump collection task. The caller
148 * can wait on these futures to determine when all dump collection
149 * tasks have completed. Exceptions thrown by the asynchronous tasks are
150 * captured by the futures and can be rethrown when the futures are
151 * accessed.
152 */
153 std::vector<std::future<void>> spawnDumpCollectionProcesses(
154 uint8_t type, uint32_t id, const std::filesystem::path& path,
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500155 uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500156
157 /** @brief This function creates the new dump file in dump file name
158 * format and then writes the contents into it.
159 * @param path - Path to dump file
160 * @param id - A unique id assigned to dump to be collected
161 * @param clockState - Clock state, ON or Off
162 * @param nodeNum - Node containing the chip
163 * @param chipName - Name of the chip
164 * @param chipPos - Chip position of the failing unit
165 * @param dataPtr - Content to write to file
166 * @param len - Length of the content
167 */
168 void writeDumpFile(const std::filesystem::path& path, const uint32_t id,
169 const uint8_t clockState, const uint8_t nodeNum,
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500170 const std::string& chipName, const uint8_t chipPos,
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500171 util::DumpDataPtr& dataPtr, const uint32_t len);
172
173 /**
174 * @brief Determines if fastarray collection is needed based on dump type
175 * and unit.
176 *
177 * @param clockState The current state of the clock.
178 * @param type The type of the dump being collected.
179 * @param failingUnit The ID of the failing unit.
180 * @param chipPos The position of the chip for which the dump is being
181 * collected.
182 *
183 * @return uint8_t - Returns 1 if fastarray collection is needed, 0
184 * otherwise.
185 */
Patrick Williams540521e2024-08-16 15:20:03 -0400186 inline uint8_t checkFastarrayCollectionNeeded(
187 const uint8_t clockState, const uint8_t type, uint64_t failingUnit,
188 const uint8_t chipPos) const
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500189 {
190 using namespace openpower::dump::SBE;
191
192 return (clockState == SBE_CLOCK_OFF &&
193 (type == SBE_DUMP_TYPE_HOSTBOOT ||
194 (type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit)))
195 ? 1
196 : 0;
197 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500198
199 /**
200 * Logs an error and creates a PEL for SBE chip-op failures.
201 *
202 * @param sbeError - An error object encapsulating details about the SBE
203 * error.
204 * @param chipPos - The position of the chip where the error occurred.
205 * @param sbeType - The type of SBE, used to determine the event log
206 * message.
207 * @param cmdClass - The command class associated with the SBE operation.
208 * @param cmdType - The specific type of command within the command class.
SwethaParasaed53dc72025-01-24 01:07:01 -0600209 * @param path - Dump collection path.
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500210 *
211 */
Dhruvaraj Subhashchandranf2298892024-04-21 04:42:55 -0500212 bool logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError,
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500213 uint64_t chipPos, SBETypes sbeType,
SwethaParasaed53dc72025-01-24 01:07:01 -0600214 uint32_t cmdClass, uint32_t cmdType,
215 const std::filesystem::path& path);
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500216
217 /**
218 * Determines the type of SBE for a given chip target.
219 *
220 * @param chip - A pointer to a pdbg_target structure representing the chip.
221 * @return The SBE type for the given chip target.
222 */
223 inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip)
224 {
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500225 if (is_ody_ocmb_chip(chip))
226 {
227 return SBETypes::OCMB;
228 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500229 return SBETypes::PROC;
230 }
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -0500231
232 /**
233 * @brief Executes thread stop on a processor target
234 *
235 * If the Self Boot Engine (SBE) is not ready to accept chip operations
236 * (chip-ops), it logs the condition and excludes the processor from the
237 * dump collection process. For critical errors, such as a timeout during
238 * the stop operation, it logs the error and again excludes the processor.
239 * In case of SBE command failure or non-critical errors, it continues with
240 * the dump collection process.
241 *
242 * @param target Pointer to the pdbg target structure representing the
243 * processor to perform the thread stop on.
SwethaParasaed53dc72025-01-24 01:07:01 -0600244 * @param path Dump collection path
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -0500245 * @return true If the thread stop was successful or in case of non-critical
246 * errors where dump collection can proceed.
247 * @return false If the SBE is not ready for chip-ops or in case of critical
248 * errors like timeouts, indicating the processor should be
249 * excluded from the dump collection.
250 */
SwethaParasaed53dc72025-01-24 01:07:01 -0600251 bool executeThreadStop(struct pdbg_target* target,
252 const std::filesystem::path& path);
253
254 /**
255 * @brief Add Failure log information to info.yaml file
256 * @param logId - Error Log Id
257 * @param src - Reason Code of PEL
258 * @param chipName - Resource Name
259 * @param chipPos - Resource number
260 * @param path - Dump collection path
261 */
262 void addLogDataToDump(uint32_t logId, std::string src, std::string chipName,
263 uint64_t chipPos, const std::filesystem::path& path);
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500264};
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500265} // namespace openpower::dump::sbe_chipop