Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 1 | #pragma once |
| 2 | |
| 3 | extern "C" |
| 4 | { |
| 5 | #include <libpdbg.h> |
| 6 | #include <libpdbg_sbe.h> |
| 7 | } |
| 8 | |
Dhruvaraj Subhashchandran | a699e31 | 2021-10-27 07:20:34 -0500 | [diff] [blame] | 9 | #include "dump_utils.hpp" |
| 10 | #include "sbe_consts.hpp" |
Dhruvaraj Subhashchandran | 6feeebd | 2021-10-19 05:03:59 -0500 | [diff] [blame] | 11 | #include "sbe_type.hpp" |
| 12 | |
| 13 | #include <phal_exception.H> |
Dhruvaraj Subhashchandran | a699e31 | 2021-10-27 07:20:34 -0500 | [diff] [blame] | 14 | |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 15 | #include <cstdint> |
| 16 | #include <filesystem> |
| 17 | #include <future> |
| 18 | #include <vector> |
| 19 | |
| 20 | namespace openpower::dump::sbe_chipop |
| 21 | { |
| 22 | |
Dhruvaraj Subhashchandran | e74e916 | 2024-04-01 09:53:13 -0500 | [diff] [blame] | 23 | using TargetMap = |
| 24 | std::map<struct pdbg_target*, std::vector<struct pdbg_target*>>; |
| 25 | |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 26 | /** |
| 27 | * @class SbeDumpCollector |
| 28 | * @brief Manages the collection of dumps from SBEs on failure. |
| 29 | * |
| 30 | * This class provides functionalities to orchestrate the collection of |
| 31 | * diagnostic dumps from Self Boot Engines across multiple processors |
| 32 | * in response to failures or for diagnostic purposes. |
| 33 | */ |
| 34 | class SbeDumpCollector |
| 35 | { |
| 36 | public: |
| 37 | /** |
| 38 | * @brief Constructs a new SbeDumpCollector object. |
| 39 | */ |
| 40 | SbeDumpCollector() = default; |
| 41 | |
| 42 | /** |
| 43 | * @brief Destroys the SbeDumpCollector object. |
| 44 | */ |
| 45 | ~SbeDumpCollector() = default; |
| 46 | |
| 47 | /** |
Nabil Ananthamangalath | fc4f223 | 2025-08-07 00:14:51 -0500 | [diff] [blame^] | 48 | * @brief Drives all type of dump collection process from SBEs. |
| 49 | * |
| 50 | * Triggers SBE, Hardware/Hostboot dump collection process from SBEs. |
| 51 | * Internally calls private method collectHWHBDump(for Hardware/Hostboot |
| 52 | * dump) or collectSBEDump(for SBE dump) based on the parameter type's value |
| 53 | * |
| 54 | * @param type The type of dump which needs to be collected. |
| 55 | * @param id ID of the collected dump. |
| 56 | * @param failingUnit ID of the failing unit from which the dump is |
| 57 | * collected. |
| 58 | * @param path Path where the collected dump will be stored. |
| 59 | */ |
| 60 | void collectDump(uint8_t type, uint32_t id, uint32_t failingUnit, |
| 61 | const std::filesystem::path& path); |
| 62 | |
| 63 | private: |
| 64 | /** |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 65 | * @brief Orchestrates the collection of dumps from all available SBEs. |
| 66 | * |
| 67 | * Initiates the process of collecting diagnostic dumps from SBEs. This |
| 68 | * involves identifying available processors, initiating the dump |
| 69 | * collection process, and managing the collected dump files. |
| 70 | * |
| 71 | * @param type The type of dump to collect. |
| 72 | * @param id A unique identifier for the dump collection operation. |
| 73 | * @param failingUnit The identifier of the failing unit prompting the dump |
| 74 | * collection. |
| 75 | * @param path The filesystem path where collected dumps should be stored. |
| 76 | */ |
Nabil Ananthamangalath | fc4f223 | 2025-08-07 00:14:51 -0500 | [diff] [blame^] | 77 | void collectHWHBDump(uint8_t type, uint32_t id, uint64_t failingUnit, |
| 78 | const std::filesystem::path& path); |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 79 | |
Nabil Ananthamangalath | fc4f223 | 2025-08-07 00:14:51 -0500 | [diff] [blame^] | 80 | /** |
| 81 | * @brief Execute HWPs to collect SBE dump. |
| 82 | * |
| 83 | * @param[in] id Id of the dump. |
| 84 | * @param[in] failingUnit Id of proc containing failing SBE. |
| 85 | * @param[in] dumpPath Path to stored the dump files. |
| 86 | * @param[in] sbeTypeId ID for SBE type i.e.; Odyssey or normal memory chip |
| 87 | * 0xA-->Normal SBE type, |
| 88 | * 0xB-->Odyssey SBE type Exceptions: PDBG_INIT_FAIL for any pdbg init |
| 89 | * related failure. |
| 90 | */ |
| 91 | void collectSBEDump(uint32_t id, uint32_t failingUnit, |
| 92 | const std::filesystem::path& dumpPath, |
| 93 | const int sbeTypeId); |
| 94 | |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 95 | /** |
| 96 | * @brief Collects a dump from a single SBE. |
| 97 | * |
| 98 | * Executes the low-level operations required to collect a diagnostic |
| 99 | * dump from the specified SBE. |
| 100 | * |
| 101 | * @param chip A pointer to the pdbg_target structure representing the SBE. |
| 102 | * @param path The filesystem path where the dump should be stored. |
| 103 | * @param id The unique identifier for this dump collection operation. |
| 104 | * @param type The type of dump to collect. |
| 105 | * @param clockState The clock state of the SBE during dump collection. |
| 106 | * @param failingUnit The identifier of the failing unit. |
| 107 | */ |
| 108 | void collectDumpFromSBE(struct pdbg_target* chip, |
| 109 | const std::filesystem::path& path, uint32_t id, |
| 110 | uint8_t type, uint8_t clockState, |
| 111 | uint64_t failingUnit); |
| 112 | |
| 113 | /** |
| 114 | * @brief Initializes the PDBG library. |
| 115 | * |
| 116 | * Prepares the PDBG library for interacting with processor targets. This |
| 117 | * must be called before any PDBG-related operations are performed. |
| 118 | */ |
| 119 | void initializePdbg(); |
| 120 | |
| 121 | /** |
| 122 | * @brief Launches asynchronous dump collection tasks for a set of targets. |
| 123 | * |
| 124 | * This method initiates the dump collection process asynchronously for each |
| 125 | * target provided in the `targets` vector. It launches a separate |
| 126 | * asynchronous task for each target, where each task calls |
| 127 | * `collectDumpFromSBE` with the specified parameters, including the clock |
| 128 | * state. |
| 129 | * |
| 130 | * @param type The type of the dump to collect. This could be a hardware |
| 131 | * dump, software dump, etc., as defined by the SBE dump type enumeration. |
| 132 | * @param id A unique identifier for the dump collection operation. This ID |
| 133 | * is used to tag the collected dump for identification. |
| 134 | * @param path The filesystem path where the collected dumps should be |
| 135 | * stored. Each dump file will be stored under this directory. |
| 136 | * @param failingUnit The identifier of the unit or component that is |
| 137 | * failing or suspected to be the cause of the issue prompting the dump |
| 138 | * collection. This is used for diagnostic purposes. |
| 139 | * @param cstate The clock state during the dump collection. This parameter |
| 140 | * dictates whether the dump should be collected with the |
| 141 | * clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF). |
Dhruvaraj Subhashchandran | e74e916 | 2024-04-01 09:53:13 -0500 | [diff] [blame] | 142 | * @param targetMap A map of `pdbg_target*` representing the targets from |
| 143 | * which dumps should be collected. The key is the proc target with the |
| 144 | * list of ocmb targets associated with the proc. |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 145 | * |
| 146 | * @return A vector of `std::future<void>` objects. Each future represents |
| 147 | * the completion state of an asynchronous dump collection task. The caller |
| 148 | * can wait on these futures to determine when all dump collection |
| 149 | * tasks have completed. Exceptions thrown by the asynchronous tasks are |
| 150 | * captured by the futures and can be rethrown when the futures are |
| 151 | * accessed. |
| 152 | */ |
| 153 | std::vector<std::future<void>> spawnDumpCollectionProcesses( |
| 154 | uint8_t type, uint32_t id, const std::filesystem::path& path, |
Dhruvaraj Subhashchandran | e74e916 | 2024-04-01 09:53:13 -0500 | [diff] [blame] | 155 | uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap); |
Dhruvaraj Subhashchandran | a699e31 | 2021-10-27 07:20:34 -0500 | [diff] [blame] | 156 | |
| 157 | /** @brief This function creates the new dump file in dump file name |
| 158 | * format and then writes the contents into it. |
| 159 | * @param path - Path to dump file |
| 160 | * @param id - A unique id assigned to dump to be collected |
| 161 | * @param clockState - Clock state, ON or Off |
| 162 | * @param nodeNum - Node containing the chip |
| 163 | * @param chipName - Name of the chip |
| 164 | * @param chipPos - Chip position of the failing unit |
| 165 | * @param dataPtr - Content to write to file |
| 166 | * @param len - Length of the content |
| 167 | */ |
| 168 | void writeDumpFile(const std::filesystem::path& path, const uint32_t id, |
| 169 | const uint8_t clockState, const uint8_t nodeNum, |
Dhruvaraj Subhashchandran | 6feeebd | 2021-10-19 05:03:59 -0500 | [diff] [blame] | 170 | const std::string& chipName, const uint8_t chipPos, |
Dhruvaraj Subhashchandran | a699e31 | 2021-10-27 07:20:34 -0500 | [diff] [blame] | 171 | util::DumpDataPtr& dataPtr, const uint32_t len); |
| 172 | |
| 173 | /** |
| 174 | * @brief Determines if fastarray collection is needed based on dump type |
| 175 | * and unit. |
| 176 | * |
| 177 | * @param clockState The current state of the clock. |
| 178 | * @param type The type of the dump being collected. |
| 179 | * @param failingUnit The ID of the failing unit. |
| 180 | * @param chipPos The position of the chip for which the dump is being |
| 181 | * collected. |
| 182 | * |
| 183 | * @return uint8_t - Returns 1 if fastarray collection is needed, 0 |
| 184 | * otherwise. |
| 185 | */ |
Patrick Williams | 540521e | 2024-08-16 15:20:03 -0400 | [diff] [blame] | 186 | inline uint8_t checkFastarrayCollectionNeeded( |
| 187 | const uint8_t clockState, const uint8_t type, uint64_t failingUnit, |
| 188 | const uint8_t chipPos) const |
Dhruvaraj Subhashchandran | a699e31 | 2021-10-27 07:20:34 -0500 | [diff] [blame] | 189 | { |
| 190 | using namespace openpower::dump::SBE; |
| 191 | |
| 192 | return (clockState == SBE_CLOCK_OFF && |
| 193 | (type == SBE_DUMP_TYPE_HOSTBOOT || |
| 194 | (type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit))) |
| 195 | ? 1 |
| 196 | : 0; |
| 197 | } |
Dhruvaraj Subhashchandran | 6feeebd | 2021-10-19 05:03:59 -0500 | [diff] [blame] | 198 | |
| 199 | /** |
| 200 | * Logs an error and creates a PEL for SBE chip-op failures. |
| 201 | * |
| 202 | * @param sbeError - An error object encapsulating details about the SBE |
| 203 | * error. |
| 204 | * @param chipPos - The position of the chip where the error occurred. |
| 205 | * @param sbeType - The type of SBE, used to determine the event log |
| 206 | * message. |
| 207 | * @param cmdClass - The command class associated with the SBE operation. |
| 208 | * @param cmdType - The specific type of command within the command class. |
SwethaParasa | ed53dc7 | 2025-01-24 01:07:01 -0600 | [diff] [blame] | 209 | * @param path - Dump collection path. |
Dhruvaraj Subhashchandran | 6feeebd | 2021-10-19 05:03:59 -0500 | [diff] [blame] | 210 | * |
| 211 | */ |
Dhruvaraj Subhashchandran | f229889 | 2024-04-21 04:42:55 -0500 | [diff] [blame] | 212 | bool logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError, |
Dhruvaraj Subhashchandran | 6feeebd | 2021-10-19 05:03:59 -0500 | [diff] [blame] | 213 | uint64_t chipPos, SBETypes sbeType, |
SwethaParasa | ed53dc7 | 2025-01-24 01:07:01 -0600 | [diff] [blame] | 214 | uint32_t cmdClass, uint32_t cmdType, |
| 215 | const std::filesystem::path& path); |
Dhruvaraj Subhashchandran | 6feeebd | 2021-10-19 05:03:59 -0500 | [diff] [blame] | 216 | |
| 217 | /** |
| 218 | * Determines the type of SBE for a given chip target. |
| 219 | * |
| 220 | * @param chip - A pointer to a pdbg_target structure representing the chip. |
| 221 | * @return The SBE type for the given chip target. |
| 222 | */ |
| 223 | inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip) |
| 224 | { |
Dhruvaraj Subhashchandran | e74e916 | 2024-04-01 09:53:13 -0500 | [diff] [blame] | 225 | if (is_ody_ocmb_chip(chip)) |
| 226 | { |
| 227 | return SBETypes::OCMB; |
| 228 | } |
Dhruvaraj Subhashchandran | 6feeebd | 2021-10-19 05:03:59 -0500 | [diff] [blame] | 229 | return SBETypes::PROC; |
| 230 | } |
Dhruvaraj Subhashchandran | f9f65b8 | 2022-10-13 06:46:43 -0500 | [diff] [blame] | 231 | |
| 232 | /** |
| 233 | * @brief Executes thread stop on a processor target |
| 234 | * |
| 235 | * If the Self Boot Engine (SBE) is not ready to accept chip operations |
| 236 | * (chip-ops), it logs the condition and excludes the processor from the |
| 237 | * dump collection process. For critical errors, such as a timeout during |
| 238 | * the stop operation, it logs the error and again excludes the processor. |
| 239 | * In case of SBE command failure or non-critical errors, it continues with |
| 240 | * the dump collection process. |
| 241 | * |
| 242 | * @param target Pointer to the pdbg target structure representing the |
| 243 | * processor to perform the thread stop on. |
SwethaParasa | ed53dc7 | 2025-01-24 01:07:01 -0600 | [diff] [blame] | 244 | * @param path Dump collection path |
Dhruvaraj Subhashchandran | f9f65b8 | 2022-10-13 06:46:43 -0500 | [diff] [blame] | 245 | * @return true If the thread stop was successful or in case of non-critical |
| 246 | * errors where dump collection can proceed. |
| 247 | * @return false If the SBE is not ready for chip-ops or in case of critical |
| 248 | * errors like timeouts, indicating the processor should be |
| 249 | * excluded from the dump collection. |
| 250 | */ |
SwethaParasa | ed53dc7 | 2025-01-24 01:07:01 -0600 | [diff] [blame] | 251 | bool executeThreadStop(struct pdbg_target* target, |
| 252 | const std::filesystem::path& path); |
| 253 | |
| 254 | /** |
| 255 | * @brief Add Failure log information to info.yaml file |
| 256 | * @param logId - Error Log Id |
| 257 | * @param src - Reason Code of PEL |
| 258 | * @param chipName - Resource Name |
| 259 | * @param chipPos - Resource number |
| 260 | * @param path - Dump collection path |
| 261 | */ |
| 262 | void addLogDataToDump(uint32_t logId, std::string src, std::string chipName, |
| 263 | uint64_t chipPos, const std::filesystem::path& path); |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 264 | }; |
Dhruvaraj Subhashchandran | 858d1aa | 2021-10-27 03:26:06 -0500 | [diff] [blame] | 265 | } // namespace openpower::dump::sbe_chipop |