blob: 1cb2348d9f54d723e80cada111e716e5b253d4e2 [file] [log] [blame]
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -05001#pragma once
2
3extern "C"
4{
5#include <libpdbg.h>
6#include <libpdbg_sbe.h>
7}
8
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -05009#include "dump_utils.hpp"
10#include "sbe_consts.hpp"
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -050011#include "sbe_type.hpp"
12
13#include <phal_exception.H>
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -050014
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050015#include <cstdint>
16#include <filesystem>
17#include <future>
18#include <vector>
19
20namespace openpower::dump::sbe_chipop
21{
22
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -050023using TargetMap =
24 std::map<struct pdbg_target*, std::vector<struct pdbg_target*>>;
25
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -050026/**
27 * @class SbeDumpCollector
28 * @brief Manages the collection of dumps from SBEs on failure.
29 *
30 * This class provides functionalities to orchestrate the collection of
31 * diagnostic dumps from Self Boot Engines across multiple processors
32 * in response to failures or for diagnostic purposes.
33 */
34class SbeDumpCollector
35{
36 public:
37 /**
38 * @brief Constructs a new SbeDumpCollector object.
39 */
40 SbeDumpCollector() = default;
41
42 /**
43 * @brief Destroys the SbeDumpCollector object.
44 */
45 ~SbeDumpCollector() = default;
46
47 /**
48 * @brief Orchestrates the collection of dumps from all available SBEs.
49 *
50 * Initiates the process of collecting diagnostic dumps from SBEs. This
51 * involves identifying available processors, initiating the dump
52 * collection process, and managing the collected dump files.
53 *
54 * @param type The type of dump to collect.
55 * @param id A unique identifier for the dump collection operation.
56 * @param failingUnit The identifier of the failing unit prompting the dump
57 * collection.
58 * @param path The filesystem path where collected dumps should be stored.
59 */
60 void collectDump(uint8_t type, uint32_t id, uint64_t failingUnit,
61 const std::filesystem::path& path);
62
63 private:
64 /**
65 * @brief Collects a dump from a single SBE.
66 *
67 * Executes the low-level operations required to collect a diagnostic
68 * dump from the specified SBE.
69 *
70 * @param chip A pointer to the pdbg_target structure representing the SBE.
71 * @param path The filesystem path where the dump should be stored.
72 * @param id The unique identifier for this dump collection operation.
73 * @param type The type of dump to collect.
74 * @param clockState The clock state of the SBE during dump collection.
75 * @param failingUnit The identifier of the failing unit.
76 */
77 void collectDumpFromSBE(struct pdbg_target* chip,
78 const std::filesystem::path& path, uint32_t id,
79 uint8_t type, uint8_t clockState,
80 uint64_t failingUnit);
81
82 /**
83 * @brief Initializes the PDBG library.
84 *
85 * Prepares the PDBG library for interacting with processor targets. This
86 * must be called before any PDBG-related operations are performed.
87 */
88 void initializePdbg();
89
90 /**
91 * @brief Launches asynchronous dump collection tasks for a set of targets.
92 *
93 * This method initiates the dump collection process asynchronously for each
94 * target provided in the `targets` vector. It launches a separate
95 * asynchronous task for each target, where each task calls
96 * `collectDumpFromSBE` with the specified parameters, including the clock
97 * state.
98 *
99 * @param type The type of the dump to collect. This could be a hardware
100 * dump, software dump, etc., as defined by the SBE dump type enumeration.
101 * @param id A unique identifier for the dump collection operation. This ID
102 * is used to tag the collected dump for identification.
103 * @param path The filesystem path where the collected dumps should be
104 * stored. Each dump file will be stored under this directory.
105 * @param failingUnit The identifier of the unit or component that is
106 * failing or suspected to be the cause of the issue prompting the dump
107 * collection. This is used for diagnostic purposes.
108 * @param cstate The clock state during the dump collection. This parameter
109 * dictates whether the dump should be collected with the
110 * clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF).
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500111 * @param targetMap A map of `pdbg_target*` representing the targets from
112 * which dumps should be collected. The key is the proc target with the
113 * list of ocmb targets associated with the proc.
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500114 *
115 * @return A vector of `std::future<void>` objects. Each future represents
116 * the completion state of an asynchronous dump collection task. The caller
117 * can wait on these futures to determine when all dump collection
118 * tasks have completed. Exceptions thrown by the asynchronous tasks are
119 * captured by the futures and can be rethrown when the futures are
120 * accessed.
121 */
122 std::vector<std::future<void>> spawnDumpCollectionProcesses(
123 uint8_t type, uint32_t id, const std::filesystem::path& path,
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500124 uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap);
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500125
126 /** @brief This function creates the new dump file in dump file name
127 * format and then writes the contents into it.
128 * @param path - Path to dump file
129 * @param id - A unique id assigned to dump to be collected
130 * @param clockState - Clock state, ON or Off
131 * @param nodeNum - Node containing the chip
132 * @param chipName - Name of the chip
133 * @param chipPos - Chip position of the failing unit
134 * @param dataPtr - Content to write to file
135 * @param len - Length of the content
136 */
137 void writeDumpFile(const std::filesystem::path& path, const uint32_t id,
138 const uint8_t clockState, const uint8_t nodeNum,
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500139 const std::string& chipName, const uint8_t chipPos,
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500140 util::DumpDataPtr& dataPtr, const uint32_t len);
141
142 /**
143 * @brief Determines if fastarray collection is needed based on dump type
144 * and unit.
145 *
146 * @param clockState The current state of the clock.
147 * @param type The type of the dump being collected.
148 * @param failingUnit The ID of the failing unit.
149 * @param chipPos The position of the chip for which the dump is being
150 * collected.
151 *
152 * @return uint8_t - Returns 1 if fastarray collection is needed, 0
153 * otherwise.
154 */
Patrick Williams540521e2024-08-16 15:20:03 -0400155 inline uint8_t checkFastarrayCollectionNeeded(
156 const uint8_t clockState, const uint8_t type, uint64_t failingUnit,
157 const uint8_t chipPos) const
Dhruvaraj Subhashchandrana699e312021-10-27 07:20:34 -0500158 {
159 using namespace openpower::dump::SBE;
160
161 return (clockState == SBE_CLOCK_OFF &&
162 (type == SBE_DUMP_TYPE_HOSTBOOT ||
163 (type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit)))
164 ? 1
165 : 0;
166 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500167
168 /**
169 * Logs an error and creates a PEL for SBE chip-op failures.
170 *
171 * @param sbeError - An error object encapsulating details about the SBE
172 * error.
173 * @param chipPos - The position of the chip where the error occurred.
174 * @param sbeType - The type of SBE, used to determine the event log
175 * message.
176 * @param cmdClass - The command class associated with the SBE operation.
177 * @param cmdType - The specific type of command within the command class.
178 *
179 */
Dhruvaraj Subhashchandranf2298892024-04-21 04:42:55 -0500180 bool logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError,
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500181 uint64_t chipPos, SBETypes sbeType,
182 uint32_t cmdClass, uint32_t cmdType);
183
184 /**
185 * Determines the type of SBE for a given chip target.
186 *
187 * @param chip - A pointer to a pdbg_target structure representing the chip.
188 * @return The SBE type for the given chip target.
189 */
190 inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip)
191 {
Dhruvaraj Subhashchandrane74e9162024-04-01 09:53:13 -0500192 if (is_ody_ocmb_chip(chip))
193 {
194 return SBETypes::OCMB;
195 }
Dhruvaraj Subhashchandran6feeebd2021-10-19 05:03:59 -0500196 return SBETypes::PROC;
197 }
Dhruvaraj Subhashchandranf9f65b82022-10-13 06:46:43 -0500198
199 /**
200 * @brief Executes thread stop on a processor target
201 *
202 * If the Self Boot Engine (SBE) is not ready to accept chip operations
203 * (chip-ops), it logs the condition and excludes the processor from the
204 * dump collection process. For critical errors, such as a timeout during
205 * the stop operation, it logs the error and again excludes the processor.
206 * In case of SBE command failure or non-critical errors, it continues with
207 * the dump collection process.
208 *
209 * @param target Pointer to the pdbg target structure representing the
210 * processor to perform the thread stop on.
211 * @return true If the thread stop was successful or in case of non-critical
212 * errors where dump collection can proceed.
213 * @return false If the SBE is not ready for chip-ops or in case of critical
214 * errors like timeouts, indicating the processor should be
215 * excluded from the dump collection.
216 */
217 bool executeThreadStop(struct pdbg_target* target);
Dhruvaraj Subhashchandran858d1aa2021-10-27 03:26:06 -0500218};
219
220} // namespace openpower::dump::sbe_chipop