blob: acade3f54876817d56652264d13608134347c608 [file] [log] [blame]
Chau Lya743e382024-10-26 11:12:22 +00001#pragma once
2
3#include "libpldm/pldm.h"
4
5#include "common/instance_id.hpp"
6#include "common/types.hpp"
7#include "oem_event_manager.hpp"
8#include "platform-mc/manager.hpp"
9#include "requester/handler.hpp"
10#include "requester/request.hpp"
11
12namespace pldm
13{
14namespace oem_ampere
15{
16using namespace pldm::pdr;
17
18using EventToMsgMap_t = std::unordered_map<uint8_t, std::string>;
19
20enum sensor_ids
21{
Chau Lycebf4762024-10-03 09:02:54 +000022 DDR_STATUS = 51,
Chau Ly4cca3dc2024-10-03 09:07:09 +000023 PCP_VR_STATE = 75,
24 SOC_VR_STATE = 80,
25 DPHY_VR1_STATE = 85,
26 DPHY_VR2_STATE = 90,
27 D2D_VR_STATE = 95,
28 IOC_VR1_STATE = 100,
29 IOC_VR2_STATE = 105,
30 PCI_D_VR_STATE = 110,
31 PCI_A_VR_STATE = 115,
Chau Ly3de0d942024-10-03 08:57:11 +000032 PCIE_HOT_PLUG = 169,
Chau Lyef214b52024-10-16 09:40:38 +000033 SOC_HEALTH_AVAILABILITY = 170,
Chau Lya743e382024-10-26 11:12:22 +000034 BOOT_OVERALL = 175,
Chau Lyb01357f2024-10-17 09:18:01 +000035 WATCH_DOG = 179,
Thu Nguyen79f9ff62024-11-22 03:36:27 +000036 CORE_UE = 192,
37 MCU_UE = 194,
38 PCIE_UE = 196,
39 SOC_UE = 198,
40 SOC_BERT = 200,
Chau Lya743e382024-10-26 11:12:22 +000041};
42
43namespace boot
44{
45namespace status
46{
47enum boot_status
48{
49 BOOT_STATUS_SUCCESS = 0x80,
50 BOOT_STATUS_FAILURE = 0x81,
51};
52} // namespace status
53namespace stage
54{
55enum boot_stage
56{
57 UEFI_STATUS_CLASS_CODE_MIN = 0x00,
58 UEFI_STATUS_CLASS_CODE_MAX = 0x7f,
59 SECPRO = 0x90,
60 MPRO = 0x91,
61 ATF_BL1 = 0x92,
62 ATF_BL2 = 0x93,
63 DDR_INITIALIZATION = 0x94,
64 DDR_TRAINING = 0x95,
65 S0_DDR_TRAINING_FAILURE = 0x96,
66 ATF_BL31 = 0x97,
67 ATF_BL32 = 0x98,
68 S1_DDR_TRAINING_FAILURE = 0x99,
69};
70} // namespace stage
71} // namespace boot
72
73enum class log_level : int
74{
75 OK,
Chau Ly3de0d942024-10-03 08:57:11 +000076 WARNING,
77 CRITICAL,
Chau Lya743e382024-10-26 11:12:22 +000078 BIOSFWPANIC,
79};
80
Chau Ly3de0d942024-10-03 08:57:11 +000081/*
82 * PresentReading value format
83 * FIELD | COMMENT
84 * Bit 31 | Reserved
85 * Bit 30:24 | Media slot number (0 - 63) This field can be used by UEFI
86 * | to indicate the media slot number (such as NVMe/SSD slot)
87 * | (7 bits)
88 * Bit 23 | Operation status: 1 = operation failed
89 * | 0 = operation successful
90 * Bit 22 | Action: 0 - Insertion 1 - Removal
91 * Bit 21:18 | Function (4 bits)
92 * Bit 17:13 | Device (5 bits)
93 * Bit 12:5 | Bus (8 bits)
94 * Bit 4:0 | Segment (5 bits)
95 */
96typedef union
97{
98 uint32_t value;
99 struct
100 {
101 uint32_t segment:5;
102 uint32_t bus:8;
103 uint32_t device:5;
104 uint32_t function:4;
105 uint32_t action:1;
106 uint32_t opStatus:1;
107 uint32_t mediaSlot:7;
108 uint32_t reserved:1;
109 } __attribute__((packed)) bits;
110} PCIeHotPlugEventRecord_t;
111
Chau Lycebf4762024-10-03 09:02:54 +0000112typedef union
113{
114 uint32_t value;
115 struct
116 {
117 uint32_t type:2;
118 uint32_t mcuRankIdx:3;
119 uint32_t reserved_1:3; // byte0
120 uint32_t sliceNum:4;
121 uint32_t upperNibbStatErr:1;
122 uint32_t lowerNibbStatErr:1;
123 uint32_t reserved_2:2; // byte1
124 uint32_t syndrome:4;
125 uint32_t reserved_3:4; // byte2
126 uint32_t reserved_byte:8;
127 } __attribute__((packed)) bits;
128} DIMMTrainingFailure_t;
129
130namespace ddr
131{
132namespace status
133{
134enum ddr_status
135{
136 NO_SYSTEM_LEVEL_ERROR = 0x01,
137 ECC_INITIALIZATION_FAILURE = 0x04,
138 CONFIGURATION_FAILURE = 0x05,
139 TRAINING_FAILURE = 0x06,
140 OTHER_FAILURE = 0x07,
141 BOOT_FAILURE_NO_VALID_CONFIG = 0x08,
142 FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS = 0x09,
143};
144}
145} // namespace ddr
146
147namespace dimm
148{
149namespace status
150{
151enum dimm_status
152{
153 INSTALLED_NO_ERROR = 0x01,
154 NOT_INSTALLED = 0x02,
155 OTHER_FAILURE = 0x07,
156 INSTALLED_BUT_DISABLED = 0x10,
157 TRAINING_FAILURE = 0x12,
158 PMIC_HIGH_TEMP = 0x13,
159 TSx_HIGH_TEMP = 0x14,
160 SPD_HUB_HIGH_TEMP = 0x15,
161 PMIC_TEMP_ALERT = 0x16,
162};
163} // namespace status
164
165namespace training_failure
166{
167enum dimm_training_failure_type
168{
169 PHY_TRAINING_FAILURE_TYPE = 0x01,
170 DIMM_TRAINING_FAILURE_TYPE = 0x02,
171};
172
173namespace phy_syndrome
174{
175enum phy_training_failure_syndrome
176{
177 NA = 0x00,
178 PHY_TRAINING_SETUP_FAILURE = 0x01,
179 CA_LEVELING = 0x02,
180 PHY_WRITE_LEVEL_FAILURE = 0x03,
181 PHY_READ_GATE_LEVELING_FAILURE = 0x04,
182 PHY_READ_LEVEL_FAILURE = 0x05,
183 WRITE_DQ_LEVELING = 0x06,
184 PHY_SW_TRAINING_FAILURE = 0x07,
185};
186} // namespace phy_syndrome
187
188namespace dimm_syndrome
189{
190enum dimm_training_failure_syndrome
191{
192 NA = 0x00,
193 DRAM_VREFDQ_TRAINING_FAILURE = 0x01,
194 LRDIMM_DB_TRAINING_FAILURE = 0x02,
195 LRDRIMM_DB_SW_TRAINING_FAILURE = 0x03,
196};
197} // namespace dimm_syndrome
198} // namespace training_failure
199} // namespace dimm
200
Chau Ly4cca3dc2024-10-03 09:07:09 +0000201/*
202 * PresentReading value format
203 * FIELD | COMMENT
204 * Bit 31:30 | Reserved (2 bits)
205 * Bit 29 | A VR Critical condition observed (1 bit)
206 * Bit 28 | A VR Warning condition observed (1 bit)
207 * Bit 27:16 | Reserved (12 bits)
208 * Bit 15:8 | VR status byte high - The bit definition is the same as the
209 * | corresponding VR PMBUS STATUS_WORD (upper byte) (8 bits)
210 * Bit 7:0 | VR status byte low - The bit definition is the same as the
211 * | corresponding VR PMBUS STATUS_WORD (lower byte) (8 bits)
212 */
213typedef union
214{
215 uint32_t value;
216 struct
217 {
218 uint32_t vr_status_byte_low:8;
219 uint32_t vr_status_byte_high:8;
220 uint32_t reserved_1:12;
221 uint32_t warning:1;
222 uint32_t critical:1;
223 uint32_t reserved_2:2;
224 } __attribute__((packed)) bits;
225} VRDStatus_t;
226
Chau Lya743e382024-10-26 11:12:22 +0000227/**
228 * @brief OemEventManager
229 *
230 *
231 */
232class OemEventManager
233{
234 public:
235 OemEventManager() = delete;
236 OemEventManager(const OemEventManager&) = delete;
237 OemEventManager(OemEventManager&&) = delete;
238 OemEventManager& operator=(const OemEventManager&) = delete;
239 OemEventManager& operator=(OemEventManager&&) = delete;
240 virtual ~OemEventManager() = default;
241
242 explicit OemEventManager(
243 sdeventplus::Event& event,
244 requester::Handler<requester::Request>* /* handler */,
245 pldm::InstanceIdDb& /* instanceIdDb */) : event(event) {};
246
247 /** @brief Decode sensor event messages and handle correspondingly.
248 *
249 * @param[in] request - the request message of sensor event
250 * @param[in] payloadLength - the payload length of sensor event
251 * @param[in] formatVersion - the format version of sensor event
252 * @param[in] tid - TID
253 * @param[in] eventDataOffset - the event data offset of sensor event
254 *
255 * @return int - returned error code
256 */
257 int handleSensorEvent(const pldm_msg* request, size_t payloadLength,
258 uint8_t /* formatVersion */, pldm_tid_t tid,
259 size_t eventDataOffset);
260
Dung Cao72c8aa02023-11-22 02:31:41 +0000261 /** @brief Handle the polled CPER (0x07, 0xFA) event class.
262 *
263 * @param[in] tid - terminus ID
264 * @param[out] eventId - Event ID
265 * @param[in] eventData - event data
266 * @param[in] eventDataSize - size of event data
267 *
268 * @return int - PLDM completion code
269 */
270 int processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId,
271 const uint8_t* eventData, size_t eventDataSize);
272
Thu Nguyen79f9ff62024-11-22 03:36:27 +0000273 /** @brief Decode sensor event messages and handle correspondingly.
274 *
275 * @param[in] request - the request message of sensor event
276 * @param[in] payloadLength - the payload length of sensor event
277 * @param[in] formatVersion - the format version of sensor event
278 * @param[in] tid - TID
279 * @param[in] eventDataOffset - the event data offset of sensor event
280 *
281 * @return int - returned error code
282 */
283 int handlepldmMessagePollEvent(
284 const pldm_msg* request, size_t payloadLength,
285 uint8_t /* formatVersion */, pldm_tid_t tid, size_t eventDataOffset);
286
Chau Lya743e382024-10-26 11:12:22 +0000287 protected:
288 /** @brief Create prefix string for logging message.
289 *
290 * @param[in] tid - TID
291 * @param[in] sensorId - Sensor ID
292 *
293 * @return std::string - the prefeix string
294 */
295 std::string prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId);
296
297 /** @brief Log the message into Redfish SEL.
298 *
299 * @param[in] description - the logging message
300 * @param[in] logLevel - the logging level
301 */
302 void sendJournalRedfish(const std::string& description,
303 log_level& logLevel);
304
305 /** @brief Convert the one-hot DIMM index byte into a string of DIMM
306 * indexes.
307 *
308 * @param[in] dimmIdxs - the one-hot DIMM index byte
309 *
310 * @return std::string - the string of DIMM indexes
311 */
312 std::string dimmIdxsToString(uint32_t dimmIdxs);
313
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000314 /** @brief Convert sensor ID to DIMM index. Return maxDIMMInstantNum
315 * in failure.
316 *
317 * @param[in] sensorId - sensorID
318 *
319 * @return uint8_t - DIMM index
320 */
321 uint8_t sensorIdToDIMMIdx(const uint16_t& sensorId);
322
Chau Lycebf4762024-10-03 09:02:54 +0000323 /** @brief Convert the DIMM training failure into logging string.
324 *
325 * @param[in] failureInfo - the one-hot DIMM index byte
326 *
327 * @return std::string - the returned logging string
328 */
329 std::string dimmTrainingFailureToMsg(uint32_t failureInfo);
330
Chau Ly3de0d942024-10-03 08:57:11 +0000331 /** @brief Handle numeric sensor event message from PCIe hot-plug sensor.
332 *
333 * @param[in] tid - TID
334 * @param[in] sensorId - Sensor ID
335 * @param[in] presentReading - the present reading of the sensor
336 */
337 void handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
338 uint32_t presentReading);
339
Chau Lya743e382024-10-26 11:12:22 +0000340 /** @brief Handle numeric sensor event message from boot overall sensor.
341 *
342 * @param[in] tid - TID
343 * @param[in] sensorId - Sensor ID
344 * @param[in] presentReading - the present reading of the sensor
345 */
346 void handleBootOverallEvent(pldm_tid_t /*tid*/, uint16_t /*sensorId*/,
347 uint32_t presentReading);
348
Chau Lycebf4762024-10-03 09:02:54 +0000349 /** @brief Handle numeric sensor event message from DIMM status sensor.
350 *
351 * @param[in] tid - TID
352 * @param[in] sensorId - Sensor ID
353 * @param[in] presentReading - the present reading of the sensor
354 */
355 void handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
356 uint32_t presentReading);
357
358 /** @brief Handle numeric sensor event message from DDR status sensor.
359 *
360 * @param[in] tid - TID
361 * @param[in] sensorId - Sensor ID
362 * @param[in] presentReading - the present reading of the sensor
363 */
364 void handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
365 uint32_t presentReading);
366
Chau Ly4cca3dc2024-10-03 09:07:09 +0000367 /** @brief Handle numeric sensor event message from VRD status sensor.
368 *
369 * @param[in] tid - TID
370 * @param[in] sensorId - Sensor ID
371 * @param[in] presentReading - the present reading of the sensor
372 */
373 void handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
374 uint32_t presentReading);
375
Chau Lyb01357f2024-10-17 09:18:01 +0000376 /** @brief Handle numeric sensor event message from Watchdog status sensor.
377 *
378 * @param[in] tid - TID
379 * @param[in] sensorId - Sensor ID
380 * @param[in] presentReading - the present reading of the sensor
381 */
382 void handleNumericWatchdogEvent(pldm_tid_t tid, uint16_t sensorId,
383 uint32_t presentReading);
384
Chau Lya743e382024-10-26 11:12:22 +0000385 /** @brief Handle numeric sensor event messages.
386 *
387 * @param[in] tid - TID
388 * @param[in] sensorId - Sensor ID
389 * @param[in] sensorData - the sensor data
390 * @param[in] sensorDataLength - the length of sensor data
391 *
392 * @return int - returned error code
393 */
394 int processNumericSensorEvent(pldm_tid_t tid, uint16_t sensorId,
395 const uint8_t* sensorData,
396 size_t sensorDataLength);
397
398 /** @brief Handle state sensor event messages.
399 *
400 * @param[in] tid - TID
401 * @param[in] sensorId - Sensor ID
402 * @param[in] sensorData - the sensor data
403 * @param[in] sensorDataLength - the length of sensor data
404 *
405 * @return int - returned error code
406 */
407 int processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
408 const uint8_t* sensorData,
409 size_t sensorDataLength);
410
411 /** @brief Handle op state sensor event messages.
412 *
413 * @param[in] tid - TID
414 * @param[in] sensorId - Sensor ID
415 * @param[in] sensorData - the sensor data
416 * @param[in] sensorDataLength - the length of sensor data
417 *
418 * @return int - returned error code
419 */
420 int processSensorOpStateEvent(pldm_tid_t tid, uint16_t sensorId,
421 const uint8_t* sensorData,
422 size_t sensorDataLength);
423
424 /** @brief reference of main event loop of pldmd, primarily used to schedule
425 * work
426 */
427 sdeventplus::Event& event;
428};
429} // namespace oem_ampere
430} // namespace pldm