blob: 2f76909db22d26f81dbcccebe4b021118e25a26e [file] [log] [blame]
Chau Lya743e382024-10-26 11:12:22 +00001#pragma once
2
3#include "libpldm/pldm.h"
4
5#include "common/instance_id.hpp"
6#include "common/types.hpp"
7#include "oem_event_manager.hpp"
8#include "platform-mc/manager.hpp"
9#include "requester/handler.hpp"
10#include "requester/request.hpp"
11
12namespace pldm
13{
14namespace oem_ampere
15{
16using namespace pldm::pdr;
Dung Cao4a503832025-01-08 03:45:17 +000017#define NORMAL_EVENT_POLLING_TIME 5000000 // ms
Chau Lya743e382024-10-26 11:12:22 +000018
19using EventToMsgMap_t = std::unordered_map<uint8_t, std::string>;
20
21enum sensor_ids
22{
Chau Lycebf4762024-10-03 09:02:54 +000023 DDR_STATUS = 51,
Chau Ly4cca3dc2024-10-03 09:07:09 +000024 PCP_VR_STATE = 75,
25 SOC_VR_STATE = 80,
26 DPHY_VR1_STATE = 85,
27 DPHY_VR2_STATE = 90,
28 D2D_VR_STATE = 95,
29 IOC_VR1_STATE = 100,
30 IOC_VR2_STATE = 105,
31 PCI_D_VR_STATE = 110,
32 PCI_A_VR_STATE = 115,
Chau Ly3de0d942024-10-03 08:57:11 +000033 PCIE_HOT_PLUG = 169,
Chau Lyef214b52024-10-16 09:40:38 +000034 SOC_HEALTH_AVAILABILITY = 170,
Chau Lya743e382024-10-26 11:12:22 +000035 BOOT_OVERALL = 175,
Chau Lyb01357f2024-10-17 09:18:01 +000036 WATCH_DOG = 179,
Thu Nguyen79f9ff62024-11-22 03:36:27 +000037 CORE_UE = 192,
38 MCU_UE = 194,
39 PCIE_UE = 196,
40 SOC_UE = 198,
41 SOC_BERT = 200,
Chau Lya743e382024-10-26 11:12:22 +000042};
43
44namespace boot
45{
46namespace status
47{
48enum boot_status
49{
50 BOOT_STATUS_SUCCESS = 0x80,
51 BOOT_STATUS_FAILURE = 0x81,
52};
53} // namespace status
54namespace stage
55{
56enum boot_stage
57{
58 UEFI_STATUS_CLASS_CODE_MIN = 0x00,
59 UEFI_STATUS_CLASS_CODE_MAX = 0x7f,
60 SECPRO = 0x90,
61 MPRO = 0x91,
62 ATF_BL1 = 0x92,
63 ATF_BL2 = 0x93,
64 DDR_INITIALIZATION = 0x94,
65 DDR_TRAINING = 0x95,
66 S0_DDR_TRAINING_FAILURE = 0x96,
67 ATF_BL31 = 0x97,
68 ATF_BL32 = 0x98,
69 S1_DDR_TRAINING_FAILURE = 0x99,
70};
71} // namespace stage
72} // namespace boot
73
74enum class log_level : int
75{
76 OK,
Chau Ly3de0d942024-10-03 08:57:11 +000077 WARNING,
78 CRITICAL,
Chau Lya743e382024-10-26 11:12:22 +000079 BIOSFWPANIC,
80};
81
Chau Ly3de0d942024-10-03 08:57:11 +000082/*
83 * PresentReading value format
84 * FIELD | COMMENT
85 * Bit 31 | Reserved
86 * Bit 30:24 | Media slot number (0 - 63) This field can be used by UEFI
87 * | to indicate the media slot number (such as NVMe/SSD slot)
88 * | (7 bits)
89 * Bit 23 | Operation status: 1 = operation failed
90 * | 0 = operation successful
91 * Bit 22 | Action: 0 - Insertion 1 - Removal
92 * Bit 21:18 | Function (4 bits)
93 * Bit 17:13 | Device (5 bits)
94 * Bit 12:5 | Bus (8 bits)
95 * Bit 4:0 | Segment (5 bits)
96 */
97typedef union
98{
99 uint32_t value;
100 struct
101 {
102 uint32_t segment:5;
103 uint32_t bus:8;
104 uint32_t device:5;
105 uint32_t function:4;
106 uint32_t action:1;
107 uint32_t opStatus:1;
108 uint32_t mediaSlot:7;
109 uint32_t reserved:1;
110 } __attribute__((packed)) bits;
111} PCIeHotPlugEventRecord_t;
112
Chau Lycebf4762024-10-03 09:02:54 +0000113typedef union
114{
115 uint32_t value;
116 struct
117 {
118 uint32_t type:2;
119 uint32_t mcuRankIdx:3;
120 uint32_t reserved_1:3; // byte0
121 uint32_t sliceNum:4;
122 uint32_t upperNibbStatErr:1;
123 uint32_t lowerNibbStatErr:1;
124 uint32_t reserved_2:2; // byte1
125 uint32_t syndrome:4;
126 uint32_t reserved_3:4; // byte2
127 uint32_t reserved_byte:8;
128 } __attribute__((packed)) bits;
129} DIMMTrainingFailure_t;
130
131namespace ddr
132{
133namespace status
134{
135enum ddr_status
136{
137 NO_SYSTEM_LEVEL_ERROR = 0x01,
138 ECC_INITIALIZATION_FAILURE = 0x04,
139 CONFIGURATION_FAILURE = 0x05,
140 TRAINING_FAILURE = 0x06,
141 OTHER_FAILURE = 0x07,
142 BOOT_FAILURE_NO_VALID_CONFIG = 0x08,
143 FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS = 0x09,
144};
145}
146} // namespace ddr
147
148namespace dimm
149{
150namespace status
151{
152enum dimm_status
153{
154 INSTALLED_NO_ERROR = 0x01,
155 NOT_INSTALLED = 0x02,
156 OTHER_FAILURE = 0x07,
157 INSTALLED_BUT_DISABLED = 0x10,
158 TRAINING_FAILURE = 0x12,
159 PMIC_HIGH_TEMP = 0x13,
160 TSx_HIGH_TEMP = 0x14,
161 SPD_HUB_HIGH_TEMP = 0x15,
162 PMIC_TEMP_ALERT = 0x16,
163};
164} // namespace status
165
166namespace training_failure
167{
168enum dimm_training_failure_type
169{
170 PHY_TRAINING_FAILURE_TYPE = 0x01,
171 DIMM_TRAINING_FAILURE_TYPE = 0x02,
172};
173
174namespace phy_syndrome
175{
176enum phy_training_failure_syndrome
177{
178 NA = 0x00,
179 PHY_TRAINING_SETUP_FAILURE = 0x01,
180 CA_LEVELING = 0x02,
181 PHY_WRITE_LEVEL_FAILURE = 0x03,
182 PHY_READ_GATE_LEVELING_FAILURE = 0x04,
183 PHY_READ_LEVEL_FAILURE = 0x05,
184 WRITE_DQ_LEVELING = 0x06,
185 PHY_SW_TRAINING_FAILURE = 0x07,
186};
187} // namespace phy_syndrome
188
189namespace dimm_syndrome
190{
191enum dimm_training_failure_syndrome
192{
193 NA = 0x00,
194 DRAM_VREFDQ_TRAINING_FAILURE = 0x01,
195 LRDIMM_DB_TRAINING_FAILURE = 0x02,
196 LRDRIMM_DB_SW_TRAINING_FAILURE = 0x03,
197};
198} // namespace dimm_syndrome
199} // namespace training_failure
200} // namespace dimm
201
Chau Ly4cca3dc2024-10-03 09:07:09 +0000202/*
203 * PresentReading value format
204 * FIELD | COMMENT
205 * Bit 31:30 | Reserved (2 bits)
206 * Bit 29 | A VR Critical condition observed (1 bit)
207 * Bit 28 | A VR Warning condition observed (1 bit)
208 * Bit 27:16 | Reserved (12 bits)
209 * Bit 15:8 | VR status byte high - The bit definition is the same as the
210 * | corresponding VR PMBUS STATUS_WORD (upper byte) (8 bits)
211 * Bit 7:0 | VR status byte low - The bit definition is the same as the
212 * | corresponding VR PMBUS STATUS_WORD (lower byte) (8 bits)
213 */
214typedef union
215{
216 uint32_t value;
217 struct
218 {
219 uint32_t vr_status_byte_low:8;
220 uint32_t vr_status_byte_high:8;
221 uint32_t reserved_1:12;
222 uint32_t warning:1;
223 uint32_t critical:1;
224 uint32_t reserved_2:2;
225 } __attribute__((packed)) bits;
226} VRDStatus_t;
227
Chau Lya743e382024-10-26 11:12:22 +0000228/**
229 * @brief OemEventManager
230 *
231 *
232 */
233class OemEventManager
234{
235 public:
236 OemEventManager() = delete;
237 OemEventManager(const OemEventManager&) = delete;
238 OemEventManager(OemEventManager&&) = delete;
239 OemEventManager& operator=(const OemEventManager&) = delete;
240 OemEventManager& operator=(OemEventManager&&) = delete;
241 virtual ~OemEventManager() = default;
242
243 explicit OemEventManager(
244 sdeventplus::Event& event,
245 requester::Handler<requester::Request>* /* handler */,
Dung Cao4a503832025-01-08 03:45:17 +0000246 pldm::InstanceIdDb& /* instanceIdDb */,
247 platform_mc::Manager* platformManager) :
248 event(event), manager(platformManager) {};
Chau Lya743e382024-10-26 11:12:22 +0000249
250 /** @brief Decode sensor event messages and handle correspondingly.
251 *
252 * @param[in] request - the request message of sensor event
253 * @param[in] payloadLength - the payload length of sensor event
254 * @param[in] formatVersion - the format version of sensor event
255 * @param[in] tid - TID
256 * @param[in] eventDataOffset - the event data offset of sensor event
257 *
258 * @return int - returned error code
259 */
260 int handleSensorEvent(const pldm_msg* request, size_t payloadLength,
261 uint8_t /* formatVersion */, pldm_tid_t tid,
262 size_t eventDataOffset);
263
Dung Cao72c8aa02023-11-22 02:31:41 +0000264 /** @brief Handle the polled CPER (0x07, 0xFA) event class.
265 *
266 * @param[in] tid - terminus ID
267 * @param[out] eventId - Event ID
268 * @param[in] eventData - event data
269 * @param[in] eventDataSize - size of event data
270 *
271 * @return int - PLDM completion code
272 */
273 int processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId,
274 const uint8_t* eventData, size_t eventDataSize);
275
Thu Nguyen79f9ff62024-11-22 03:36:27 +0000276 /** @brief Decode sensor event messages and handle correspondingly.
277 *
278 * @param[in] request - the request message of sensor event
279 * @param[in] payloadLength - the payload length of sensor event
280 * @param[in] formatVersion - the format version of sensor event
281 * @param[in] tid - TID
282 * @param[in] eventDataOffset - the event data offset of sensor event
283 *
284 * @return int - returned error code
285 */
286 int handlepldmMessagePollEvent(
287 const pldm_msg* request, size_t payloadLength,
288 uint8_t /* formatVersion */, pldm_tid_t tid, size_t eventDataOffset);
289
Dung Cao4a503832025-01-08 03:45:17 +0000290 /** @brief A Coroutine to do OEM PollForPlatformEvent action
291 *
292 * @param[in] tid - the destination TID
293 * @return coroutine return_value - PLDM completion code
294 */
295 exec::task<int> oemPollForPlatformEvent(pldm_tid_t tid);
296
Chau Lya743e382024-10-26 11:12:22 +0000297 protected:
298 /** @brief Create prefix string for logging message.
299 *
300 * @param[in] tid - TID
301 * @param[in] sensorId - Sensor ID
302 *
303 * @return std::string - the prefeix string
304 */
305 std::string prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId);
306
307 /** @brief Log the message into Redfish SEL.
308 *
309 * @param[in] description - the logging message
310 * @param[in] logLevel - the logging level
311 */
312 void sendJournalRedfish(const std::string& description,
313 log_level& logLevel);
314
315 /** @brief Convert the one-hot DIMM index byte into a string of DIMM
316 * indexes.
317 *
318 * @param[in] dimmIdxs - the one-hot DIMM index byte
319 *
320 * @return std::string - the string of DIMM indexes
321 */
322 std::string dimmIdxsToString(uint32_t dimmIdxs);
323
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000324 /** @brief Convert sensor ID to DIMM index. Return maxDIMMInstantNum
325 * in failure.
326 *
327 * @param[in] sensorId - sensorID
328 *
329 * @return uint8_t - DIMM index
330 */
331 uint8_t sensorIdToDIMMIdx(const uint16_t& sensorId);
332
Chau Lycebf4762024-10-03 09:02:54 +0000333 /** @brief Convert the DIMM training failure into logging string.
334 *
335 * @param[in] failureInfo - the one-hot DIMM index byte
336 *
337 * @return std::string - the returned logging string
338 */
339 std::string dimmTrainingFailureToMsg(uint32_t failureInfo);
340
Chau Ly3de0d942024-10-03 08:57:11 +0000341 /** @brief Handle numeric sensor event message from PCIe hot-plug sensor.
342 *
343 * @param[in] tid - TID
344 * @param[in] sensorId - Sensor ID
345 * @param[in] presentReading - the present reading of the sensor
346 */
347 void handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
348 uint32_t presentReading);
349
Chau Lya743e382024-10-26 11:12:22 +0000350 /** @brief Handle numeric sensor event message from boot overall sensor.
351 *
352 * @param[in] tid - TID
353 * @param[in] sensorId - Sensor ID
354 * @param[in] presentReading - the present reading of the sensor
355 */
356 void handleBootOverallEvent(pldm_tid_t /*tid*/, uint16_t /*sensorId*/,
357 uint32_t presentReading);
358
Chau Lycebf4762024-10-03 09:02:54 +0000359 /** @brief Handle numeric sensor event message from DIMM status sensor.
360 *
361 * @param[in] tid - TID
362 * @param[in] sensorId - Sensor ID
363 * @param[in] presentReading - the present reading of the sensor
364 */
365 void handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
366 uint32_t presentReading);
367
368 /** @brief Handle numeric sensor event message from DDR status sensor.
369 *
370 * @param[in] tid - TID
371 * @param[in] sensorId - Sensor ID
372 * @param[in] presentReading - the present reading of the sensor
373 */
374 void handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
375 uint32_t presentReading);
376
Chau Ly4cca3dc2024-10-03 09:07:09 +0000377 /** @brief Handle numeric sensor event message from VRD status sensor.
378 *
379 * @param[in] tid - TID
380 * @param[in] sensorId - Sensor ID
381 * @param[in] presentReading - the present reading of the sensor
382 */
383 void handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
384 uint32_t presentReading);
385
Chau Lyb01357f2024-10-17 09:18:01 +0000386 /** @brief Handle numeric sensor event message from Watchdog status sensor.
387 *
388 * @param[in] tid - TID
389 * @param[in] sensorId - Sensor ID
390 * @param[in] presentReading - the present reading of the sensor
391 */
392 void handleNumericWatchdogEvent(pldm_tid_t tid, uint16_t sensorId,
393 uint32_t presentReading);
394
Chau Lya743e382024-10-26 11:12:22 +0000395 /** @brief Handle numeric sensor event messages.
396 *
397 * @param[in] tid - TID
398 * @param[in] sensorId - Sensor ID
399 * @param[in] sensorData - the sensor data
400 * @param[in] sensorDataLength - the length of sensor data
401 *
402 * @return int - returned error code
403 */
404 int processNumericSensorEvent(pldm_tid_t tid, uint16_t sensorId,
405 const uint8_t* sensorData,
406 size_t sensorDataLength);
407
408 /** @brief Handle state sensor event messages.
409 *
410 * @param[in] tid - TID
411 * @param[in] sensorId - Sensor ID
412 * @param[in] sensorData - the sensor data
413 * @param[in] sensorDataLength - the length of sensor data
414 *
415 * @return int - returned error code
416 */
417 int processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
418 const uint8_t* sensorData,
419 size_t sensorDataLength);
420
421 /** @brief Handle op state sensor event messages.
422 *
423 * @param[in] tid - TID
424 * @param[in] sensorId - Sensor ID
425 * @param[in] sensorData - the sensor data
426 * @param[in] sensorDataLength - the length of sensor data
427 *
428 * @return int - returned error code
429 */
430 int processSensorOpStateEvent(pldm_tid_t tid, uint16_t sensorId,
431 const uint8_t* sensorData,
432 size_t sensorDataLength);
433
434 /** @brief reference of main event loop of pldmd, primarily used to schedule
435 * work
436 */
437 sdeventplus::Event& event;
Dung Cao4a503832025-01-08 03:45:17 +0000438
439 /** @brief Latest OEM PollForPlatformEvent message timeStamp. */
440 std::map<pldm_tid_t, uint64_t> timeStampMap;
441
442 /** @brief A Manager interface for calling the hook functions */
443 platform_mc::Manager* manager;
Chau Lya743e382024-10-26 11:12:22 +0000444};
445} // namespace oem_ampere
446} // namespace pldm