blob: 79266f56574563298276f02023bb14b8abd7ab79 [file] [log] [blame]
Chau Lya743e382024-10-26 11:12:22 +00001#include "oem_event_manager.hpp"
2
3#include "requester/handler.hpp"
4#include "requester/request.hpp"
5
6#include <config.h>
7#include <libpldm/pldm.h>
8#include <libpldm/utils.h>
9#include <systemd/sd-journal.h>
10
11#include <phosphor-logging/lg2.hpp>
12#include <xyz/openbmc_project/Logging/Entry/server.hpp>
13
14#include <algorithm>
15#include <map>
16#include <sstream>
17#include <string>
18#include <unordered_map>
19
20namespace pldm
21{
22namespace oem_ampere
23{
24namespace boot_stage = boot::stage;
Chau Lycebf4762024-10-03 09:02:54 +000025namespace ddr_status = ddr::status;
26namespace dimm_status = dimm::status;
27namespace dimm_syndrome = dimm::training_failure::dimm_syndrome;
28namespace phy_syndrome = dimm::training_failure::phy_syndrome;
29namespace training_failure = dimm::training_failure;
Chau Lya743e382024-10-26 11:12:22 +000030
Chau Ly3de0d942024-10-03 08:57:11 +000031constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK";
32constexpr const char* ampereWarningRegistry =
33 "OpenBMC.0.1.AmpereWarning.Warning";
34constexpr const char* ampereCriticalRegistry =
35 "OpenBMC.0.1.AmpereCritical.Critical";
Chau Lya743e382024-10-26 11:12:22 +000036constexpr const char* BIOSFWPanicRegistry =
37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning";
38constexpr auto maxDIMMIdxBitNum = 24;
Chau Lycebf4762024-10-03 09:02:54 +000039constexpr auto maxDIMMInstantNum = 24;
Chau Lya743e382024-10-26 11:12:22 +000040
41/*
42 An array of possible boot status of a boot stage.
43 The index maps with byte 0 of boot code.
44*/
45std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"};
46
47/*
48 An array of possible boot status of DDR training stage.
49 The index maps with byte 0 of boot code.
50*/
51std::array<std::string, 3> ddrTrainingMsg = {
52 " progress started", " in-progress", " progress completed"};
53
54/*
Chau Lycebf4762024-10-03 09:02:54 +000055 A map between PMIC status and logging strings.
56*/
57std::array<std::string, 8> pmicTempAlertMsg = {
58 "Below 85°C", "85°C", "95°C", "105°C",
59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"};
60
61/*
Chau Lya743e382024-10-26 11:12:22 +000062 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC
63 EPs through SMBus and PCIe. When host boots up, SMBUS interface
64 comes up first. In this interface, BMC is bus owner.
65
66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available).
67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available).
68*/
69EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}};
70
71/*
72 A map between sensor IDs and their names in string.
73 Using pldm::oem::sensor_ids
74*/
Chau Ly4cca3dc2024-10-03 09:07:09 +000075EventToMsgMap_t sensorIdToStrMap = {
Chau Lyef214b52024-10-16 09:40:38 +000076 {DDR_STATUS, "DDR_STATUS"},
77 {PCP_VR_STATE, "PCP_VR_STATE"},
78 {SOC_VR_STATE, "SOC_VR_STATE"},
79 {DPHY_VR1_STATE, "DPHY_VR1_STATE"},
80 {DPHY_VR2_STATE, "DPHY_VR2_STATE"},
81 {D2D_VR_STATE, "D2D_VR_STATE"},
82 {IOC_VR1_STATE, "IOC_VR1_STATE"},
83 {IOC_VR2_STATE, "IOC_VR2_STATE"},
84 {PCI_D_VR_STATE, "PCI_D_VR_STATE"},
85 {PCI_A_VR_STATE, "PCI_A_VR_STATE"},
86 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"},
87 {BOOT_OVERALL, "BOOT_OVERALL"},
Chau Lyb01357f2024-10-17 09:18:01 +000088 {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"},
89 {WATCH_DOG, "WATCH_DOG"}};
Chau Lya743e382024-10-26 11:12:22 +000090
91/*
92 A map between the boot stages and logging strings.
93 Using pldm::oem::boot::stage::boot_stage
94*/
95EventToMsgMap_t bootStageToMsgMap = {
96 {boot_stage::SECPRO, "SECpro"},
97 {boot_stage::MPRO, "Mpro"},
98 {boot_stage::ATF_BL1, "ATF BL1"},
99 {boot_stage::ATF_BL2, "ATF BL2"},
100 {boot_stage::DDR_INITIALIZATION, "DDR initialization"},
101 {boot_stage::DDR_TRAINING, "DDR training"},
102 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"},
103 {boot_stage::ATF_BL31, "ATF BL31"},
104 {boot_stage::ATF_BL32, "ATF BL32"},
105 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"},
106 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN,
107 "ATF BL33 (UEFI) booting status = "}};
108
109/*
Chau Lycebf4762024-10-03 09:02:54 +0000110 A map between DDR status and logging strings.
111 Using pldm::oem::ddr::status::ddr_status
112*/
113EventToMsgMap_t ddrStatusToMsgMap = {
114 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"},
115 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"},
116 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"},
117 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"},
118 {ddr_status::OTHER_FAILURE, "has other failure"},
119 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG,
120 "has boot failure due to no configuration"},
121 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS,
122 "failsafe activated but boot success with the next valid configuration"}};
123
124/*
125 A map between DIMM status and logging strings.
126 Using pldm::oem::dimm::status::dimm_status
127*/
128EventToMsgMap_t dimmStatusToMsgMap = {
129 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"},
130 {dimm_status::NOT_INSTALLED, "is not installed"},
131 {dimm_status::OTHER_FAILURE, "has other failure"},
132 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"},
133 {dimm_status::TRAINING_FAILURE, "has training failure; "},
134 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}};
135
136/*
137 A map between PHY training failure syndrome and logging strings.
138 Using
139 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome
140*/
141EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = {
142 {phy_syndrome::NA, "(N/A)"},
143 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"},
144 {phy_syndrome::CA_LEVELING, "(CA leveling)"},
145 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE,
146 "(PHY write level failure - see syndrome 1)"},
147 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE,
148 "(PHY read gate leveling failure)"},
149 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"},
150 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"},
151 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}};
152
153/*
154 A map between DIMM training failure syndrome and logging strings.
155 Using
156 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome
157*/
158EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = {
159 {dimm_syndrome::NA, "(N/A)"},
160 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE,
161 "(DRAM VREFDQ training failure)"},
162 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"},
163 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE,
164 "(LRDRIMM DB SW training failure)"}};
165
166/*
167 A map between DIMM training failure type and a pair of <logging strings -
168 syndrome map>. Using
169 pldm::oem::dimm::training_faillure::dimm_training_failure_type
170*/
171std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>>
172 dimmTrainingFailureTypeMap = {
173 {training_failure::PHY_TRAINING_FAILURE_TYPE,
174 std::make_pair("PHY training failure",
175 phyTrainingFailureSyndromeToMsgMap)},
176 {training_failure::DIMM_TRAINING_FAILURE_TYPE,
177 std::make_pair("DIMM training failure",
178 dimmTrainingFailureSyndromeToMsgMap)}};
179
180/*
Chau Lya743e382024-10-26 11:12:22 +0000181 A map between log level and the registry used for Redfish SEL log
182 Using pldm::oem::log_level
183*/
184std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = {
Chau Ly3de0d942024-10-03 08:57:11 +0000185 {log_level::OK, ampereEventRegistry},
186 {log_level::WARNING, ampereWarningRegistry},
187 {log_level::CRITICAL, ampereCriticalRegistry},
Chau Lya743e382024-10-26 11:12:22 +0000188 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}};
189
Chau Lyef214b52024-10-16 09:40:38 +0000190std::unordered_map<
191 uint16_t,
192 std::vector<std::pair<
193 std::string,
194 std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>>
195 stateSensorToMsgMap = {
196 {SOC_HEALTH_AVAILABILITY,
197 {{"SoC Health",
198 {{1, {log_level::OK, "Normal"}},
199 {2, {log_level::WARNING, "Non-Critical"}},
200 {3, {log_level::CRITICAL, "Critical"}},
201 {4, {log_level::CRITICAL, "Fatal"}}}},
202 {"SoC Availability",
203 {{1, {log_level::OK, "Enabled"}},
204 {2, {log_level::WARNING, "Disabled"}},
Chau Lyb01357f2024-10-17 09:18:01 +0000205 {3, {log_level::CRITICAL, "Shutdown"}}}}}},
206 {WATCH_DOG,
207 {{"Global Watch Dog",
208 {{1, {log_level::OK, "Normal"}},
209 {2, {log_level::CRITICAL, "Timer Expired"}}}},
210 {"Secure Watch Dog",
211 {{1, {log_level::OK, "Normal"}},
212 {2, {log_level::CRITICAL, "Timer Expired"}}}},
213 {"Non-secure Watch Dog",
214 {{1, {log_level::OK, "Normal"}},
215 {2, {log_level::CRITICAL, "Timer Expired"}}}}}}};
Chau Lyef214b52024-10-16 09:40:38 +0000216
Chau Lya743e382024-10-26 11:12:22 +0000217std::string
218 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId)
219{
220 std::string description;
221 if (!tidToSocketNameMap.contains(tid))
222 {
223 description += "TID " + std::to_string(tid) + ": ";
224 }
225 else
226 {
227 description += tidToSocketNameMap[tid] + ": ";
228 }
229
230 if (!sensorIdToStrMap.contains(sensorId))
231 {
232 description += "Sensor ID " + std::to_string(sensorId) + ": ";
233 }
234 else
235 {
236 description += sensorIdToStrMap[sensorId] + ": ";
237 }
238
239 return description;
240}
241
242void OemEventManager::sendJournalRedfish(const std::string& description,
243 log_level& logLevel)
244{
245 if (description.empty())
246 {
247 return;
248 }
249
250 if (!logLevelToRedfishMsgIdMap.contains(logLevel))
251 {
252 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel,
253 "DES", description);
254 return;
255 }
256 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel];
257 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID",
258 redfishMsgId, "REDFISH_MESSAGE_ARGS", description);
259}
260
261std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs)
262{
263 std::string description;
264 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum))
265 {
266 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx))
267 {
268 description += " #" + std::to_string(bitIdx);
269 }
270 }
271 return description;
272}
273
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000274uint8_t OemEventManager::sensorIdToDIMMIdx(const uint16_t& sensorId)
275{
276 uint8_t dimmIdx = maxDIMMInstantNum;
277 int sensorId_Off = sensorId - 4;
278 if ((sensorId_Off >= 0) && ((sensorId_Off % 2) == 0) &&
279 ((sensorId_Off / 2) < maxDIMMInstantNum))
280 {
281 dimmIdx = sensorId_Off / 2;
282 }
283 return dimmIdx;
284}
285
Chau Lya743e382024-10-26 11:12:22 +0000286void OemEventManager::handleBootOverallEvent(
287 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading)
288{
289 log_level logLevel{log_level::OK};
290 std::string description;
291 std::stringstream strStream;
292
293 uint8_t byte0 = (presentReading & 0x000000ff);
294 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8;
295 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16;
296 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
297 /*
298 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31,
299 * ATF BL32 and DDR initialization
300 */
301 if (bootStageToMsgMap.contains(byte3))
302 {
303 // Boot stage adding
304 description += bootStageToMsgMap[byte3];
305
306 switch (byte3)
307 {
308 case boot_stage::DDR_TRAINING:
309 if (byte0 >= ddrTrainingMsg.size())
310 {
311 logLevel = log_level::BIOSFWPANIC;
312 description += " unknown status";
313 }
314 else
315 {
316 description += ddrTrainingMsg[byte0];
317 }
318 if (0x01 == byte0)
319 {
320 // Add complete percentage
321 description += " at " + std::to_string(byte1) + "%";
322 }
323 break;
324 case boot_stage::S0_DDR_TRAINING_FAILURE:
325 case boot_stage::S1_DDR_TRAINING_FAILURE:
326 // ddr_training_status_msg()
327 logLevel = log_level::BIOSFWPANIC;
328 description += " at DIMMs:";
329 // dimmIdxs = presentReading & 0x00ffffff;
330 description += dimmIdxsToString(presentReading & 0x00ffffff);
331 description += " of socket ";
332 description +=
333 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1";
334 break;
335 default:
336 if (byte0 >= bootStatMsg.size())
337 {
338 logLevel = log_level::BIOSFWPANIC;
339 description += " unknown status";
340 }
341 else
342 {
343 description += bootStatMsg[byte0];
344 }
345 break;
346 }
347
348 // Sensor report action is fail
349 if (boot::status::BOOT_STATUS_FAILURE == byte2)
350 {
351 logLevel = log_level::BIOSFWPANIC;
352 }
353 }
354 else
355 {
356 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX)
357 {
358 description +=
359 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN];
360
361 strStream
362 << "Segment (0x" << std::setfill('0') << std::hex
363 << std::setw(8) << static_cast<uint32_t>(presentReading)
Chau Ly3de0d942024-10-03 08:57:11 +0000364 << "); Status Class (0x" << std::setw(2)
365 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x"
Chau Lya743e382024-10-26 11:12:22 +0000366 << std::setw(2) << static_cast<uint32_t>(byte2)
Chau Ly3de0d942024-10-03 08:57:11 +0000367 << "); Operation Code (0x" << std::setw(4)
Chau Lya743e382024-10-26 11:12:22 +0000368 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16)
369 << ")" << std::dec;
370
371 description += strStream.str();
372 }
373 }
374
375 // Log to Redfish event
376 sendJournalRedfish(description, logLevel);
377}
378
379int OemEventManager::processNumericSensorEvent(
380 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
381 size_t sensorDataLength)
382{
383 uint8_t eventState = 0;
384 uint8_t previousEventState = 0;
385 uint8_t sensorDataSize = 0;
386 uint32_t presentReading;
387 auto rc = decode_numeric_sensor_data(
388 sensorData, sensorDataLength, &eventState, &previousEventState,
389 &sensorDataSize, &presentReading);
390 if (rc)
391 {
392 lg2::error(
393 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ",
394 "TID", tid, "RC", rc);
395 return rc;
396 }
397
Chau Lycebf4762024-10-03 09:02:54 +0000398 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000399 if (auto dimmIdx = sensorIdToDIMMIdx(sensorId); dimmIdx < maxDIMMInstantNum)
Chau Lycebf4762024-10-03 09:02:54 +0000400 {
401 handleDIMMStatusEvent(tid, sensorId, presentReading);
402 return PLDM_SUCCESS;
403 }
404
Chau Lya743e382024-10-26 11:12:22 +0000405 switch (sensorId)
406 {
407 case BOOT_OVERALL:
408 handleBootOverallEvent(tid, sensorId, presentReading);
409 break;
Chau Ly3de0d942024-10-03 08:57:11 +0000410 case PCIE_HOT_PLUG:
411 handlePCIeHotPlugEvent(tid, sensorId, presentReading);
412 break;
Chau Lycebf4762024-10-03 09:02:54 +0000413 case DDR_STATUS:
414 handleDDRStatusEvent(tid, sensorId, presentReading);
415 break;
Chau Ly4cca3dc2024-10-03 09:07:09 +0000416 case PCP_VR_STATE:
417 case SOC_VR_STATE:
418 case DPHY_VR1_STATE:
419 case DPHY_VR2_STATE:
420 case D2D_VR_STATE:
421 case IOC_VR1_STATE:
422 case IOC_VR2_STATE:
423 case PCI_D_VR_STATE:
424 case PCI_A_VR_STATE:
425 handleVRDStatusEvent(tid, sensorId, presentReading);
426 break;
Chau Lyb01357f2024-10-17 09:18:01 +0000427 case WATCH_DOG:
428 handleNumericWatchdogEvent(tid, sensorId, presentReading);
429 break;
Chau Lya743e382024-10-26 11:12:22 +0000430 default:
431 std::string description;
432 std::stringstream strStream;
433 log_level logLevel = log_level::OK;
434
435 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: ";
436 description += prefixMsgStrCreation(tid, sensorId);
437 strStream << std::setfill('0') << std::hex << "eventState 0x"
438 << std::setw(2) << static_cast<uint32_t>(eventState)
439 << " previousEventState 0x" << std::setw(2)
440 << static_cast<uint32_t>(previousEventState)
441 << " sensorDataSize 0x" << std::setw(2)
442 << static_cast<uint32_t>(sensorDataSize)
443 << " presentReading 0x" << std::setw(8)
444 << static_cast<uint32_t>(presentReading) << std::dec;
445 description += strStream.str();
446
447 sendJournalRedfish(description, logLevel);
448 break;
449 }
450 return PLDM_SUCCESS;
451}
452
453int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
454 const uint8_t* sensorData,
455 size_t sensorDataLength)
456{
457 uint8_t sensorOffset = 0;
458 uint8_t eventState = 0;
459 uint8_t previousEventState = 0;
460
461 auto rc =
462 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset,
463 &eventState, &previousEventState);
464 if (rc)
465 {
466 lg2::error(
467 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}",
468 "TID", tid, "RC", rc);
469 return rc;
470 }
471
472 std::string description;
Chau Lya743e382024-10-26 11:12:22 +0000473 log_level logLevel = log_level::OK;
474
Chau Lyef214b52024-10-16 09:40:38 +0000475 if (stateSensorToMsgMap.contains(sensorId))
476 {
477 description += prefixMsgStrCreation(tid, sensorId);
478 auto componentMap = stateSensorToMsgMap[sensorId];
479 if (sensorOffset < componentMap.size())
480 {
481 description += std::get<0>(componentMap[sensorOffset]);
482 auto stateMap = std::get<1>(componentMap[sensorOffset]);
483 if (stateMap.contains(eventState))
484 {
485 logLevel = std::get<0>(stateMap[eventState]);
486 description += " state : " + std::get<1>(stateMap[eventState]);
487 if (stateMap.contains(previousEventState))
488 {
489 description += "; previous state: " +
490 std::get<1>(stateMap[previousEventState]);
491 }
492 }
493 else
494 {
495 description += " sends unsupported event state: " +
496 std::to_string(eventState);
497 if (stateMap.contains(previousEventState))
498 {
499 description += "; previous state: " +
500 std::get<1>(stateMap[previousEventState]);
501 }
502 }
503 }
504 else
505 {
506 description += "sends unsupported component sensor offset " +
507 std::to_string(sensorOffset);
508 }
509 }
510 else
511 {
512 std::stringstream strStream;
513 description += "SENSOR_EVENT : STATE_SENSOR_STATE: ";
514 description += prefixMsgStrCreation(tid, sensorId);
515 strStream << std::setfill('0') << std::hex << "sensorOffset 0x"
516 << std::setw(2) << static_cast<uint32_t>(sensorOffset)
517 << "eventState 0x" << std::setw(2)
518 << static_cast<uint32_t>(eventState)
519 << " previousEventState 0x" << std::setw(2)
520 << static_cast<uint32_t>(previousEventState) << std::dec;
521 description += strStream.str();
522 }
Chau Lya743e382024-10-26 11:12:22 +0000523
524 sendJournalRedfish(description, logLevel);
525
526 return PLDM_SUCCESS;
527}
528
529int OemEventManager::processSensorOpStateEvent(
530 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
531 size_t sensorDataLength)
532{
533 uint8_t present_op_state = 0;
534 uint8_t previous_op_state = 0;
535
536 auto rc = decode_sensor_op_data(sensorData, sensorDataLength,
537 &present_op_state, &previous_op_state);
538 if (rc)
539 {
540 lg2::error(
541 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}",
542 "TID", tid, "RC", rc);
543 return rc;
544 }
545
546 std::string description;
547 std::stringstream strStream;
548 log_level logLevel = log_level::OK;
549
550 description += "SENSOR_EVENT : SENSOR_OP_STATE: ";
551 description += prefixMsgStrCreation(tid, sensorId);
552 strStream << std::setfill('0') << std::hex << "present_op_state 0x"
553 << std::setw(2) << static_cast<uint32_t>(present_op_state)
554 << "previous_op_state 0x" << std::setw(2)
555 << static_cast<uint32_t>(previous_op_state) << std::dec;
556 description += strStream.str();
557
558 sendJournalRedfish(description, logLevel);
559
560 return PLDM_SUCCESS;
561}
562
563int OemEventManager::handleSensorEvent(
564 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
565 pldm_tid_t tid, size_t eventDataOffset)
566{
567 /* This OEM event handler is only used for SoC terminus*/
568 if (!tidToSocketNameMap.contains(tid))
569 {
570 return PLDM_SUCCESS;
571 }
572 auto eventData =
573 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
574 auto eventDataSize = payloadLength - eventDataOffset;
575
576 uint16_t sensorId = 0;
577 uint8_t sensorEventClassType = 0;
578 size_t eventClassDataOffset = 0;
579 auto rc =
580 decode_sensor_event_data(eventData, eventDataSize, &sensorId,
581 &sensorEventClassType, &eventClassDataOffset);
582 if (rc)
583 {
584 lg2::error("Failed to decode sensor event data return code {RC}.", "RC",
585 rc);
586 return rc;
587 }
588 const uint8_t* sensorData = eventData + eventClassDataOffset;
589 size_t sensorDataLength = eventDataSize - eventClassDataOffset;
590
591 switch (sensorEventClassType)
592 {
593 case PLDM_NUMERIC_SENSOR_STATE:
594 {
595 return processNumericSensorEvent(tid, sensorId, sensorData,
596 sensorDataLength);
597 }
598 case PLDM_STATE_SENSOR_STATE:
599 {
600 return processStateSensorEvent(tid, sensorId, sensorData,
601 sensorDataLength);
602 }
603 case PLDM_SENSOR_OP_STATE:
604 {
605 return processSensorOpStateEvent(tid, sensorId, sensorData,
606 sensorDataLength);
607 }
608 default:
609 std::string description;
610 std::stringstream strStream;
611 log_level logLevel = log_level::OK;
612
613 description += "SENSOR_EVENT : Unsupported Sensor Class " +
614 std::to_string(sensorEventClassType) + ": ";
615 description += prefixMsgStrCreation(tid, sensorId);
616 strStream << std::setfill('0') << std::hex
617 << std::setw(sizeof(sensorData) * 2) << "Sensor data: ";
618
619 auto dataPtr = sensorData;
620 for ([[maybe_unused]] const auto& i :
621 std::views::iota(0, (int)sensorDataLength))
622 {
623 strStream << "0x" << static_cast<uint32_t>(*dataPtr);
624 dataPtr += sizeof(sensorData);
625 }
626
627 description += strStream.str();
628
629 sendJournalRedfish(description, logLevel);
630 }
631 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE",
632 sensorEventClassType);
633 return PLDM_ERROR;
634}
635
Chau Ly3de0d942024-10-03 08:57:11 +0000636void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
637 uint32_t presentReading)
638{
639 std::string description;
640 std::stringstream strStream;
641 PCIeHotPlugEventRecord_t record{presentReading};
642
643 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal";
644 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed";
645 log_level logLevel =
646 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING;
647
648 description += prefixMsgStrCreation(tid, sensorId);
649
650 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2)
651 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x"
652 << std::setw(2) << static_cast<uint32_t>(record.bits.bus)
653 << "); Device (0x" << std::setw(2)
654 << static_cast<uint32_t>(record.bits.device) << "); Function (0x"
655 << std::setw(2) << static_cast<uint32_t>(record.bits.function)
656 << "); Action (" << sAction << "); Operation status ("
657 << sOpStatus << "); Media slot number (" << std::dec
658 << static_cast<uint32_t>(record.bits.mediaSlot) << ")";
659
660 description += strStream.str();
661
662 // Log to Redfish event
663 sendJournalRedfish(description, logLevel);
664}
665
Chau Lycebf4762024-10-03 09:02:54 +0000666std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo)
667{
668 std::string description;
669 DIMMTrainingFailure_t failure{failureInfo};
670
671 if (dimmTrainingFailureTypeMap.contains(failure.bits.type))
672 {
673 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type];
674
675 description += std::get<0>(failureInfoMap);
676
677 description += "; MCU rank index " +
678 std::to_string(failure.bits.mcuRankIdx);
679
680 description += "; Slice number " +
681 std::to_string(failure.bits.sliceNum);
682
683 description += "; Upper nibble error status: ";
684 description += (!failure.bits.upperNibbStatErr)
685 ? "No error"
686 : "Found no rising edge";
687
688 description += "; Lower nibble error status: ";
689 description += (!failure.bits.lowerNibbStatErr)
690 ? "No error"
691 : "Found no rising edge";
692
693 description += "; Failure syndrome 0: ";
694
695 auto& syndromeMap = std::get<1>(failureInfoMap);
696 if (syndromeMap.contains(failure.bits.syndrome))
697 {
698 description += syndromeMap[failure.bits.syndrome];
699 }
700 else
701 {
702 description += "(Unknown syndrome)";
703 }
704 }
705 else
706 {
707 description += "Unknown training failure type " +
708 std::to_string(failure.bits.type);
709 }
710
711 return description;
712}
713
714void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
715 uint32_t presentReading)
716{
717 log_level logLevel{log_level::WARNING};
718 std::string description;
719 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
720 uint32_t byte012 = presentReading & 0xffffff;
721
722 description += prefixMsgStrCreation(tid, sensorId);
723
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000724 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
725 auto dimmIdx = sensorIdToDIMMIdx(sensorId);
726 if (dimmIdx >= maxDIMMIdxBitNum)
727 {
728 return;
729 }
Chau Lycebf4762024-10-03 09:02:54 +0000730
731 description += "DIMM " + std::to_string(dimmIdx) + " ";
732
733 if (dimmStatusToMsgMap.contains(byte3))
734 {
735 if (byte3 == dimm_status::INSTALLED_NO_ERROR ||
736 byte3 == dimm_status::INSTALLED_BUT_DISABLED)
737 {
738 logLevel = log_level::OK;
739 }
740
741 description += dimmStatusToMsgMap[byte3];
742
743 if (byte3 == dimm_status::TRAINING_FAILURE)
744 {
745 description += "; " + dimmTrainingFailureToMsg(byte012);
746 }
747 else if (byte3 == dimm_status::PMIC_TEMP_ALERT)
748 {
749 uint8_t byte0 = (byte012 & 0xff);
750 if (byte0 < pmicTempAlertMsg.size())
751 {
752 description += ": " + pmicTempAlertMsg[byte0];
753 }
754 }
755 }
756 else
757 {
758 switch (byte3)
759 {
760 case dimm_status::PMIC_HIGH_TEMP:
761 if (byte012 == 0x01)
762 {
763 description += "has PMIC high temp condition";
764 }
765 break;
766 case dimm_status::TSx_HIGH_TEMP:
767 switch (byte012)
768 {
769 case 0x01:
770 description += "has TS0";
771 break;
772 case 0x02:
773 description += "has TS1";
774 break;
775 case 0x03:
776 description += "has TS0 and TS1";
777 break;
778 }
779 description += " exceeding their high temperature threshold";
780 break;
781 case dimm_status::SPD_HUB_HIGH_TEMP:
782 if (byte012 == 0x01)
783 {
784 description += "has SPD/HUB high temp condition";
785 }
786 break;
787 default:
788 description += "has unsupported status " +
789 std::to_string(byte3);
790 break;
791 }
792 }
793
794 // Log to Redfish event
795 sendJournalRedfish(description, logLevel);
796}
797
798void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
799 uint32_t presentReading)
800{
801 log_level logLevel{log_level::WARNING};
802 std::string description;
803 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
804 uint32_t byte012 = presentReading & 0xffffff;
805
806 description += prefixMsgStrCreation(tid, sensorId);
807
808 description += "DDR ";
809 if (ddrStatusToMsgMap.contains(byte3))
810 {
811 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR)
812 {
813 logLevel = log_level::OK;
814 }
815
816 description += ddrStatusToMsgMap[byte3];
817
818 if (byte3 == ddr_status::CONFIGURATION_FAILURE ||
819 byte3 == ddr_status::TRAINING_FAILURE)
820 {
821 // List out failed DIMMs
822 description += dimmIdxsToString(byte012);
823 }
824 }
825 else
826 {
827 description += "has unsupported status " + std::to_string(byte3);
828 }
829
830 // Log to Redfish event
831 sendJournalRedfish(description, logLevel);
832}
833
Chau Ly4cca3dc2024-10-03 09:07:09 +0000834void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
835 uint32_t presentReading)
836{
837 log_level logLevel{log_level::WARNING};
838 std::string description;
839 std::stringstream strStream;
840
841 description += prefixMsgStrCreation(tid, sensorId);
842
843 VRDStatus_t status{presentReading};
844
845 if (status.bits.warning && status.bits.critical)
846 {
847 description += "A VR warning and a VR critical";
848 logLevel = log_level::CRITICAL;
849 }
850 else
851 {
852 if (status.bits.warning)
853 {
854 description += "A VR warning";
855 }
856 else if (status.bits.critical)
857 {
858 description += "A VR critical";
859 logLevel = log_level::CRITICAL;
860 }
861 else
862 {
863 description += "No VR warning or critical";
864 logLevel = log_level::OK;
865 }
866 }
867 description += " condition observed";
868
869 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex
870 << std::setw(2)
871 << static_cast<uint32_t>(status.bits.vr_status_byte_high)
872 << "; VR status byte low is 0x" << std::setw(2)
873 << static_cast<uint32_t>(status.bits.vr_status_byte_low)
874 << "; Reading is 0x" << std::setw(2)
875 << static_cast<uint32_t>(presentReading) << ";";
876
877 description += strStream.str();
878
879 // Log to Redfish event
880 sendJournalRedfish(description, logLevel);
881}
882
Chau Lyb01357f2024-10-17 09:18:01 +0000883void OemEventManager::handleNumericWatchdogEvent(
884 pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading)
885{
886 std::string description;
887 log_level logLevel = log_level::CRITICAL;
888
889 description += prefixMsgStrCreation(tid, sensorId);
890
891 if (presentReading & 0x01)
892 {
893 description += "Global watchdog expired;";
894 }
895 if (presentReading & 0x02)
896 {
897 description += "Secure watchdog expired;";
898 }
899 if (presentReading & 0x04)
900 {
901 description += "Non-secure watchdog expired;";
902 }
903
904 // Log to Redfish event
905 sendJournalRedfish(description, logLevel);
906}
907
Chau Lya743e382024-10-26 11:12:22 +0000908} // namespace oem_ampere
909} // namespace pldm