blob: 1aae241819b93d93d6c0868e54a9e19354704687 [file] [log] [blame]
Chau Lya743e382024-10-26 11:12:22 +00001#include "oem_event_manager.hpp"
2
3#include "requester/handler.hpp"
4#include "requester/request.hpp"
5
6#include <config.h>
7#include <libpldm/pldm.h>
8#include <libpldm/utils.h>
9#include <systemd/sd-journal.h>
10
11#include <phosphor-logging/lg2.hpp>
12#include <xyz/openbmc_project/Logging/Entry/server.hpp>
13
14#include <algorithm>
15#include <map>
16#include <sstream>
17#include <string>
18#include <unordered_map>
19
20namespace pldm
21{
22namespace oem_ampere
23{
24namespace boot_stage = boot::stage;
Chau Lycebf4762024-10-03 09:02:54 +000025namespace ddr_status = ddr::status;
26namespace dimm_status = dimm::status;
27namespace dimm_syndrome = dimm::training_failure::dimm_syndrome;
28namespace phy_syndrome = dimm::training_failure::phy_syndrome;
29namespace training_failure = dimm::training_failure;
Chau Lya743e382024-10-26 11:12:22 +000030
Chau Ly3de0d942024-10-03 08:57:11 +000031constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK";
32constexpr const char* ampereWarningRegistry =
33 "OpenBMC.0.1.AmpereWarning.Warning";
34constexpr const char* ampereCriticalRegistry =
35 "OpenBMC.0.1.AmpereCritical.Critical";
Chau Lya743e382024-10-26 11:12:22 +000036constexpr const char* BIOSFWPanicRegistry =
37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning";
38constexpr auto maxDIMMIdxBitNum = 24;
Chau Lycebf4762024-10-03 09:02:54 +000039constexpr auto maxDIMMInstantNum = 24;
Chau Lya743e382024-10-26 11:12:22 +000040
41/*
42 An array of possible boot status of a boot stage.
43 The index maps with byte 0 of boot code.
44*/
45std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"};
46
47/*
48 An array of possible boot status of DDR training stage.
49 The index maps with byte 0 of boot code.
50*/
51std::array<std::string, 3> ddrTrainingMsg = {
52 " progress started", " in-progress", " progress completed"};
53
54/*
Chau Lycebf4762024-10-03 09:02:54 +000055 A map between PMIC status and logging strings.
56*/
57std::array<std::string, 8> pmicTempAlertMsg = {
58 "Below 85°C", "85°C", "95°C", "105°C",
59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"};
60
61/*
Chau Lya743e382024-10-26 11:12:22 +000062 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC
63 EPs through SMBus and PCIe. When host boots up, SMBUS interface
64 comes up first. In this interface, BMC is bus owner.
65
66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available).
67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available).
68*/
69EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}};
70
71/*
72 A map between sensor IDs and their names in string.
73 Using pldm::oem::sensor_ids
74*/
Chau Lycebf4762024-10-03 09:02:54 +000075EventToMsgMap_t sensorIdToStrMap = {{DDR_STATUS, "DDR_STATUS"},
76 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"},
Chau Ly3de0d942024-10-03 08:57:11 +000077 {BOOT_OVERALL, "BOOT_OVERALL"}};
Chau Lya743e382024-10-26 11:12:22 +000078
79/*
80 A map between the boot stages and logging strings.
81 Using pldm::oem::boot::stage::boot_stage
82*/
83EventToMsgMap_t bootStageToMsgMap = {
84 {boot_stage::SECPRO, "SECpro"},
85 {boot_stage::MPRO, "Mpro"},
86 {boot_stage::ATF_BL1, "ATF BL1"},
87 {boot_stage::ATF_BL2, "ATF BL2"},
88 {boot_stage::DDR_INITIALIZATION, "DDR initialization"},
89 {boot_stage::DDR_TRAINING, "DDR training"},
90 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"},
91 {boot_stage::ATF_BL31, "ATF BL31"},
92 {boot_stage::ATF_BL32, "ATF BL32"},
93 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"},
94 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN,
95 "ATF BL33 (UEFI) booting status = "}};
96
97/*
Chau Lycebf4762024-10-03 09:02:54 +000098 A map between DDR status and logging strings.
99 Using pldm::oem::ddr::status::ddr_status
100*/
101EventToMsgMap_t ddrStatusToMsgMap = {
102 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"},
103 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"},
104 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"},
105 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"},
106 {ddr_status::OTHER_FAILURE, "has other failure"},
107 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG,
108 "has boot failure due to no configuration"},
109 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS,
110 "failsafe activated but boot success with the next valid configuration"}};
111
112/*
113 A map between DIMM status and logging strings.
114 Using pldm::oem::dimm::status::dimm_status
115*/
116EventToMsgMap_t dimmStatusToMsgMap = {
117 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"},
118 {dimm_status::NOT_INSTALLED, "is not installed"},
119 {dimm_status::OTHER_FAILURE, "has other failure"},
120 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"},
121 {dimm_status::TRAINING_FAILURE, "has training failure; "},
122 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}};
123
124/*
125 A map between PHY training failure syndrome and logging strings.
126 Using
127 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome
128*/
129EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = {
130 {phy_syndrome::NA, "(N/A)"},
131 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"},
132 {phy_syndrome::CA_LEVELING, "(CA leveling)"},
133 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE,
134 "(PHY write level failure - see syndrome 1)"},
135 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE,
136 "(PHY read gate leveling failure)"},
137 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"},
138 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"},
139 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}};
140
141/*
142 A map between DIMM training failure syndrome and logging strings.
143 Using
144 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome
145*/
146EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = {
147 {dimm_syndrome::NA, "(N/A)"},
148 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE,
149 "(DRAM VREFDQ training failure)"},
150 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"},
151 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE,
152 "(LRDRIMM DB SW training failure)"}};
153
154/*
155 A map between DIMM training failure type and a pair of <logging strings -
156 syndrome map>. Using
157 pldm::oem::dimm::training_faillure::dimm_training_failure_type
158*/
159std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>>
160 dimmTrainingFailureTypeMap = {
161 {training_failure::PHY_TRAINING_FAILURE_TYPE,
162 std::make_pair("PHY training failure",
163 phyTrainingFailureSyndromeToMsgMap)},
164 {training_failure::DIMM_TRAINING_FAILURE_TYPE,
165 std::make_pair("DIMM training failure",
166 dimmTrainingFailureSyndromeToMsgMap)}};
167
168/*
Chau Lya743e382024-10-26 11:12:22 +0000169 A map between log level and the registry used for Redfish SEL log
170 Using pldm::oem::log_level
171*/
172std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = {
Chau Ly3de0d942024-10-03 08:57:11 +0000173 {log_level::OK, ampereEventRegistry},
174 {log_level::WARNING, ampereWarningRegistry},
175 {log_level::CRITICAL, ampereCriticalRegistry},
Chau Lya743e382024-10-26 11:12:22 +0000176 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}};
177
178std::string
179 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId)
180{
181 std::string description;
182 if (!tidToSocketNameMap.contains(tid))
183 {
184 description += "TID " + std::to_string(tid) + ": ";
185 }
186 else
187 {
188 description += tidToSocketNameMap[tid] + ": ";
189 }
190
191 if (!sensorIdToStrMap.contains(sensorId))
192 {
193 description += "Sensor ID " + std::to_string(sensorId) + ": ";
194 }
195 else
196 {
197 description += sensorIdToStrMap[sensorId] + ": ";
198 }
199
200 return description;
201}
202
203void OemEventManager::sendJournalRedfish(const std::string& description,
204 log_level& logLevel)
205{
206 if (description.empty())
207 {
208 return;
209 }
210
211 if (!logLevelToRedfishMsgIdMap.contains(logLevel))
212 {
213 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel,
214 "DES", description);
215 return;
216 }
217 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel];
218 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID",
219 redfishMsgId, "REDFISH_MESSAGE_ARGS", description);
220}
221
222std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs)
223{
224 std::string description;
225 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum))
226 {
227 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx))
228 {
229 description += " #" + std::to_string(bitIdx);
230 }
231 }
232 return description;
233}
234
235void OemEventManager::handleBootOverallEvent(
236 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading)
237{
238 log_level logLevel{log_level::OK};
239 std::string description;
240 std::stringstream strStream;
241
242 uint8_t byte0 = (presentReading & 0x000000ff);
243 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8;
244 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16;
245 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
246 /*
247 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31,
248 * ATF BL32 and DDR initialization
249 */
250 if (bootStageToMsgMap.contains(byte3))
251 {
252 // Boot stage adding
253 description += bootStageToMsgMap[byte3];
254
255 switch (byte3)
256 {
257 case boot_stage::DDR_TRAINING:
258 if (byte0 >= ddrTrainingMsg.size())
259 {
260 logLevel = log_level::BIOSFWPANIC;
261 description += " unknown status";
262 }
263 else
264 {
265 description += ddrTrainingMsg[byte0];
266 }
267 if (0x01 == byte0)
268 {
269 // Add complete percentage
270 description += " at " + std::to_string(byte1) + "%";
271 }
272 break;
273 case boot_stage::S0_DDR_TRAINING_FAILURE:
274 case boot_stage::S1_DDR_TRAINING_FAILURE:
275 // ddr_training_status_msg()
276 logLevel = log_level::BIOSFWPANIC;
277 description += " at DIMMs:";
278 // dimmIdxs = presentReading & 0x00ffffff;
279 description += dimmIdxsToString(presentReading & 0x00ffffff);
280 description += " of socket ";
281 description +=
282 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1";
283 break;
284 default:
285 if (byte0 >= bootStatMsg.size())
286 {
287 logLevel = log_level::BIOSFWPANIC;
288 description += " unknown status";
289 }
290 else
291 {
292 description += bootStatMsg[byte0];
293 }
294 break;
295 }
296
297 // Sensor report action is fail
298 if (boot::status::BOOT_STATUS_FAILURE == byte2)
299 {
300 logLevel = log_level::BIOSFWPANIC;
301 }
302 }
303 else
304 {
305 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX)
306 {
307 description +=
308 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN];
309
310 strStream
311 << "Segment (0x" << std::setfill('0') << std::hex
312 << std::setw(8) << static_cast<uint32_t>(presentReading)
Chau Ly3de0d942024-10-03 08:57:11 +0000313 << "); Status Class (0x" << std::setw(2)
314 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x"
Chau Lya743e382024-10-26 11:12:22 +0000315 << std::setw(2) << static_cast<uint32_t>(byte2)
Chau Ly3de0d942024-10-03 08:57:11 +0000316 << "); Operation Code (0x" << std::setw(4)
Chau Lya743e382024-10-26 11:12:22 +0000317 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16)
318 << ")" << std::dec;
319
320 description += strStream.str();
321 }
322 }
323
324 // Log to Redfish event
325 sendJournalRedfish(description, logLevel);
326}
327
328int OemEventManager::processNumericSensorEvent(
329 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
330 size_t sensorDataLength)
331{
332 uint8_t eventState = 0;
333 uint8_t previousEventState = 0;
334 uint8_t sensorDataSize = 0;
335 uint32_t presentReading;
336 auto rc = decode_numeric_sensor_data(
337 sensorData, sensorDataLength, &eventState, &previousEventState,
338 &sensorDataSize, &presentReading);
339 if (rc)
340 {
341 lg2::error(
342 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ",
343 "TID", tid, "RC", rc);
344 return rc;
345 }
346
Chau Lycebf4762024-10-03 09:02:54 +0000347 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
348 if (auto dimmIdx = (sensorId - 4) / 2;
349 sensorId >= 4 && dimmIdx >= 0 && dimmIdx < maxDIMMInstantNum)
350 {
351 handleDIMMStatusEvent(tid, sensorId, presentReading);
352 return PLDM_SUCCESS;
353 }
354
Chau Lya743e382024-10-26 11:12:22 +0000355 switch (sensorId)
356 {
357 case BOOT_OVERALL:
358 handleBootOverallEvent(tid, sensorId, presentReading);
359 break;
Chau Ly3de0d942024-10-03 08:57:11 +0000360 case PCIE_HOT_PLUG:
361 handlePCIeHotPlugEvent(tid, sensorId, presentReading);
362 break;
Chau Lycebf4762024-10-03 09:02:54 +0000363 case DDR_STATUS:
364 handleDDRStatusEvent(tid, sensorId, presentReading);
365 break;
Chau Lya743e382024-10-26 11:12:22 +0000366 default:
367 std::string description;
368 std::stringstream strStream;
369 log_level logLevel = log_level::OK;
370
371 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: ";
372 description += prefixMsgStrCreation(tid, sensorId);
373 strStream << std::setfill('0') << std::hex << "eventState 0x"
374 << std::setw(2) << static_cast<uint32_t>(eventState)
375 << " previousEventState 0x" << std::setw(2)
376 << static_cast<uint32_t>(previousEventState)
377 << " sensorDataSize 0x" << std::setw(2)
378 << static_cast<uint32_t>(sensorDataSize)
379 << " presentReading 0x" << std::setw(8)
380 << static_cast<uint32_t>(presentReading) << std::dec;
381 description += strStream.str();
382
383 sendJournalRedfish(description, logLevel);
384 break;
385 }
386 return PLDM_SUCCESS;
387}
388
389int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
390 const uint8_t* sensorData,
391 size_t sensorDataLength)
392{
393 uint8_t sensorOffset = 0;
394 uint8_t eventState = 0;
395 uint8_t previousEventState = 0;
396
397 auto rc =
398 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset,
399 &eventState, &previousEventState);
400 if (rc)
401 {
402 lg2::error(
403 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}",
404 "TID", tid, "RC", rc);
405 return rc;
406 }
407
408 std::string description;
409 std::stringstream strStream;
410 log_level logLevel = log_level::OK;
411
412 description += "SENSOR_EVENT : STATE_SENSOR_STATE: ";
413 description += prefixMsgStrCreation(tid, sensorId);
414 strStream << std::setfill('0') << std::hex << "sensorOffset 0x"
415 << std::setw(2) << static_cast<uint32_t>(sensorOffset)
416 << "eventState 0x" << std::setw(2)
417 << static_cast<uint32_t>(eventState) << " previousEventState 0x"
418 << std::setw(2) << static_cast<uint32_t>(previousEventState)
419 << std::dec;
420 description += strStream.str();
421
422 sendJournalRedfish(description, logLevel);
423
424 return PLDM_SUCCESS;
425}
426
427int OemEventManager::processSensorOpStateEvent(
428 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
429 size_t sensorDataLength)
430{
431 uint8_t present_op_state = 0;
432 uint8_t previous_op_state = 0;
433
434 auto rc = decode_sensor_op_data(sensorData, sensorDataLength,
435 &present_op_state, &previous_op_state);
436 if (rc)
437 {
438 lg2::error(
439 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}",
440 "TID", tid, "RC", rc);
441 return rc;
442 }
443
444 std::string description;
445 std::stringstream strStream;
446 log_level logLevel = log_level::OK;
447
448 description += "SENSOR_EVENT : SENSOR_OP_STATE: ";
449 description += prefixMsgStrCreation(tid, sensorId);
450 strStream << std::setfill('0') << std::hex << "present_op_state 0x"
451 << std::setw(2) << static_cast<uint32_t>(present_op_state)
452 << "previous_op_state 0x" << std::setw(2)
453 << static_cast<uint32_t>(previous_op_state) << std::dec;
454 description += strStream.str();
455
456 sendJournalRedfish(description, logLevel);
457
458 return PLDM_SUCCESS;
459}
460
461int OemEventManager::handleSensorEvent(
462 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
463 pldm_tid_t tid, size_t eventDataOffset)
464{
465 /* This OEM event handler is only used for SoC terminus*/
466 if (!tidToSocketNameMap.contains(tid))
467 {
468 return PLDM_SUCCESS;
469 }
470 auto eventData =
471 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
472 auto eventDataSize = payloadLength - eventDataOffset;
473
474 uint16_t sensorId = 0;
475 uint8_t sensorEventClassType = 0;
476 size_t eventClassDataOffset = 0;
477 auto rc =
478 decode_sensor_event_data(eventData, eventDataSize, &sensorId,
479 &sensorEventClassType, &eventClassDataOffset);
480 if (rc)
481 {
482 lg2::error("Failed to decode sensor event data return code {RC}.", "RC",
483 rc);
484 return rc;
485 }
486 const uint8_t* sensorData = eventData + eventClassDataOffset;
487 size_t sensorDataLength = eventDataSize - eventClassDataOffset;
488
489 switch (sensorEventClassType)
490 {
491 case PLDM_NUMERIC_SENSOR_STATE:
492 {
493 return processNumericSensorEvent(tid, sensorId, sensorData,
494 sensorDataLength);
495 }
496 case PLDM_STATE_SENSOR_STATE:
497 {
498 return processStateSensorEvent(tid, sensorId, sensorData,
499 sensorDataLength);
500 }
501 case PLDM_SENSOR_OP_STATE:
502 {
503 return processSensorOpStateEvent(tid, sensorId, sensorData,
504 sensorDataLength);
505 }
506 default:
507 std::string description;
508 std::stringstream strStream;
509 log_level logLevel = log_level::OK;
510
511 description += "SENSOR_EVENT : Unsupported Sensor Class " +
512 std::to_string(sensorEventClassType) + ": ";
513 description += prefixMsgStrCreation(tid, sensorId);
514 strStream << std::setfill('0') << std::hex
515 << std::setw(sizeof(sensorData) * 2) << "Sensor data: ";
516
517 auto dataPtr = sensorData;
518 for ([[maybe_unused]] const auto& i :
519 std::views::iota(0, (int)sensorDataLength))
520 {
521 strStream << "0x" << static_cast<uint32_t>(*dataPtr);
522 dataPtr += sizeof(sensorData);
523 }
524
525 description += strStream.str();
526
527 sendJournalRedfish(description, logLevel);
528 }
529 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE",
530 sensorEventClassType);
531 return PLDM_ERROR;
532}
533
Chau Ly3de0d942024-10-03 08:57:11 +0000534void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
535 uint32_t presentReading)
536{
537 std::string description;
538 std::stringstream strStream;
539 PCIeHotPlugEventRecord_t record{presentReading};
540
541 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal";
542 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed";
543 log_level logLevel =
544 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING;
545
546 description += prefixMsgStrCreation(tid, sensorId);
547
548 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2)
549 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x"
550 << std::setw(2) << static_cast<uint32_t>(record.bits.bus)
551 << "); Device (0x" << std::setw(2)
552 << static_cast<uint32_t>(record.bits.device) << "); Function (0x"
553 << std::setw(2) << static_cast<uint32_t>(record.bits.function)
554 << "); Action (" << sAction << "); Operation status ("
555 << sOpStatus << "); Media slot number (" << std::dec
556 << static_cast<uint32_t>(record.bits.mediaSlot) << ")";
557
558 description += strStream.str();
559
560 // Log to Redfish event
561 sendJournalRedfish(description, logLevel);
562}
563
Chau Lycebf4762024-10-03 09:02:54 +0000564std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo)
565{
566 std::string description;
567 DIMMTrainingFailure_t failure{failureInfo};
568
569 if (dimmTrainingFailureTypeMap.contains(failure.bits.type))
570 {
571 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type];
572
573 description += std::get<0>(failureInfoMap);
574
575 description += "; MCU rank index " +
576 std::to_string(failure.bits.mcuRankIdx);
577
578 description += "; Slice number " +
579 std::to_string(failure.bits.sliceNum);
580
581 description += "; Upper nibble error status: ";
582 description += (!failure.bits.upperNibbStatErr)
583 ? "No error"
584 : "Found no rising edge";
585
586 description += "; Lower nibble error status: ";
587 description += (!failure.bits.lowerNibbStatErr)
588 ? "No error"
589 : "Found no rising edge";
590
591 description += "; Failure syndrome 0: ";
592
593 auto& syndromeMap = std::get<1>(failureInfoMap);
594 if (syndromeMap.contains(failure.bits.syndrome))
595 {
596 description += syndromeMap[failure.bits.syndrome];
597 }
598 else
599 {
600 description += "(Unknown syndrome)";
601 }
602 }
603 else
604 {
605 description += "Unknown training failure type " +
606 std::to_string(failure.bits.type);
607 }
608
609 return description;
610}
611
612void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
613 uint32_t presentReading)
614{
615 log_level logLevel{log_level::WARNING};
616 std::string description;
617 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
618 uint32_t byte012 = presentReading & 0xffffff;
619
620 description += prefixMsgStrCreation(tid, sensorId);
621
622 uint8_t dimmIdx = (sensorId - 4) / 2;
623
624 description += "DIMM " + std::to_string(dimmIdx) + " ";
625
626 if (dimmStatusToMsgMap.contains(byte3))
627 {
628 if (byte3 == dimm_status::INSTALLED_NO_ERROR ||
629 byte3 == dimm_status::INSTALLED_BUT_DISABLED)
630 {
631 logLevel = log_level::OK;
632 }
633
634 description += dimmStatusToMsgMap[byte3];
635
636 if (byte3 == dimm_status::TRAINING_FAILURE)
637 {
638 description += "; " + dimmTrainingFailureToMsg(byte012);
639 }
640 else if (byte3 == dimm_status::PMIC_TEMP_ALERT)
641 {
642 uint8_t byte0 = (byte012 & 0xff);
643 if (byte0 < pmicTempAlertMsg.size())
644 {
645 description += ": " + pmicTempAlertMsg[byte0];
646 }
647 }
648 }
649 else
650 {
651 switch (byte3)
652 {
653 case dimm_status::PMIC_HIGH_TEMP:
654 if (byte012 == 0x01)
655 {
656 description += "has PMIC high temp condition";
657 }
658 break;
659 case dimm_status::TSx_HIGH_TEMP:
660 switch (byte012)
661 {
662 case 0x01:
663 description += "has TS0";
664 break;
665 case 0x02:
666 description += "has TS1";
667 break;
668 case 0x03:
669 description += "has TS0 and TS1";
670 break;
671 }
672 description += " exceeding their high temperature threshold";
673 break;
674 case dimm_status::SPD_HUB_HIGH_TEMP:
675 if (byte012 == 0x01)
676 {
677 description += "has SPD/HUB high temp condition";
678 }
679 break;
680 default:
681 description += "has unsupported status " +
682 std::to_string(byte3);
683 break;
684 }
685 }
686
687 // Log to Redfish event
688 sendJournalRedfish(description, logLevel);
689}
690
691void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
692 uint32_t presentReading)
693{
694 log_level logLevel{log_level::WARNING};
695 std::string description;
696 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
697 uint32_t byte012 = presentReading & 0xffffff;
698
699 description += prefixMsgStrCreation(tid, sensorId);
700
701 description += "DDR ";
702 if (ddrStatusToMsgMap.contains(byte3))
703 {
704 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR)
705 {
706 logLevel = log_level::OK;
707 }
708
709 description += ddrStatusToMsgMap[byte3];
710
711 if (byte3 == ddr_status::CONFIGURATION_FAILURE ||
712 byte3 == ddr_status::TRAINING_FAILURE)
713 {
714 // List out failed DIMMs
715 description += dimmIdxsToString(byte012);
716 }
717 }
718 else
719 {
720 description += "has unsupported status " + std::to_string(byte3);
721 }
722
723 // Log to Redfish event
724 sendJournalRedfish(description, logLevel);
725}
726
Chau Lya743e382024-10-26 11:12:22 +0000727} // namespace oem_ampere
728} // namespace pldm