blob: c87659a68b32ceed6ba2fded275b6546e30e2d53 [file] [log] [blame]
Chau Lya743e382024-10-26 11:12:22 +00001#include "oem_event_manager.hpp"
2
3#include "requester/handler.hpp"
4#include "requester/request.hpp"
5
6#include <config.h>
7#include <libpldm/pldm.h>
8#include <libpldm/utils.h>
9#include <systemd/sd-journal.h>
10
11#include <phosphor-logging/lg2.hpp>
12#include <xyz/openbmc_project/Logging/Entry/server.hpp>
13
14#include <algorithm>
15#include <map>
16#include <sstream>
17#include <string>
18#include <unordered_map>
19
20namespace pldm
21{
22namespace oem_ampere
23{
24namespace boot_stage = boot::stage;
Chau Lycebf4762024-10-03 09:02:54 +000025namespace ddr_status = ddr::status;
26namespace dimm_status = dimm::status;
27namespace dimm_syndrome = dimm::training_failure::dimm_syndrome;
28namespace phy_syndrome = dimm::training_failure::phy_syndrome;
29namespace training_failure = dimm::training_failure;
Chau Lya743e382024-10-26 11:12:22 +000030
Chau Ly3de0d942024-10-03 08:57:11 +000031constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK";
32constexpr const char* ampereWarningRegistry =
33 "OpenBMC.0.1.AmpereWarning.Warning";
34constexpr const char* ampereCriticalRegistry =
35 "OpenBMC.0.1.AmpereCritical.Critical";
Chau Lya743e382024-10-26 11:12:22 +000036constexpr const char* BIOSFWPanicRegistry =
37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning";
38constexpr auto maxDIMMIdxBitNum = 24;
Chau Lycebf4762024-10-03 09:02:54 +000039constexpr auto maxDIMMInstantNum = 24;
Chau Lya743e382024-10-26 11:12:22 +000040
41/*
42 An array of possible boot status of a boot stage.
43 The index maps with byte 0 of boot code.
44*/
45std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"};
46
47/*
48 An array of possible boot status of DDR training stage.
49 The index maps with byte 0 of boot code.
50*/
51std::array<std::string, 3> ddrTrainingMsg = {
52 " progress started", " in-progress", " progress completed"};
53
54/*
Chau Lycebf4762024-10-03 09:02:54 +000055 A map between PMIC status and logging strings.
56*/
57std::array<std::string, 8> pmicTempAlertMsg = {
58 "Below 85°C", "85°C", "95°C", "105°C",
59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"};
60
61/*
Chau Lya743e382024-10-26 11:12:22 +000062 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC
63 EPs through SMBus and PCIe. When host boots up, SMBUS interface
64 comes up first. In this interface, BMC is bus owner.
65
66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available).
67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available).
68*/
69EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}};
70
71/*
72 A map between sensor IDs and their names in string.
73 Using pldm::oem::sensor_ids
74*/
Chau Ly4cca3dc2024-10-03 09:07:09 +000075EventToMsgMap_t sensorIdToStrMap = {
76 {DDR_STATUS, "DDR_STATUS"}, {PCP_VR_STATE, "PCP_VR_STATE"},
77 {SOC_VR_STATE, "SOC_VR_STATE"}, {DPHY_VR1_STATE, "DPHY_VR1_STATE"},
78 {DPHY_VR2_STATE, "DPHY_VR2_STATE"}, {D2D_VR_STATE, "D2D_VR_STATE"},
79 {IOC_VR1_STATE, "IOC_VR1_STATE"}, {IOC_VR2_STATE, "IOC_VR2_STATE"},
80 {PCI_D_VR_STATE, "PCI_D_VR_STATE"}, {PCI_A_VR_STATE, "PCI_A_VR_STATE"},
81 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"}, {BOOT_OVERALL, "BOOT_OVERALL"}};
Chau Lya743e382024-10-26 11:12:22 +000082
83/*
84 A map between the boot stages and logging strings.
85 Using pldm::oem::boot::stage::boot_stage
86*/
87EventToMsgMap_t bootStageToMsgMap = {
88 {boot_stage::SECPRO, "SECpro"},
89 {boot_stage::MPRO, "Mpro"},
90 {boot_stage::ATF_BL1, "ATF BL1"},
91 {boot_stage::ATF_BL2, "ATF BL2"},
92 {boot_stage::DDR_INITIALIZATION, "DDR initialization"},
93 {boot_stage::DDR_TRAINING, "DDR training"},
94 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"},
95 {boot_stage::ATF_BL31, "ATF BL31"},
96 {boot_stage::ATF_BL32, "ATF BL32"},
97 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"},
98 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN,
99 "ATF BL33 (UEFI) booting status = "}};
100
101/*
Chau Lycebf4762024-10-03 09:02:54 +0000102 A map between DDR status and logging strings.
103 Using pldm::oem::ddr::status::ddr_status
104*/
105EventToMsgMap_t ddrStatusToMsgMap = {
106 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"},
107 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"},
108 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"},
109 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"},
110 {ddr_status::OTHER_FAILURE, "has other failure"},
111 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG,
112 "has boot failure due to no configuration"},
113 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS,
114 "failsafe activated but boot success with the next valid configuration"}};
115
116/*
117 A map between DIMM status and logging strings.
118 Using pldm::oem::dimm::status::dimm_status
119*/
120EventToMsgMap_t dimmStatusToMsgMap = {
121 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"},
122 {dimm_status::NOT_INSTALLED, "is not installed"},
123 {dimm_status::OTHER_FAILURE, "has other failure"},
124 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"},
125 {dimm_status::TRAINING_FAILURE, "has training failure; "},
126 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}};
127
128/*
129 A map between PHY training failure syndrome and logging strings.
130 Using
131 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome
132*/
133EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = {
134 {phy_syndrome::NA, "(N/A)"},
135 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"},
136 {phy_syndrome::CA_LEVELING, "(CA leveling)"},
137 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE,
138 "(PHY write level failure - see syndrome 1)"},
139 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE,
140 "(PHY read gate leveling failure)"},
141 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"},
142 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"},
143 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}};
144
145/*
146 A map between DIMM training failure syndrome and logging strings.
147 Using
148 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome
149*/
150EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = {
151 {dimm_syndrome::NA, "(N/A)"},
152 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE,
153 "(DRAM VREFDQ training failure)"},
154 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"},
155 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE,
156 "(LRDRIMM DB SW training failure)"}};
157
158/*
159 A map between DIMM training failure type and a pair of <logging strings -
160 syndrome map>. Using
161 pldm::oem::dimm::training_faillure::dimm_training_failure_type
162*/
163std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>>
164 dimmTrainingFailureTypeMap = {
165 {training_failure::PHY_TRAINING_FAILURE_TYPE,
166 std::make_pair("PHY training failure",
167 phyTrainingFailureSyndromeToMsgMap)},
168 {training_failure::DIMM_TRAINING_FAILURE_TYPE,
169 std::make_pair("DIMM training failure",
170 dimmTrainingFailureSyndromeToMsgMap)}};
171
172/*
Chau Lya743e382024-10-26 11:12:22 +0000173 A map between log level and the registry used for Redfish SEL log
174 Using pldm::oem::log_level
175*/
176std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = {
Chau Ly3de0d942024-10-03 08:57:11 +0000177 {log_level::OK, ampereEventRegistry},
178 {log_level::WARNING, ampereWarningRegistry},
179 {log_level::CRITICAL, ampereCriticalRegistry},
Chau Lya743e382024-10-26 11:12:22 +0000180 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}};
181
182std::string
183 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId)
184{
185 std::string description;
186 if (!tidToSocketNameMap.contains(tid))
187 {
188 description += "TID " + std::to_string(tid) + ": ";
189 }
190 else
191 {
192 description += tidToSocketNameMap[tid] + ": ";
193 }
194
195 if (!sensorIdToStrMap.contains(sensorId))
196 {
197 description += "Sensor ID " + std::to_string(sensorId) + ": ";
198 }
199 else
200 {
201 description += sensorIdToStrMap[sensorId] + ": ";
202 }
203
204 return description;
205}
206
207void OemEventManager::sendJournalRedfish(const std::string& description,
208 log_level& logLevel)
209{
210 if (description.empty())
211 {
212 return;
213 }
214
215 if (!logLevelToRedfishMsgIdMap.contains(logLevel))
216 {
217 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel,
218 "DES", description);
219 return;
220 }
221 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel];
222 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID",
223 redfishMsgId, "REDFISH_MESSAGE_ARGS", description);
224}
225
226std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs)
227{
228 std::string description;
229 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum))
230 {
231 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx))
232 {
233 description += " #" + std::to_string(bitIdx);
234 }
235 }
236 return description;
237}
238
239void OemEventManager::handleBootOverallEvent(
240 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading)
241{
242 log_level logLevel{log_level::OK};
243 std::string description;
244 std::stringstream strStream;
245
246 uint8_t byte0 = (presentReading & 0x000000ff);
247 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8;
248 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16;
249 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
250 /*
251 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31,
252 * ATF BL32 and DDR initialization
253 */
254 if (bootStageToMsgMap.contains(byte3))
255 {
256 // Boot stage adding
257 description += bootStageToMsgMap[byte3];
258
259 switch (byte3)
260 {
261 case boot_stage::DDR_TRAINING:
262 if (byte0 >= ddrTrainingMsg.size())
263 {
264 logLevel = log_level::BIOSFWPANIC;
265 description += " unknown status";
266 }
267 else
268 {
269 description += ddrTrainingMsg[byte0];
270 }
271 if (0x01 == byte0)
272 {
273 // Add complete percentage
274 description += " at " + std::to_string(byte1) + "%";
275 }
276 break;
277 case boot_stage::S0_DDR_TRAINING_FAILURE:
278 case boot_stage::S1_DDR_TRAINING_FAILURE:
279 // ddr_training_status_msg()
280 logLevel = log_level::BIOSFWPANIC;
281 description += " at DIMMs:";
282 // dimmIdxs = presentReading & 0x00ffffff;
283 description += dimmIdxsToString(presentReading & 0x00ffffff);
284 description += " of socket ";
285 description +=
286 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1";
287 break;
288 default:
289 if (byte0 >= bootStatMsg.size())
290 {
291 logLevel = log_level::BIOSFWPANIC;
292 description += " unknown status";
293 }
294 else
295 {
296 description += bootStatMsg[byte0];
297 }
298 break;
299 }
300
301 // Sensor report action is fail
302 if (boot::status::BOOT_STATUS_FAILURE == byte2)
303 {
304 logLevel = log_level::BIOSFWPANIC;
305 }
306 }
307 else
308 {
309 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX)
310 {
311 description +=
312 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN];
313
314 strStream
315 << "Segment (0x" << std::setfill('0') << std::hex
316 << std::setw(8) << static_cast<uint32_t>(presentReading)
Chau Ly3de0d942024-10-03 08:57:11 +0000317 << "); Status Class (0x" << std::setw(2)
318 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x"
Chau Lya743e382024-10-26 11:12:22 +0000319 << std::setw(2) << static_cast<uint32_t>(byte2)
Chau Ly3de0d942024-10-03 08:57:11 +0000320 << "); Operation Code (0x" << std::setw(4)
Chau Lya743e382024-10-26 11:12:22 +0000321 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16)
322 << ")" << std::dec;
323
324 description += strStream.str();
325 }
326 }
327
328 // Log to Redfish event
329 sendJournalRedfish(description, logLevel);
330}
331
332int OemEventManager::processNumericSensorEvent(
333 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
334 size_t sensorDataLength)
335{
336 uint8_t eventState = 0;
337 uint8_t previousEventState = 0;
338 uint8_t sensorDataSize = 0;
339 uint32_t presentReading;
340 auto rc = decode_numeric_sensor_data(
341 sensorData, sensorDataLength, &eventState, &previousEventState,
342 &sensorDataSize, &presentReading);
343 if (rc)
344 {
345 lg2::error(
346 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ",
347 "TID", tid, "RC", rc);
348 return rc;
349 }
350
Chau Lycebf4762024-10-03 09:02:54 +0000351 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
352 if (auto dimmIdx = (sensorId - 4) / 2;
353 sensorId >= 4 && dimmIdx >= 0 && dimmIdx < maxDIMMInstantNum)
354 {
355 handleDIMMStatusEvent(tid, sensorId, presentReading);
356 return PLDM_SUCCESS;
357 }
358
Chau Lya743e382024-10-26 11:12:22 +0000359 switch (sensorId)
360 {
361 case BOOT_OVERALL:
362 handleBootOverallEvent(tid, sensorId, presentReading);
363 break;
Chau Ly3de0d942024-10-03 08:57:11 +0000364 case PCIE_HOT_PLUG:
365 handlePCIeHotPlugEvent(tid, sensorId, presentReading);
366 break;
Chau Lycebf4762024-10-03 09:02:54 +0000367 case DDR_STATUS:
368 handleDDRStatusEvent(tid, sensorId, presentReading);
369 break;
Chau Ly4cca3dc2024-10-03 09:07:09 +0000370 case PCP_VR_STATE:
371 case SOC_VR_STATE:
372 case DPHY_VR1_STATE:
373 case DPHY_VR2_STATE:
374 case D2D_VR_STATE:
375 case IOC_VR1_STATE:
376 case IOC_VR2_STATE:
377 case PCI_D_VR_STATE:
378 case PCI_A_VR_STATE:
379 handleVRDStatusEvent(tid, sensorId, presentReading);
380 break;
Chau Lya743e382024-10-26 11:12:22 +0000381 default:
382 std::string description;
383 std::stringstream strStream;
384 log_level logLevel = log_level::OK;
385
386 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: ";
387 description += prefixMsgStrCreation(tid, sensorId);
388 strStream << std::setfill('0') << std::hex << "eventState 0x"
389 << std::setw(2) << static_cast<uint32_t>(eventState)
390 << " previousEventState 0x" << std::setw(2)
391 << static_cast<uint32_t>(previousEventState)
392 << " sensorDataSize 0x" << std::setw(2)
393 << static_cast<uint32_t>(sensorDataSize)
394 << " presentReading 0x" << std::setw(8)
395 << static_cast<uint32_t>(presentReading) << std::dec;
396 description += strStream.str();
397
398 sendJournalRedfish(description, logLevel);
399 break;
400 }
401 return PLDM_SUCCESS;
402}
403
404int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
405 const uint8_t* sensorData,
406 size_t sensorDataLength)
407{
408 uint8_t sensorOffset = 0;
409 uint8_t eventState = 0;
410 uint8_t previousEventState = 0;
411
412 auto rc =
413 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset,
414 &eventState, &previousEventState);
415 if (rc)
416 {
417 lg2::error(
418 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}",
419 "TID", tid, "RC", rc);
420 return rc;
421 }
422
423 std::string description;
424 std::stringstream strStream;
425 log_level logLevel = log_level::OK;
426
427 description += "SENSOR_EVENT : STATE_SENSOR_STATE: ";
428 description += prefixMsgStrCreation(tid, sensorId);
429 strStream << std::setfill('0') << std::hex << "sensorOffset 0x"
430 << std::setw(2) << static_cast<uint32_t>(sensorOffset)
431 << "eventState 0x" << std::setw(2)
432 << static_cast<uint32_t>(eventState) << " previousEventState 0x"
433 << std::setw(2) << static_cast<uint32_t>(previousEventState)
434 << std::dec;
435 description += strStream.str();
436
437 sendJournalRedfish(description, logLevel);
438
439 return PLDM_SUCCESS;
440}
441
442int OemEventManager::processSensorOpStateEvent(
443 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
444 size_t sensorDataLength)
445{
446 uint8_t present_op_state = 0;
447 uint8_t previous_op_state = 0;
448
449 auto rc = decode_sensor_op_data(sensorData, sensorDataLength,
450 &present_op_state, &previous_op_state);
451 if (rc)
452 {
453 lg2::error(
454 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}",
455 "TID", tid, "RC", rc);
456 return rc;
457 }
458
459 std::string description;
460 std::stringstream strStream;
461 log_level logLevel = log_level::OK;
462
463 description += "SENSOR_EVENT : SENSOR_OP_STATE: ";
464 description += prefixMsgStrCreation(tid, sensorId);
465 strStream << std::setfill('0') << std::hex << "present_op_state 0x"
466 << std::setw(2) << static_cast<uint32_t>(present_op_state)
467 << "previous_op_state 0x" << std::setw(2)
468 << static_cast<uint32_t>(previous_op_state) << std::dec;
469 description += strStream.str();
470
471 sendJournalRedfish(description, logLevel);
472
473 return PLDM_SUCCESS;
474}
475
476int OemEventManager::handleSensorEvent(
477 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
478 pldm_tid_t tid, size_t eventDataOffset)
479{
480 /* This OEM event handler is only used for SoC terminus*/
481 if (!tidToSocketNameMap.contains(tid))
482 {
483 return PLDM_SUCCESS;
484 }
485 auto eventData =
486 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
487 auto eventDataSize = payloadLength - eventDataOffset;
488
489 uint16_t sensorId = 0;
490 uint8_t sensorEventClassType = 0;
491 size_t eventClassDataOffset = 0;
492 auto rc =
493 decode_sensor_event_data(eventData, eventDataSize, &sensorId,
494 &sensorEventClassType, &eventClassDataOffset);
495 if (rc)
496 {
497 lg2::error("Failed to decode sensor event data return code {RC}.", "RC",
498 rc);
499 return rc;
500 }
501 const uint8_t* sensorData = eventData + eventClassDataOffset;
502 size_t sensorDataLength = eventDataSize - eventClassDataOffset;
503
504 switch (sensorEventClassType)
505 {
506 case PLDM_NUMERIC_SENSOR_STATE:
507 {
508 return processNumericSensorEvent(tid, sensorId, sensorData,
509 sensorDataLength);
510 }
511 case PLDM_STATE_SENSOR_STATE:
512 {
513 return processStateSensorEvent(tid, sensorId, sensorData,
514 sensorDataLength);
515 }
516 case PLDM_SENSOR_OP_STATE:
517 {
518 return processSensorOpStateEvent(tid, sensorId, sensorData,
519 sensorDataLength);
520 }
521 default:
522 std::string description;
523 std::stringstream strStream;
524 log_level logLevel = log_level::OK;
525
526 description += "SENSOR_EVENT : Unsupported Sensor Class " +
527 std::to_string(sensorEventClassType) + ": ";
528 description += prefixMsgStrCreation(tid, sensorId);
529 strStream << std::setfill('0') << std::hex
530 << std::setw(sizeof(sensorData) * 2) << "Sensor data: ";
531
532 auto dataPtr = sensorData;
533 for ([[maybe_unused]] const auto& i :
534 std::views::iota(0, (int)sensorDataLength))
535 {
536 strStream << "0x" << static_cast<uint32_t>(*dataPtr);
537 dataPtr += sizeof(sensorData);
538 }
539
540 description += strStream.str();
541
542 sendJournalRedfish(description, logLevel);
543 }
544 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE",
545 sensorEventClassType);
546 return PLDM_ERROR;
547}
548
Chau Ly3de0d942024-10-03 08:57:11 +0000549void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
550 uint32_t presentReading)
551{
552 std::string description;
553 std::stringstream strStream;
554 PCIeHotPlugEventRecord_t record{presentReading};
555
556 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal";
557 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed";
558 log_level logLevel =
559 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING;
560
561 description += prefixMsgStrCreation(tid, sensorId);
562
563 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2)
564 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x"
565 << std::setw(2) << static_cast<uint32_t>(record.bits.bus)
566 << "); Device (0x" << std::setw(2)
567 << static_cast<uint32_t>(record.bits.device) << "); Function (0x"
568 << std::setw(2) << static_cast<uint32_t>(record.bits.function)
569 << "); Action (" << sAction << "); Operation status ("
570 << sOpStatus << "); Media slot number (" << std::dec
571 << static_cast<uint32_t>(record.bits.mediaSlot) << ")";
572
573 description += strStream.str();
574
575 // Log to Redfish event
576 sendJournalRedfish(description, logLevel);
577}
578
Chau Lycebf4762024-10-03 09:02:54 +0000579std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo)
580{
581 std::string description;
582 DIMMTrainingFailure_t failure{failureInfo};
583
584 if (dimmTrainingFailureTypeMap.contains(failure.bits.type))
585 {
586 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type];
587
588 description += std::get<0>(failureInfoMap);
589
590 description += "; MCU rank index " +
591 std::to_string(failure.bits.mcuRankIdx);
592
593 description += "; Slice number " +
594 std::to_string(failure.bits.sliceNum);
595
596 description += "; Upper nibble error status: ";
597 description += (!failure.bits.upperNibbStatErr)
598 ? "No error"
599 : "Found no rising edge";
600
601 description += "; Lower nibble error status: ";
602 description += (!failure.bits.lowerNibbStatErr)
603 ? "No error"
604 : "Found no rising edge";
605
606 description += "; Failure syndrome 0: ";
607
608 auto& syndromeMap = std::get<1>(failureInfoMap);
609 if (syndromeMap.contains(failure.bits.syndrome))
610 {
611 description += syndromeMap[failure.bits.syndrome];
612 }
613 else
614 {
615 description += "(Unknown syndrome)";
616 }
617 }
618 else
619 {
620 description += "Unknown training failure type " +
621 std::to_string(failure.bits.type);
622 }
623
624 return description;
625}
626
627void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
628 uint32_t presentReading)
629{
630 log_level logLevel{log_level::WARNING};
631 std::string description;
632 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
633 uint32_t byte012 = presentReading & 0xffffff;
634
635 description += prefixMsgStrCreation(tid, sensorId);
636
637 uint8_t dimmIdx = (sensorId - 4) / 2;
638
639 description += "DIMM " + std::to_string(dimmIdx) + " ";
640
641 if (dimmStatusToMsgMap.contains(byte3))
642 {
643 if (byte3 == dimm_status::INSTALLED_NO_ERROR ||
644 byte3 == dimm_status::INSTALLED_BUT_DISABLED)
645 {
646 logLevel = log_level::OK;
647 }
648
649 description += dimmStatusToMsgMap[byte3];
650
651 if (byte3 == dimm_status::TRAINING_FAILURE)
652 {
653 description += "; " + dimmTrainingFailureToMsg(byte012);
654 }
655 else if (byte3 == dimm_status::PMIC_TEMP_ALERT)
656 {
657 uint8_t byte0 = (byte012 & 0xff);
658 if (byte0 < pmicTempAlertMsg.size())
659 {
660 description += ": " + pmicTempAlertMsg[byte0];
661 }
662 }
663 }
664 else
665 {
666 switch (byte3)
667 {
668 case dimm_status::PMIC_HIGH_TEMP:
669 if (byte012 == 0x01)
670 {
671 description += "has PMIC high temp condition";
672 }
673 break;
674 case dimm_status::TSx_HIGH_TEMP:
675 switch (byte012)
676 {
677 case 0x01:
678 description += "has TS0";
679 break;
680 case 0x02:
681 description += "has TS1";
682 break;
683 case 0x03:
684 description += "has TS0 and TS1";
685 break;
686 }
687 description += " exceeding their high temperature threshold";
688 break;
689 case dimm_status::SPD_HUB_HIGH_TEMP:
690 if (byte012 == 0x01)
691 {
692 description += "has SPD/HUB high temp condition";
693 }
694 break;
695 default:
696 description += "has unsupported status " +
697 std::to_string(byte3);
698 break;
699 }
700 }
701
702 // Log to Redfish event
703 sendJournalRedfish(description, logLevel);
704}
705
706void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
707 uint32_t presentReading)
708{
709 log_level logLevel{log_level::WARNING};
710 std::string description;
711 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
712 uint32_t byte012 = presentReading & 0xffffff;
713
714 description += prefixMsgStrCreation(tid, sensorId);
715
716 description += "DDR ";
717 if (ddrStatusToMsgMap.contains(byte3))
718 {
719 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR)
720 {
721 logLevel = log_level::OK;
722 }
723
724 description += ddrStatusToMsgMap[byte3];
725
726 if (byte3 == ddr_status::CONFIGURATION_FAILURE ||
727 byte3 == ddr_status::TRAINING_FAILURE)
728 {
729 // List out failed DIMMs
730 description += dimmIdxsToString(byte012);
731 }
732 }
733 else
734 {
735 description += "has unsupported status " + std::to_string(byte3);
736 }
737
738 // Log to Redfish event
739 sendJournalRedfish(description, logLevel);
740}
741
Chau Ly4cca3dc2024-10-03 09:07:09 +0000742void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
743 uint32_t presentReading)
744{
745 log_level logLevel{log_level::WARNING};
746 std::string description;
747 std::stringstream strStream;
748
749 description += prefixMsgStrCreation(tid, sensorId);
750
751 VRDStatus_t status{presentReading};
752
753 if (status.bits.warning && status.bits.critical)
754 {
755 description += "A VR warning and a VR critical";
756 logLevel = log_level::CRITICAL;
757 }
758 else
759 {
760 if (status.bits.warning)
761 {
762 description += "A VR warning";
763 }
764 else if (status.bits.critical)
765 {
766 description += "A VR critical";
767 logLevel = log_level::CRITICAL;
768 }
769 else
770 {
771 description += "No VR warning or critical";
772 logLevel = log_level::OK;
773 }
774 }
775 description += " condition observed";
776
777 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex
778 << std::setw(2)
779 << static_cast<uint32_t>(status.bits.vr_status_byte_high)
780 << "; VR status byte low is 0x" << std::setw(2)
781 << static_cast<uint32_t>(status.bits.vr_status_byte_low)
782 << "; Reading is 0x" << std::setw(2)
783 << static_cast<uint32_t>(presentReading) << ";";
784
785 description += strStream.str();
786
787 // Log to Redfish event
788 sendJournalRedfish(description, logLevel);
789}
790
Chau Lya743e382024-10-26 11:12:22 +0000791} // namespace oem_ampere
792} // namespace pldm