blob: fe0021b3f383ee87fb41fca4c97a3448819b322f [file] [log] [blame]
Chau Lya743e382024-10-26 11:12:22 +00001#include "oem_event_manager.hpp"
2
3#include "requester/handler.hpp"
4#include "requester/request.hpp"
5
6#include <config.h>
7#include <libpldm/pldm.h>
8#include <libpldm/utils.h>
9#include <systemd/sd-journal.h>
10
11#include <phosphor-logging/lg2.hpp>
12#include <xyz/openbmc_project/Logging/Entry/server.hpp>
13
14#include <algorithm>
15#include <map>
16#include <sstream>
17#include <string>
18#include <unordered_map>
19
20namespace pldm
21{
22namespace oem_ampere
23{
24namespace boot_stage = boot::stage;
Chau Lycebf4762024-10-03 09:02:54 +000025namespace ddr_status = ddr::status;
26namespace dimm_status = dimm::status;
27namespace dimm_syndrome = dimm::training_failure::dimm_syndrome;
28namespace phy_syndrome = dimm::training_failure::phy_syndrome;
29namespace training_failure = dimm::training_failure;
Chau Lya743e382024-10-26 11:12:22 +000030
Chau Ly3de0d942024-10-03 08:57:11 +000031constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK";
32constexpr const char* ampereWarningRegistry =
33 "OpenBMC.0.1.AmpereWarning.Warning";
34constexpr const char* ampereCriticalRegistry =
35 "OpenBMC.0.1.AmpereCritical.Critical";
Chau Lya743e382024-10-26 11:12:22 +000036constexpr const char* BIOSFWPanicRegistry =
37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning";
38constexpr auto maxDIMMIdxBitNum = 24;
Chau Lycebf4762024-10-03 09:02:54 +000039constexpr auto maxDIMMInstantNum = 24;
Chau Lya743e382024-10-26 11:12:22 +000040
41/*
42 An array of possible boot status of a boot stage.
43 The index maps with byte 0 of boot code.
44*/
45std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"};
46
47/*
48 An array of possible boot status of DDR training stage.
49 The index maps with byte 0 of boot code.
50*/
51std::array<std::string, 3> ddrTrainingMsg = {
52 " progress started", " in-progress", " progress completed"};
53
54/*
Chau Lycebf4762024-10-03 09:02:54 +000055 A map between PMIC status and logging strings.
56*/
57std::array<std::string, 8> pmicTempAlertMsg = {
58 "Below 85°C", "85°C", "95°C", "105°C",
59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"};
60
61/*
Chau Lya743e382024-10-26 11:12:22 +000062 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC
63 EPs through SMBus and PCIe. When host boots up, SMBUS interface
64 comes up first. In this interface, BMC is bus owner.
65
66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available).
67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available).
68*/
69EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}};
70
71/*
72 A map between sensor IDs and their names in string.
73 Using pldm::oem::sensor_ids
74*/
Chau Ly4cca3dc2024-10-03 09:07:09 +000075EventToMsgMap_t sensorIdToStrMap = {
Chau Lyef214b52024-10-16 09:40:38 +000076 {DDR_STATUS, "DDR_STATUS"},
77 {PCP_VR_STATE, "PCP_VR_STATE"},
78 {SOC_VR_STATE, "SOC_VR_STATE"},
79 {DPHY_VR1_STATE, "DPHY_VR1_STATE"},
80 {DPHY_VR2_STATE, "DPHY_VR2_STATE"},
81 {D2D_VR_STATE, "D2D_VR_STATE"},
82 {IOC_VR1_STATE, "IOC_VR1_STATE"},
83 {IOC_VR2_STATE, "IOC_VR2_STATE"},
84 {PCI_D_VR_STATE, "PCI_D_VR_STATE"},
85 {PCI_A_VR_STATE, "PCI_A_VR_STATE"},
86 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"},
87 {BOOT_OVERALL, "BOOT_OVERALL"},
88 {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"}};
Chau Lya743e382024-10-26 11:12:22 +000089
90/*
91 A map between the boot stages and logging strings.
92 Using pldm::oem::boot::stage::boot_stage
93*/
94EventToMsgMap_t bootStageToMsgMap = {
95 {boot_stage::SECPRO, "SECpro"},
96 {boot_stage::MPRO, "Mpro"},
97 {boot_stage::ATF_BL1, "ATF BL1"},
98 {boot_stage::ATF_BL2, "ATF BL2"},
99 {boot_stage::DDR_INITIALIZATION, "DDR initialization"},
100 {boot_stage::DDR_TRAINING, "DDR training"},
101 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"},
102 {boot_stage::ATF_BL31, "ATF BL31"},
103 {boot_stage::ATF_BL32, "ATF BL32"},
104 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"},
105 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN,
106 "ATF BL33 (UEFI) booting status = "}};
107
108/*
Chau Lycebf4762024-10-03 09:02:54 +0000109 A map between DDR status and logging strings.
110 Using pldm::oem::ddr::status::ddr_status
111*/
112EventToMsgMap_t ddrStatusToMsgMap = {
113 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"},
114 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"},
115 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"},
116 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"},
117 {ddr_status::OTHER_FAILURE, "has other failure"},
118 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG,
119 "has boot failure due to no configuration"},
120 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS,
121 "failsafe activated but boot success with the next valid configuration"}};
122
123/*
124 A map between DIMM status and logging strings.
125 Using pldm::oem::dimm::status::dimm_status
126*/
127EventToMsgMap_t dimmStatusToMsgMap = {
128 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"},
129 {dimm_status::NOT_INSTALLED, "is not installed"},
130 {dimm_status::OTHER_FAILURE, "has other failure"},
131 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"},
132 {dimm_status::TRAINING_FAILURE, "has training failure; "},
133 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}};
134
135/*
136 A map between PHY training failure syndrome and logging strings.
137 Using
138 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome
139*/
140EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = {
141 {phy_syndrome::NA, "(N/A)"},
142 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"},
143 {phy_syndrome::CA_LEVELING, "(CA leveling)"},
144 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE,
145 "(PHY write level failure - see syndrome 1)"},
146 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE,
147 "(PHY read gate leveling failure)"},
148 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"},
149 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"},
150 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}};
151
152/*
153 A map between DIMM training failure syndrome and logging strings.
154 Using
155 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome
156*/
157EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = {
158 {dimm_syndrome::NA, "(N/A)"},
159 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE,
160 "(DRAM VREFDQ training failure)"},
161 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"},
162 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE,
163 "(LRDRIMM DB SW training failure)"}};
164
165/*
166 A map between DIMM training failure type and a pair of <logging strings -
167 syndrome map>. Using
168 pldm::oem::dimm::training_faillure::dimm_training_failure_type
169*/
170std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>>
171 dimmTrainingFailureTypeMap = {
172 {training_failure::PHY_TRAINING_FAILURE_TYPE,
173 std::make_pair("PHY training failure",
174 phyTrainingFailureSyndromeToMsgMap)},
175 {training_failure::DIMM_TRAINING_FAILURE_TYPE,
176 std::make_pair("DIMM training failure",
177 dimmTrainingFailureSyndromeToMsgMap)}};
178
179/*
Chau Lya743e382024-10-26 11:12:22 +0000180 A map between log level and the registry used for Redfish SEL log
181 Using pldm::oem::log_level
182*/
183std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = {
Chau Ly3de0d942024-10-03 08:57:11 +0000184 {log_level::OK, ampereEventRegistry},
185 {log_level::WARNING, ampereWarningRegistry},
186 {log_level::CRITICAL, ampereCriticalRegistry},
Chau Lya743e382024-10-26 11:12:22 +0000187 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}};
188
Chau Lyef214b52024-10-16 09:40:38 +0000189std::unordered_map<
190 uint16_t,
191 std::vector<std::pair<
192 std::string,
193 std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>>
194 stateSensorToMsgMap = {
195 {SOC_HEALTH_AVAILABILITY,
196 {{"SoC Health",
197 {{1, {log_level::OK, "Normal"}},
198 {2, {log_level::WARNING, "Non-Critical"}},
199 {3, {log_level::CRITICAL, "Critical"}},
200 {4, {log_level::CRITICAL, "Fatal"}}}},
201 {"SoC Availability",
202 {{1, {log_level::OK, "Enabled"}},
203 {2, {log_level::WARNING, "Disabled"}},
204 {3, {log_level::CRITICAL, "Shutdown"}}}}}}};
205
Chau Lya743e382024-10-26 11:12:22 +0000206std::string
207 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId)
208{
209 std::string description;
210 if (!tidToSocketNameMap.contains(tid))
211 {
212 description += "TID " + std::to_string(tid) + ": ";
213 }
214 else
215 {
216 description += tidToSocketNameMap[tid] + ": ";
217 }
218
219 if (!sensorIdToStrMap.contains(sensorId))
220 {
221 description += "Sensor ID " + std::to_string(sensorId) + ": ";
222 }
223 else
224 {
225 description += sensorIdToStrMap[sensorId] + ": ";
226 }
227
228 return description;
229}
230
231void OemEventManager::sendJournalRedfish(const std::string& description,
232 log_level& logLevel)
233{
234 if (description.empty())
235 {
236 return;
237 }
238
239 if (!logLevelToRedfishMsgIdMap.contains(logLevel))
240 {
241 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel,
242 "DES", description);
243 return;
244 }
245 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel];
246 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID",
247 redfishMsgId, "REDFISH_MESSAGE_ARGS", description);
248}
249
250std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs)
251{
252 std::string description;
253 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum))
254 {
255 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx))
256 {
257 description += " #" + std::to_string(bitIdx);
258 }
259 }
260 return description;
261}
262
263void OemEventManager::handleBootOverallEvent(
264 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading)
265{
266 log_level logLevel{log_level::OK};
267 std::string description;
268 std::stringstream strStream;
269
270 uint8_t byte0 = (presentReading & 0x000000ff);
271 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8;
272 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16;
273 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
274 /*
275 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31,
276 * ATF BL32 and DDR initialization
277 */
278 if (bootStageToMsgMap.contains(byte3))
279 {
280 // Boot stage adding
281 description += bootStageToMsgMap[byte3];
282
283 switch (byte3)
284 {
285 case boot_stage::DDR_TRAINING:
286 if (byte0 >= ddrTrainingMsg.size())
287 {
288 logLevel = log_level::BIOSFWPANIC;
289 description += " unknown status";
290 }
291 else
292 {
293 description += ddrTrainingMsg[byte0];
294 }
295 if (0x01 == byte0)
296 {
297 // Add complete percentage
298 description += " at " + std::to_string(byte1) + "%";
299 }
300 break;
301 case boot_stage::S0_DDR_TRAINING_FAILURE:
302 case boot_stage::S1_DDR_TRAINING_FAILURE:
303 // ddr_training_status_msg()
304 logLevel = log_level::BIOSFWPANIC;
305 description += " at DIMMs:";
306 // dimmIdxs = presentReading & 0x00ffffff;
307 description += dimmIdxsToString(presentReading & 0x00ffffff);
308 description += " of socket ";
309 description +=
310 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1";
311 break;
312 default:
313 if (byte0 >= bootStatMsg.size())
314 {
315 logLevel = log_level::BIOSFWPANIC;
316 description += " unknown status";
317 }
318 else
319 {
320 description += bootStatMsg[byte0];
321 }
322 break;
323 }
324
325 // Sensor report action is fail
326 if (boot::status::BOOT_STATUS_FAILURE == byte2)
327 {
328 logLevel = log_level::BIOSFWPANIC;
329 }
330 }
331 else
332 {
333 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX)
334 {
335 description +=
336 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN];
337
338 strStream
339 << "Segment (0x" << std::setfill('0') << std::hex
340 << std::setw(8) << static_cast<uint32_t>(presentReading)
Chau Ly3de0d942024-10-03 08:57:11 +0000341 << "); Status Class (0x" << std::setw(2)
342 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x"
Chau Lya743e382024-10-26 11:12:22 +0000343 << std::setw(2) << static_cast<uint32_t>(byte2)
Chau Ly3de0d942024-10-03 08:57:11 +0000344 << "); Operation Code (0x" << std::setw(4)
Chau Lya743e382024-10-26 11:12:22 +0000345 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16)
346 << ")" << std::dec;
347
348 description += strStream.str();
349 }
350 }
351
352 // Log to Redfish event
353 sendJournalRedfish(description, logLevel);
354}
355
356int OemEventManager::processNumericSensorEvent(
357 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
358 size_t sensorDataLength)
359{
360 uint8_t eventState = 0;
361 uint8_t previousEventState = 0;
362 uint8_t sensorDataSize = 0;
363 uint32_t presentReading;
364 auto rc = decode_numeric_sensor_data(
365 sensorData, sensorDataLength, &eventState, &previousEventState,
366 &sensorDataSize, &presentReading);
367 if (rc)
368 {
369 lg2::error(
370 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ",
371 "TID", tid, "RC", rc);
372 return rc;
373 }
374
Chau Lycebf4762024-10-03 09:02:54 +0000375 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
376 if (auto dimmIdx = (sensorId - 4) / 2;
377 sensorId >= 4 && dimmIdx >= 0 && dimmIdx < maxDIMMInstantNum)
378 {
379 handleDIMMStatusEvent(tid, sensorId, presentReading);
380 return PLDM_SUCCESS;
381 }
382
Chau Lya743e382024-10-26 11:12:22 +0000383 switch (sensorId)
384 {
385 case BOOT_OVERALL:
386 handleBootOverallEvent(tid, sensorId, presentReading);
387 break;
Chau Ly3de0d942024-10-03 08:57:11 +0000388 case PCIE_HOT_PLUG:
389 handlePCIeHotPlugEvent(tid, sensorId, presentReading);
390 break;
Chau Lycebf4762024-10-03 09:02:54 +0000391 case DDR_STATUS:
392 handleDDRStatusEvent(tid, sensorId, presentReading);
393 break;
Chau Ly4cca3dc2024-10-03 09:07:09 +0000394 case PCP_VR_STATE:
395 case SOC_VR_STATE:
396 case DPHY_VR1_STATE:
397 case DPHY_VR2_STATE:
398 case D2D_VR_STATE:
399 case IOC_VR1_STATE:
400 case IOC_VR2_STATE:
401 case PCI_D_VR_STATE:
402 case PCI_A_VR_STATE:
403 handleVRDStatusEvent(tid, sensorId, presentReading);
404 break;
Chau Lya743e382024-10-26 11:12:22 +0000405 default:
406 std::string description;
407 std::stringstream strStream;
408 log_level logLevel = log_level::OK;
409
410 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: ";
411 description += prefixMsgStrCreation(tid, sensorId);
412 strStream << std::setfill('0') << std::hex << "eventState 0x"
413 << std::setw(2) << static_cast<uint32_t>(eventState)
414 << " previousEventState 0x" << std::setw(2)
415 << static_cast<uint32_t>(previousEventState)
416 << " sensorDataSize 0x" << std::setw(2)
417 << static_cast<uint32_t>(sensorDataSize)
418 << " presentReading 0x" << std::setw(8)
419 << static_cast<uint32_t>(presentReading) << std::dec;
420 description += strStream.str();
421
422 sendJournalRedfish(description, logLevel);
423 break;
424 }
425 return PLDM_SUCCESS;
426}
427
428int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
429 const uint8_t* sensorData,
430 size_t sensorDataLength)
431{
432 uint8_t sensorOffset = 0;
433 uint8_t eventState = 0;
434 uint8_t previousEventState = 0;
435
436 auto rc =
437 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset,
438 &eventState, &previousEventState);
439 if (rc)
440 {
441 lg2::error(
442 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}",
443 "TID", tid, "RC", rc);
444 return rc;
445 }
446
447 std::string description;
Chau Lya743e382024-10-26 11:12:22 +0000448 log_level logLevel = log_level::OK;
449
Chau Lyef214b52024-10-16 09:40:38 +0000450 if (stateSensorToMsgMap.contains(sensorId))
451 {
452 description += prefixMsgStrCreation(tid, sensorId);
453 auto componentMap = stateSensorToMsgMap[sensorId];
454 if (sensorOffset < componentMap.size())
455 {
456 description += std::get<0>(componentMap[sensorOffset]);
457 auto stateMap = std::get<1>(componentMap[sensorOffset]);
458 if (stateMap.contains(eventState))
459 {
460 logLevel = std::get<0>(stateMap[eventState]);
461 description += " state : " + std::get<1>(stateMap[eventState]);
462 if (stateMap.contains(previousEventState))
463 {
464 description += "; previous state: " +
465 std::get<1>(stateMap[previousEventState]);
466 }
467 }
468 else
469 {
470 description += " sends unsupported event state: " +
471 std::to_string(eventState);
472 if (stateMap.contains(previousEventState))
473 {
474 description += "; previous state: " +
475 std::get<1>(stateMap[previousEventState]);
476 }
477 }
478 }
479 else
480 {
481 description += "sends unsupported component sensor offset " +
482 std::to_string(sensorOffset);
483 }
484 }
485 else
486 {
487 std::stringstream strStream;
488 description += "SENSOR_EVENT : STATE_SENSOR_STATE: ";
489 description += prefixMsgStrCreation(tid, sensorId);
490 strStream << std::setfill('0') << std::hex << "sensorOffset 0x"
491 << std::setw(2) << static_cast<uint32_t>(sensorOffset)
492 << "eventState 0x" << std::setw(2)
493 << static_cast<uint32_t>(eventState)
494 << " previousEventState 0x" << std::setw(2)
495 << static_cast<uint32_t>(previousEventState) << std::dec;
496 description += strStream.str();
497 }
Chau Lya743e382024-10-26 11:12:22 +0000498
499 sendJournalRedfish(description, logLevel);
500
501 return PLDM_SUCCESS;
502}
503
504int OemEventManager::processSensorOpStateEvent(
505 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
506 size_t sensorDataLength)
507{
508 uint8_t present_op_state = 0;
509 uint8_t previous_op_state = 0;
510
511 auto rc = decode_sensor_op_data(sensorData, sensorDataLength,
512 &present_op_state, &previous_op_state);
513 if (rc)
514 {
515 lg2::error(
516 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}",
517 "TID", tid, "RC", rc);
518 return rc;
519 }
520
521 std::string description;
522 std::stringstream strStream;
523 log_level logLevel = log_level::OK;
524
525 description += "SENSOR_EVENT : SENSOR_OP_STATE: ";
526 description += prefixMsgStrCreation(tid, sensorId);
527 strStream << std::setfill('0') << std::hex << "present_op_state 0x"
528 << std::setw(2) << static_cast<uint32_t>(present_op_state)
529 << "previous_op_state 0x" << std::setw(2)
530 << static_cast<uint32_t>(previous_op_state) << std::dec;
531 description += strStream.str();
532
533 sendJournalRedfish(description, logLevel);
534
535 return PLDM_SUCCESS;
536}
537
538int OemEventManager::handleSensorEvent(
539 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
540 pldm_tid_t tid, size_t eventDataOffset)
541{
542 /* This OEM event handler is only used for SoC terminus*/
543 if (!tidToSocketNameMap.contains(tid))
544 {
545 return PLDM_SUCCESS;
546 }
547 auto eventData =
548 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
549 auto eventDataSize = payloadLength - eventDataOffset;
550
551 uint16_t sensorId = 0;
552 uint8_t sensorEventClassType = 0;
553 size_t eventClassDataOffset = 0;
554 auto rc =
555 decode_sensor_event_data(eventData, eventDataSize, &sensorId,
556 &sensorEventClassType, &eventClassDataOffset);
557 if (rc)
558 {
559 lg2::error("Failed to decode sensor event data return code {RC}.", "RC",
560 rc);
561 return rc;
562 }
563 const uint8_t* sensorData = eventData + eventClassDataOffset;
564 size_t sensorDataLength = eventDataSize - eventClassDataOffset;
565
566 switch (sensorEventClassType)
567 {
568 case PLDM_NUMERIC_SENSOR_STATE:
569 {
570 return processNumericSensorEvent(tid, sensorId, sensorData,
571 sensorDataLength);
572 }
573 case PLDM_STATE_SENSOR_STATE:
574 {
575 return processStateSensorEvent(tid, sensorId, sensorData,
576 sensorDataLength);
577 }
578 case PLDM_SENSOR_OP_STATE:
579 {
580 return processSensorOpStateEvent(tid, sensorId, sensorData,
581 sensorDataLength);
582 }
583 default:
584 std::string description;
585 std::stringstream strStream;
586 log_level logLevel = log_level::OK;
587
588 description += "SENSOR_EVENT : Unsupported Sensor Class " +
589 std::to_string(sensorEventClassType) + ": ";
590 description += prefixMsgStrCreation(tid, sensorId);
591 strStream << std::setfill('0') << std::hex
592 << std::setw(sizeof(sensorData) * 2) << "Sensor data: ";
593
594 auto dataPtr = sensorData;
595 for ([[maybe_unused]] const auto& i :
596 std::views::iota(0, (int)sensorDataLength))
597 {
598 strStream << "0x" << static_cast<uint32_t>(*dataPtr);
599 dataPtr += sizeof(sensorData);
600 }
601
602 description += strStream.str();
603
604 sendJournalRedfish(description, logLevel);
605 }
606 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE",
607 sensorEventClassType);
608 return PLDM_ERROR;
609}
610
Chau Ly3de0d942024-10-03 08:57:11 +0000611void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
612 uint32_t presentReading)
613{
614 std::string description;
615 std::stringstream strStream;
616 PCIeHotPlugEventRecord_t record{presentReading};
617
618 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal";
619 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed";
620 log_level logLevel =
621 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING;
622
623 description += prefixMsgStrCreation(tid, sensorId);
624
625 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2)
626 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x"
627 << std::setw(2) << static_cast<uint32_t>(record.bits.bus)
628 << "); Device (0x" << std::setw(2)
629 << static_cast<uint32_t>(record.bits.device) << "); Function (0x"
630 << std::setw(2) << static_cast<uint32_t>(record.bits.function)
631 << "); Action (" << sAction << "); Operation status ("
632 << sOpStatus << "); Media slot number (" << std::dec
633 << static_cast<uint32_t>(record.bits.mediaSlot) << ")";
634
635 description += strStream.str();
636
637 // Log to Redfish event
638 sendJournalRedfish(description, logLevel);
639}
640
Chau Lycebf4762024-10-03 09:02:54 +0000641std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo)
642{
643 std::string description;
644 DIMMTrainingFailure_t failure{failureInfo};
645
646 if (dimmTrainingFailureTypeMap.contains(failure.bits.type))
647 {
648 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type];
649
650 description += std::get<0>(failureInfoMap);
651
652 description += "; MCU rank index " +
653 std::to_string(failure.bits.mcuRankIdx);
654
655 description += "; Slice number " +
656 std::to_string(failure.bits.sliceNum);
657
658 description += "; Upper nibble error status: ";
659 description += (!failure.bits.upperNibbStatErr)
660 ? "No error"
661 : "Found no rising edge";
662
663 description += "; Lower nibble error status: ";
664 description += (!failure.bits.lowerNibbStatErr)
665 ? "No error"
666 : "Found no rising edge";
667
668 description += "; Failure syndrome 0: ";
669
670 auto& syndromeMap = std::get<1>(failureInfoMap);
671 if (syndromeMap.contains(failure.bits.syndrome))
672 {
673 description += syndromeMap[failure.bits.syndrome];
674 }
675 else
676 {
677 description += "(Unknown syndrome)";
678 }
679 }
680 else
681 {
682 description += "Unknown training failure type " +
683 std::to_string(failure.bits.type);
684 }
685
686 return description;
687}
688
689void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
690 uint32_t presentReading)
691{
692 log_level logLevel{log_level::WARNING};
693 std::string description;
694 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
695 uint32_t byte012 = presentReading & 0xffffff;
696
697 description += prefixMsgStrCreation(tid, sensorId);
698
699 uint8_t dimmIdx = (sensorId - 4) / 2;
700
701 description += "DIMM " + std::to_string(dimmIdx) + " ";
702
703 if (dimmStatusToMsgMap.contains(byte3))
704 {
705 if (byte3 == dimm_status::INSTALLED_NO_ERROR ||
706 byte3 == dimm_status::INSTALLED_BUT_DISABLED)
707 {
708 logLevel = log_level::OK;
709 }
710
711 description += dimmStatusToMsgMap[byte3];
712
713 if (byte3 == dimm_status::TRAINING_FAILURE)
714 {
715 description += "; " + dimmTrainingFailureToMsg(byte012);
716 }
717 else if (byte3 == dimm_status::PMIC_TEMP_ALERT)
718 {
719 uint8_t byte0 = (byte012 & 0xff);
720 if (byte0 < pmicTempAlertMsg.size())
721 {
722 description += ": " + pmicTempAlertMsg[byte0];
723 }
724 }
725 }
726 else
727 {
728 switch (byte3)
729 {
730 case dimm_status::PMIC_HIGH_TEMP:
731 if (byte012 == 0x01)
732 {
733 description += "has PMIC high temp condition";
734 }
735 break;
736 case dimm_status::TSx_HIGH_TEMP:
737 switch (byte012)
738 {
739 case 0x01:
740 description += "has TS0";
741 break;
742 case 0x02:
743 description += "has TS1";
744 break;
745 case 0x03:
746 description += "has TS0 and TS1";
747 break;
748 }
749 description += " exceeding their high temperature threshold";
750 break;
751 case dimm_status::SPD_HUB_HIGH_TEMP:
752 if (byte012 == 0x01)
753 {
754 description += "has SPD/HUB high temp condition";
755 }
756 break;
757 default:
758 description += "has unsupported status " +
759 std::to_string(byte3);
760 break;
761 }
762 }
763
764 // Log to Redfish event
765 sendJournalRedfish(description, logLevel);
766}
767
768void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
769 uint32_t presentReading)
770{
771 log_level logLevel{log_level::WARNING};
772 std::string description;
773 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
774 uint32_t byte012 = presentReading & 0xffffff;
775
776 description += prefixMsgStrCreation(tid, sensorId);
777
778 description += "DDR ";
779 if (ddrStatusToMsgMap.contains(byte3))
780 {
781 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR)
782 {
783 logLevel = log_level::OK;
784 }
785
786 description += ddrStatusToMsgMap[byte3];
787
788 if (byte3 == ddr_status::CONFIGURATION_FAILURE ||
789 byte3 == ddr_status::TRAINING_FAILURE)
790 {
791 // List out failed DIMMs
792 description += dimmIdxsToString(byte012);
793 }
794 }
795 else
796 {
797 description += "has unsupported status " + std::to_string(byte3);
798 }
799
800 // Log to Redfish event
801 sendJournalRedfish(description, logLevel);
802}
803
Chau Ly4cca3dc2024-10-03 09:07:09 +0000804void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
805 uint32_t presentReading)
806{
807 log_level logLevel{log_level::WARNING};
808 std::string description;
809 std::stringstream strStream;
810
811 description += prefixMsgStrCreation(tid, sensorId);
812
813 VRDStatus_t status{presentReading};
814
815 if (status.bits.warning && status.bits.critical)
816 {
817 description += "A VR warning and a VR critical";
818 logLevel = log_level::CRITICAL;
819 }
820 else
821 {
822 if (status.bits.warning)
823 {
824 description += "A VR warning";
825 }
826 else if (status.bits.critical)
827 {
828 description += "A VR critical";
829 logLevel = log_level::CRITICAL;
830 }
831 else
832 {
833 description += "No VR warning or critical";
834 logLevel = log_level::OK;
835 }
836 }
837 description += " condition observed";
838
839 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex
840 << std::setw(2)
841 << static_cast<uint32_t>(status.bits.vr_status_byte_high)
842 << "; VR status byte low is 0x" << std::setw(2)
843 << static_cast<uint32_t>(status.bits.vr_status_byte_low)
844 << "; Reading is 0x" << std::setw(2)
845 << static_cast<uint32_t>(presentReading) << ";";
846
847 description += strStream.str();
848
849 // Log to Redfish event
850 sendJournalRedfish(description, logLevel);
851}
852
Chau Lya743e382024-10-26 11:12:22 +0000853} // namespace oem_ampere
854} // namespace pldm