blob: b9fd1147e04210cd50943c498aaa703b76626a8e [file] [log] [blame]
Chau Lya743e382024-10-26 11:12:22 +00001#include "oem_event_manager.hpp"
2
Dung Cao72c8aa02023-11-22 02:31:41 +00003#include "libcper/Cper.h"
4
5#include "cper.hpp"
Chau Lya743e382024-10-26 11:12:22 +00006#include "requester/handler.hpp"
7#include "requester/request.hpp"
8
9#include <config.h>
10#include <libpldm/pldm.h>
11#include <libpldm/utils.h>
12#include <systemd/sd-journal.h>
13
14#include <phosphor-logging/lg2.hpp>
15#include <xyz/openbmc_project/Logging/Entry/server.hpp>
16
17#include <algorithm>
18#include <map>
19#include <sstream>
20#include <string>
21#include <unordered_map>
22
23namespace pldm
24{
25namespace oem_ampere
26{
27namespace boot_stage = boot::stage;
Chau Lycebf4762024-10-03 09:02:54 +000028namespace ddr_status = ddr::status;
29namespace dimm_status = dimm::status;
30namespace dimm_syndrome = dimm::training_failure::dimm_syndrome;
31namespace phy_syndrome = dimm::training_failure::phy_syndrome;
32namespace training_failure = dimm::training_failure;
Chau Lya743e382024-10-26 11:12:22 +000033
Chau Ly3de0d942024-10-03 08:57:11 +000034constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK";
35constexpr const char* ampereWarningRegistry =
36 "OpenBMC.0.1.AmpereWarning.Warning";
37constexpr const char* ampereCriticalRegistry =
38 "OpenBMC.0.1.AmpereCritical.Critical";
Chau Lya743e382024-10-26 11:12:22 +000039constexpr const char* BIOSFWPanicRegistry =
40 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning";
41constexpr auto maxDIMMIdxBitNum = 24;
Chau Lycebf4762024-10-03 09:02:54 +000042constexpr auto maxDIMMInstantNum = 24;
Chau Lya743e382024-10-26 11:12:22 +000043
44/*
45 An array of possible boot status of a boot stage.
46 The index maps with byte 0 of boot code.
47*/
48std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"};
49
50/*
51 An array of possible boot status of DDR training stage.
52 The index maps with byte 0 of boot code.
53*/
54std::array<std::string, 3> ddrTrainingMsg = {
55 " progress started", " in-progress", " progress completed"};
56
57/*
Chau Lycebf4762024-10-03 09:02:54 +000058 A map between PMIC status and logging strings.
59*/
60std::array<std::string, 8> pmicTempAlertMsg = {
61 "Below 85°C", "85°C", "95°C", "105°C",
62 "115°C", "125°C", "135°C", "Equal or greater than 140°C"};
63
64/*
Chau Lya743e382024-10-26 11:12:22 +000065 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC
66 EPs through SMBus and PCIe. When host boots up, SMBUS interface
67 comes up first. In this interface, BMC is bus owner.
68
69 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available).
70 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available).
71*/
72EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}};
73
74/*
75 A map between sensor IDs and their names in string.
76 Using pldm::oem::sensor_ids
77*/
Chau Ly4cca3dc2024-10-03 09:07:09 +000078EventToMsgMap_t sensorIdToStrMap = {
Chau Lyef214b52024-10-16 09:40:38 +000079 {DDR_STATUS, "DDR_STATUS"},
80 {PCP_VR_STATE, "PCP_VR_STATE"},
81 {SOC_VR_STATE, "SOC_VR_STATE"},
82 {DPHY_VR1_STATE, "DPHY_VR1_STATE"},
83 {DPHY_VR2_STATE, "DPHY_VR2_STATE"},
84 {D2D_VR_STATE, "D2D_VR_STATE"},
85 {IOC_VR1_STATE, "IOC_VR1_STATE"},
86 {IOC_VR2_STATE, "IOC_VR2_STATE"},
87 {PCI_D_VR_STATE, "PCI_D_VR_STATE"},
88 {PCI_A_VR_STATE, "PCI_A_VR_STATE"},
89 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"},
90 {BOOT_OVERALL, "BOOT_OVERALL"},
Chau Lyb01357f2024-10-17 09:18:01 +000091 {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"},
92 {WATCH_DOG, "WATCH_DOG"}};
Chau Lya743e382024-10-26 11:12:22 +000093
94/*
95 A map between the boot stages and logging strings.
96 Using pldm::oem::boot::stage::boot_stage
97*/
98EventToMsgMap_t bootStageToMsgMap = {
99 {boot_stage::SECPRO, "SECpro"},
100 {boot_stage::MPRO, "Mpro"},
101 {boot_stage::ATF_BL1, "ATF BL1"},
102 {boot_stage::ATF_BL2, "ATF BL2"},
103 {boot_stage::DDR_INITIALIZATION, "DDR initialization"},
104 {boot_stage::DDR_TRAINING, "DDR training"},
105 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"},
106 {boot_stage::ATF_BL31, "ATF BL31"},
107 {boot_stage::ATF_BL32, "ATF BL32"},
108 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"},
109 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN,
110 "ATF BL33 (UEFI) booting status = "}};
111
112/*
Chau Lycebf4762024-10-03 09:02:54 +0000113 A map between DDR status and logging strings.
114 Using pldm::oem::ddr::status::ddr_status
115*/
116EventToMsgMap_t ddrStatusToMsgMap = {
117 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"},
118 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"},
119 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"},
120 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"},
121 {ddr_status::OTHER_FAILURE, "has other failure"},
122 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG,
123 "has boot failure due to no configuration"},
124 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS,
125 "failsafe activated but boot success with the next valid configuration"}};
126
127/*
128 A map between DIMM status and logging strings.
129 Using pldm::oem::dimm::status::dimm_status
130*/
131EventToMsgMap_t dimmStatusToMsgMap = {
132 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"},
133 {dimm_status::NOT_INSTALLED, "is not installed"},
134 {dimm_status::OTHER_FAILURE, "has other failure"},
135 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"},
136 {dimm_status::TRAINING_FAILURE, "has training failure; "},
137 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}};
138
139/*
140 A map between PHY training failure syndrome and logging strings.
141 Using
142 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome
143*/
144EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = {
145 {phy_syndrome::NA, "(N/A)"},
146 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"},
147 {phy_syndrome::CA_LEVELING, "(CA leveling)"},
148 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE,
149 "(PHY write level failure - see syndrome 1)"},
150 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE,
151 "(PHY read gate leveling failure)"},
152 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"},
153 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"},
154 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}};
155
156/*
157 A map between DIMM training failure syndrome and logging strings.
158 Using
159 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome
160*/
161EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = {
162 {dimm_syndrome::NA, "(N/A)"},
163 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE,
164 "(DRAM VREFDQ training failure)"},
165 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"},
166 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE,
167 "(LRDRIMM DB SW training failure)"}};
168
169/*
170 A map between DIMM training failure type and a pair of <logging strings -
171 syndrome map>. Using
172 pldm::oem::dimm::training_faillure::dimm_training_failure_type
173*/
174std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>>
175 dimmTrainingFailureTypeMap = {
176 {training_failure::PHY_TRAINING_FAILURE_TYPE,
177 std::make_pair("PHY training failure",
178 phyTrainingFailureSyndromeToMsgMap)},
179 {training_failure::DIMM_TRAINING_FAILURE_TYPE,
180 std::make_pair("DIMM training failure",
181 dimmTrainingFailureSyndromeToMsgMap)}};
182
183/*
Chau Lya743e382024-10-26 11:12:22 +0000184 A map between log level and the registry used for Redfish SEL log
185 Using pldm::oem::log_level
186*/
187std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = {
Chau Ly3de0d942024-10-03 08:57:11 +0000188 {log_level::OK, ampereEventRegistry},
189 {log_level::WARNING, ampereWarningRegistry},
190 {log_level::CRITICAL, ampereCriticalRegistry},
Chau Lya743e382024-10-26 11:12:22 +0000191 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}};
192
Chau Lyef214b52024-10-16 09:40:38 +0000193std::unordered_map<
194 uint16_t,
195 std::vector<std::pair<
196 std::string,
197 std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>>
198 stateSensorToMsgMap = {
199 {SOC_HEALTH_AVAILABILITY,
200 {{"SoC Health",
201 {{1, {log_level::OK, "Normal"}},
202 {2, {log_level::WARNING, "Non-Critical"}},
203 {3, {log_level::CRITICAL, "Critical"}},
204 {4, {log_level::CRITICAL, "Fatal"}}}},
205 {"SoC Availability",
206 {{1, {log_level::OK, "Enabled"}},
207 {2, {log_level::WARNING, "Disabled"}},
Chau Lyb01357f2024-10-17 09:18:01 +0000208 {3, {log_level::CRITICAL, "Shutdown"}}}}}},
209 {WATCH_DOG,
210 {{"Global Watch Dog",
211 {{1, {log_level::OK, "Normal"}},
212 {2, {log_level::CRITICAL, "Timer Expired"}}}},
213 {"Secure Watch Dog",
214 {{1, {log_level::OK, "Normal"}},
215 {2, {log_level::CRITICAL, "Timer Expired"}}}},
216 {"Non-secure Watch Dog",
217 {{1, {log_level::OK, "Normal"}},
218 {2, {log_level::CRITICAL, "Timer Expired"}}}}}}};
Chau Lyef214b52024-10-16 09:40:38 +0000219
Chau Lya743e382024-10-26 11:12:22 +0000220std::string
221 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId)
222{
223 std::string description;
224 if (!tidToSocketNameMap.contains(tid))
225 {
226 description += "TID " + std::to_string(tid) + ": ";
227 }
228 else
229 {
230 description += tidToSocketNameMap[tid] + ": ";
231 }
232
233 if (!sensorIdToStrMap.contains(sensorId))
234 {
235 description += "Sensor ID " + std::to_string(sensorId) + ": ";
236 }
237 else
238 {
239 description += sensorIdToStrMap[sensorId] + ": ";
240 }
241
242 return description;
243}
244
245void OemEventManager::sendJournalRedfish(const std::string& description,
246 log_level& logLevel)
247{
248 if (description.empty())
249 {
250 return;
251 }
252
253 if (!logLevelToRedfishMsgIdMap.contains(logLevel))
254 {
255 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel,
256 "DES", description);
257 return;
258 }
259 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel];
260 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID",
261 redfishMsgId, "REDFISH_MESSAGE_ARGS", description);
262}
263
264std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs)
265{
266 std::string description;
267 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum))
268 {
269 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx))
270 {
271 description += " #" + std::to_string(bitIdx);
272 }
273 }
274 return description;
275}
276
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000277uint8_t OemEventManager::sensorIdToDIMMIdx(const uint16_t& sensorId)
278{
279 uint8_t dimmIdx = maxDIMMInstantNum;
280 int sensorId_Off = sensorId - 4;
281 if ((sensorId_Off >= 0) && ((sensorId_Off % 2) == 0) &&
282 ((sensorId_Off / 2) < maxDIMMInstantNum))
283 {
284 dimmIdx = sensorId_Off / 2;
285 }
286 return dimmIdx;
287}
288
Chau Lya743e382024-10-26 11:12:22 +0000289void OemEventManager::handleBootOverallEvent(
290 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading)
291{
292 log_level logLevel{log_level::OK};
293 std::string description;
294 std::stringstream strStream;
295
296 uint8_t byte0 = (presentReading & 0x000000ff);
297 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8;
298 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16;
299 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
300 /*
301 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31,
302 * ATF BL32 and DDR initialization
303 */
304 if (bootStageToMsgMap.contains(byte3))
305 {
306 // Boot stage adding
307 description += bootStageToMsgMap[byte3];
308
309 switch (byte3)
310 {
311 case boot_stage::DDR_TRAINING:
312 if (byte0 >= ddrTrainingMsg.size())
313 {
314 logLevel = log_level::BIOSFWPANIC;
315 description += " unknown status";
316 }
317 else
318 {
319 description += ddrTrainingMsg[byte0];
320 }
321 if (0x01 == byte0)
322 {
323 // Add complete percentage
324 description += " at " + std::to_string(byte1) + "%";
325 }
326 break;
327 case boot_stage::S0_DDR_TRAINING_FAILURE:
328 case boot_stage::S1_DDR_TRAINING_FAILURE:
329 // ddr_training_status_msg()
330 logLevel = log_level::BIOSFWPANIC;
331 description += " at DIMMs:";
332 // dimmIdxs = presentReading & 0x00ffffff;
333 description += dimmIdxsToString(presentReading & 0x00ffffff);
334 description += " of socket ";
335 description +=
336 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1";
337 break;
338 default:
339 if (byte0 >= bootStatMsg.size())
340 {
341 logLevel = log_level::BIOSFWPANIC;
342 description += " unknown status";
343 }
344 else
345 {
346 description += bootStatMsg[byte0];
347 }
348 break;
349 }
350
351 // Sensor report action is fail
352 if (boot::status::BOOT_STATUS_FAILURE == byte2)
353 {
354 logLevel = log_level::BIOSFWPANIC;
355 }
356 }
357 else
358 {
359 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX)
360 {
361 description +=
362 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN];
363
364 strStream
365 << "Segment (0x" << std::setfill('0') << std::hex
366 << std::setw(8) << static_cast<uint32_t>(presentReading)
Chau Ly3de0d942024-10-03 08:57:11 +0000367 << "); Status Class (0x" << std::setw(2)
368 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x"
Chau Lya743e382024-10-26 11:12:22 +0000369 << std::setw(2) << static_cast<uint32_t>(byte2)
Chau Ly3de0d942024-10-03 08:57:11 +0000370 << "); Operation Code (0x" << std::setw(4)
Chau Lya743e382024-10-26 11:12:22 +0000371 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16)
372 << ")" << std::dec;
373
374 description += strStream.str();
375 }
376 }
377
378 // Log to Redfish event
379 sendJournalRedfish(description, logLevel);
380}
381
382int OemEventManager::processNumericSensorEvent(
383 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
384 size_t sensorDataLength)
385{
386 uint8_t eventState = 0;
387 uint8_t previousEventState = 0;
388 uint8_t sensorDataSize = 0;
389 uint32_t presentReading;
390 auto rc = decode_numeric_sensor_data(
391 sensorData, sensorDataLength, &eventState, &previousEventState,
392 &sensorDataSize, &presentReading);
393 if (rc)
394 {
395 lg2::error(
396 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ",
397 "TID", tid, "RC", rc);
398 return rc;
399 }
400
Chau Lycebf4762024-10-03 09:02:54 +0000401 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000402 if (auto dimmIdx = sensorIdToDIMMIdx(sensorId); dimmIdx < maxDIMMInstantNum)
Chau Lycebf4762024-10-03 09:02:54 +0000403 {
404 handleDIMMStatusEvent(tid, sensorId, presentReading);
405 return PLDM_SUCCESS;
406 }
407
Chau Lya743e382024-10-26 11:12:22 +0000408 switch (sensorId)
409 {
410 case BOOT_OVERALL:
411 handleBootOverallEvent(tid, sensorId, presentReading);
412 break;
Chau Ly3de0d942024-10-03 08:57:11 +0000413 case PCIE_HOT_PLUG:
414 handlePCIeHotPlugEvent(tid, sensorId, presentReading);
415 break;
Chau Lycebf4762024-10-03 09:02:54 +0000416 case DDR_STATUS:
417 handleDDRStatusEvent(tid, sensorId, presentReading);
418 break;
Chau Ly4cca3dc2024-10-03 09:07:09 +0000419 case PCP_VR_STATE:
420 case SOC_VR_STATE:
421 case DPHY_VR1_STATE:
422 case DPHY_VR2_STATE:
423 case D2D_VR_STATE:
424 case IOC_VR1_STATE:
425 case IOC_VR2_STATE:
426 case PCI_D_VR_STATE:
427 case PCI_A_VR_STATE:
428 handleVRDStatusEvent(tid, sensorId, presentReading);
429 break;
Chau Lyb01357f2024-10-17 09:18:01 +0000430 case WATCH_DOG:
431 handleNumericWatchdogEvent(tid, sensorId, presentReading);
432 break;
Chau Lya743e382024-10-26 11:12:22 +0000433 default:
434 std::string description;
435 std::stringstream strStream;
436 log_level logLevel = log_level::OK;
437
438 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: ";
439 description += prefixMsgStrCreation(tid, sensorId);
440 strStream << std::setfill('0') << std::hex << "eventState 0x"
441 << std::setw(2) << static_cast<uint32_t>(eventState)
442 << " previousEventState 0x" << std::setw(2)
443 << static_cast<uint32_t>(previousEventState)
444 << " sensorDataSize 0x" << std::setw(2)
445 << static_cast<uint32_t>(sensorDataSize)
446 << " presentReading 0x" << std::setw(8)
447 << static_cast<uint32_t>(presentReading) << std::dec;
448 description += strStream.str();
449
450 sendJournalRedfish(description, logLevel);
451 break;
452 }
453 return PLDM_SUCCESS;
454}
455
456int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
457 const uint8_t* sensorData,
458 size_t sensorDataLength)
459{
460 uint8_t sensorOffset = 0;
461 uint8_t eventState = 0;
462 uint8_t previousEventState = 0;
463
464 auto rc =
465 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset,
466 &eventState, &previousEventState);
467 if (rc)
468 {
469 lg2::error(
470 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}",
471 "TID", tid, "RC", rc);
472 return rc;
473 }
474
475 std::string description;
Chau Lya743e382024-10-26 11:12:22 +0000476 log_level logLevel = log_level::OK;
477
Chau Lyef214b52024-10-16 09:40:38 +0000478 if (stateSensorToMsgMap.contains(sensorId))
479 {
480 description += prefixMsgStrCreation(tid, sensorId);
481 auto componentMap = stateSensorToMsgMap[sensorId];
482 if (sensorOffset < componentMap.size())
483 {
484 description += std::get<0>(componentMap[sensorOffset]);
485 auto stateMap = std::get<1>(componentMap[sensorOffset]);
486 if (stateMap.contains(eventState))
487 {
488 logLevel = std::get<0>(stateMap[eventState]);
489 description += " state : " + std::get<1>(stateMap[eventState]);
490 if (stateMap.contains(previousEventState))
491 {
492 description += "; previous state: " +
493 std::get<1>(stateMap[previousEventState]);
494 }
495 }
496 else
497 {
498 description += " sends unsupported event state: " +
499 std::to_string(eventState);
500 if (stateMap.contains(previousEventState))
501 {
502 description += "; previous state: " +
503 std::get<1>(stateMap[previousEventState]);
504 }
505 }
506 }
507 else
508 {
509 description += "sends unsupported component sensor offset " +
510 std::to_string(sensorOffset);
511 }
512 }
513 else
514 {
515 std::stringstream strStream;
516 description += "SENSOR_EVENT : STATE_SENSOR_STATE: ";
517 description += prefixMsgStrCreation(tid, sensorId);
518 strStream << std::setfill('0') << std::hex << "sensorOffset 0x"
519 << std::setw(2) << static_cast<uint32_t>(sensorOffset)
520 << "eventState 0x" << std::setw(2)
521 << static_cast<uint32_t>(eventState)
522 << " previousEventState 0x" << std::setw(2)
523 << static_cast<uint32_t>(previousEventState) << std::dec;
524 description += strStream.str();
525 }
Chau Lya743e382024-10-26 11:12:22 +0000526
527 sendJournalRedfish(description, logLevel);
528
529 return PLDM_SUCCESS;
530}
531
532int OemEventManager::processSensorOpStateEvent(
533 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
534 size_t sensorDataLength)
535{
536 uint8_t present_op_state = 0;
537 uint8_t previous_op_state = 0;
538
539 auto rc = decode_sensor_op_data(sensorData, sensorDataLength,
540 &present_op_state, &previous_op_state);
541 if (rc)
542 {
543 lg2::error(
544 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}",
545 "TID", tid, "RC", rc);
546 return rc;
547 }
548
549 std::string description;
550 std::stringstream strStream;
551 log_level logLevel = log_level::OK;
552
553 description += "SENSOR_EVENT : SENSOR_OP_STATE: ";
554 description += prefixMsgStrCreation(tid, sensorId);
555 strStream << std::setfill('0') << std::hex << "present_op_state 0x"
556 << std::setw(2) << static_cast<uint32_t>(present_op_state)
557 << "previous_op_state 0x" << std::setw(2)
558 << static_cast<uint32_t>(previous_op_state) << std::dec;
559 description += strStream.str();
560
561 sendJournalRedfish(description, logLevel);
562
563 return PLDM_SUCCESS;
564}
565
566int OemEventManager::handleSensorEvent(
567 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
568 pldm_tid_t tid, size_t eventDataOffset)
569{
570 /* This OEM event handler is only used for SoC terminus*/
571 if (!tidToSocketNameMap.contains(tid))
572 {
573 return PLDM_SUCCESS;
574 }
575 auto eventData =
576 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
577 auto eventDataSize = payloadLength - eventDataOffset;
578
579 uint16_t sensorId = 0;
580 uint8_t sensorEventClassType = 0;
581 size_t eventClassDataOffset = 0;
582 auto rc =
583 decode_sensor_event_data(eventData, eventDataSize, &sensorId,
584 &sensorEventClassType, &eventClassDataOffset);
585 if (rc)
586 {
587 lg2::error("Failed to decode sensor event data return code {RC}.", "RC",
588 rc);
589 return rc;
590 }
591 const uint8_t* sensorData = eventData + eventClassDataOffset;
592 size_t sensorDataLength = eventDataSize - eventClassDataOffset;
593
594 switch (sensorEventClassType)
595 {
596 case PLDM_NUMERIC_SENSOR_STATE:
597 {
598 return processNumericSensorEvent(tid, sensorId, sensorData,
599 sensorDataLength);
600 }
601 case PLDM_STATE_SENSOR_STATE:
602 {
603 return processStateSensorEvent(tid, sensorId, sensorData,
604 sensorDataLength);
605 }
606 case PLDM_SENSOR_OP_STATE:
607 {
608 return processSensorOpStateEvent(tid, sensorId, sensorData,
609 sensorDataLength);
610 }
611 default:
612 std::string description;
613 std::stringstream strStream;
614 log_level logLevel = log_level::OK;
615
616 description += "SENSOR_EVENT : Unsupported Sensor Class " +
617 std::to_string(sensorEventClassType) + ": ";
618 description += prefixMsgStrCreation(tid, sensorId);
619 strStream << std::setfill('0') << std::hex
620 << std::setw(sizeof(sensorData) * 2) << "Sensor data: ";
621
622 auto dataPtr = sensorData;
623 for ([[maybe_unused]] const auto& i :
624 std::views::iota(0, (int)sensorDataLength))
625 {
626 strStream << "0x" << static_cast<uint32_t>(*dataPtr);
627 dataPtr += sizeof(sensorData);
628 }
629
630 description += strStream.str();
631
632 sendJournalRedfish(description, logLevel);
633 }
634 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE",
635 sensorEventClassType);
636 return PLDM_ERROR;
637}
638
Chau Ly3de0d942024-10-03 08:57:11 +0000639void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
640 uint32_t presentReading)
641{
642 std::string description;
643 std::stringstream strStream;
644 PCIeHotPlugEventRecord_t record{presentReading};
645
646 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal";
647 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed";
648 log_level logLevel =
649 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING;
650
651 description += prefixMsgStrCreation(tid, sensorId);
652
653 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2)
654 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x"
655 << std::setw(2) << static_cast<uint32_t>(record.bits.bus)
656 << "); Device (0x" << std::setw(2)
657 << static_cast<uint32_t>(record.bits.device) << "); Function (0x"
658 << std::setw(2) << static_cast<uint32_t>(record.bits.function)
659 << "); Action (" << sAction << "); Operation status ("
660 << sOpStatus << "); Media slot number (" << std::dec
661 << static_cast<uint32_t>(record.bits.mediaSlot) << ")";
662
663 description += strStream.str();
664
665 // Log to Redfish event
666 sendJournalRedfish(description, logLevel);
667}
668
Chau Lycebf4762024-10-03 09:02:54 +0000669std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo)
670{
671 std::string description;
672 DIMMTrainingFailure_t failure{failureInfo};
673
674 if (dimmTrainingFailureTypeMap.contains(failure.bits.type))
675 {
676 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type];
677
678 description += std::get<0>(failureInfoMap);
679
680 description += "; MCU rank index " +
681 std::to_string(failure.bits.mcuRankIdx);
682
683 description += "; Slice number " +
684 std::to_string(failure.bits.sliceNum);
685
686 description += "; Upper nibble error status: ";
687 description += (!failure.bits.upperNibbStatErr)
688 ? "No error"
689 : "Found no rising edge";
690
691 description += "; Lower nibble error status: ";
692 description += (!failure.bits.lowerNibbStatErr)
693 ? "No error"
694 : "Found no rising edge";
695
696 description += "; Failure syndrome 0: ";
697
698 auto& syndromeMap = std::get<1>(failureInfoMap);
699 if (syndromeMap.contains(failure.bits.syndrome))
700 {
701 description += syndromeMap[failure.bits.syndrome];
702 }
703 else
704 {
705 description += "(Unknown syndrome)";
706 }
707 }
708 else
709 {
710 description += "Unknown training failure type " +
711 std::to_string(failure.bits.type);
712 }
713
714 return description;
715}
716
717void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
718 uint32_t presentReading)
719{
720 log_level logLevel{log_level::WARNING};
721 std::string description;
722 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
723 uint32_t byte012 = presentReading & 0xffffff;
724
725 description += prefixMsgStrCreation(tid, sensorId);
726
Thu Nguyen93d0ca32024-11-14 23:46:40 +0000727 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
728 auto dimmIdx = sensorIdToDIMMIdx(sensorId);
729 if (dimmIdx >= maxDIMMIdxBitNum)
730 {
731 return;
732 }
Chau Lycebf4762024-10-03 09:02:54 +0000733
734 description += "DIMM " + std::to_string(dimmIdx) + " ";
735
736 if (dimmStatusToMsgMap.contains(byte3))
737 {
738 if (byte3 == dimm_status::INSTALLED_NO_ERROR ||
739 byte3 == dimm_status::INSTALLED_BUT_DISABLED)
740 {
741 logLevel = log_level::OK;
742 }
743
744 description += dimmStatusToMsgMap[byte3];
745
746 if (byte3 == dimm_status::TRAINING_FAILURE)
747 {
748 description += "; " + dimmTrainingFailureToMsg(byte012);
749 }
750 else if (byte3 == dimm_status::PMIC_TEMP_ALERT)
751 {
752 uint8_t byte0 = (byte012 & 0xff);
753 if (byte0 < pmicTempAlertMsg.size())
754 {
755 description += ": " + pmicTempAlertMsg[byte0];
756 }
757 }
758 }
759 else
760 {
761 switch (byte3)
762 {
763 case dimm_status::PMIC_HIGH_TEMP:
764 if (byte012 == 0x01)
765 {
766 description += "has PMIC high temp condition";
767 }
768 break;
769 case dimm_status::TSx_HIGH_TEMP:
770 switch (byte012)
771 {
772 case 0x01:
773 description += "has TS0";
774 break;
775 case 0x02:
776 description += "has TS1";
777 break;
778 case 0x03:
779 description += "has TS0 and TS1";
780 break;
781 }
782 description += " exceeding their high temperature threshold";
783 break;
784 case dimm_status::SPD_HUB_HIGH_TEMP:
785 if (byte012 == 0x01)
786 {
787 description += "has SPD/HUB high temp condition";
788 }
789 break;
790 default:
791 description += "has unsupported status " +
792 std::to_string(byte3);
793 break;
794 }
795 }
796
797 // Log to Redfish event
798 sendJournalRedfish(description, logLevel);
799}
800
801void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
802 uint32_t presentReading)
803{
804 log_level logLevel{log_level::WARNING};
805 std::string description;
806 uint8_t byte3 = (presentReading & 0xff000000) >> 24;
807 uint32_t byte012 = presentReading & 0xffffff;
808
809 description += prefixMsgStrCreation(tid, sensorId);
810
811 description += "DDR ";
812 if (ddrStatusToMsgMap.contains(byte3))
813 {
814 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR)
815 {
816 logLevel = log_level::OK;
817 }
818
819 description += ddrStatusToMsgMap[byte3];
820
821 if (byte3 == ddr_status::CONFIGURATION_FAILURE ||
822 byte3 == ddr_status::TRAINING_FAILURE)
823 {
824 // List out failed DIMMs
825 description += dimmIdxsToString(byte012);
826 }
827 }
828 else
829 {
830 description += "has unsupported status " + std::to_string(byte3);
831 }
832
833 // Log to Redfish event
834 sendJournalRedfish(description, logLevel);
835}
836
Chau Ly4cca3dc2024-10-03 09:07:09 +0000837void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
838 uint32_t presentReading)
839{
840 log_level logLevel{log_level::WARNING};
841 std::string description;
842 std::stringstream strStream;
843
844 description += prefixMsgStrCreation(tid, sensorId);
845
846 VRDStatus_t status{presentReading};
847
848 if (status.bits.warning && status.bits.critical)
849 {
850 description += "A VR warning and a VR critical";
851 logLevel = log_level::CRITICAL;
852 }
853 else
854 {
855 if (status.bits.warning)
856 {
857 description += "A VR warning";
858 }
859 else if (status.bits.critical)
860 {
861 description += "A VR critical";
862 logLevel = log_level::CRITICAL;
863 }
864 else
865 {
866 description += "No VR warning or critical";
867 logLevel = log_level::OK;
868 }
869 }
870 description += " condition observed";
871
872 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex
873 << std::setw(2)
874 << static_cast<uint32_t>(status.bits.vr_status_byte_high)
875 << "; VR status byte low is 0x" << std::setw(2)
876 << static_cast<uint32_t>(status.bits.vr_status_byte_low)
877 << "; Reading is 0x" << std::setw(2)
878 << static_cast<uint32_t>(presentReading) << ";";
879
880 description += strStream.str();
881
882 // Log to Redfish event
883 sendJournalRedfish(description, logLevel);
884}
885
Chau Lyb01357f2024-10-17 09:18:01 +0000886void OemEventManager::handleNumericWatchdogEvent(
887 pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading)
888{
889 std::string description;
890 log_level logLevel = log_level::CRITICAL;
891
892 description += prefixMsgStrCreation(tid, sensorId);
893
894 if (presentReading & 0x01)
895 {
896 description += "Global watchdog expired;";
897 }
898 if (presentReading & 0x02)
899 {
900 description += "Secure watchdog expired;";
901 }
902 if (presentReading & 0x04)
903 {
904 description += "Non-secure watchdog expired;";
905 }
906
907 // Log to Redfish event
908 sendJournalRedfish(description, logLevel);
909}
910
Dung Cao72c8aa02023-11-22 02:31:41 +0000911int OemEventManager::processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId,
912 const uint8_t* eventData,
913 size_t eventDataSize)
914{
915 EFI_AMPERE_ERROR_DATA ampHdr;
916
917 decodeCperRecord(eventData, eventDataSize, &ampHdr);
918
919 addCperSELLog(tid, eventId, &ampHdr);
920
Thu Nguyen4b537552024-11-19 08:43:23 +0000921 /* isBert at bit 12 of TypeId */
922 if (ampHdr.TypeId & 0x0800)
923 {
924 lg2::info("Ampere SoC BERT is triggered.");
925 std::variant<std::string> value(
926 "com.ampere.CrashCapture.Trigger.TriggerAction.Bert");
927 try
928 {
929 auto& bus = pldm::utils::DBusHandler::getBus();
930 auto method =
931 bus.new_method_call("com.ampere.CrashCapture.Trigger",
932 "/com/ampere/crashcapture/trigger",
933 pldm::utils::dbusProperties, "Set");
934 method.append("com.ampere.CrashCapture.Trigger", "TriggerActions",
935 value);
936 bus.call_noreply(method);
937 }
938 catch (const std::exception& e)
939 {
940 lg2::error("call BERT trigger error - {ERROR}", "ERROR", e);
941 }
942 }
943
Dung Cao72c8aa02023-11-22 02:31:41 +0000944 return PLDM_SUCCESS;
945}
946
Chau Lya743e382024-10-26 11:12:22 +0000947} // namespace oem_ampere
948} // namespace pldm