| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 1 | #include "oem_event_manager.hpp" | 
|  | 2 |  | 
| Dung Cao | 72c8aa0 | 2023-11-22 02:31:41 +0000 | [diff] [blame] | 3 | #include "libcper/Cper.h" | 
|  | 4 |  | 
|  | 5 | #include "cper.hpp" | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 6 | #include "requester/handler.hpp" | 
|  | 7 | #include "requester/request.hpp" | 
|  | 8 |  | 
|  | 9 | #include <config.h> | 
|  | 10 | #include <libpldm/pldm.h> | 
|  | 11 | #include <libpldm/utils.h> | 
|  | 12 | #include <systemd/sd-journal.h> | 
|  | 13 |  | 
|  | 14 | #include <phosphor-logging/lg2.hpp> | 
|  | 15 | #include <xyz/openbmc_project/Logging/Entry/server.hpp> | 
|  | 16 |  | 
|  | 17 | #include <algorithm> | 
|  | 18 | #include <map> | 
| Thu Nguyen | 79f9ff6 | 2024-11-22 03:36:27 +0000 | [diff] [blame] | 19 | #include <set> | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 20 | #include <sstream> | 
|  | 21 | #include <string> | 
|  | 22 | #include <unordered_map> | 
|  | 23 |  | 
|  | 24 | namespace pldm | 
|  | 25 | { | 
|  | 26 | namespace oem_ampere | 
|  | 27 | { | 
| Dung Cao | 4a50383 | 2025-01-08 03:45:17 +0000 | [diff] [blame] | 28 | namespace fs = std::filesystem; | 
|  | 29 | using namespace std::chrono; | 
|  | 30 |  | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 31 | namespace boot_stage = boot::stage; | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 32 | namespace ddr_status = ddr::status; | 
|  | 33 | namespace dimm_status = dimm::status; | 
|  | 34 | namespace dimm_syndrome = dimm::training_failure::dimm_syndrome; | 
|  | 35 | namespace phy_syndrome = dimm::training_failure::phy_syndrome; | 
|  | 36 | namespace training_failure = dimm::training_failure; | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 37 |  | 
| Chaul Ly | 198084b | 2024-12-13 09:02:52 +0000 | [diff] [blame] | 38 | constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent"; | 
|  | 39 | constexpr const char* ampereWarningRegistry = "OpenBMC.0.1.AmpereWarning"; | 
|  | 40 | constexpr const char* ampereCriticalRegistry = "OpenBMC.0.1.AmpereCritical"; | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 41 | constexpr const char* BIOSFWPanicRegistry = | 
| Chaul Ly | 198084b | 2024-12-13 09:02:52 +0000 | [diff] [blame] | 42 | "OpenBMC.0.1.BIOSFirmwarePanicReason"; | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 43 | constexpr auto maxDIMMIdxBitNum = 24; | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 44 | constexpr auto maxDIMMInstantNum = 24; | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 45 |  | 
| Thu Nguyen | 79f9ff6 | 2024-11-22 03:36:27 +0000 | [diff] [blame] | 46 | const std::set<uint16_t> rasUESensorIDs = {CORE_UE, MCU_UE, PCIE_UE, SOC_UE}; | 
|  | 47 |  | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 48 | /* | 
|  | 49 | An array of possible boot status of a boot stage. | 
|  | 50 | The index maps with byte 0 of boot code. | 
|  | 51 | */ | 
|  | 52 | std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"}; | 
|  | 53 |  | 
|  | 54 | /* | 
|  | 55 | An array of possible boot status of DDR training stage. | 
|  | 56 | The index maps with byte 0 of boot code. | 
|  | 57 | */ | 
|  | 58 | std::array<std::string, 3> ddrTrainingMsg = { | 
|  | 59 | " progress started", " in-progress", " progress completed"}; | 
|  | 60 |  | 
|  | 61 | /* | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 62 | A map between PMIC status and logging strings. | 
|  | 63 | */ | 
|  | 64 | std::array<std::string, 8> pmicTempAlertMsg = { | 
|  | 65 | "Below 85°C", "85°C",  "95°C",  "105°C", | 
|  | 66 | "115°C",      "125°C", "135°C", "Equal or greater than 140°C"}; | 
|  | 67 |  | 
|  | 68 | /* | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 69 | In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC | 
|  | 70 | EPs through SMBus and PCIe. When host boots up, SMBUS interface | 
|  | 71 | comes up first. In this interface, BMC is bus owner. | 
|  | 72 |  | 
|  | 73 | mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available). | 
|  | 74 | pldmd will always use TID 1 for S0 and TID 2 for S1 (if available). | 
|  | 75 | */ | 
|  | 76 | EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}}; | 
|  | 77 |  | 
|  | 78 | /* | 
|  | 79 | A map between sensor IDs and their names in string. | 
|  | 80 | Using pldm::oem::sensor_ids | 
|  | 81 | */ | 
| Chau Ly | 4cca3dc | 2024-10-03 09:07:09 +0000 | [diff] [blame] | 82 | EventToMsgMap_t sensorIdToStrMap = { | 
| Chau Ly | ef214b5 | 2024-10-16 09:40:38 +0000 | [diff] [blame] | 83 | {DDR_STATUS, "DDR_STATUS"}, | 
|  | 84 | {PCP_VR_STATE, "PCP_VR_STATE"}, | 
|  | 85 | {SOC_VR_STATE, "SOC_VR_STATE"}, | 
|  | 86 | {DPHY_VR1_STATE, "DPHY_VR1_STATE"}, | 
|  | 87 | {DPHY_VR2_STATE, "DPHY_VR2_STATE"}, | 
|  | 88 | {D2D_VR_STATE, "D2D_VR_STATE"}, | 
|  | 89 | {IOC_VR1_STATE, "IOC_VR1_STATE"}, | 
|  | 90 | {IOC_VR2_STATE, "IOC_VR2_STATE"}, | 
|  | 91 | {PCI_D_VR_STATE, "PCI_D_VR_STATE"}, | 
|  | 92 | {PCI_A_VR_STATE, "PCI_A_VR_STATE"}, | 
|  | 93 | {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"}, | 
|  | 94 | {BOOT_OVERALL, "BOOT_OVERALL"}, | 
| Chau Ly | b01357f | 2024-10-17 09:18:01 +0000 | [diff] [blame] | 95 | {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"}, | 
|  | 96 | {WATCH_DOG, "WATCH_DOG"}}; | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 97 |  | 
|  | 98 | /* | 
|  | 99 | A map between the boot stages and logging strings. | 
|  | 100 | Using pldm::oem::boot::stage::boot_stage | 
|  | 101 | */ | 
|  | 102 | EventToMsgMap_t bootStageToMsgMap = { | 
|  | 103 | {boot_stage::SECPRO, "SECpro"}, | 
|  | 104 | {boot_stage::MPRO, "Mpro"}, | 
|  | 105 | {boot_stage::ATF_BL1, "ATF BL1"}, | 
|  | 106 | {boot_stage::ATF_BL2, "ATF BL2"}, | 
|  | 107 | {boot_stage::DDR_INITIALIZATION, "DDR initialization"}, | 
|  | 108 | {boot_stage::DDR_TRAINING, "DDR training"}, | 
|  | 109 | {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"}, | 
|  | 110 | {boot_stage::ATF_BL31, "ATF BL31"}, | 
|  | 111 | {boot_stage::ATF_BL32, "ATF BL32"}, | 
|  | 112 | {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"}, | 
|  | 113 | {boot_stage::UEFI_STATUS_CLASS_CODE_MIN, | 
|  | 114 | "ATF BL33 (UEFI) booting status = "}}; | 
|  | 115 |  | 
|  | 116 | /* | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 117 | A map between DDR status and logging strings. | 
|  | 118 | Using pldm::oem::ddr::status::ddr_status | 
|  | 119 | */ | 
|  | 120 | EventToMsgMap_t ddrStatusToMsgMap = { | 
|  | 121 | {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"}, | 
|  | 122 | {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"}, | 
|  | 123 | {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"}, | 
|  | 124 | {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"}, | 
|  | 125 | {ddr_status::OTHER_FAILURE, "has other failure"}, | 
|  | 126 | {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG, | 
|  | 127 | "has boot failure due to no configuration"}, | 
|  | 128 | {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS, | 
|  | 129 | "failsafe activated but boot success with the next valid configuration"}}; | 
|  | 130 |  | 
|  | 131 | /* | 
|  | 132 | A map between DIMM status and logging strings. | 
|  | 133 | Using pldm::oem::dimm::status::dimm_status | 
|  | 134 | */ | 
|  | 135 | EventToMsgMap_t dimmStatusToMsgMap = { | 
|  | 136 | {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"}, | 
|  | 137 | {dimm_status::NOT_INSTALLED, "is not installed"}, | 
|  | 138 | {dimm_status::OTHER_FAILURE, "has other failure"}, | 
|  | 139 | {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"}, | 
|  | 140 | {dimm_status::TRAINING_FAILURE, "has training failure; "}, | 
|  | 141 | {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}}; | 
|  | 142 |  | 
|  | 143 | /* | 
|  | 144 | A map between PHY training failure syndrome and logging strings. | 
|  | 145 | Using | 
|  | 146 | pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome | 
|  | 147 | */ | 
|  | 148 | EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = { | 
|  | 149 | {phy_syndrome::NA, "(N/A)"}, | 
|  | 150 | {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"}, | 
|  | 151 | {phy_syndrome::CA_LEVELING, "(CA leveling)"}, | 
|  | 152 | {phy_syndrome::PHY_WRITE_LEVEL_FAILURE, | 
|  | 153 | "(PHY write level failure - see syndrome 1)"}, | 
|  | 154 | {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE, | 
|  | 155 | "(PHY read gate leveling failure)"}, | 
|  | 156 | {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"}, | 
|  | 157 | {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"}, | 
|  | 158 | {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}}; | 
|  | 159 |  | 
|  | 160 | /* | 
|  | 161 | A map between DIMM training failure syndrome and logging strings. | 
|  | 162 | Using | 
|  | 163 | pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome | 
|  | 164 | */ | 
|  | 165 | EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = { | 
|  | 166 | {dimm_syndrome::NA, "(N/A)"}, | 
|  | 167 | {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE, | 
|  | 168 | "(DRAM VREFDQ training failure)"}, | 
|  | 169 | {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"}, | 
|  | 170 | {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE, | 
|  | 171 | "(LRDRIMM DB SW training failure)"}}; | 
|  | 172 |  | 
|  | 173 | /* | 
|  | 174 | A map between DIMM training failure type and a pair of <logging strings - | 
|  | 175 | syndrome map>. Using | 
|  | 176 | pldm::oem::dimm::training_faillure::dimm_training_failure_type | 
|  | 177 | */ | 
|  | 178 | std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>> | 
|  | 179 | dimmTrainingFailureTypeMap = { | 
|  | 180 | {training_failure::PHY_TRAINING_FAILURE_TYPE, | 
|  | 181 | std::make_pair("PHY training failure", | 
|  | 182 | phyTrainingFailureSyndromeToMsgMap)}, | 
|  | 183 | {training_failure::DIMM_TRAINING_FAILURE_TYPE, | 
|  | 184 | std::make_pair("DIMM training failure", | 
|  | 185 | dimmTrainingFailureSyndromeToMsgMap)}}; | 
|  | 186 |  | 
|  | 187 | /* | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 188 | A map between log level and the registry used for Redfish SEL log | 
|  | 189 | Using pldm::oem::log_level | 
|  | 190 | */ | 
|  | 191 | std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = { | 
| Chau Ly | 3de0d94 | 2024-10-03 08:57:11 +0000 | [diff] [blame] | 192 | {log_level::OK, ampereEventRegistry}, | 
|  | 193 | {log_level::WARNING, ampereWarningRegistry}, | 
|  | 194 | {log_level::CRITICAL, ampereCriticalRegistry}, | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 195 | {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}}; | 
|  | 196 |  | 
| Chau Ly | ef214b5 | 2024-10-16 09:40:38 +0000 | [diff] [blame] | 197 | std::unordered_map< | 
|  | 198 | uint16_t, | 
|  | 199 | std::vector<std::pair< | 
|  | 200 | std::string, | 
|  | 201 | std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>> | 
|  | 202 | stateSensorToMsgMap = { | 
|  | 203 | {SOC_HEALTH_AVAILABILITY, | 
|  | 204 | {{"SoC Health", | 
|  | 205 | {{1, {log_level::OK, "Normal"}}, | 
|  | 206 | {2, {log_level::WARNING, "Non-Critical"}}, | 
|  | 207 | {3, {log_level::CRITICAL, "Critical"}}, | 
|  | 208 | {4, {log_level::CRITICAL, "Fatal"}}}}, | 
|  | 209 | {"SoC Availability", | 
|  | 210 | {{1, {log_level::OK, "Enabled"}}, | 
|  | 211 | {2, {log_level::WARNING, "Disabled"}}, | 
| Chau Ly | b01357f | 2024-10-17 09:18:01 +0000 | [diff] [blame] | 212 | {3, {log_level::CRITICAL, "Shutdown"}}}}}}, | 
|  | 213 | {WATCH_DOG, | 
|  | 214 | {{"Global Watch Dog", | 
|  | 215 | {{1, {log_level::OK, "Normal"}}, | 
|  | 216 | {2, {log_level::CRITICAL, "Timer Expired"}}}}, | 
|  | 217 | {"Secure Watch Dog", | 
|  | 218 | {{1, {log_level::OK, "Normal"}}, | 
|  | 219 | {2, {log_level::CRITICAL, "Timer Expired"}}}}, | 
|  | 220 | {"Non-secure Watch Dog", | 
|  | 221 | {{1, {log_level::OK, "Normal"}}, | 
|  | 222 | {2, {log_level::CRITICAL, "Timer Expired"}}}}}}}; | 
| Chau Ly | ef214b5 | 2024-10-16 09:40:38 +0000 | [diff] [blame] | 223 |  | 
| Patrick Williams | 366507c | 2025-02-03 14:28:01 -0500 | [diff] [blame] | 224 | std::string OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, | 
|  | 225 | uint16_t sensorId) | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 226 | { | 
|  | 227 | std::string description; | 
|  | 228 | if (!tidToSocketNameMap.contains(tid)) | 
|  | 229 | { | 
|  | 230 | description += "TID " + std::to_string(tid) + ": "; | 
|  | 231 | } | 
|  | 232 | else | 
|  | 233 | { | 
|  | 234 | description += tidToSocketNameMap[tid] + ": "; | 
|  | 235 | } | 
|  | 236 |  | 
|  | 237 | if (!sensorIdToStrMap.contains(sensorId)) | 
|  | 238 | { | 
|  | 239 | description += "Sensor ID " + std::to_string(sensorId) + ": "; | 
|  | 240 | } | 
|  | 241 | else | 
|  | 242 | { | 
|  | 243 | description += sensorIdToStrMap[sensorId] + ": "; | 
|  | 244 | } | 
|  | 245 |  | 
|  | 246 | return description; | 
|  | 247 | } | 
|  | 248 |  | 
|  | 249 | void OemEventManager::sendJournalRedfish(const std::string& description, | 
|  | 250 | log_level& logLevel) | 
|  | 251 | { | 
|  | 252 | if (description.empty()) | 
|  | 253 | { | 
|  | 254 | return; | 
|  | 255 | } | 
|  | 256 |  | 
|  | 257 | if (!logLevelToRedfishMsgIdMap.contains(logLevel)) | 
|  | 258 | { | 
|  | 259 | lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel, | 
|  | 260 | "DES", description); | 
|  | 261 | return; | 
|  | 262 | } | 
|  | 263 | auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel]; | 
|  | 264 | lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID", | 
|  | 265 | redfishMsgId, "REDFISH_MESSAGE_ARGS", description); | 
|  | 266 | } | 
|  | 267 |  | 
|  | 268 | std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs) | 
|  | 269 | { | 
|  | 270 | std::string description; | 
|  | 271 | for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum)) | 
|  | 272 | { | 
|  | 273 | if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx)) | 
|  | 274 | { | 
|  | 275 | description += " #" + std::to_string(bitIdx); | 
|  | 276 | } | 
|  | 277 | } | 
|  | 278 | return description; | 
|  | 279 | } | 
|  | 280 |  | 
| Thu Nguyen | 93d0ca3 | 2024-11-14 23:46:40 +0000 | [diff] [blame] | 281 | uint8_t OemEventManager::sensorIdToDIMMIdx(const uint16_t& sensorId) | 
|  | 282 | { | 
|  | 283 | uint8_t dimmIdx = maxDIMMInstantNum; | 
|  | 284 | int sensorId_Off = sensorId - 4; | 
|  | 285 | if ((sensorId_Off >= 0) && ((sensorId_Off % 2) == 0) && | 
|  | 286 | ((sensorId_Off / 2) < maxDIMMInstantNum)) | 
|  | 287 | { | 
|  | 288 | dimmIdx = sensorId_Off / 2; | 
|  | 289 | } | 
|  | 290 | return dimmIdx; | 
|  | 291 | } | 
|  | 292 |  | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 293 | void OemEventManager::handleBootOverallEvent( | 
|  | 294 | pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading) | 
|  | 295 | { | 
|  | 296 | log_level logLevel{log_level::OK}; | 
|  | 297 | std::string description; | 
|  | 298 | std::stringstream strStream; | 
|  | 299 |  | 
|  | 300 | uint8_t byte0 = (presentReading & 0x000000ff); | 
|  | 301 | uint8_t byte1 = (presentReading & 0x0000ff00) >> 8; | 
|  | 302 | uint8_t byte2 = (presentReading & 0x00ff0000) >> 16; | 
|  | 303 | uint8_t byte3 = (presentReading & 0xff000000) >> 24; | 
|  | 304 | /* | 
|  | 305 | * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31, | 
|  | 306 | * ATF BL32 and DDR initialization | 
|  | 307 | */ | 
|  | 308 | if (bootStageToMsgMap.contains(byte3)) | 
|  | 309 | { | 
|  | 310 | // Boot stage adding | 
|  | 311 | description += bootStageToMsgMap[byte3]; | 
|  | 312 |  | 
|  | 313 | switch (byte3) | 
|  | 314 | { | 
|  | 315 | case boot_stage::DDR_TRAINING: | 
|  | 316 | if (byte0 >= ddrTrainingMsg.size()) | 
|  | 317 | { | 
|  | 318 | logLevel = log_level::BIOSFWPANIC; | 
|  | 319 | description += " unknown status"; | 
|  | 320 | } | 
|  | 321 | else | 
|  | 322 | { | 
|  | 323 | description += ddrTrainingMsg[byte0]; | 
|  | 324 | } | 
|  | 325 | if (0x01 == byte0) | 
|  | 326 | { | 
|  | 327 | // Add complete percentage | 
|  | 328 | description += " at " + std::to_string(byte1) + "%"; | 
|  | 329 | } | 
|  | 330 | break; | 
|  | 331 | case boot_stage::S0_DDR_TRAINING_FAILURE: | 
|  | 332 | case boot_stage::S1_DDR_TRAINING_FAILURE: | 
|  | 333 | // ddr_training_status_msg() | 
|  | 334 | logLevel = log_level::BIOSFWPANIC; | 
|  | 335 | description += " at DIMMs:"; | 
|  | 336 | // dimmIdxs = presentReading & 0x00ffffff; | 
|  | 337 | description += dimmIdxsToString(presentReading & 0x00ffffff); | 
|  | 338 | description += " of socket "; | 
|  | 339 | description += | 
|  | 340 | (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1"; | 
|  | 341 | break; | 
|  | 342 | default: | 
|  | 343 | if (byte0 >= bootStatMsg.size()) | 
|  | 344 | { | 
|  | 345 | logLevel = log_level::BIOSFWPANIC; | 
|  | 346 | description += " unknown status"; | 
|  | 347 | } | 
|  | 348 | else | 
|  | 349 | { | 
|  | 350 | description += bootStatMsg[byte0]; | 
|  | 351 | } | 
|  | 352 | break; | 
|  | 353 | } | 
|  | 354 |  | 
|  | 355 | // Sensor report action is fail | 
|  | 356 | if (boot::status::BOOT_STATUS_FAILURE == byte2) | 
|  | 357 | { | 
|  | 358 | logLevel = log_level::BIOSFWPANIC; | 
|  | 359 | } | 
|  | 360 | } | 
|  | 361 | else | 
|  | 362 | { | 
|  | 363 | if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX) | 
|  | 364 | { | 
|  | 365 | description += | 
|  | 366 | bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN]; | 
|  | 367 |  | 
|  | 368 | strStream | 
|  | 369 | << "Segment (0x" << std::setfill('0') << std::hex | 
|  | 370 | << std::setw(8) << static_cast<uint32_t>(presentReading) | 
| Chau Ly | 3de0d94 | 2024-10-03 08:57:11 +0000 | [diff] [blame] | 371 | << "); Status Class (0x" << std::setw(2) | 
|  | 372 | << static_cast<uint32_t>(byte3) << "); Status SubClass (0x" | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 373 | << std::setw(2) << static_cast<uint32_t>(byte2) | 
| Chau Ly | 3de0d94 | 2024-10-03 08:57:11 +0000 | [diff] [blame] | 374 | << "); Operation Code (0x" << std::setw(4) | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 375 | << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16) | 
|  | 376 | << ")" << std::dec; | 
|  | 377 |  | 
|  | 378 | description += strStream.str(); | 
|  | 379 | } | 
|  | 380 | } | 
|  | 381 |  | 
|  | 382 | // Log to Redfish event | 
|  | 383 | sendJournalRedfish(description, logLevel); | 
|  | 384 | } | 
|  | 385 |  | 
|  | 386 | int OemEventManager::processNumericSensorEvent( | 
|  | 387 | pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, | 
|  | 388 | size_t sensorDataLength) | 
|  | 389 | { | 
|  | 390 | uint8_t eventState = 0; | 
|  | 391 | uint8_t previousEventState = 0; | 
|  | 392 | uint8_t sensorDataSize = 0; | 
|  | 393 | uint32_t presentReading; | 
|  | 394 | auto rc = decode_numeric_sensor_data( | 
|  | 395 | sensorData, sensorDataLength, &eventState, &previousEventState, | 
|  | 396 | &sensorDataSize, &presentReading); | 
|  | 397 | if (rc) | 
|  | 398 | { | 
|  | 399 | lg2::error( | 
|  | 400 | "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ", | 
|  | 401 | "TID", tid, "RC", rc); | 
|  | 402 | return rc; | 
|  | 403 | } | 
|  | 404 |  | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 405 | // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) | 
| Thu Nguyen | 93d0ca3 | 2024-11-14 23:46:40 +0000 | [diff] [blame] | 406 | if (auto dimmIdx = sensorIdToDIMMIdx(sensorId); dimmIdx < maxDIMMInstantNum) | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 407 | { | 
|  | 408 | handleDIMMStatusEvent(tid, sensorId, presentReading); | 
|  | 409 | return PLDM_SUCCESS; | 
|  | 410 | } | 
|  | 411 |  | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 412 | switch (sensorId) | 
|  | 413 | { | 
|  | 414 | case BOOT_OVERALL: | 
|  | 415 | handleBootOverallEvent(tid, sensorId, presentReading); | 
|  | 416 | break; | 
| Chau Ly | 3de0d94 | 2024-10-03 08:57:11 +0000 | [diff] [blame] | 417 | case PCIE_HOT_PLUG: | 
|  | 418 | handlePCIeHotPlugEvent(tid, sensorId, presentReading); | 
|  | 419 | break; | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 420 | case DDR_STATUS: | 
|  | 421 | handleDDRStatusEvent(tid, sensorId, presentReading); | 
|  | 422 | break; | 
| Chau Ly | 4cca3dc | 2024-10-03 09:07:09 +0000 | [diff] [blame] | 423 | case PCP_VR_STATE: | 
|  | 424 | case SOC_VR_STATE: | 
|  | 425 | case DPHY_VR1_STATE: | 
|  | 426 | case DPHY_VR2_STATE: | 
|  | 427 | case D2D_VR_STATE: | 
|  | 428 | case IOC_VR1_STATE: | 
|  | 429 | case IOC_VR2_STATE: | 
|  | 430 | case PCI_D_VR_STATE: | 
|  | 431 | case PCI_A_VR_STATE: | 
|  | 432 | handleVRDStatusEvent(tid, sensorId, presentReading); | 
|  | 433 | break; | 
| Chau Ly | b01357f | 2024-10-17 09:18:01 +0000 | [diff] [blame] | 434 | case WATCH_DOG: | 
|  | 435 | handleNumericWatchdogEvent(tid, sensorId, presentReading); | 
|  | 436 | break; | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 437 | default: | 
|  | 438 | std::string description; | 
|  | 439 | std::stringstream strStream; | 
|  | 440 | log_level logLevel = log_level::OK; | 
|  | 441 |  | 
|  | 442 | description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: "; | 
|  | 443 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 444 | strStream << std::setfill('0') << std::hex << "eventState 0x" | 
|  | 445 | << std::setw(2) << static_cast<uint32_t>(eventState) | 
|  | 446 | << " previousEventState 0x" << std::setw(2) | 
|  | 447 | << static_cast<uint32_t>(previousEventState) | 
|  | 448 | << " sensorDataSize 0x" << std::setw(2) | 
|  | 449 | << static_cast<uint32_t>(sensorDataSize) | 
|  | 450 | << " presentReading 0x" << std::setw(8) | 
|  | 451 | << static_cast<uint32_t>(presentReading) << std::dec; | 
|  | 452 | description += strStream.str(); | 
|  | 453 |  | 
|  | 454 | sendJournalRedfish(description, logLevel); | 
|  | 455 | break; | 
|  | 456 | } | 
|  | 457 | return PLDM_SUCCESS; | 
|  | 458 | } | 
|  | 459 |  | 
|  | 460 | int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId, | 
|  | 461 | const uint8_t* sensorData, | 
|  | 462 | size_t sensorDataLength) | 
|  | 463 | { | 
|  | 464 | uint8_t sensorOffset = 0; | 
|  | 465 | uint8_t eventState = 0; | 
|  | 466 | uint8_t previousEventState = 0; | 
|  | 467 |  | 
|  | 468 | auto rc = | 
|  | 469 | decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset, | 
|  | 470 | &eventState, &previousEventState); | 
|  | 471 | if (rc) | 
|  | 472 | { | 
|  | 473 | lg2::error( | 
|  | 474 | "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}", | 
|  | 475 | "TID", tid, "RC", rc); | 
|  | 476 | return rc; | 
|  | 477 | } | 
|  | 478 |  | 
|  | 479 | std::string description; | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 480 | log_level logLevel = log_level::OK; | 
|  | 481 |  | 
| Chau Ly | ef214b5 | 2024-10-16 09:40:38 +0000 | [diff] [blame] | 482 | if (stateSensorToMsgMap.contains(sensorId)) | 
|  | 483 | { | 
|  | 484 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 485 | auto componentMap = stateSensorToMsgMap[sensorId]; | 
|  | 486 | if (sensorOffset < componentMap.size()) | 
|  | 487 | { | 
|  | 488 | description += std::get<0>(componentMap[sensorOffset]); | 
|  | 489 | auto stateMap = std::get<1>(componentMap[sensorOffset]); | 
|  | 490 | if (stateMap.contains(eventState)) | 
|  | 491 | { | 
|  | 492 | logLevel = std::get<0>(stateMap[eventState]); | 
|  | 493 | description += " state : " + std::get<1>(stateMap[eventState]); | 
|  | 494 | if (stateMap.contains(previousEventState)) | 
|  | 495 | { | 
|  | 496 | description += "; previous state: " + | 
|  | 497 | std::get<1>(stateMap[previousEventState]); | 
|  | 498 | } | 
|  | 499 | } | 
|  | 500 | else | 
|  | 501 | { | 
|  | 502 | description += " sends unsupported event state: " + | 
|  | 503 | std::to_string(eventState); | 
|  | 504 | if (stateMap.contains(previousEventState)) | 
|  | 505 | { | 
|  | 506 | description += "; previous state: " + | 
|  | 507 | std::get<1>(stateMap[previousEventState]); | 
|  | 508 | } | 
|  | 509 | } | 
|  | 510 | } | 
|  | 511 | else | 
|  | 512 | { | 
|  | 513 | description += "sends unsupported component sensor offset " + | 
|  | 514 | std::to_string(sensorOffset); | 
|  | 515 | } | 
|  | 516 | } | 
|  | 517 | else | 
|  | 518 | { | 
|  | 519 | std::stringstream strStream; | 
|  | 520 | description += "SENSOR_EVENT : STATE_SENSOR_STATE: "; | 
|  | 521 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 522 | strStream << std::setfill('0') << std::hex << "sensorOffset 0x" | 
|  | 523 | << std::setw(2) << static_cast<uint32_t>(sensorOffset) | 
|  | 524 | << "eventState 0x" << std::setw(2) | 
|  | 525 | << static_cast<uint32_t>(eventState) | 
|  | 526 | << " previousEventState 0x" << std::setw(2) | 
|  | 527 | << static_cast<uint32_t>(previousEventState) << std::dec; | 
|  | 528 | description += strStream.str(); | 
|  | 529 | } | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 530 |  | 
|  | 531 | sendJournalRedfish(description, logLevel); | 
|  | 532 |  | 
|  | 533 | return PLDM_SUCCESS; | 
|  | 534 | } | 
|  | 535 |  | 
|  | 536 | int OemEventManager::processSensorOpStateEvent( | 
|  | 537 | pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, | 
|  | 538 | size_t sensorDataLength) | 
|  | 539 | { | 
|  | 540 | uint8_t present_op_state = 0; | 
|  | 541 | uint8_t previous_op_state = 0; | 
|  | 542 |  | 
|  | 543 | auto rc = decode_sensor_op_data(sensorData, sensorDataLength, | 
|  | 544 | &present_op_state, &previous_op_state); | 
|  | 545 | if (rc) | 
|  | 546 | { | 
|  | 547 | lg2::error( | 
|  | 548 | "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}", | 
|  | 549 | "TID", tid, "RC", rc); | 
|  | 550 | return rc; | 
|  | 551 | } | 
|  | 552 |  | 
|  | 553 | std::string description; | 
|  | 554 | std::stringstream strStream; | 
|  | 555 | log_level logLevel = log_level::OK; | 
|  | 556 |  | 
|  | 557 | description += "SENSOR_EVENT : SENSOR_OP_STATE: "; | 
|  | 558 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 559 | strStream << std::setfill('0') << std::hex << "present_op_state 0x" | 
|  | 560 | << std::setw(2) << static_cast<uint32_t>(present_op_state) | 
|  | 561 | << "previous_op_state 0x" << std::setw(2) | 
|  | 562 | << static_cast<uint32_t>(previous_op_state) << std::dec; | 
|  | 563 | description += strStream.str(); | 
|  | 564 |  | 
|  | 565 | sendJournalRedfish(description, logLevel); | 
|  | 566 |  | 
|  | 567 | return PLDM_SUCCESS; | 
|  | 568 | } | 
|  | 569 |  | 
|  | 570 | int OemEventManager::handleSensorEvent( | 
|  | 571 | const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, | 
|  | 572 | pldm_tid_t tid, size_t eventDataOffset) | 
|  | 573 | { | 
|  | 574 | /* This OEM event handler is only used for SoC terminus*/ | 
|  | 575 | if (!tidToSocketNameMap.contains(tid)) | 
|  | 576 | { | 
|  | 577 | return PLDM_SUCCESS; | 
|  | 578 | } | 
|  | 579 | auto eventData = | 
|  | 580 | reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; | 
|  | 581 | auto eventDataSize = payloadLength - eventDataOffset; | 
|  | 582 |  | 
|  | 583 | uint16_t sensorId = 0; | 
|  | 584 | uint8_t sensorEventClassType = 0; | 
|  | 585 | size_t eventClassDataOffset = 0; | 
|  | 586 | auto rc = | 
|  | 587 | decode_sensor_event_data(eventData, eventDataSize, &sensorId, | 
|  | 588 | &sensorEventClassType, &eventClassDataOffset); | 
|  | 589 | if (rc) | 
|  | 590 | { | 
|  | 591 | lg2::error("Failed to decode sensor event data return code {RC}.", "RC", | 
|  | 592 | rc); | 
|  | 593 | return rc; | 
|  | 594 | } | 
|  | 595 | const uint8_t* sensorData = eventData + eventClassDataOffset; | 
|  | 596 | size_t sensorDataLength = eventDataSize - eventClassDataOffset; | 
|  | 597 |  | 
|  | 598 | switch (sensorEventClassType) | 
|  | 599 | { | 
|  | 600 | case PLDM_NUMERIC_SENSOR_STATE: | 
|  | 601 | { | 
|  | 602 | return processNumericSensorEvent(tid, sensorId, sensorData, | 
|  | 603 | sensorDataLength); | 
|  | 604 | } | 
|  | 605 | case PLDM_STATE_SENSOR_STATE: | 
|  | 606 | { | 
|  | 607 | return processStateSensorEvent(tid, sensorId, sensorData, | 
|  | 608 | sensorDataLength); | 
|  | 609 | } | 
|  | 610 | case PLDM_SENSOR_OP_STATE: | 
|  | 611 | { | 
|  | 612 | return processSensorOpStateEvent(tid, sensorId, sensorData, | 
|  | 613 | sensorDataLength); | 
|  | 614 | } | 
|  | 615 | default: | 
|  | 616 | std::string description; | 
|  | 617 | std::stringstream strStream; | 
|  | 618 | log_level logLevel = log_level::OK; | 
|  | 619 |  | 
|  | 620 | description += "SENSOR_EVENT : Unsupported Sensor Class " + | 
|  | 621 | std::to_string(sensorEventClassType) + ": "; | 
|  | 622 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 623 | strStream << std::setfill('0') << std::hex | 
|  | 624 | << std::setw(sizeof(sensorData) * 2) << "Sensor data: "; | 
|  | 625 |  | 
|  | 626 | auto dataPtr = sensorData; | 
|  | 627 | for ([[maybe_unused]] const auto& i : | 
|  | 628 | std::views::iota(0, (int)sensorDataLength)) | 
|  | 629 | { | 
|  | 630 | strStream << "0x" << static_cast<uint32_t>(*dataPtr); | 
|  | 631 | dataPtr += sizeof(sensorData); | 
|  | 632 | } | 
|  | 633 |  | 
|  | 634 | description += strStream.str(); | 
|  | 635 |  | 
|  | 636 | sendJournalRedfish(description, logLevel); | 
|  | 637 | } | 
|  | 638 | lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE", | 
|  | 639 | sensorEventClassType); | 
|  | 640 | return PLDM_ERROR; | 
|  | 641 | } | 
|  | 642 |  | 
| Chau Ly | 3de0d94 | 2024-10-03 08:57:11 +0000 | [diff] [blame] | 643 | void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId, | 
|  | 644 | uint32_t presentReading) | 
|  | 645 | { | 
|  | 646 | std::string description; | 
|  | 647 | std::stringstream strStream; | 
|  | 648 | PCIeHotPlugEventRecord_t record{presentReading}; | 
|  | 649 |  | 
|  | 650 | std::string sAction = (!record.bits.action) ? "Insertion" : "Removal"; | 
|  | 651 | std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed"; | 
|  | 652 | log_level logLevel = | 
|  | 653 | (!record.bits.opStatus) ? log_level::OK : log_level::WARNING; | 
|  | 654 |  | 
|  | 655 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 656 |  | 
|  | 657 | strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2) | 
|  | 658 | << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x" | 
|  | 659 | << std::setw(2) << static_cast<uint32_t>(record.bits.bus) | 
|  | 660 | << "); Device (0x" << std::setw(2) | 
|  | 661 | << static_cast<uint32_t>(record.bits.device) << "); Function (0x" | 
|  | 662 | << std::setw(2) << static_cast<uint32_t>(record.bits.function) | 
|  | 663 | << "); Action (" << sAction << "); Operation status (" | 
|  | 664 | << sOpStatus << "); Media slot number (" << std::dec | 
|  | 665 | << static_cast<uint32_t>(record.bits.mediaSlot) << ")"; | 
|  | 666 |  | 
|  | 667 | description += strStream.str(); | 
|  | 668 |  | 
|  | 669 | // Log to Redfish event | 
|  | 670 | sendJournalRedfish(description, logLevel); | 
|  | 671 | } | 
|  | 672 |  | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 673 | std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo) | 
|  | 674 | { | 
|  | 675 | std::string description; | 
|  | 676 | DIMMTrainingFailure_t failure{failureInfo}; | 
|  | 677 |  | 
|  | 678 | if (dimmTrainingFailureTypeMap.contains(failure.bits.type)) | 
|  | 679 | { | 
|  | 680 | auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type]; | 
|  | 681 |  | 
|  | 682 | description += std::get<0>(failureInfoMap); | 
|  | 683 |  | 
|  | 684 | description += "; MCU rank index " + | 
|  | 685 | std::to_string(failure.bits.mcuRankIdx); | 
|  | 686 |  | 
|  | 687 | description += "; Slice number " + | 
|  | 688 | std::to_string(failure.bits.sliceNum); | 
|  | 689 |  | 
|  | 690 | description += "; Upper nibble error status: "; | 
|  | 691 | description += (!failure.bits.upperNibbStatErr) | 
|  | 692 | ? "No error" | 
|  | 693 | : "Found no rising edge"; | 
|  | 694 |  | 
|  | 695 | description += "; Lower nibble error status: "; | 
|  | 696 | description += (!failure.bits.lowerNibbStatErr) | 
|  | 697 | ? "No error" | 
|  | 698 | : "Found no rising edge"; | 
|  | 699 |  | 
|  | 700 | description += "; Failure syndrome 0: "; | 
|  | 701 |  | 
|  | 702 | auto& syndromeMap = std::get<1>(failureInfoMap); | 
|  | 703 | if (syndromeMap.contains(failure.bits.syndrome)) | 
|  | 704 | { | 
|  | 705 | description += syndromeMap[failure.bits.syndrome]; | 
|  | 706 | } | 
|  | 707 | else | 
|  | 708 | { | 
|  | 709 | description += "(Unknown syndrome)"; | 
|  | 710 | } | 
|  | 711 | } | 
|  | 712 | else | 
|  | 713 | { | 
|  | 714 | description += "Unknown training failure type " + | 
|  | 715 | std::to_string(failure.bits.type); | 
|  | 716 | } | 
|  | 717 |  | 
|  | 718 | return description; | 
|  | 719 | } | 
|  | 720 |  | 
|  | 721 | void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId, | 
|  | 722 | uint32_t presentReading) | 
|  | 723 | { | 
|  | 724 | log_level logLevel{log_level::WARNING}; | 
|  | 725 | std::string description; | 
|  | 726 | uint8_t byte3 = (presentReading & 0xff000000) >> 24; | 
|  | 727 | uint32_t byte012 = presentReading & 0xffffff; | 
|  | 728 |  | 
|  | 729 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 730 |  | 
| Thu Nguyen | 93d0ca3 | 2024-11-14 23:46:40 +0000 | [diff] [blame] | 731 | // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) | 
|  | 732 | auto dimmIdx = sensorIdToDIMMIdx(sensorId); | 
|  | 733 | if (dimmIdx >= maxDIMMIdxBitNum) | 
|  | 734 | { | 
|  | 735 | return; | 
|  | 736 | } | 
| Chau Ly | cebf476 | 2024-10-03 09:02:54 +0000 | [diff] [blame] | 737 |  | 
|  | 738 | description += "DIMM " + std::to_string(dimmIdx) + " "; | 
|  | 739 |  | 
|  | 740 | if (dimmStatusToMsgMap.contains(byte3)) | 
|  | 741 | { | 
|  | 742 | if (byte3 == dimm_status::INSTALLED_NO_ERROR || | 
|  | 743 | byte3 == dimm_status::INSTALLED_BUT_DISABLED) | 
|  | 744 | { | 
|  | 745 | logLevel = log_level::OK; | 
|  | 746 | } | 
|  | 747 |  | 
|  | 748 | description += dimmStatusToMsgMap[byte3]; | 
|  | 749 |  | 
|  | 750 | if (byte3 == dimm_status::TRAINING_FAILURE) | 
|  | 751 | { | 
|  | 752 | description += "; " + dimmTrainingFailureToMsg(byte012); | 
|  | 753 | } | 
|  | 754 | else if (byte3 == dimm_status::PMIC_TEMP_ALERT) | 
|  | 755 | { | 
|  | 756 | uint8_t byte0 = (byte012 & 0xff); | 
|  | 757 | if (byte0 < pmicTempAlertMsg.size()) | 
|  | 758 | { | 
|  | 759 | description += ": " + pmicTempAlertMsg[byte0]; | 
|  | 760 | } | 
|  | 761 | } | 
|  | 762 | } | 
|  | 763 | else | 
|  | 764 | { | 
|  | 765 | switch (byte3) | 
|  | 766 | { | 
|  | 767 | case dimm_status::PMIC_HIGH_TEMP: | 
|  | 768 | if (byte012 == 0x01) | 
|  | 769 | { | 
|  | 770 | description += "has PMIC high temp condition"; | 
|  | 771 | } | 
|  | 772 | break; | 
|  | 773 | case dimm_status::TSx_HIGH_TEMP: | 
|  | 774 | switch (byte012) | 
|  | 775 | { | 
|  | 776 | case 0x01: | 
|  | 777 | description += "has TS0"; | 
|  | 778 | break; | 
|  | 779 | case 0x02: | 
|  | 780 | description += "has TS1"; | 
|  | 781 | break; | 
|  | 782 | case 0x03: | 
|  | 783 | description += "has TS0 and TS1"; | 
|  | 784 | break; | 
|  | 785 | } | 
|  | 786 | description += " exceeding their high temperature threshold"; | 
|  | 787 | break; | 
|  | 788 | case dimm_status::SPD_HUB_HIGH_TEMP: | 
|  | 789 | if (byte012 == 0x01) | 
|  | 790 | { | 
|  | 791 | description += "has SPD/HUB high temp condition"; | 
|  | 792 | } | 
|  | 793 | break; | 
|  | 794 | default: | 
|  | 795 | description += "has unsupported status " + | 
|  | 796 | std::to_string(byte3); | 
|  | 797 | break; | 
|  | 798 | } | 
|  | 799 | } | 
|  | 800 |  | 
|  | 801 | // Log to Redfish event | 
|  | 802 | sendJournalRedfish(description, logLevel); | 
|  | 803 | } | 
|  | 804 |  | 
|  | 805 | void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId, | 
|  | 806 | uint32_t presentReading) | 
|  | 807 | { | 
|  | 808 | log_level logLevel{log_level::WARNING}; | 
|  | 809 | std::string description; | 
|  | 810 | uint8_t byte3 = (presentReading & 0xff000000) >> 24; | 
|  | 811 | uint32_t byte012 = presentReading & 0xffffff; | 
|  | 812 |  | 
|  | 813 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 814 |  | 
|  | 815 | description += "DDR "; | 
|  | 816 | if (ddrStatusToMsgMap.contains(byte3)) | 
|  | 817 | { | 
|  | 818 | if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR) | 
|  | 819 | { | 
|  | 820 | logLevel = log_level::OK; | 
|  | 821 | } | 
|  | 822 |  | 
|  | 823 | description += ddrStatusToMsgMap[byte3]; | 
|  | 824 |  | 
|  | 825 | if (byte3 == ddr_status::CONFIGURATION_FAILURE || | 
|  | 826 | byte3 == ddr_status::TRAINING_FAILURE) | 
|  | 827 | { | 
|  | 828 | // List out failed DIMMs | 
|  | 829 | description += dimmIdxsToString(byte012); | 
|  | 830 | } | 
|  | 831 | } | 
|  | 832 | else | 
|  | 833 | { | 
|  | 834 | description += "has unsupported status " + std::to_string(byte3); | 
|  | 835 | } | 
|  | 836 |  | 
|  | 837 | // Log to Redfish event | 
|  | 838 | sendJournalRedfish(description, logLevel); | 
|  | 839 | } | 
|  | 840 |  | 
| Chau Ly | 4cca3dc | 2024-10-03 09:07:09 +0000 | [diff] [blame] | 841 | void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId, | 
|  | 842 | uint32_t presentReading) | 
|  | 843 | { | 
|  | 844 | log_level logLevel{log_level::WARNING}; | 
|  | 845 | std::string description; | 
|  | 846 | std::stringstream strStream; | 
|  | 847 |  | 
|  | 848 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 849 |  | 
|  | 850 | VRDStatus_t status{presentReading}; | 
|  | 851 |  | 
|  | 852 | if (status.bits.warning && status.bits.critical) | 
|  | 853 | { | 
|  | 854 | description += "A VR warning and a VR critical"; | 
|  | 855 | logLevel = log_level::CRITICAL; | 
|  | 856 | } | 
|  | 857 | else | 
|  | 858 | { | 
|  | 859 | if (status.bits.warning) | 
|  | 860 | { | 
|  | 861 | description += "A VR warning"; | 
|  | 862 | } | 
|  | 863 | else if (status.bits.critical) | 
|  | 864 | { | 
|  | 865 | description += "A VR critical"; | 
|  | 866 | logLevel = log_level::CRITICAL; | 
|  | 867 | } | 
|  | 868 | else | 
|  | 869 | { | 
|  | 870 | description += "No VR warning or critical"; | 
|  | 871 | logLevel = log_level::OK; | 
|  | 872 | } | 
|  | 873 | } | 
|  | 874 | description += " condition observed"; | 
|  | 875 |  | 
|  | 876 | strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex | 
|  | 877 | << std::setw(2) | 
|  | 878 | << static_cast<uint32_t>(status.bits.vr_status_byte_high) | 
|  | 879 | << "; VR status byte low is 0x" << std::setw(2) | 
|  | 880 | << static_cast<uint32_t>(status.bits.vr_status_byte_low) | 
|  | 881 | << "; Reading is 0x" << std::setw(2) | 
|  | 882 | << static_cast<uint32_t>(presentReading) << ";"; | 
|  | 883 |  | 
|  | 884 | description += strStream.str(); | 
|  | 885 |  | 
|  | 886 | // Log to Redfish event | 
|  | 887 | sendJournalRedfish(description, logLevel); | 
|  | 888 | } | 
|  | 889 |  | 
| Chau Ly | b01357f | 2024-10-17 09:18:01 +0000 | [diff] [blame] | 890 | void OemEventManager::handleNumericWatchdogEvent( | 
|  | 891 | pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading) | 
|  | 892 | { | 
|  | 893 | std::string description; | 
|  | 894 | log_level logLevel = log_level::CRITICAL; | 
|  | 895 |  | 
|  | 896 | description += prefixMsgStrCreation(tid, sensorId); | 
|  | 897 |  | 
|  | 898 | if (presentReading & 0x01) | 
|  | 899 | { | 
|  | 900 | description += "Global watchdog expired;"; | 
|  | 901 | } | 
|  | 902 | if (presentReading & 0x02) | 
|  | 903 | { | 
|  | 904 | description += "Secure watchdog expired;"; | 
|  | 905 | } | 
|  | 906 | if (presentReading & 0x04) | 
|  | 907 | { | 
|  | 908 | description += "Non-secure watchdog expired;"; | 
|  | 909 | } | 
|  | 910 |  | 
|  | 911 | // Log to Redfish event | 
|  | 912 | sendJournalRedfish(description, logLevel); | 
|  | 913 | } | 
|  | 914 |  | 
| Dung Cao | 72c8aa0 | 2023-11-22 02:31:41 +0000 | [diff] [blame] | 915 | int OemEventManager::processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId, | 
|  | 916 | const uint8_t* eventData, | 
|  | 917 | size_t eventDataSize) | 
|  | 918 | { | 
|  | 919 | EFI_AMPERE_ERROR_DATA ampHdr; | 
|  | 920 |  | 
|  | 921 | decodeCperRecord(eventData, eventDataSize, &Hdr); | 
|  | 922 |  | 
|  | 923 | addCperSELLog(tid, eventId, &Hdr); | 
|  | 924 |  | 
| Thu Nguyen | 4b53755 | 2024-11-19 08:43:23 +0000 | [diff] [blame] | 925 | /* isBert at bit 12 of TypeId */ | 
|  | 926 | if (ampHdr.TypeId & 0x0800) | 
|  | 927 | { | 
|  | 928 | lg2::info("Ampere SoC BERT is triggered."); | 
|  | 929 | std::variant<std::string> value( | 
|  | 930 | "com.ampere.CrashCapture.Trigger.TriggerAction.Bert"); | 
|  | 931 | try | 
|  | 932 | { | 
|  | 933 | auto& bus = pldm::utils::DBusHandler::getBus(); | 
|  | 934 | auto method = | 
|  | 935 | bus.new_method_call("com.ampere.CrashCapture.Trigger", | 
|  | 936 | "/com/ampere/crashcapture/trigger", | 
|  | 937 | pldm::utils::dbusProperties, "Set"); | 
|  | 938 | method.append("com.ampere.CrashCapture.Trigger", "TriggerActions", | 
|  | 939 | value); | 
|  | 940 | bus.call_noreply(method); | 
|  | 941 | } | 
|  | 942 | catch (const std::exception& e) | 
|  | 943 | { | 
|  | 944 | lg2::error("call BERT trigger error - {ERROR}", "ERROR", e); | 
|  | 945 | } | 
|  | 946 | } | 
|  | 947 |  | 
| Dung Cao | 72c8aa0 | 2023-11-22 02:31:41 +0000 | [diff] [blame] | 948 | return PLDM_SUCCESS; | 
|  | 949 | } | 
|  | 950 |  | 
| Thu Nguyen | 79f9ff6 | 2024-11-22 03:36:27 +0000 | [diff] [blame] | 951 | int OemEventManager::handlepldmMessagePollEvent( | 
|  | 952 | const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, | 
|  | 953 | pldm_tid_t tid, size_t eventDataOffset) | 
|  | 954 | { | 
|  | 955 | /* This OEM event handler is only used for SoC terminus*/ | 
|  | 956 | if (!tidToSocketNameMap.contains(tid)) | 
|  | 957 | { | 
|  | 958 | return PLDM_SUCCESS; | 
|  | 959 | } | 
|  | 960 |  | 
|  | 961 | auto eventData = | 
|  | 962 | reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; | 
|  | 963 | auto eventDataSize = payloadLength - eventDataOffset; | 
|  | 964 |  | 
|  | 965 | pldm_message_poll_event poll_event{}; | 
|  | 966 | auto rc = decode_pldm_message_poll_event_data(eventData, eventDataSize, | 
|  | 967 | &poll_event); | 
|  | 968 | if (rc) | 
|  | 969 | { | 
|  | 970 | lg2::error("Failed to decode PldmMessagePollEvent event, error {RC} ", | 
|  | 971 | "RC", rc); | 
|  | 972 | return rc; | 
|  | 973 | } | 
|  | 974 |  | 
|  | 975 | auto sensorID = poll_event.event_id; | 
|  | 976 | /* The UE errors */ | 
|  | 977 | if (rasUESensorIDs.contains(sensorID)) | 
|  | 978 | { | 
|  | 979 | pldm::utils::DBusMapping dbusMapping{ | 
|  | 980 | "/xyz/openbmc_project/led/groups/ras_ue_fault", | 
|  | 981 | "xyz.openbmc_project.Led.Group", "Asserted", "bool"}; | 
|  | 982 | try | 
|  | 983 | { | 
|  | 984 | pldm::utils::DBusHandler().setDbusProperty( | 
|  | 985 | dbusMapping, pldm::utils::PropertyValue{bool(true)}); | 
|  | 986 | } | 
|  | 987 | catch (const std::exception& e) | 
|  | 988 | { | 
|  | 989 | lg2::error( | 
|  | 990 | "Failed to set the RAS UE LED terminus ID {TID} sensor ID {SENSORID} - errors {ERROR}", | 
|  | 991 | "TID", tid, "SENSORID", sensorID, "ERROR", e); | 
|  | 992 | } | 
|  | 993 | } | 
|  | 994 |  | 
|  | 995 | return PLDM_SUCCESS; | 
|  | 996 | } | 
|  | 997 |  | 
| Dung Cao | 4a50383 | 2025-01-08 03:45:17 +0000 | [diff] [blame] | 998 | exec::task<int> OemEventManager::oemPollForPlatformEvent(pldm_tid_t tid) | 
|  | 999 | { | 
|  | 1000 | uint64_t t0 = 0; | 
|  | 1001 |  | 
|  | 1002 | /* This OEM event handler is only used for SoC terminus */ | 
|  | 1003 | if (!tidToSocketNameMap.contains(tid)) | 
|  | 1004 | { | 
|  | 1005 | co_return PLDM_SUCCESS; | 
|  | 1006 | } | 
|  | 1007 |  | 
|  | 1008 | if (!timeStampMap.contains(tid)) | 
|  | 1009 | { | 
|  | 1010 | sd_event_now(event.get(), CLOCK_MONOTONIC, &t0); | 
|  | 1011 | timeStampMap.emplace(std::make_pair(tid, t0)); | 
|  | 1012 | } | 
|  | 1013 | else | 
|  | 1014 | { | 
|  | 1015 | sd_event_now(event.get(), CLOCK_MONOTONIC, &t0); | 
|  | 1016 | uint64_t elapsed = t0 - timeStampMap[tid]; | 
|  | 1017 | if (elapsed >= NORMAL_EVENT_POLLING_TIME) | 
|  | 1018 | { | 
|  | 1019 | co_await manager->pollForPlatformEvent(tid, 0, 0); | 
|  | 1020 | timeStampMap[tid] = t0; | 
|  | 1021 | } | 
|  | 1022 | } | 
|  | 1023 |  | 
|  | 1024 | co_return PLDM_SUCCESS; | 
|  | 1025 | } | 
| Chau Ly | a743e38 | 2024-10-26 11:12:22 +0000 | [diff] [blame] | 1026 | } // namespace oem_ampere | 
|  | 1027 | } // namespace pldm |