Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 1 | #include "occ_ffdc.hpp" |
| 2 | |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 3 | #include "utils.hpp" |
| 4 | |
| 5 | #include <errno.h> |
| 6 | #include <fcntl.h> |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 7 | #include <fmt/core.h> |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 8 | #include <stdio.h> |
| 9 | #include <sys/ioctl.h> |
| 10 | #include <unistd.h> |
| 11 | |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 12 | #include <nlohmann/json.hpp> |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 13 | #include <org/open_power/OCC/Device/error.hpp> |
Patrick Williams | d8aab2a | 2023-04-21 11:15:54 -0500 | [diff] [blame] | 14 | #include <phosphor-logging/elog-errors.hpp> |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 15 | #include <phosphor-logging/elog.hpp> |
| 16 | #include <phosphor-logging/log.hpp> |
| 17 | #include <xyz/openbmc_project/Common/error.hpp> |
| 18 | #include <xyz/openbmc_project/Logging/Create/server.hpp> |
| 19 | |
| 20 | namespace open_power |
| 21 | { |
| 22 | namespace occ |
| 23 | { |
| 24 | |
| 25 | static constexpr size_t max_ffdc_size = 8192; |
| 26 | static constexpr size_t sbe_status_header_size = 8; |
| 27 | |
| 28 | static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging"; |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 29 | static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL"; |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 30 | |
| 31 | using namespace phosphor::logging; |
| 32 | using namespace sdbusplus::org::open_power::OCC::Device::Error; |
| 33 | using InternalFailure = |
| 34 | sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; |
| 35 | |
| 36 | uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg, |
| 37 | int fd) |
| 38 | { |
| 39 | uint32_t plid = 0; |
| 40 | std::vector<std::tuple< |
| 41 | sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat, |
| 42 | uint8_t, uint8_t, sdbusplus::message::unix_fd>> |
| 43 | pelFFDCInfo; |
| 44 | |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 45 | log<level::INFO>( |
| 46 | fmt::format("Creating PEL for OCC{} with SBE FFDC: {} - SRC6: 0x{:08X}", |
| 47 | src6 >> 16, path, src6) |
| 48 | .c_str()); |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 49 | |
| 50 | if (fd > 0) |
| 51 | { |
| 52 | pelFFDCInfo.push_back(std::make_tuple( |
| 53 | sdbusplus::xyz::openbmc_project::Logging::server::Create:: |
| 54 | FFDCFormat::Custom, |
| 55 | static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd)); |
| 56 | } |
| 57 | |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 58 | // Add journal traces to PEL FFDC |
Patrick Williams | a49c987 | 2023-05-10 07:50:35 -0500 | [diff] [blame] | 59 | auto occJournalFile = addJournalEntries(pelFFDCInfo, |
| 60 | "openpower-occ-control", 25); |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 61 | |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 62 | std::map<std::string, std::string> additionalData; |
| 63 | additionalData.emplace("SRC6", std::to_string(src6)); |
| 64 | additionalData.emplace("_PID", std::to_string(getpid())); |
| 65 | additionalData.emplace("SBE_ERR_MSG", msg); |
| 66 | |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 67 | auto& bus = utils::getBus(); |
| 68 | |
| 69 | try |
| 70 | { |
Patrick Williams | a49c987 | 2023-05-10 07:50:35 -0500 | [diff] [blame] | 71 | std::string service = utils::getService(loggingObjectPath, |
| 72 | opLoggingInterface); |
| 73 | auto method = bus.new_method_call(service.c_str(), loggingObjectPath, |
| 74 | opLoggingInterface, |
| 75 | "CreatePELWithFFDCFiles"); |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 76 | |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 77 | // Set level to Notice (Informational). Error should trigger an OCC |
| 78 | // reset and if it does not recover, HTMGT/HBRT will create an |
| 79 | // unrecoverable error. |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 80 | auto level = |
| 81 | sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( |
| 82 | sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 83 | Notice); |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 84 | |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 85 | method.append(path, level, additionalData, pelFFDCInfo); |
| 86 | auto response = bus.call(method); |
| 87 | std::tuple<uint32_t, uint32_t> reply = {0, 0}; |
| 88 | |
| 89 | response.read(reply); |
| 90 | plid = std::get<1>(reply); |
| 91 | } |
Patrick Williams | af40808 | 2022-07-22 19:26:54 -0500 | [diff] [blame] | 92 | catch (const sdbusplus::exception_t& e) |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 93 | { |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 94 | log<level::ERR>( |
| 95 | fmt::format("Failed to create PEL: {}", e.what()).c_str()); |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 96 | } |
| 97 | |
| 98 | return plid; |
| 99 | } |
| 100 | |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 101 | void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err, |
| 102 | const char* callout) |
| 103 | { |
| 104 | std::map<std::string, std::string> additionalData; |
| 105 | |
| 106 | additionalData.emplace("_PID", std::to_string(getpid())); |
| 107 | |
| 108 | if (err) |
| 109 | { |
| 110 | additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err)); |
| 111 | } |
| 112 | |
| 113 | if (callout) |
| 114 | { |
| 115 | additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout)); |
| 116 | } |
| 117 | |
| 118 | additionalData.emplace("OCC", std::to_string(instance)); |
| 119 | |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 120 | log<level::INFO>( |
| 121 | fmt::format("Creating OCC Reset PEL for OCC{}: {}", instance, path) |
| 122 | .c_str()); |
| 123 | |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 124 | auto& bus = utils::getBus(); |
| 125 | |
| 126 | try |
| 127 | { |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 128 | FFDCFiles ffdc; |
| 129 | // Add journal traces to PEL FFDC |
Patrick Williams | a49c987 | 2023-05-10 07:50:35 -0500 | [diff] [blame] | 130 | auto occJournalFile = addJournalEntries(ffdc, "openpower-occ-control", |
| 131 | 25); |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 132 | |
Patrick Williams | a49c987 | 2023-05-10 07:50:35 -0500 | [diff] [blame] | 133 | std::string service = utils::getService(loggingObjectPath, |
| 134 | opLoggingInterface); |
| 135 | auto method = bus.new_method_call(service.c_str(), loggingObjectPath, |
| 136 | opLoggingInterface, |
| 137 | "CreatePELWithFFDCFiles"); |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 138 | |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 139 | // Set level to Notice (Informational). Error should trigger an OCC |
| 140 | // reset and if it does not recover, HTMGT/HBRT will create an |
| 141 | // unrecoverable error. |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 142 | auto level = |
| 143 | sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( |
| 144 | sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 145 | Notice); |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 146 | |
| 147 | method.append(path, level, additionalData, ffdc); |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 148 | bus.call(method); |
| 149 | } |
Patrick Williams | af40808 | 2022-07-22 19:26:54 -0500 | [diff] [blame] | 150 | catch (const sdbusplus::exception_t& e) |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 151 | { |
| 152 | log<level::ERR>( |
Chris Cain | f9fd1e5 | 2022-10-04 13:39:24 -0500 | [diff] [blame] | 153 | fmt::format("Failed to create OCC Reset PEL: {}", e.what()) |
| 154 | .c_str()); |
Eddie James | 9789e71 | 2022-05-25 15:43:40 -0500 | [diff] [blame] | 155 | } |
| 156 | } |
| 157 | |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 158 | // Reads the SBE FFDC file and create an error log |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 159 | void FFDC::analyzeEvent() |
| 160 | { |
| 161 | int tfd = -1; |
| 162 | size_t total = 0; |
| 163 | auto data = std::make_unique<unsigned char[]>(max_ffdc_size); |
| 164 | while (total < max_ffdc_size) |
| 165 | { |
| 166 | auto r = read(fd, data.get() + total, max_ffdc_size - total); |
| 167 | if (r < 0) |
| 168 | { |
| 169 | elog<ReadFailure>( |
| 170 | phosphor::logging::org::open_power::OCC::Device::ReadFailure:: |
| 171 | CALLOUT_ERRNO(errno), |
| 172 | phosphor::logging::org::open_power::OCC::Device::ReadFailure:: |
| 173 | CALLOUT_DEVICE_PATH(file.c_str())); |
| 174 | return; |
| 175 | } |
| 176 | if (!r) |
| 177 | { |
| 178 | break; |
| 179 | } |
| 180 | total += r; |
| 181 | } |
| 182 | |
| 183 | lseek(fd, 0, SEEK_SET); |
| 184 | |
Eddie James | 338748b | 2021-10-29 10:06:50 -0500 | [diff] [blame] | 185 | if (!total) |
| 186 | { |
| 187 | // no error |
| 188 | return; |
| 189 | } |
| 190 | |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 191 | uint32_t src6 = instance << 16; |
| 192 | src6 |= *(data.get() + 2) << 8; |
| 193 | src6 |= *(data.get() + 3); |
| 194 | |
| 195 | if (total > sbe_status_header_size) |
| 196 | { |
Patrick Williams | a49c987 | 2023-05-10 07:50:35 -0500 | [diff] [blame] | 197 | std::string templateString = fs::temp_directory_path() / |
| 198 | "OCC_FFDC_XXXXXX"; |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 199 | tfd = mkostemp(templateString.data(), O_RDWR); |
| 200 | if (tfd < 0) |
| 201 | { |
| 202 | log<level::ERR>("Couldn't create temporary FFDC file"); |
| 203 | } |
| 204 | else |
| 205 | { |
| 206 | temporaryFiles.emplace_back(templateString, tfd); |
| 207 | size_t written = sbe_status_header_size; |
| 208 | while (written < total) |
| 209 | { |
| 210 | auto r = write(tfd, data.get() + written, total - written); |
| 211 | if (r < 0) |
| 212 | { |
| 213 | close(temporaryFiles.back().second); |
| 214 | fs::remove(temporaryFiles.back().first); |
| 215 | temporaryFiles.pop_back(); |
| 216 | tfd = -1; |
| 217 | log<level::ERR>("Couldn't write temporary FFDC file"); |
| 218 | break; |
| 219 | } |
| 220 | if (!r) |
| 221 | { |
| 222 | break; |
| 223 | } |
| 224 | written += r; |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6, |
| 230 | "SBE command reported error", tfd); |
| 231 | } |
| 232 | |
Chris Cain | 2ccc3f6 | 2022-10-05 14:40:07 -0500 | [diff] [blame] | 233 | // Create file with the latest journal entries for specified executable |
| 234 | std::unique_ptr<FFDCFile> FFDC::addJournalEntries(FFDCFiles& fileList, |
| 235 | const std::string& executable, |
| 236 | unsigned int lines) |
| 237 | { |
| 238 | auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable)); |
| 239 | if (journalFile && journalFile->fd() != -1) |
| 240 | { |
| 241 | log<level::DEBUG>( |
| 242 | fmt::format( |
| 243 | "addJournalEntries: Added up to {} journal entries for {}", |
| 244 | lines, executable) |
| 245 | .c_str()); |
| 246 | fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd()); |
| 247 | } |
| 248 | else |
| 249 | { |
| 250 | log<level::ERR>( |
| 251 | fmt::format( |
| 252 | "addJournalEntries: Failed to add journal entries for {}", |
| 253 | executable) |
| 254 | .c_str()); |
| 255 | } |
| 256 | return journalFile; |
| 257 | } |
| 258 | |
| 259 | // Write JSON data into FFDC file and return the file |
| 260 | std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData) |
| 261 | { |
| 262 | std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX"; |
| 263 | auto fd = mkostemp(tmpFile.data(), O_RDWR); |
| 264 | if (fd != -1) |
| 265 | { |
| 266 | auto jsonString = ffdcData.dump(); |
| 267 | auto rc = write(fd, jsonString.data(), jsonString.size()); |
| 268 | close(fd); |
| 269 | if (rc != -1) |
| 270 | { |
| 271 | fs::path jsonFile{tmpFile}; |
| 272 | return std::make_unique<FFDCFile>(jsonFile); |
| 273 | } |
| 274 | else |
| 275 | { |
| 276 | auto e = errno; |
| 277 | log<level::ERR>( |
| 278 | fmt::format( |
| 279 | "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={}", |
| 280 | e) |
| 281 | .c_str()); |
| 282 | } |
| 283 | } |
| 284 | else |
| 285 | { |
| 286 | auto e = errno; |
| 287 | log<level::ERR>( |
| 288 | fmt::format("makeJsonFFDCFile: Failed called to mkostemp, errno={}", |
| 289 | e) |
| 290 | .c_str()); |
| 291 | } |
| 292 | return nullptr; |
| 293 | } |
| 294 | |
| 295 | // Collect the latest journal entries for a specified executable |
| 296 | nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable) |
| 297 | { |
| 298 | // Sleep 100ms; otherwise recent journal entries sometimes not available |
| 299 | using namespace std::chrono_literals; |
| 300 | std::this_thread::sleep_for(100ms); |
| 301 | |
| 302 | std::vector<std::string> entries; |
| 303 | |
| 304 | // Open the journal |
| 305 | sd_journal* journal; |
| 306 | int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY); |
| 307 | if (rc < 0) |
| 308 | { |
| 309 | // Build one line string containing field values |
| 310 | entries.push_back("[Internal error: sd_journal_open(), rc=" + |
| 311 | std::string(strerror(rc)) + "]"); |
| 312 | return nlohmann::json(entries); |
| 313 | } |
| 314 | |
| 315 | // Create object to automatically close journal |
| 316 | JournalCloser closer{journal}; |
| 317 | |
| 318 | // Add match so we only loop over entries with specified field value |
| 319 | std::string field{"SYSLOG_IDENTIFIER"}; |
| 320 | std::string match{field + '=' + executable}; |
| 321 | rc = sd_journal_add_match(journal, match.c_str(), 0); |
| 322 | if (rc < 0) |
| 323 | { |
| 324 | // Build one line string containing field values |
| 325 | entries.push_back("[Internal error: sd_journal_add_match(), rc=" + |
| 326 | std::string(strerror(rc)) + "]"); |
| 327 | } |
| 328 | else |
| 329 | { |
| 330 | int count{1}; |
| 331 | entries.reserve(numLines); |
| 332 | std::string syslogID, pid, message, timeStamp; |
| 333 | |
| 334 | // Loop through journal entries from newest to oldest |
| 335 | SD_JOURNAL_FOREACH_BACKWARDS(journal) |
| 336 | { |
| 337 | // Get relevant journal entry fields |
| 338 | timeStamp = getTimeStamp(journal); |
| 339 | syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER"); |
| 340 | pid = getFieldValue(journal, "_PID"); |
| 341 | message = getFieldValue(journal, "MESSAGE"); |
| 342 | |
| 343 | // Build one line string containing field values |
| 344 | entries.push_back(timeStamp + " " + syslogID + "[" + pid + |
| 345 | "]: " + message); |
| 346 | |
| 347 | // Stop after number of lines was read |
| 348 | if (count++ >= numLines) |
| 349 | { |
| 350 | break; |
| 351 | } |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | // put the journal entries in chronological order |
| 356 | std::reverse(entries.begin(), entries.end()); |
| 357 | |
| 358 | return nlohmann::json(entries); |
| 359 | } |
| 360 | |
| 361 | std::string FFDC::getTimeStamp(sd_journal* journal) |
| 362 | { |
| 363 | // Get realtime (wallclock) timestamp of current journal entry. The |
| 364 | // timestamp is in microseconds since the epoch. |
| 365 | uint64_t usec{0}; |
| 366 | int rc = sd_journal_get_realtime_usec(journal, &usec); |
| 367 | if (rc < 0) |
| 368 | { |
| 369 | return "[Internal error: sd_journal_get_realtime_usec(), rc=" + |
| 370 | std::string(strerror(rc)) + "]"; |
| 371 | } |
| 372 | |
| 373 | // Convert to number of seconds since the epoch |
| 374 | time_t secs = usec / 1000000; |
| 375 | |
| 376 | // Convert seconds to tm struct required by strftime() |
| 377 | struct tm* timeStruct = localtime(&secs); |
| 378 | if (timeStruct == nullptr) |
| 379 | { |
| 380 | return "[Internal error: localtime() returned nullptr]"; |
| 381 | } |
| 382 | |
| 383 | // Convert tm struct into a date/time string |
| 384 | char timeStamp[80]; |
| 385 | strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct); |
| 386 | |
| 387 | return timeStamp; |
| 388 | } |
| 389 | |
| 390 | std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field) |
| 391 | { |
| 392 | std::string value{}; |
| 393 | |
| 394 | // Get field data from current journal entry |
| 395 | const void* data{nullptr}; |
| 396 | size_t length{0}; |
| 397 | int rc = sd_journal_get_data(journal, field.c_str(), &data, &length); |
| 398 | if (rc < 0) |
| 399 | { |
| 400 | if (-rc == ENOENT) |
| 401 | { |
| 402 | // Current entry does not include this field; return empty value |
| 403 | return value; |
| 404 | } |
| 405 | else |
| 406 | { |
| 407 | return "[Internal error: sd_journal_get_data() rc=" + |
| 408 | std::string(strerror(rc)) + "]"; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | // Get value from field data. Field data in format "FIELD=value". |
| 413 | std::string dataString{static_cast<const char*>(data), length}; |
| 414 | std::string::size_type pos = dataString.find('='); |
| 415 | if ((pos != std::string::npos) && ((pos + 1) < dataString.size())) |
| 416 | { |
| 417 | // Value is substring after the '=' |
| 418 | value = dataString.substr(pos + 1); |
| 419 | } |
| 420 | |
| 421 | return value; |
| 422 | } |
| 423 | |
| 424 | // Create temporary file that will automatically get removed when destructed |
| 425 | FFDCFile::FFDCFile(const fs::path& name) : |
| 426 | _fd(open(name.c_str(), O_RDONLY)), _name(name) |
| 427 | { |
| 428 | if (_fd() == -1) |
| 429 | { |
| 430 | auto e = errno; |
| 431 | log<level::ERR>( |
| 432 | fmt::format("FFDCFile: Could not open FFDC file {}. errno {}", |
| 433 | _name.string(), e) |
| 434 | .c_str()); |
| 435 | } |
| 436 | } |
| 437 | |
Eddie James | 2f9f9bb | 2021-09-20 14:26:31 -0500 | [diff] [blame] | 438 | } // namespace occ |
| 439 | } // namespace open_power |