blob: d2ad833cdb82d65c8f88c78182ec4a1dc0ea3bd2 [file] [log] [blame]
Eddie James2f9f9bb2021-09-20 14:26:31 -05001#include "occ_ffdc.hpp"
2
Eddie James2f9f9bb2021-09-20 14:26:31 -05003#include "utils.hpp"
4
5#include <errno.h>
6#include <fcntl.h>
7#include <stdio.h>
8#include <sys/ioctl.h>
9#include <unistd.h>
10
Chris Cain2ccc3f62022-10-05 14:40:07 -050011#include <nlohmann/json.hpp>
Eddie James2f9f9bb2021-09-20 14:26:31 -050012#include <org/open_power/OCC/Device/error.hpp>
Patrick Williamsd8aab2a2023-04-21 11:15:54 -050013#include <phosphor-logging/elog-errors.hpp>
Eddie James2f9f9bb2021-09-20 14:26:31 -050014#include <phosphor-logging/elog.hpp>
Chris Cain37abe9b2024-10-31 17:20:31 -050015#include <phosphor-logging/lg2.hpp>
Eddie James2f9f9bb2021-09-20 14:26:31 -050016#include <phosphor-logging/log.hpp>
17#include <xyz/openbmc_project/Common/error.hpp>
18#include <xyz/openbmc_project/Logging/Create/server.hpp>
19
20namespace open_power
21{
22namespace occ
23{
24
25static constexpr size_t max_ffdc_size = 8192;
26static constexpr size_t sbe_status_header_size = 8;
27
28static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
Eddie James9789e712022-05-25 15:43:40 -050029static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Eddie James2f9f9bb2021-09-20 14:26:31 -050030
31using namespace phosphor::logging;
32using namespace sdbusplus::org::open_power::OCC::Device::Error;
33using InternalFailure =
34 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
35
36uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg,
37 int fd)
38{
39 uint32_t plid = 0;
40 std::vector<std::tuple<
41 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
42 uint8_t, uint8_t, sdbusplus::message::unix_fd>>
43 pelFFDCInfo;
44
Chris Cain37abe9b2024-10-31 17:20:31 -050045 lg2::info("Creating PEL for OCC{INST} with SBE FFDC: {PATH} - SRC6: {SRC}",
46 "INST", src6 >> 16, "PATH", path, "SRC", lg2::hex, src6);
Eddie James2f9f9bb2021-09-20 14:26:31 -050047
48 if (fd > 0)
49 {
50 pelFFDCInfo.push_back(std::make_tuple(
51 sdbusplus::xyz::openbmc_project::Logging::server::Create::
52 FFDCFormat::Custom,
53 static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd));
54 }
55
Chris Cain2ccc3f62022-10-05 14:40:07 -050056 // Add journal traces to PEL FFDC
Patrick Williamsd7542c82024-08-16 15:20:28 -040057 auto occJournalFile =
58 addJournalEntries(pelFFDCInfo, "openpower-occ-control", 25);
Chris Cain2ccc3f62022-10-05 14:40:07 -050059
Eddie James2f9f9bb2021-09-20 14:26:31 -050060 std::map<std::string, std::string> additionalData;
61 additionalData.emplace("SRC6", std::to_string(src6));
62 additionalData.emplace("_PID", std::to_string(getpid()));
63 additionalData.emplace("SBE_ERR_MSG", msg);
64
Eddie James2f9f9bb2021-09-20 14:26:31 -050065 auto& bus = utils::getBus();
66
67 try
68 {
Patrick Williamsd7542c82024-08-16 15:20:28 -040069 std::string service =
70 utils::getService(loggingObjectPath, opLoggingInterface);
71 auto method =
72 bus.new_method_call(service.c_str(), loggingObjectPath,
73 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain2ccc3f62022-10-05 14:40:07 -050074
Chris Cainf9fd1e52022-10-04 13:39:24 -050075 // Set level to Notice (Informational). Error should trigger an OCC
76 // reset and if it does not recover, HTMGT/HBRT will create an
77 // unrecoverable error.
Eddie James2f9f9bb2021-09-20 14:26:31 -050078 auto level =
79 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
80 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cainf9fd1e52022-10-04 13:39:24 -050081 Notice);
Chris Cain2ccc3f62022-10-05 14:40:07 -050082
Eddie James2f9f9bb2021-09-20 14:26:31 -050083 method.append(path, level, additionalData, pelFFDCInfo);
84 auto response = bus.call(method);
85 std::tuple<uint32_t, uint32_t> reply = {0, 0};
86
87 response.read(reply);
88 plid = std::get<1>(reply);
89 }
Patrick Williamsaf408082022-07-22 19:26:54 -050090 catch (const sdbusplus::exception_t& e)
Eddie James2f9f9bb2021-09-20 14:26:31 -050091 {
Chris Cain37abe9b2024-10-31 17:20:31 -050092 lg2::error("Failed to create PEL: {ERR}", "ERR", e.what());
Eddie James2f9f9bb2021-09-20 14:26:31 -050093 }
94
95 return plid;
96}
97
Eddie James9789e712022-05-25 15:43:40 -050098void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err,
Chris Cain3ece5b92025-01-10 16:06:31 -060099 const char* callout, const bool isInventoryCallout)
Eddie James9789e712022-05-25 15:43:40 -0500100{
101 std::map<std::string, std::string> additionalData;
102
103 additionalData.emplace("_PID", std::to_string(getpid()));
104
105 if (err)
106 {
107 additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err));
108 }
109
Chris Cain3ece5b92025-01-10 16:06:31 -0600110 lg2::info("Creating OCC Reset PEL for OCC{INST}: {PATH}", "INST", instance,
111 "PATH", path);
112
Eddie James9789e712022-05-25 15:43:40 -0500113 if (callout)
114 {
Chris Cain3ece5b92025-01-10 16:06:31 -0600115 if (isInventoryCallout)
116 {
117 lg2::info("adding inventory callout path {COPATH}", "COPATH",
118 std::string(callout));
119 additionalData.emplace("CALLOUT_INVENTORY_PATH",
120 std::string(callout));
121 }
122 else
123 {
124 lg2::info("adding device callout path {COPATH}, errno:{ERRNO}",
125 "COPATH", std::string(callout), "ERRNO", err);
126 additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout));
127 }
Eddie James9789e712022-05-25 15:43:40 -0500128 }
129
130 additionalData.emplace("OCC", std::to_string(instance));
131
132 auto& bus = utils::getBus();
133
134 try
135 {
Chris Cain2ccc3f62022-10-05 14:40:07 -0500136 FFDCFiles ffdc;
137 // Add journal traces to PEL FFDC
Patrick Williamsd7542c82024-08-16 15:20:28 -0400138 auto occJournalFile =
139 addJournalEntries(ffdc, "openpower-occ-control", 25);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500140
Patrick Williamsd7542c82024-08-16 15:20:28 -0400141 std::string service =
142 utils::getService(loggingObjectPath, opLoggingInterface);
143 auto method =
144 bus.new_method_call(service.c_str(), loggingObjectPath,
145 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain2ccc3f62022-10-05 14:40:07 -0500146
Chris Cainf9fd1e52022-10-04 13:39:24 -0500147 // Set level to Notice (Informational). Error should trigger an OCC
148 // reset and if it does not recover, HTMGT/HBRT will create an
149 // unrecoverable error.
Eddie James9789e712022-05-25 15:43:40 -0500150 auto level =
151 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
152 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cainf9fd1e52022-10-04 13:39:24 -0500153 Notice);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500154
155 method.append(path, level, additionalData, ffdc);
Eddie James9789e712022-05-25 15:43:40 -0500156 bus.call(method);
157 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500158 catch (const sdbusplus::exception_t& e)
Eddie James9789e712022-05-25 15:43:40 -0500159 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500160 lg2::error("Failed to create OCC Reset PEL: {ERR}", "ERR", e.what());
Eddie James9789e712022-05-25 15:43:40 -0500161 }
162}
163
Chris Cain2ccc3f62022-10-05 14:40:07 -0500164// Reads the SBE FFDC file and create an error log
Eddie James2f9f9bb2021-09-20 14:26:31 -0500165void FFDC::analyzeEvent()
166{
167 int tfd = -1;
168 size_t total = 0;
169 auto data = std::make_unique<unsigned char[]>(max_ffdc_size);
170 while (total < max_ffdc_size)
171 {
172 auto r = read(fd, data.get() + total, max_ffdc_size - total);
173 if (r < 0)
174 {
175 elog<ReadFailure>(
176 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
177 CALLOUT_ERRNO(errno),
178 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
179 CALLOUT_DEVICE_PATH(file.c_str()));
180 return;
181 }
182 if (!r)
183 {
184 break;
185 }
186 total += r;
187 }
188
189 lseek(fd, 0, SEEK_SET);
190
Eddie James338748b2021-10-29 10:06:50 -0500191 if (!total)
192 {
193 // no error
194 return;
195 }
196
Eddie James2f9f9bb2021-09-20 14:26:31 -0500197 uint32_t src6 = instance << 16;
198 src6 |= *(data.get() + 2) << 8;
199 src6 |= *(data.get() + 3);
200
201 if (total > sbe_status_header_size)
202 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400203 std::string templateString =
204 fs::temp_directory_path() / "OCC_FFDC_XXXXXX";
Eddie James2f9f9bb2021-09-20 14:26:31 -0500205 tfd = mkostemp(templateString.data(), O_RDWR);
206 if (tfd < 0)
207 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500208 lg2::error("Couldn't create temporary FFDC file");
Eddie James2f9f9bb2021-09-20 14:26:31 -0500209 }
210 else
211 {
212 temporaryFiles.emplace_back(templateString, tfd);
213 size_t written = sbe_status_header_size;
214 while (written < total)
215 {
216 auto r = write(tfd, data.get() + written, total - written);
217 if (r < 0)
218 {
219 close(temporaryFiles.back().second);
220 fs::remove(temporaryFiles.back().first);
221 temporaryFiles.pop_back();
222 tfd = -1;
Chris Cain37abe9b2024-10-31 17:20:31 -0500223 lg2::error("Couldn't write temporary FFDC file");
Eddie James2f9f9bb2021-09-20 14:26:31 -0500224 break;
225 }
226 if (!r)
227 {
228 break;
229 }
230 written += r;
231 }
232 }
233 }
234
235 createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6,
236 "SBE command reported error", tfd);
237}
238
Chris Cain2ccc3f62022-10-05 14:40:07 -0500239// Create file with the latest journal entries for specified executable
Patrick Williamsd7542c82024-08-16 15:20:28 -0400240std::unique_ptr<FFDCFile> FFDC::addJournalEntries(
241 FFDCFiles& fileList, const std::string& executable, unsigned int lines)
Chris Cain2ccc3f62022-10-05 14:40:07 -0500242{
243 auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable));
244 if (journalFile && journalFile->fd() != -1)
245 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500246 lg2::debug(
247 "addJournalEntries: Added up to {NUM} journal entries for {APP}",
248 "NUM", lines, "APP", executable);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500249 fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd());
250 }
251 else
252 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500253 lg2::error("addJournalEntries: Failed to add journal entries for {APP}",
254 "APP", executable);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500255 }
256 return journalFile;
257}
258
259// Write JSON data into FFDC file and return the file
260std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData)
261{
262 std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX";
263 auto fd = mkostemp(tmpFile.data(), O_RDWR);
264 if (fd != -1)
265 {
266 auto jsonString = ffdcData.dump();
267 auto rc = write(fd, jsonString.data(), jsonString.size());
268 close(fd);
269 if (rc != -1)
270 {
271 fs::path jsonFile{tmpFile};
272 return std::make_unique<FFDCFile>(jsonFile);
273 }
274 else
275 {
276 auto e = errno;
Chris Cain37abe9b2024-10-31 17:20:31 -0500277 lg2::error(
278 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={ERR}",
279 "ERR", e);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500280 }
281 }
282 else
283 {
284 auto e = errno;
Chris Cain37abe9b2024-10-31 17:20:31 -0500285 lg2::error("makeJsonFFDCFile: Failed called to mkostemp, errno={ERR}",
286 "ERR", e);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500287 }
288 return nullptr;
289}
290
291// Collect the latest journal entries for a specified executable
292nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable)
293{
294 // Sleep 100ms; otherwise recent journal entries sometimes not available
295 using namespace std::chrono_literals;
296 std::this_thread::sleep_for(100ms);
297
298 std::vector<std::string> entries;
299
300 // Open the journal
301 sd_journal* journal;
302 int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY);
303 if (rc < 0)
304 {
305 // Build one line string containing field values
306 entries.push_back("[Internal error: sd_journal_open(), rc=" +
307 std::string(strerror(rc)) + "]");
308 return nlohmann::json(entries);
309 }
310
311 // Create object to automatically close journal
312 JournalCloser closer{journal};
313
314 // Add match so we only loop over entries with specified field value
315 std::string field{"SYSLOG_IDENTIFIER"};
316 std::string match{field + '=' + executable};
317 rc = sd_journal_add_match(journal, match.c_str(), 0);
318 if (rc < 0)
319 {
320 // Build one line string containing field values
321 entries.push_back("[Internal error: sd_journal_add_match(), rc=" +
322 std::string(strerror(rc)) + "]");
323 }
324 else
325 {
326 int count{1};
327 entries.reserve(numLines);
328 std::string syslogID, pid, message, timeStamp;
329
330 // Loop through journal entries from newest to oldest
331 SD_JOURNAL_FOREACH_BACKWARDS(journal)
332 {
333 // Get relevant journal entry fields
334 timeStamp = getTimeStamp(journal);
335 syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER");
336 pid = getFieldValue(journal, "_PID");
337 message = getFieldValue(journal, "MESSAGE");
338
339 // Build one line string containing field values
Patrick Williamsd7542c82024-08-16 15:20:28 -0400340 entries.push_back(
341 timeStamp + " " + syslogID + "[" + pid + "]: " + message);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500342
343 // Stop after number of lines was read
344 if (count++ >= numLines)
345 {
346 break;
347 }
348 }
349 }
350
351 // put the journal entries in chronological order
352 std::reverse(entries.begin(), entries.end());
353
354 return nlohmann::json(entries);
355}
356
357std::string FFDC::getTimeStamp(sd_journal* journal)
358{
359 // Get realtime (wallclock) timestamp of current journal entry. The
360 // timestamp is in microseconds since the epoch.
361 uint64_t usec{0};
362 int rc = sd_journal_get_realtime_usec(journal, &usec);
363 if (rc < 0)
364 {
365 return "[Internal error: sd_journal_get_realtime_usec(), rc=" +
366 std::string(strerror(rc)) + "]";
367 }
368
369 // Convert to number of seconds since the epoch
370 time_t secs = usec / 1000000;
371
372 // Convert seconds to tm struct required by strftime()
373 struct tm* timeStruct = localtime(&secs);
374 if (timeStruct == nullptr)
375 {
376 return "[Internal error: localtime() returned nullptr]";
377 }
378
379 // Convert tm struct into a date/time string
380 char timeStamp[80];
381 strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct);
382
383 return timeStamp;
384}
385
386std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field)
387{
388 std::string value{};
389
390 // Get field data from current journal entry
391 const void* data{nullptr};
392 size_t length{0};
393 int rc = sd_journal_get_data(journal, field.c_str(), &data, &length);
394 if (rc < 0)
395 {
396 if (-rc == ENOENT)
397 {
398 // Current entry does not include this field; return empty value
399 return value;
400 }
401 else
402 {
403 return "[Internal error: sd_journal_get_data() rc=" +
404 std::string(strerror(rc)) + "]";
405 }
406 }
407
408 // Get value from field data. Field data in format "FIELD=value".
409 std::string dataString{static_cast<const char*>(data), length};
410 std::string::size_type pos = dataString.find('=');
411 if ((pos != std::string::npos) && ((pos + 1) < dataString.size()))
412 {
413 // Value is substring after the '='
414 value = dataString.substr(pos + 1);
415 }
416
417 return value;
418}
419
420// Create temporary file that will automatically get removed when destructed
421FFDCFile::FFDCFile(const fs::path& name) :
422 _fd(open(name.c_str(), O_RDONLY)), _name(name)
423{
424 if (_fd() == -1)
425 {
426 auto e = errno;
Chris Cain37abe9b2024-10-31 17:20:31 -0500427 lg2::error("FFDCFile: Could not open FFDC file {FILE}. errno {ERR}",
428 "FILE", _name.string(), "ERR", e);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500429 }
430}
431
Eddie James2f9f9bb2021-09-20 14:26:31 -0500432} // namespace occ
433} // namespace open_power