blob: 131913bce631be0d037448992b167b9f287f6da9 [file] [log] [blame]
Eddie James2f9f9bb2021-09-20 14:26:31 -05001#include "occ_ffdc.hpp"
2
Eddie James2f9f9bb2021-09-20 14:26:31 -05003#include "utils.hpp"
4
5#include <errno.h>
6#include <fcntl.h>
7#include <stdio.h>
8#include <sys/ioctl.h>
9#include <unistd.h>
10
Chris Cain2ccc3f62022-10-05 14:40:07 -050011#include <nlohmann/json.hpp>
Eddie James2f9f9bb2021-09-20 14:26:31 -050012#include <org/open_power/OCC/Device/error.hpp>
Patrick Williamsd8aab2a2023-04-21 11:15:54 -050013#include <phosphor-logging/elog-errors.hpp>
Eddie James2f9f9bb2021-09-20 14:26:31 -050014#include <phosphor-logging/elog.hpp>
15#include <phosphor-logging/log.hpp>
16#include <xyz/openbmc_project/Common/error.hpp>
17#include <xyz/openbmc_project/Logging/Create/server.hpp>
18
Patrick Williams48002492024-02-13 21:43:32 -060019#include <format>
20
Eddie James2f9f9bb2021-09-20 14:26:31 -050021namespace open_power
22{
23namespace occ
24{
25
26static constexpr size_t max_ffdc_size = 8192;
27static constexpr size_t sbe_status_header_size = 8;
28
29static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
Eddie James9789e712022-05-25 15:43:40 -050030static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Eddie James2f9f9bb2021-09-20 14:26:31 -050031
32using namespace phosphor::logging;
33using namespace sdbusplus::org::open_power::OCC::Device::Error;
34using InternalFailure =
35 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
36
37uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg,
38 int fd)
39{
40 uint32_t plid = 0;
41 std::vector<std::tuple<
42 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
43 uint8_t, uint8_t, sdbusplus::message::unix_fd>>
44 pelFFDCInfo;
45
Chris Cainf9fd1e52022-10-04 13:39:24 -050046 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -060047 std::format("Creating PEL for OCC{} with SBE FFDC: {} - SRC6: 0x{:08X}",
Chris Cainf9fd1e52022-10-04 13:39:24 -050048 src6 >> 16, path, src6)
49 .c_str());
Eddie James2f9f9bb2021-09-20 14:26:31 -050050
51 if (fd > 0)
52 {
53 pelFFDCInfo.push_back(std::make_tuple(
54 sdbusplus::xyz::openbmc_project::Logging::server::Create::
55 FFDCFormat::Custom,
56 static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd));
57 }
58
Chris Cain2ccc3f62022-10-05 14:40:07 -050059 // Add journal traces to PEL FFDC
Patrick Williamsa49c9872023-05-10 07:50:35 -050060 auto occJournalFile = addJournalEntries(pelFFDCInfo,
61 "openpower-occ-control", 25);
Chris Cain2ccc3f62022-10-05 14:40:07 -050062
Eddie James2f9f9bb2021-09-20 14:26:31 -050063 std::map<std::string, std::string> additionalData;
64 additionalData.emplace("SRC6", std::to_string(src6));
65 additionalData.emplace("_PID", std::to_string(getpid()));
66 additionalData.emplace("SBE_ERR_MSG", msg);
67
Eddie James2f9f9bb2021-09-20 14:26:31 -050068 auto& bus = utils::getBus();
69
70 try
71 {
Patrick Williamsa49c9872023-05-10 07:50:35 -050072 std::string service = utils::getService(loggingObjectPath,
73 opLoggingInterface);
74 auto method = bus.new_method_call(service.c_str(), loggingObjectPath,
75 opLoggingInterface,
76 "CreatePELWithFFDCFiles");
Chris Cain2ccc3f62022-10-05 14:40:07 -050077
Chris Cainf9fd1e52022-10-04 13:39:24 -050078 // Set level to Notice (Informational). Error should trigger an OCC
79 // reset and if it does not recover, HTMGT/HBRT will create an
80 // unrecoverable error.
Eddie James2f9f9bb2021-09-20 14:26:31 -050081 auto level =
82 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
83 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cainf9fd1e52022-10-04 13:39:24 -050084 Notice);
Chris Cain2ccc3f62022-10-05 14:40:07 -050085
Eddie James2f9f9bb2021-09-20 14:26:31 -050086 method.append(path, level, additionalData, pelFFDCInfo);
87 auto response = bus.call(method);
88 std::tuple<uint32_t, uint32_t> reply = {0, 0};
89
90 response.read(reply);
91 plid = std::get<1>(reply);
92 }
Patrick Williamsaf408082022-07-22 19:26:54 -050093 catch (const sdbusplus::exception_t& e)
Eddie James2f9f9bb2021-09-20 14:26:31 -050094 {
Chris Cainf9fd1e52022-10-04 13:39:24 -050095 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -060096 std::format("Failed to create PEL: {}", e.what()).c_str());
Eddie James2f9f9bb2021-09-20 14:26:31 -050097 }
98
99 return plid;
100}
101
Eddie James9789e712022-05-25 15:43:40 -0500102void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err,
103 const char* callout)
104{
105 std::map<std::string, std::string> additionalData;
106
107 additionalData.emplace("_PID", std::to_string(getpid()));
108
109 if (err)
110 {
111 additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err));
112 }
113
114 if (callout)
115 {
116 additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout));
117 }
118
119 additionalData.emplace("OCC", std::to_string(instance));
120
Chris Cainf9fd1e52022-10-04 13:39:24 -0500121 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600122 std::format("Creating OCC Reset PEL for OCC{}: {}", instance, path)
Chris Cainf9fd1e52022-10-04 13:39:24 -0500123 .c_str());
124
Eddie James9789e712022-05-25 15:43:40 -0500125 auto& bus = utils::getBus();
126
127 try
128 {
Chris Cain2ccc3f62022-10-05 14:40:07 -0500129 FFDCFiles ffdc;
130 // Add journal traces to PEL FFDC
Patrick Williamsa49c9872023-05-10 07:50:35 -0500131 auto occJournalFile = addJournalEntries(ffdc, "openpower-occ-control",
132 25);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500133
Patrick Williamsa49c9872023-05-10 07:50:35 -0500134 std::string service = utils::getService(loggingObjectPath,
135 opLoggingInterface);
136 auto method = bus.new_method_call(service.c_str(), loggingObjectPath,
137 opLoggingInterface,
138 "CreatePELWithFFDCFiles");
Chris Cain2ccc3f62022-10-05 14:40:07 -0500139
Chris Cainf9fd1e52022-10-04 13:39:24 -0500140 // Set level to Notice (Informational). Error should trigger an OCC
141 // reset and if it does not recover, HTMGT/HBRT will create an
142 // unrecoverable error.
Eddie James9789e712022-05-25 15:43:40 -0500143 auto level =
144 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
145 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cainf9fd1e52022-10-04 13:39:24 -0500146 Notice);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500147
148 method.append(path, level, additionalData, ffdc);
Eddie James9789e712022-05-25 15:43:40 -0500149 bus.call(method);
150 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500151 catch (const sdbusplus::exception_t& e)
Eddie James9789e712022-05-25 15:43:40 -0500152 {
153 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600154 std::format("Failed to create OCC Reset PEL: {}", e.what())
Chris Cainf9fd1e52022-10-04 13:39:24 -0500155 .c_str());
Eddie James9789e712022-05-25 15:43:40 -0500156 }
157}
158
Chris Cain2ccc3f62022-10-05 14:40:07 -0500159// Reads the SBE FFDC file and create an error log
Eddie James2f9f9bb2021-09-20 14:26:31 -0500160void FFDC::analyzeEvent()
161{
162 int tfd = -1;
163 size_t total = 0;
164 auto data = std::make_unique<unsigned char[]>(max_ffdc_size);
165 while (total < max_ffdc_size)
166 {
167 auto r = read(fd, data.get() + total, max_ffdc_size - total);
168 if (r < 0)
169 {
170 elog<ReadFailure>(
171 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
172 CALLOUT_ERRNO(errno),
173 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
174 CALLOUT_DEVICE_PATH(file.c_str()));
175 return;
176 }
177 if (!r)
178 {
179 break;
180 }
181 total += r;
182 }
183
184 lseek(fd, 0, SEEK_SET);
185
Eddie James338748b2021-10-29 10:06:50 -0500186 if (!total)
187 {
188 // no error
189 return;
190 }
191
Eddie James2f9f9bb2021-09-20 14:26:31 -0500192 uint32_t src6 = instance << 16;
193 src6 |= *(data.get() + 2) << 8;
194 src6 |= *(data.get() + 3);
195
196 if (total > sbe_status_header_size)
197 {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500198 std::string templateString = fs::temp_directory_path() /
199 "OCC_FFDC_XXXXXX";
Eddie James2f9f9bb2021-09-20 14:26:31 -0500200 tfd = mkostemp(templateString.data(), O_RDWR);
201 if (tfd < 0)
202 {
203 log<level::ERR>("Couldn't create temporary FFDC file");
204 }
205 else
206 {
207 temporaryFiles.emplace_back(templateString, tfd);
208 size_t written = sbe_status_header_size;
209 while (written < total)
210 {
211 auto r = write(tfd, data.get() + written, total - written);
212 if (r < 0)
213 {
214 close(temporaryFiles.back().second);
215 fs::remove(temporaryFiles.back().first);
216 temporaryFiles.pop_back();
217 tfd = -1;
218 log<level::ERR>("Couldn't write temporary FFDC file");
219 break;
220 }
221 if (!r)
222 {
223 break;
224 }
225 written += r;
226 }
227 }
228 }
229
230 createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6,
231 "SBE command reported error", tfd);
232}
233
Chris Cain2ccc3f62022-10-05 14:40:07 -0500234// Create file with the latest journal entries for specified executable
235std::unique_ptr<FFDCFile> FFDC::addJournalEntries(FFDCFiles& fileList,
236 const std::string& executable,
237 unsigned int lines)
238{
239 auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable));
240 if (journalFile && journalFile->fd() != -1)
241 {
242 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600243 std::format(
Chris Cain2ccc3f62022-10-05 14:40:07 -0500244 "addJournalEntries: Added up to {} journal entries for {}",
245 lines, executable)
246 .c_str());
247 fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd());
248 }
249 else
250 {
251 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600252 std::format(
Chris Cain2ccc3f62022-10-05 14:40:07 -0500253 "addJournalEntries: Failed to add journal entries for {}",
254 executable)
255 .c_str());
256 }
257 return journalFile;
258}
259
260// Write JSON data into FFDC file and return the file
261std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData)
262{
263 std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX";
264 auto fd = mkostemp(tmpFile.data(), O_RDWR);
265 if (fd != -1)
266 {
267 auto jsonString = ffdcData.dump();
268 auto rc = write(fd, jsonString.data(), jsonString.size());
269 close(fd);
270 if (rc != -1)
271 {
272 fs::path jsonFile{tmpFile};
273 return std::make_unique<FFDCFile>(jsonFile);
274 }
275 else
276 {
277 auto e = errno;
278 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600279 std::format(
Chris Cain2ccc3f62022-10-05 14:40:07 -0500280 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={}",
281 e)
282 .c_str());
283 }
284 }
285 else
286 {
287 auto e = errno;
288 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600289 std::format("makeJsonFFDCFile: Failed called to mkostemp, errno={}",
Chris Cain2ccc3f62022-10-05 14:40:07 -0500290 e)
291 .c_str());
292 }
293 return nullptr;
294}
295
296// Collect the latest journal entries for a specified executable
297nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable)
298{
299 // Sleep 100ms; otherwise recent journal entries sometimes not available
300 using namespace std::chrono_literals;
301 std::this_thread::sleep_for(100ms);
302
303 std::vector<std::string> entries;
304
305 // Open the journal
306 sd_journal* journal;
307 int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY);
308 if (rc < 0)
309 {
310 // Build one line string containing field values
311 entries.push_back("[Internal error: sd_journal_open(), rc=" +
312 std::string(strerror(rc)) + "]");
313 return nlohmann::json(entries);
314 }
315
316 // Create object to automatically close journal
317 JournalCloser closer{journal};
318
319 // Add match so we only loop over entries with specified field value
320 std::string field{"SYSLOG_IDENTIFIER"};
321 std::string match{field + '=' + executable};
322 rc = sd_journal_add_match(journal, match.c_str(), 0);
323 if (rc < 0)
324 {
325 // Build one line string containing field values
326 entries.push_back("[Internal error: sd_journal_add_match(), rc=" +
327 std::string(strerror(rc)) + "]");
328 }
329 else
330 {
331 int count{1};
332 entries.reserve(numLines);
333 std::string syslogID, pid, message, timeStamp;
334
335 // Loop through journal entries from newest to oldest
336 SD_JOURNAL_FOREACH_BACKWARDS(journal)
337 {
338 // Get relevant journal entry fields
339 timeStamp = getTimeStamp(journal);
340 syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER");
341 pid = getFieldValue(journal, "_PID");
342 message = getFieldValue(journal, "MESSAGE");
343
344 // Build one line string containing field values
345 entries.push_back(timeStamp + " " + syslogID + "[" + pid +
346 "]: " + message);
347
348 // Stop after number of lines was read
349 if (count++ >= numLines)
350 {
351 break;
352 }
353 }
354 }
355
356 // put the journal entries in chronological order
357 std::reverse(entries.begin(), entries.end());
358
359 return nlohmann::json(entries);
360}
361
362std::string FFDC::getTimeStamp(sd_journal* journal)
363{
364 // Get realtime (wallclock) timestamp of current journal entry. The
365 // timestamp is in microseconds since the epoch.
366 uint64_t usec{0};
367 int rc = sd_journal_get_realtime_usec(journal, &usec);
368 if (rc < 0)
369 {
370 return "[Internal error: sd_journal_get_realtime_usec(), rc=" +
371 std::string(strerror(rc)) + "]";
372 }
373
374 // Convert to number of seconds since the epoch
375 time_t secs = usec / 1000000;
376
377 // Convert seconds to tm struct required by strftime()
378 struct tm* timeStruct = localtime(&secs);
379 if (timeStruct == nullptr)
380 {
381 return "[Internal error: localtime() returned nullptr]";
382 }
383
384 // Convert tm struct into a date/time string
385 char timeStamp[80];
386 strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct);
387
388 return timeStamp;
389}
390
391std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field)
392{
393 std::string value{};
394
395 // Get field data from current journal entry
396 const void* data{nullptr};
397 size_t length{0};
398 int rc = sd_journal_get_data(journal, field.c_str(), &data, &length);
399 if (rc < 0)
400 {
401 if (-rc == ENOENT)
402 {
403 // Current entry does not include this field; return empty value
404 return value;
405 }
406 else
407 {
408 return "[Internal error: sd_journal_get_data() rc=" +
409 std::string(strerror(rc)) + "]";
410 }
411 }
412
413 // Get value from field data. Field data in format "FIELD=value".
414 std::string dataString{static_cast<const char*>(data), length};
415 std::string::size_type pos = dataString.find('=');
416 if ((pos != std::string::npos) && ((pos + 1) < dataString.size()))
417 {
418 // Value is substring after the '='
419 value = dataString.substr(pos + 1);
420 }
421
422 return value;
423}
424
425// Create temporary file that will automatically get removed when destructed
426FFDCFile::FFDCFile(const fs::path& name) :
427 _fd(open(name.c_str(), O_RDONLY)), _name(name)
428{
429 if (_fd() == -1)
430 {
431 auto e = errno;
432 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600433 std::format("FFDCFile: Could not open FFDC file {}. errno {}",
Chris Cain2ccc3f62022-10-05 14:40:07 -0500434 _name.string(), e)
435 .c_str());
436 }
437}
438
Eddie James2f9f9bb2021-09-20 14:26:31 -0500439} // namespace occ
440} // namespace open_power