blob: f83f74a296f5f0733c2970b4eda85ea4ca84bf78 [file] [log] [blame]
Eddie James2f9f9bb2021-09-20 14:26:31 -05001#include "occ_ffdc.hpp"
2
3#include "elog-errors.hpp"
4#include "utils.hpp"
5
6#include <errno.h>
7#include <fcntl.h>
Eddie James9789e712022-05-25 15:43:40 -05008#include <fmt/core.h>
Eddie James2f9f9bb2021-09-20 14:26:31 -05009#include <stdio.h>
10#include <sys/ioctl.h>
11#include <unistd.h>
12
Chris Cain2ccc3f62022-10-05 14:40:07 -050013#include <nlohmann/json.hpp>
Eddie James2f9f9bb2021-09-20 14:26:31 -050014#include <org/open_power/OCC/Device/error.hpp>
15#include <phosphor-logging/elog.hpp>
16#include <phosphor-logging/log.hpp>
17#include <xyz/openbmc_project/Common/error.hpp>
18#include <xyz/openbmc_project/Logging/Create/server.hpp>
19
20namespace open_power
21{
22namespace occ
23{
24
25static constexpr size_t max_ffdc_size = 8192;
26static constexpr size_t sbe_status_header_size = 8;
27
28static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
Eddie James9789e712022-05-25 15:43:40 -050029static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Eddie James2f9f9bb2021-09-20 14:26:31 -050030
31using namespace phosphor::logging;
32using namespace sdbusplus::org::open_power::OCC::Device::Error;
33using InternalFailure =
34 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
35
36uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg,
37 int fd)
38{
39 uint32_t plid = 0;
40 std::vector<std::tuple<
41 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
42 uint8_t, uint8_t, sdbusplus::message::unix_fd>>
43 pelFFDCInfo;
44
Chris Cainf9fd1e52022-10-04 13:39:24 -050045 log<level::INFO>(
46 fmt::format("Creating PEL for OCC{} with SBE FFDC: {} - SRC6: 0x{:08X}",
47 src6 >> 16, path, src6)
48 .c_str());
Eddie James2f9f9bb2021-09-20 14:26:31 -050049
50 if (fd > 0)
51 {
52 pelFFDCInfo.push_back(std::make_tuple(
53 sdbusplus::xyz::openbmc_project::Logging::server::Create::
54 FFDCFormat::Custom,
55 static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd));
56 }
57
Chris Cain2ccc3f62022-10-05 14:40:07 -050058 // Add journal traces to PEL FFDC
59 auto occJournalFile =
60 addJournalEntries(pelFFDCInfo, "openpower-occ-control", 25);
61
Eddie James2f9f9bb2021-09-20 14:26:31 -050062 std::map<std::string, std::string> additionalData;
63 additionalData.emplace("SRC6", std::to_string(src6));
64 additionalData.emplace("_PID", std::to_string(getpid()));
65 additionalData.emplace("SBE_ERR_MSG", msg);
66
Eddie James2f9f9bb2021-09-20 14:26:31 -050067 auto& bus = utils::getBus();
68
69 try
70 {
Eddie James338748b2021-10-29 10:06:50 -050071 std::string service =
Eddie James9789e712022-05-25 15:43:40 -050072 utils::getService(loggingObjectPath, opLoggingInterface);
Eddie James2f9f9bb2021-09-20 14:26:31 -050073 auto method =
74 bus.new_method_call(service.c_str(), loggingObjectPath,
Eddie James9789e712022-05-25 15:43:40 -050075 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain2ccc3f62022-10-05 14:40:07 -050076
Chris Cainf9fd1e52022-10-04 13:39:24 -050077 // Set level to Notice (Informational). Error should trigger an OCC
78 // reset and if it does not recover, HTMGT/HBRT will create an
79 // unrecoverable error.
Eddie James2f9f9bb2021-09-20 14:26:31 -050080 auto level =
81 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
82 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cainf9fd1e52022-10-04 13:39:24 -050083 Notice);
Chris Cain2ccc3f62022-10-05 14:40:07 -050084
Eddie James2f9f9bb2021-09-20 14:26:31 -050085 method.append(path, level, additionalData, pelFFDCInfo);
86 auto response = bus.call(method);
87 std::tuple<uint32_t, uint32_t> reply = {0, 0};
88
89 response.read(reply);
90 plid = std::get<1>(reply);
91 }
Patrick Williamsaf408082022-07-22 19:26:54 -050092 catch (const sdbusplus::exception_t& e)
Eddie James2f9f9bb2021-09-20 14:26:31 -050093 {
Chris Cainf9fd1e52022-10-04 13:39:24 -050094 log<level::ERR>(
95 fmt::format("Failed to create PEL: {}", e.what()).c_str());
Eddie James2f9f9bb2021-09-20 14:26:31 -050096 }
97
98 return plid;
99}
100
Eddie James9789e712022-05-25 15:43:40 -0500101void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err,
102 const char* callout)
103{
104 std::map<std::string, std::string> additionalData;
105
106 additionalData.emplace("_PID", std::to_string(getpid()));
107
108 if (err)
109 {
110 additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err));
111 }
112
113 if (callout)
114 {
115 additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout));
116 }
117
118 additionalData.emplace("OCC", std::to_string(instance));
119
Chris Cainf9fd1e52022-10-04 13:39:24 -0500120 log<level::INFO>(
121 fmt::format("Creating OCC Reset PEL for OCC{}: {}", instance, path)
122 .c_str());
123
Eddie James9789e712022-05-25 15:43:40 -0500124 auto& bus = utils::getBus();
125
126 try
127 {
Chris Cain2ccc3f62022-10-05 14:40:07 -0500128 FFDCFiles ffdc;
129 // Add journal traces to PEL FFDC
130 auto occJournalFile =
131 addJournalEntries(ffdc, "openpower-occ-control", 25);
132
Eddie James9789e712022-05-25 15:43:40 -0500133 std::string service =
Chris Cain2ccc3f62022-10-05 14:40:07 -0500134 utils::getService(loggingObjectPath, opLoggingInterface);
135 auto method =
136 bus.new_method_call(service.c_str(), loggingObjectPath,
137 opLoggingInterface, "CreatePELWithFFDCFiles");
138
Chris Cainf9fd1e52022-10-04 13:39:24 -0500139 // Set level to Notice (Informational). Error should trigger an OCC
140 // reset and if it does not recover, HTMGT/HBRT will create an
141 // unrecoverable error.
Eddie James9789e712022-05-25 15:43:40 -0500142 auto level =
143 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
144 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cainf9fd1e52022-10-04 13:39:24 -0500145 Notice);
Chris Cain2ccc3f62022-10-05 14:40:07 -0500146
147 method.append(path, level, additionalData, ffdc);
Eddie James9789e712022-05-25 15:43:40 -0500148 bus.call(method);
149 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500150 catch (const sdbusplus::exception_t& e)
Eddie James9789e712022-05-25 15:43:40 -0500151 {
152 log<level::ERR>(
Chris Cainf9fd1e52022-10-04 13:39:24 -0500153 fmt::format("Failed to create OCC Reset PEL: {}", e.what())
154 .c_str());
Eddie James9789e712022-05-25 15:43:40 -0500155 }
156}
157
Chris Cain2ccc3f62022-10-05 14:40:07 -0500158// Reads the SBE FFDC file and create an error log
Eddie James2f9f9bb2021-09-20 14:26:31 -0500159void FFDC::analyzeEvent()
160{
161 int tfd = -1;
162 size_t total = 0;
163 auto data = std::make_unique<unsigned char[]>(max_ffdc_size);
164 while (total < max_ffdc_size)
165 {
166 auto r = read(fd, data.get() + total, max_ffdc_size - total);
167 if (r < 0)
168 {
169 elog<ReadFailure>(
170 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
171 CALLOUT_ERRNO(errno),
172 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
173 CALLOUT_DEVICE_PATH(file.c_str()));
174 return;
175 }
176 if (!r)
177 {
178 break;
179 }
180 total += r;
181 }
182
183 lseek(fd, 0, SEEK_SET);
184
Eddie James338748b2021-10-29 10:06:50 -0500185 if (!total)
186 {
187 // no error
188 return;
189 }
190
Eddie James2f9f9bb2021-09-20 14:26:31 -0500191 uint32_t src6 = instance << 16;
192 src6 |= *(data.get() + 2) << 8;
193 src6 |= *(data.get() + 3);
194
195 if (total > sbe_status_header_size)
196 {
197 std::string templateString =
198 fs::temp_directory_path() / "OCC_FFDC_XXXXXX";
199 tfd = mkostemp(templateString.data(), O_RDWR);
200 if (tfd < 0)
201 {
202 log<level::ERR>("Couldn't create temporary FFDC file");
203 }
204 else
205 {
206 temporaryFiles.emplace_back(templateString, tfd);
207 size_t written = sbe_status_header_size;
208 while (written < total)
209 {
210 auto r = write(tfd, data.get() + written, total - written);
211 if (r < 0)
212 {
213 close(temporaryFiles.back().second);
214 fs::remove(temporaryFiles.back().first);
215 temporaryFiles.pop_back();
216 tfd = -1;
217 log<level::ERR>("Couldn't write temporary FFDC file");
218 break;
219 }
220 if (!r)
221 {
222 break;
223 }
224 written += r;
225 }
226 }
227 }
228
229 createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6,
230 "SBE command reported error", tfd);
231}
232
Chris Cain2ccc3f62022-10-05 14:40:07 -0500233// Create file with the latest journal entries for specified executable
234std::unique_ptr<FFDCFile> FFDC::addJournalEntries(FFDCFiles& fileList,
235 const std::string& executable,
236 unsigned int lines)
237{
238 auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable));
239 if (journalFile && journalFile->fd() != -1)
240 {
241 log<level::DEBUG>(
242 fmt::format(
243 "addJournalEntries: Added up to {} journal entries for {}",
244 lines, executable)
245 .c_str());
246 fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd());
247 }
248 else
249 {
250 log<level::ERR>(
251 fmt::format(
252 "addJournalEntries: Failed to add journal entries for {}",
253 executable)
254 .c_str());
255 }
256 return journalFile;
257}
258
259// Write JSON data into FFDC file and return the file
260std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData)
261{
262 std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX";
263 auto fd = mkostemp(tmpFile.data(), O_RDWR);
264 if (fd != -1)
265 {
266 auto jsonString = ffdcData.dump();
267 auto rc = write(fd, jsonString.data(), jsonString.size());
268 close(fd);
269 if (rc != -1)
270 {
271 fs::path jsonFile{tmpFile};
272 return std::make_unique<FFDCFile>(jsonFile);
273 }
274 else
275 {
276 auto e = errno;
277 log<level::ERR>(
278 fmt::format(
279 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={}",
280 e)
281 .c_str());
282 }
283 }
284 else
285 {
286 auto e = errno;
287 log<level::ERR>(
288 fmt::format("makeJsonFFDCFile: Failed called to mkostemp, errno={}",
289 e)
290 .c_str());
291 }
292 return nullptr;
293}
294
295// Collect the latest journal entries for a specified executable
296nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable)
297{
298 // Sleep 100ms; otherwise recent journal entries sometimes not available
299 using namespace std::chrono_literals;
300 std::this_thread::sleep_for(100ms);
301
302 std::vector<std::string> entries;
303
304 // Open the journal
305 sd_journal* journal;
306 int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY);
307 if (rc < 0)
308 {
309 // Build one line string containing field values
310 entries.push_back("[Internal error: sd_journal_open(), rc=" +
311 std::string(strerror(rc)) + "]");
312 return nlohmann::json(entries);
313 }
314
315 // Create object to automatically close journal
316 JournalCloser closer{journal};
317
318 // Add match so we only loop over entries with specified field value
319 std::string field{"SYSLOG_IDENTIFIER"};
320 std::string match{field + '=' + executable};
321 rc = sd_journal_add_match(journal, match.c_str(), 0);
322 if (rc < 0)
323 {
324 // Build one line string containing field values
325 entries.push_back("[Internal error: sd_journal_add_match(), rc=" +
326 std::string(strerror(rc)) + "]");
327 }
328 else
329 {
330 int count{1};
331 entries.reserve(numLines);
332 std::string syslogID, pid, message, timeStamp;
333
334 // Loop through journal entries from newest to oldest
335 SD_JOURNAL_FOREACH_BACKWARDS(journal)
336 {
337 // Get relevant journal entry fields
338 timeStamp = getTimeStamp(journal);
339 syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER");
340 pid = getFieldValue(journal, "_PID");
341 message = getFieldValue(journal, "MESSAGE");
342
343 // Build one line string containing field values
344 entries.push_back(timeStamp + " " + syslogID + "[" + pid +
345 "]: " + message);
346
347 // Stop after number of lines was read
348 if (count++ >= numLines)
349 {
350 break;
351 }
352 }
353 }
354
355 // put the journal entries in chronological order
356 std::reverse(entries.begin(), entries.end());
357
358 return nlohmann::json(entries);
359}
360
361std::string FFDC::getTimeStamp(sd_journal* journal)
362{
363 // Get realtime (wallclock) timestamp of current journal entry. The
364 // timestamp is in microseconds since the epoch.
365 uint64_t usec{0};
366 int rc = sd_journal_get_realtime_usec(journal, &usec);
367 if (rc < 0)
368 {
369 return "[Internal error: sd_journal_get_realtime_usec(), rc=" +
370 std::string(strerror(rc)) + "]";
371 }
372
373 // Convert to number of seconds since the epoch
374 time_t secs = usec / 1000000;
375
376 // Convert seconds to tm struct required by strftime()
377 struct tm* timeStruct = localtime(&secs);
378 if (timeStruct == nullptr)
379 {
380 return "[Internal error: localtime() returned nullptr]";
381 }
382
383 // Convert tm struct into a date/time string
384 char timeStamp[80];
385 strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct);
386
387 return timeStamp;
388}
389
390std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field)
391{
392 std::string value{};
393
394 // Get field data from current journal entry
395 const void* data{nullptr};
396 size_t length{0};
397 int rc = sd_journal_get_data(journal, field.c_str(), &data, &length);
398 if (rc < 0)
399 {
400 if (-rc == ENOENT)
401 {
402 // Current entry does not include this field; return empty value
403 return value;
404 }
405 else
406 {
407 return "[Internal error: sd_journal_get_data() rc=" +
408 std::string(strerror(rc)) + "]";
409 }
410 }
411
412 // Get value from field data. Field data in format "FIELD=value".
413 std::string dataString{static_cast<const char*>(data), length};
414 std::string::size_type pos = dataString.find('=');
415 if ((pos != std::string::npos) && ((pos + 1) < dataString.size()))
416 {
417 // Value is substring after the '='
418 value = dataString.substr(pos + 1);
419 }
420
421 return value;
422}
423
424// Create temporary file that will automatically get removed when destructed
425FFDCFile::FFDCFile(const fs::path& name) :
426 _fd(open(name.c_str(), O_RDONLY)), _name(name)
427{
428 if (_fd() == -1)
429 {
430 auto e = errno;
431 log<level::ERR>(
432 fmt::format("FFDCFile: Could not open FFDC file {}. errno {}",
433 _name.string(), e)
434 .c_str());
435 }
436}
437
Eddie James2f9f9bb2021-09-20 14:26:31 -0500438} // namespace occ
439} // namespace open_power