Add FFDC collection
Add a class to watch for the presence of SBEFIFO FFDC reported up
through the OCC hwmon driver.
Signed-off-by: Eddie James <eajames@linux.ibm.com>
Change-Id: Ia8bf6eddb037aec547b72ecce39c1a977d9374bc
diff --git a/meson.build b/meson.build
index 68f2c21..e6b6c6c 100644
--- a/meson.build
+++ b/meson.build
@@ -138,6 +138,7 @@
'occ_status.cpp',
'occ_device.cpp',
'occ_errors.cpp',
+ 'occ_ffdc.cpp',
'occ_presence.cpp',
'occ_command.cpp',
'occ_dbus.cpp',
diff --git a/occ_device.hpp b/occ_device.hpp
index a877c8c..ef2fb06 100644
--- a/occ_device.hpp
+++ b/occ_device.hpp
@@ -4,6 +4,7 @@
#include "occ_errors.hpp"
#include "occ_events.hpp"
+#include "occ_ffdc.hpp"
#include "occ_presence.hpp"
#include <org/open_power/OCC/Device/error.hpp>
@@ -39,13 +40,17 @@
* @param[in] event - Unique ptr reference to sd_event
* @param[in] path - Path to the OCC instance
* @param[in] manager - OCC manager instance
+ * @param[in] status - Status instance
+ * @param[in] instance - OCC instance number
* @param[in] callback - Optional callback on errors
*/
Device(EventPtr& event, const fs::path& path, const Manager& manager,
- Status& status, std::function<void(bool)> callBack = nullptr) :
+ Status& status, unsigned int instance = 0,
+ std::function<void(bool)> callBack = nullptr) :
config(getPathBack(path)),
devPath(path), statusObject(status),
error(event, path / "occ_error", callBack),
+ ffdc(event, path / "ffdc", instance),
presence(event, path / "occs_present", manager, callBack),
throttleProcTemp(
event, path / "occ_dvfs_overtemp",
@@ -111,6 +116,16 @@
throttleProcPower.addWatch(poll);
throttleMemTemp.addWatch(poll);
+
+ try
+ {
+ ffdc.addWatch(poll);
+ }
+ catch (const OpenFailure& e)
+ {
+ // nothing to do if there is no FFDC file
+ }
+
error.addWatch(poll);
}
@@ -119,6 +134,7 @@
{
// we can always safely remove watch even if we don't add it
presence.removeWatch();
+ ffdc.removeWatch();
error.removeWatch();
throttleMemTemp.removeWatch();
throttleProcPower.removeWatch();
@@ -168,6 +184,9 @@
/** Abstraction of error monitoring */
Error error;
+ /** SBE FFDC monitoring */
+ FFDC ffdc;
+
/** Abstraction of OCC presence monitoring */
Presence presence;
diff --git a/occ_ffdc.cpp b/occ_ffdc.cpp
new file mode 100644
index 0000000..bb55ce8
--- /dev/null
+++ b/occ_ffdc.cpp
@@ -0,0 +1,156 @@
+#include "occ_ffdc.hpp"
+
+#include "elog-errors.hpp"
+#include "utils.hpp"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <org/open_power/OCC/Device/error.hpp>
+#include <phosphor-logging/elog.hpp>
+#include <phosphor-logging/log.hpp>
+#include <xyz/openbmc_project/Common/error.hpp>
+#include <xyz/openbmc_project/Logging/Create/server.hpp>
+
+namespace open_power
+{
+namespace occ
+{
+
+static constexpr size_t max_ffdc_size = 8192;
+static constexpr size_t sbe_status_header_size = 8;
+
+static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
+static constexpr auto loggingInterface = "org.open_power.Logging.PEL";
+
+using namespace phosphor::logging;
+using namespace sdbusplus::org::open_power::OCC::Device::Error;
+using InternalFailure =
+ sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
+
+uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg,
+ int fd)
+{
+ uint32_t plid = 0;
+ std::vector<std::tuple<
+ sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
+ uint8_t, uint8_t, sdbusplus::message::unix_fd>>
+ pelFFDCInfo;
+
+ log<level::INFO>("Creating PEL with SBE FFDC", entry("SRC6=%08x", src6));
+
+ if (fd > 0)
+ {
+ pelFFDCInfo.push_back(std::make_tuple(
+ sdbusplus::xyz::openbmc_project::Logging::server::Create::
+ FFDCFormat::Custom,
+ static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd));
+ }
+
+ std::map<std::string, std::string> additionalData;
+ additionalData.emplace("SRC6", std::to_string(src6));
+ additionalData.emplace("_PID", std::to_string(getpid()));
+ additionalData.emplace("SBE_ERR_MSG", msg);
+
+ std::string service =
+ utils::getService(loggingObjectPath, loggingInterface);
+ auto& bus = utils::getBus();
+
+ try
+ {
+ auto method =
+ bus.new_method_call(service.c_str(), loggingObjectPath,
+ loggingInterface, "CreatePELWithFFDCFiles");
+ auto level =
+ sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
+ sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
+ Error);
+ method.append(path, level, additionalData, pelFFDCInfo);
+ auto response = bus.call(method);
+ std::tuple<uint32_t, uint32_t> reply = {0, 0};
+
+ response.read(reply);
+ plid = std::get<1>(reply);
+ }
+ catch (const sdbusplus::exception::exception& e)
+ {
+ log<level::ERR>("Failed to create PEL");
+ }
+
+ return plid;
+}
+
+// Reads the FFDC file and create an error log
+void FFDC::analyzeEvent()
+{
+ int tfd = -1;
+ size_t total = 0;
+ auto data = std::make_unique<unsigned char[]>(max_ffdc_size);
+ while (total < max_ffdc_size)
+ {
+ auto r = read(fd, data.get() + total, max_ffdc_size - total);
+ if (r < 0)
+ {
+ elog<ReadFailure>(
+ phosphor::logging::org::open_power::OCC::Device::ReadFailure::
+ CALLOUT_ERRNO(errno),
+ phosphor::logging::org::open_power::OCC::Device::ReadFailure::
+ CALLOUT_DEVICE_PATH(file.c_str()));
+ return;
+ }
+ if (!r)
+ {
+ break;
+ }
+ total += r;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ uint32_t src6 = instance << 16;
+ src6 |= *(data.get() + 2) << 8;
+ src6 |= *(data.get() + 3);
+
+ if (total > sbe_status_header_size)
+ {
+ std::string templateString =
+ fs::temp_directory_path() / "OCC_FFDC_XXXXXX";
+ tfd = mkostemp(templateString.data(), O_RDWR);
+ if (tfd < 0)
+ {
+ log<level::ERR>("Couldn't create temporary FFDC file");
+ }
+ else
+ {
+ temporaryFiles.emplace_back(templateString, tfd);
+ size_t written = sbe_status_header_size;
+ while (written < total)
+ {
+ auto r = write(tfd, data.get() + written, total - written);
+ if (r < 0)
+ {
+ close(temporaryFiles.back().second);
+ fs::remove(temporaryFiles.back().first);
+ temporaryFiles.pop_back();
+ tfd = -1;
+ log<level::ERR>("Couldn't write temporary FFDC file");
+ break;
+ }
+ if (!r)
+ {
+ break;
+ }
+ written += r;
+ }
+ }
+ }
+
+ createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6,
+ "SBE command reported error", tfd);
+}
+
+} // namespace occ
+} // namespace open_power
diff --git a/occ_ffdc.hpp b/occ_ffdc.hpp
new file mode 100644
index 0000000..a4c882d
--- /dev/null
+++ b/occ_ffdc.hpp
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "config.h"
+
+#include "occ_errors.hpp"
+
+namespace open_power
+{
+namespace occ
+{
+
+/** @class FFDC
+ * @brief Monitors for SBE FFDC availability
+ */
+class FFDC : public Error
+{
+ public:
+ FFDC() = delete;
+ FFDC(const FFDC&) = delete;
+ FFDC& operator=(const FFDC&) = delete;
+ FFDC(FFDC&&) = default;
+ FFDC& operator=(FFDC&&) = default;
+
+ /** @brief Constructs the FFDC object
+ *
+ * @param[in] event - reference to sd_event unique_ptr
+ * @param[in] file - File used by driver to communicate FFDC data
+ * @param[in] instance - OCC instance number
+ */
+ FFDC(EventPtr& event, const fs::path& file, unsigned int instance) :
+ Error(event, file, nullptr), instance(instance)
+ {
+ // Nothing to do here.
+ }
+
+ ~FFDC()
+ {
+ for (auto&& it : temporaryFiles)
+ {
+ close(it.second);
+ fs::remove(it.first);
+ }
+ }
+
+ /** @brief Helper function to create a PEL with the OpenPower DBus
+ * interface
+ *
+ * @param[in] path - the DBus error path
+ * @param[in] src6 - the SBE error SRC6 word
+ * @param[in] msg - the error message
+ * @param[in] fd - the file descriptor for any FFDC
+ */
+ static uint32_t createPEL(const char* path, uint32_t src6, const char* msg,
+ int fd = -1);
+
+ private:
+ /** @brief OCC instance number. Ex, 0,1, etc */
+ unsigned int instance;
+
+ /** @brief Stores the temporary files and file descriptors
+ * in usage. They will be cleaned up when the class
+ * is destroyed (when the application exits).
+ */
+ std::vector<std::pair<fs::path, int>> temporaryFiles;
+
+ /** @brief When the error event is received, analyzes it
+ * and makes a callback to error handler if the
+ * content denotes an error condition
+ */
+ void analyzeEvent() override;
+};
+
+} // namespace occ
+} // namespace open_power
diff --git a/occ_status.hpp b/occ_status.hpp
index ceff8f3..606a890 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -86,7 +86,7 @@
fs::path(DEV_PATH) /
fs::path(sysfsName + "." + std::to_string(instance + 1)),
#endif
- manager, *this,
+ manager, *this, instance,
std::bind(std::mem_fn(&Status::deviceErrorHandler), this,
std::placeholders::_1)),
hostControlSignal(
diff --git a/test/meson.build b/test/meson.build
index 4a751c4..8e5da4c 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -26,6 +26,7 @@
'../occ_status.cpp',
'../occ_device.cpp',
'../occ_errors.cpp',
+ '../occ_ffdc.cpp',
'../occ_presence.cpp',
'../occ_command.cpp',
'../occ_dbus.cpp',