Log a PEL for communication, presence mismatch, and safe state errors

Add code to log a PEL in various error scenarios. Refactor some of the
error handling to get the return code out of the driver.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Change-Id: Ifd91cfc063718e484ec8886df8357d115c6b41e3
diff --git a/occ_device.cpp b/occ_device.cpp
index df8683e..0ca4fdc 100644
--- a/occ_device.cpp
+++ b/occ_device.cpp
@@ -79,16 +79,34 @@
     return v == 1;
 }
 
-void Device::errorCallback(bool error)
+void Device::errorCallback(int error)
 {
     if (error)
     {
-        statusObject.deviceError();
+        if (error != -EHOSTDOWN)
+        {
+            fs::path p = devPath;
+            if (fs::is_symlink(p))
+            {
+                p = fs::read_symlink(p);
+            }
+            statusObject.deviceError(Error::Descriptor(
+                "org.open_power.OCC.Device.ReadFailure", error, p.c_str()));
+        }
+        else
+        {
+            statusObject.deviceError(Error::Descriptor(SAFE_ERROR_PATH));
+        }
     }
 }
 
+void Device::presenceCallback(int)
+{
+    statusObject.deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
+}
+
 #ifdef PLDM
-void Device::timeoutCallback(bool error)
+void Device::timeoutCallback(int error)
 {
     if (error)
     {
@@ -97,17 +115,17 @@
 }
 #endif
 
-void Device::throttleProcTempCallback(bool error)
+void Device::throttleProcTempCallback(int error)
 {
     statusObject.throttleProcTemp(error);
 }
 
-void Device::throttleProcPowerCallback(bool error)
+void Device::throttleProcPowerCallback(int error)
 {
     statusObject.throttleProcPower(error);
 }
 
-void Device::throttleMemTempCallback(bool error)
+void Device::throttleMemTempCallback(int error)
 {
     statusObject.throttleMemTemp(error);
 }
diff --git a/occ_device.hpp b/occ_device.hpp
index 873ecfc..b853412 100644
--- a/occ_device.hpp
+++ b/occ_device.hpp
@@ -245,37 +245,44 @@
         return;
     }
 
-    /** @brief callback for OCC error and presence monitoring
+    /** @brief callback for OCC error monitoring
      *
-     * @param[in] error - True if an error is reported, false otherwise
+     * @param[in] error - Errno stored in the error file, 0 if no error
      */
-    void errorCallback(bool error);
+    void errorCallback(int error);
+
+    /** @brief callback for OCC presence monitoring
+     *
+     * @param[in] occsPresent - The number of OCCs indicated in the poll
+     * response
+     */
+    void presenceCallback(int occsPresent);
 
 #ifdef PLDM
     /** @brief callback for SBE timeout monitoring
      *
      * @param[in] error - True if an error is reported, false otherwise
      */
-    void timeoutCallback(bool error);
+    void timeoutCallback(int error);
 #endif
 
     /** @brief callback for the proc temp throttle event
      *
      *  @param[in] error - True if an error is reported, false otherwise
      */
-    void throttleProcTempCallback(bool error);
+    void throttleProcTempCallback(int error);
 
     /** @brief callback for the proc power throttle event
      *
      *  @param[in] error - True if an error is reported, false otherwise
      */
-    void throttleProcPowerCallback(bool error);
+    void throttleProcPowerCallback(int error);
 
     /** @brief callback for the proc temp throttle event
      *
      *  @param[in] error - True if an error is reported, false otherwise
      */
-    void throttleMemTempCallback(bool error);
+    void throttleMemTempCallback(int error);
 
     /** @brief Get the pathname for a file based on a regular expression
      *
diff --git a/occ_errors.cpp b/occ_errors.cpp
index c6f450d..f41173e 100644
--- a/occ_errors.cpp
+++ b/occ_errors.cpp
@@ -17,9 +17,6 @@
 namespace occ
 {
 
-// Value in error file indicating success
-constexpr auto NO_ERROR = '0';
-
 using namespace phosphor::logging;
 using namespace sdbusplus::org::open_power::OCC::Device::Error;
 using InternalFailure =
@@ -113,6 +110,7 @@
 void Error::analyzeEvent()
 {
     // Get the number of bytes to read
+    int err = 0;
     int len = -1;
     auto r = ioctl(fd, FIONREAD, &len);
     if (r < 0)
@@ -127,10 +125,11 @@
     // A non-zero data indicates an error condition
     // Let the caller take appropriate action on this
     auto data = readFile(len);
-    bool error = !(data.empty() || data.front() == NO_ERROR);
+    if (!data.empty())
+        err = std::stoi(data, nullptr, 0);
     if (callBack)
     {
-        callBack(error);
+        callBack(err);
     }
     return;
 }
diff --git a/occ_errors.hpp b/occ_errors.hpp
index 6fe0983..ba4610e 100644
--- a/occ_errors.hpp
+++ b/occ_errors.hpp
@@ -15,6 +15,10 @@
 
 namespace fs = std::filesystem;
 
+constexpr auto PRESENCE_ERROR_PATH =
+    "org.open_power.OCC.Firmware.PresenceMismatch";
+constexpr auto SAFE_ERROR_PATH = "org.open_power.OCC.Device.SafeState";
+
 /** @class Error
  *  @brief Monitors for OCC device error condition
  */
@@ -34,7 +38,7 @@
      *  @param[in] callBack - Optional function callback on error condition
      */
     Error(EventPtr& event, const fs::path& file,
-          std::function<void(bool)> callBack = nullptr) :
+          std::function<void(int)> callBack = nullptr) :
         event(event),
         file(file), callBack(callBack)
     {
@@ -49,6 +53,38 @@
         }
     }
 
+    /** @class Descriptor
+     *  @brief Contains data relevant to an error that occurred.
+     */
+    class Descriptor
+    {
+      public:
+        Descriptor(const Descriptor&) = default;
+        Descriptor& operator=(const Descriptor&) = default;
+        Descriptor(Descriptor&&) = default;
+        Descriptor& operator=(Descriptor&&) = default;
+
+        Descriptor() : log(false), err(0), callout(nullptr), path(nullptr)
+        {}
+
+        /** @brief Constructs the Descriptor object
+         *
+         *  @param[in] path - the DBus error path
+         *  @param[in] err - Optional error return code
+         *  @param[in] callout - Optional PEL callout path
+         */
+        Descriptor(const char* path, int err = 0,
+                   const char* callout = nullptr) :
+            log(true),
+            err(err), callout(callout), path(path)
+        {}
+
+        bool log;
+        int err;
+        const char* callout;
+        const char* path;
+    };
+
     /** @brief Starts to monitor for error conditions
      *
      *  @param[in] poll - Indicates whether or not the error file should
@@ -109,7 +145,7 @@
     fs::path file;
 
     /** @brief Optional function to call on error scenario */
-    std::function<void(bool)> callBack;
+    std::function<void(int)> callBack;
 
     /** @brief Reads file data
      *
diff --git a/occ_ffdc.cpp b/occ_ffdc.cpp
index 0dc8d3c..3ec42be 100644
--- a/occ_ffdc.cpp
+++ b/occ_ffdc.cpp
@@ -5,6 +5,7 @@
 
 #include <errno.h>
 #include <fcntl.h>
+#include <fmt/core.h>
 #include <stdio.h>
 #include <sys/ioctl.h>
 #include <unistd.h>
@@ -24,7 +25,8 @@
 static constexpr size_t sbe_status_header_size = 8;
 
 static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
-static constexpr auto loggingInterface = "org.open_power.Logging.PEL";
+static constexpr auto loggingInterface = "xyz.openbmc_project.Logging.Create";
+static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
 
 using namespace phosphor::logging;
 using namespace sdbusplus::org::open_power::OCC::Device::Error;
@@ -60,10 +62,10 @@
     try
     {
         std::string service =
-            utils::getService(loggingObjectPath, loggingInterface);
+            utils::getService(loggingObjectPath, opLoggingInterface);
         auto method =
             bus.new_method_call(service.c_str(), loggingObjectPath,
-                                loggingInterface, "CreatePELWithFFDCFiles");
+                                opLoggingInterface, "CreatePELWithFFDCFiles");
         auto level =
             sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
                 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
@@ -83,6 +85,47 @@
     return plid;
 }
 
+void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err,
+                             const char* callout)
+{
+    std::map<std::string, std::string> additionalData;
+
+    additionalData.emplace("_PID", std::to_string(getpid()));
+
+    if (err)
+    {
+        additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err));
+    }
+
+    if (callout)
+    {
+        additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout));
+    }
+
+    additionalData.emplace("OCC", std::to_string(instance));
+
+    auto& bus = utils::getBus();
+
+    try
+    {
+        std::string service =
+            utils::getService(loggingObjectPath, loggingInterface);
+        auto method = bus.new_method_call(service.c_str(), loggingObjectPath,
+                                          loggingInterface, "Create");
+        auto level =
+            sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
+                sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
+                    Error);
+        method.append(path, level, additionalData);
+        bus.call(method);
+    }
+    catch (const sdbusplus::exception::exception& e)
+    {
+        log<level::ERR>(
+            fmt::format("Failed to create PEL: {}", e.what()).c_str());
+    }
+}
+
 // Reads the FFDC file and create an error log
 void FFDC::analyzeEvent()
 {
diff --git a/occ_ffdc.hpp b/occ_ffdc.hpp
index a4c882d..a2b1200 100644
--- a/occ_ffdc.hpp
+++ b/occ_ffdc.hpp
@@ -53,6 +53,17 @@
     static uint32_t createPEL(const char* path, uint32_t src6, const char* msg,
                               int fd = -1);
 
+    /** @brief Helper function to create a PEL for the OCC reset with the
+     * OpenPower DBus interface
+     *
+     * @param[in] instance - the OCC instance id
+     * @param[in] path - the DBus error path
+     * @param[in] err - the error return code
+     * @param[in] callout - the PEL callout path
+     */
+    static void createOCCResetPEL(unsigned int instance, const char* path,
+                                  int err, const char* callout);
+
   private:
     /** @brief OCC instance number. Ex, 0,1, etc */
     unsigned int instance;
diff --git a/occ_manager.cpp b/occ_manager.cpp
index 88b0d26..1efb1e5 100644
--- a/occ_manager.cpp
+++ b/occ_manager.cpp
@@ -1288,7 +1288,7 @@
                         masterInstance, instance)
                         .c_str());
                 // request reset
-                obj->deviceError();
+                obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
             }
         }
     }
@@ -1300,7 +1300,8 @@
                         statusObjects.size())
                 .c_str());
         // request reset
-        statusObjects.front()->deviceError();
+        statusObjects.front()->deviceError(
+            Error::Descriptor(PRESENCE_ERROR_PATH));
     }
     else
     {
diff --git a/occ_presence.cpp b/occ_presence.cpp
index e06ecb8..dd92725 100644
--- a/occ_presence.cpp
+++ b/occ_presence.cpp
@@ -51,7 +51,7 @@
                             .c_str());
         if (callBack)
         {
-            callBack(true);
+            callBack(occsPresent);
         }
     }
 }
diff --git a/occ_presence.hpp b/occ_presence.hpp
index 9e66acc..ef63c89 100644
--- a/occ_presence.hpp
+++ b/occ_presence.hpp
@@ -28,7 +28,7 @@
      *  @param[in] callBack - Optional function callback on error condition
      */
     Presence(EventPtr& event, const fs::path& file, const Manager& mgr,
-             std::function<void(bool)> callBack = nullptr) :
+             std::function<void(int)> callBack = nullptr) :
         Error(event, file, callBack),
         manager(mgr)
     {
diff --git a/occ_status.cpp b/occ_status.cpp
index 6174c91..14981c7 100644
--- a/occ_status.cpp
+++ b/occ_status.cpp
@@ -116,7 +116,7 @@
 }
 
 // Callback handler when a device error is reported.
-void Status::deviceError()
+void Status::deviceError(Error::Descriptor d)
 {
 #ifdef POWER10
     if (pmode && device.master())
@@ -126,6 +126,11 @@
     }
 #endif
 
+    if (d.log)
+    {
+        FFDC::createOCCResetPEL(instance, d.path, d.err, d.callout);
+    }
+
     // This would deem OCC inactive
     this->occActive(false);
 
@@ -327,7 +332,7 @@
                 instance)
                 .c_str());
         // Disable and reset to try recovering
-        deviceError();
+        deviceError(Error::Descriptor(SAFE_ERROR_PATH));
     }
 }
 #endif // POWER10
diff --git a/occ_status.hpp b/occ_status.hpp
index fa9d2d9..224a534 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -184,8 +184,11 @@
     /** @brief Read OCC state (will trigger kernel to poll the OCC) */
     void readOccState();
 
-    /** @brief Called when device errors are detected */
-    void deviceError();
+    /** @brief Called when device errors are detected
+     *
+     * @param[in] d - description of the error that occurred
+     */
+    void deviceError(Error::Descriptor d = Error::Descriptor());
 
 #ifdef POWER10
     /** @brief Handle additional tasks when the OCCs reach active state */