pseq: Add UCD90320 specific error handling

Add device specific handling for voltage and power good failures to
UCD90320 device subclass.

Signed-off-by: Jim Wright <jlwright@us.ibm.com>
Change-Id: I0ed7657320f4944d64ee434c1d02ce25d5cdc43b
diff --git a/phosphor-power-sequencer/src/power_sequencer_monitor.hpp b/phosphor-power-sequencer/src/power_sequencer_monitor.hpp
index 944e7ae..e8b36fc 100644
--- a/phosphor-power-sequencer/src/power_sequencer_monitor.hpp
+++ b/phosphor-power-sequencer/src/power_sequencer_monitor.hpp
@@ -41,7 +41,7 @@
      * known to be in an error state.  A log will be created.
      * @param[in] timeout if the failure state was determined by timing out
      * @param[in] powerSupplyError The power supply error to log. A default
-     * std:string, i.e. empty string ("") is passed when there is no power
+     * std:string, i.e. empty string (""), is passed when there is no power
      * supply error to log.
      */
     virtual void onFailure(bool timeout, const std::string& powerSupplyError);
diff --git a/phosphor-power-sequencer/src/ucd90320_monitor.cpp b/phosphor-power-sequencer/src/ucd90320_monitor.cpp
index d7ef7bb..f2bb057 100644
--- a/phosphor-power-sequencer/src/ucd90320_monitor.cpp
+++ b/phosphor-power-sequencer/src/ucd90320_monitor.cpp
@@ -24,6 +24,7 @@
 #include <nlohmann/json.hpp>
 #include <phosphor-logging/log.hpp>
 #include <sdbusplus/bus.hpp>
+#include <xyz/openbmc_project/Common/Device/error.hpp>
 
 #include <fstream>
 #include <map>
@@ -33,6 +34,7 @@
 {
 
 using json = nlohmann::json;
+using namespace pmbus;
 using namespace phosphor::logging;
 using namespace phosphor::power;
 
@@ -40,6 +42,8 @@
     "xyz.openbmc_project.Configuration.IBMCompatibleSystem";
 const std::string compatibleNamesProperty = "Names";
 
+namespace device_error = sdbusplus::xyz::openbmc_project::Common::Device::Error;
+
 UCD90320Monitor::UCD90320Monitor(sdbusplus::bus::bus& bus, std::uint8_t i2cBus,
                                  std::uint16_t i2cAddress) :
     PowerSequencerMonitor(bus),
@@ -60,6 +64,103 @@
     findCompatibleSystemTypes();
 }
 
+bool UCD90320Monitor::checkPGOODFaults(
+    std::map<std::string, std::string>& additionalData)
+{
+    // Check only the GPIs configured on this system.
+    std::vector<int> values = lines.get_values();
+
+    bool errorCreated = false;
+    for (size_t pin = 0; pin < pins.size(); ++pin)
+    {
+        if (pin < values.size() && !values[pin])
+        {
+            try
+            {
+                additionalData.emplace(
+                    "STATUS_WORD", fmt::format("{:#04x}", readStatusWord()));
+                additionalData.emplace("MFR_STATUS",
+                                       fmt::format("{:#04x}", readMFRStatus()));
+            }
+            catch (device_error::ReadFailure& e)
+            {
+                log<level::ERR>("ReadFailure when collecting metadata");
+            }
+            additionalData.emplace("INPUT_NUM",
+                                   fmt::format("{}", pins[pin].line));
+            additionalData.emplace("INPUT_NAME", pins[pin].name);
+            additionalData.emplace("INPUT_STATUS",
+                                   fmt::format("{}", values[pin]));
+
+            logError("xyz.openbmc_project.Power.Error.PowerSequencerPGOODFault",
+                     additionalData);
+
+            errorCreated = true;
+            break;
+        }
+    }
+    return errorCreated;
+}
+
+bool UCD90320Monitor::checkVOUTFaults(
+    std::map<std::string, std::string>& additionalData)
+{
+    // The status_word register has a summary bit to tell us
+    // if each page even needs to be checked
+    auto statusWord = readStatusWord();
+    if (!(statusWord & status_word::VOUT_FAULT))
+    {
+        return false;
+    }
+
+    constexpr size_t numberPages = 24;
+    bool errorCreated = false;
+    for (size_t page = 0; page < numberPages; page++)
+    {
+        auto statusVout = pmbusInterface.insertPageNum(STATUS_VOUT, page);
+        uint8_t vout = pmbusInterface.read(statusVout, Type::Debug);
+
+        // If any bits are on log them, though some are just
+        // warnings so they won't cause errors
+        if (vout)
+        {
+            log<level::INFO>("A voltage rail has bits on in STATUS_VOUT",
+                             entry("STATUS_VOUT=0x%X", vout),
+                             entry("PAGE=%d", page));
+        }
+
+        // Log errors if any non-warning bits on
+        if (vout & ~status_vout::WARNING_MASK)
+        {
+            auto railName = rails[page];
+
+            additionalData.emplace("STATUS_WORD",
+                                   fmt::format("{:#04x}", statusWord));
+            additionalData.emplace("STATUS_VOUT", fmt::format("{:#02x}", vout));
+            try
+            {
+                additionalData.emplace("MFR_STATUS",
+                                       fmt::format("{:#04x}", readMFRStatus()));
+            }
+            catch (device_error::ReadFailure& e)
+            {
+                log<level::ERR>("ReadFailure when collecting MFR_STATUS");
+            }
+            additionalData.emplace("RAIL", fmt::format("{}", page));
+            additionalData.emplace("RAIL_NAME", railName);
+
+            logError(
+                "xyz.openbmc_project.Power.Error.PowerSequencerVoltageFault",
+                additionalData);
+
+            errorCreated = true;
+            break;
+        }
+    }
+
+    return errorCreated;
+}
+
 void UCD90320Monitor::findCompatibleSystemTypes()
 {
     try
@@ -253,6 +354,60 @@
     }
 }
 
+void UCD90320Monitor::onFailure(bool timeout,
+                                const std::string& powerSupplyError)
+{
+    std::map<std::string, std::string> additionalData{};
+    if (!powerSupplyError.empty())
+    {
+        logError(powerSupplyError, additionalData);
+        return;
+    }
+
+    try
+    {
+        bool voutError = checkVOUTFaults(additionalData);
+        bool pgoodError = checkPGOODFaults(additionalData);
+
+        // Not a voltage or PGOOD fault, but we know something
+        // failed so still create an error log.
+        if (!voutError && !pgoodError)
+        {
+            // Default to generic pgood error
+            logError("xyz.openbmc_project.Power.Error.Shutdown",
+                     additionalData);
+        }
+    }
+    catch (device_error::ReadFailure& e)
+    {
+        log<level::ERR>("ReadFailure when collecting metadata");
+
+        if (timeout)
+        {
+            // Default to timeout error
+            logError("xyz.openbmc_project.Power.Error.PowerOnTimeout",
+                     additionalData);
+        }
+        else
+        {
+            // Default to generic pgood error
+            logError("xyz.openbmc_project.Power.Error.Shutdown",
+                     additionalData);
+        }
+    }
+}
+
+uint16_t UCD90320Monitor::readStatusWord()
+{
+    return pmbusInterface.read(STATUS_WORD, Type::Debug);
+}
+
+uint32_t UCD90320Monitor::readMFRStatus()
+{
+    const std::string mfrStatus = "mfr_status";
+    return pmbusInterface.read(mfrStatus, Type::HwmonDeviceDebug);
+}
+
 void UCD90320Monitor::setUpGpio(const std::vector<unsigned int>& offsets)
 {
     gpiod::chip chip{"ucd90320", gpiod::chip::OPEN_BY_LABEL};
diff --git a/phosphor-power-sequencer/src/ucd90320_monitor.hpp b/phosphor-power-sequencer/src/ucd90320_monitor.hpp
index 97958ce..b4d1916 100644
--- a/phosphor-power-sequencer/src/ucd90320_monitor.hpp
+++ b/phosphor-power-sequencer/src/ucd90320_monitor.hpp
@@ -49,6 +49,9 @@
      */
     void interfacesAddedHandler(sdbusplus::message::message& msg);
 
+    /** @copydoc PowerSequencerMonitor::onFailure() */
+    void onFailure(bool timeout, const std::string& powerSupplyError) override;
+
   private:
     /**
      * Set of GPIO lines to monitor in this UCD chip.
@@ -76,6 +79,20 @@
     std::vector<std::string> rails;
 
     /**
+     * Checks for PGOOD faults on the device.
+     * @param[in] additionalData AdditionalData property of the error log entry
+     * @return bool true if an error log was created
+     */
+    bool checkPGOODFaults(std::map<std::string, std::string>& additionalData);
+
+    /**
+     * Checks for VOUT faults on the device.
+     * @param[in] additionalData AdditionalData property of the error log entry
+     * @return bool true if an error log was created
+     */
+    bool checkVOUTFaults(std::map<std::string, std::string>& additionalData);
+
+    /**
      * Finds the list of compatible system types using D-Bus methods.
      * This list is used to find the correct JSON configuration file for the
      * current system.
@@ -99,6 +116,18 @@
     void parseConfigFile(const std::filesystem::path& pathName);
 
     /**
+     * Reads the mfr_status register
+     * @return uint32_t the register contents
+     */
+    uint32_t readMFRStatus();
+
+    /**
+     * Reads the status_word register
+     * @return uint16_t the register contents
+     */
+    uint16_t readStatusWord();
+
+    /**
      * Set up GPIOs
      * @param[in] offsets the list of pin offsets
      */