pseq: Check STATUS_VOUT first to find pgood fault

Enhance the support for finding the correct voltage rail when a pgood
fault occurs.

First check the PMBus STATUS_VOUT register for all rails.  Check the
rails in power-on-sequence order.

Checking STATUS_VOUT is usually the most accurate method.  For example,
if a pgood fault occurs, the power sequencer device may automatically
shut off related rails.  Ideally the device will only set fault bits in
STATUS_VOUT for the rail with the pgood fault.  However, all the related
rails will likely appear to be faulted by the other methods.

If no fault is found by checking STATUS_VOUT, then check the GPIOs and
output voltage for all rails.  Check the rails in power-on-sequence
order.

Tested:
* Verified all automated test cases run successfully.

Change-Id: Ida8732db573013f1b72edac8ed54e3cfc38da146
Signed-off-by: Shawn McCarney <shawnmm@us.ibm.com>
diff --git a/phosphor-power-sequencer/src/rail.cpp b/phosphor-power-sequencer/src/rail.cpp
index 18866c5..ec97108 100644
--- a/phosphor-power-sequencer/src/rail.cpp
+++ b/phosphor-power-sequencer/src/rail.cpp
@@ -121,46 +121,9 @@
                          const std::vector<int>& gpioValues,
                          std::map<std::string, std::string>& additionalData)
 {
-    // If rail is not present, return false and don't check anything else
-    if (!isPresent(services))
-    {
-        services.logInfoMsg(std::format("Rail {} is not present", name));
-        return false;
-    }
-
-    // Check if STATUS_VOUT indicates a pgood fault occurred
-    bool hasFault = hasPgoodFaultStatusVout(device, services, additionalData);
-
-    // Check if a GPIO value indicates a pgood fault occurred
-    if (!hasFault)
-    {
-        hasFault = hasPgoodFaultGPIO(services, gpioValues, additionalData);
-    }
-
-    // Check if output voltage is below UV limit indicating pgood fault occurred
-    if (!hasFault)
-    {
-        hasFault = hasPgoodFaultOutputVoltage(device, services, additionalData);
-    }
-
-    // If fault detected, store debug data in additional data map
-    if (hasFault)
-    {
-        services.logErrorMsg(
-            std::format("Pgood fault detected in rail {}", name));
-        storePgoodFaultDebugData(device, services, additionalData);
-    }
-
-    return hasFault;
-}
-
-void Rail::verifyHasPage()
-{
-    if (!page)
-    {
-        throw std::runtime_error{
-            std::format("No PAGE number defined for rail {}", name)};
-    }
+    return (hasPgoodFaultStatusVout(device, services, additionalData) ||
+            hasPgoodFaultGPIO(device, services, gpioValues, additionalData) ||
+            hasPgoodFaultOutputVoltage(device, services, additionalData));
 }
 
 bool Rail::hasPgoodFaultStatusVout(
@@ -169,8 +132,8 @@
 {
     bool hasFault{false};
 
-    // If we are checking the value of STATUS_VOUT for the rail
-    if (checkStatusVout)
+    // If rail is present and we are checking the value of STATUS_VOUT
+    if (isPresent(services) && checkStatusVout)
     {
         // Read STATUS_VOUT value from device
         uint8_t statusVout = getStatusVout(device);
@@ -184,6 +147,7 @@
                 statusVout));
             additionalData.emplace("STATUS_VOUT",
                                    std::format("{:#04x}", statusVout));
+            storePgoodFaultDebugData(device, services, additionalData);
         }
         else if (statusVout != 0)
         {
@@ -196,14 +160,14 @@
     return hasFault;
 }
 
-bool Rail::hasPgoodFaultGPIO(Services& services,
+bool Rail::hasPgoodFaultGPIO(PowerSequencerDevice& device, Services& services,
                              const std::vector<int>& gpioValues,
                              std::map<std::string, std::string>& additionalData)
 {
     bool hasFault{false};
 
-    // If a GPIO is defined for checking pgood status
-    if (gpio)
+    // If rail is present and a GPIO is defined for checking pgood status
+    if (isPresent(services) && gpio)
     {
         // Get GPIO value
         unsigned int line = gpio->line;
@@ -225,6 +189,7 @@
                 line, value));
             additionalData.emplace("GPIO_LINE", std::format("{}", line));
             additionalData.emplace("GPIO_VALUE", std::format("{}", value));
+            storePgoodFaultDebugData(device, services, additionalData);
         }
     }
 
@@ -237,8 +202,8 @@
 {
     bool hasFault{false};
 
-    // If we are comparing output voltage to UV limit to check pgood status
-    if (compareVoltageToLimit)
+    // If rail is present and we are comparing output voltage to UV limit
+    if (isPresent(services) && compareVoltageToLimit)
     {
         // Read output voltage and UV fault limit values from device
         double vout = getReadVout(device);
@@ -254,16 +219,27 @@
             additionalData.emplace("READ_VOUT", std::format("{}", vout));
             additionalData.emplace("VOUT_UV_FAULT_LIMIT",
                                    std::format("{}", uvLimit));
+            storePgoodFaultDebugData(device, services, additionalData);
         }
     }
 
     return hasFault;
 }
 
+void Rail::verifyHasPage()
+{
+    if (!page)
+    {
+        throw std::runtime_error{
+            std::format("No PAGE number defined for rail {}", name)};
+    }
+}
+
 void Rail::storePgoodFaultDebugData(
     PowerSequencerDevice& device, Services& services,
     std::map<std::string, std::string>& additionalData)
 {
+    services.logErrorMsg(std::format("Pgood fault detected in rail {}", name));
     additionalData.emplace("RAIL_NAME", name);
     if (page)
     {
diff --git a/phosphor-power-sequencer/src/rail.hpp b/phosphor-power-sequencer/src/rail.hpp
index b4b6f95..ee331ca 100644
--- a/phosphor-power-sequencer/src/rail.hpp
+++ b/phosphor-power-sequencer/src/rail.hpp
@@ -257,14 +257,6 @@
                        const std::vector<int>& gpioValues,
                        std::map<std::string, std::string>& additionalData);
 
-  private:
-    /**
-     * Verifies that a PMBus PAGE number is defined for the rail.
-     *
-     * Throws an exception if a PAGE number is not defined.
-     */
-    void verifyHasPage();
-
     /**
      * Returns whether the PMBus STATUS_VOUT command indicates a pgood fault
      * has occurred on the rail.
@@ -290,12 +282,13 @@
      * status.
      *
      * @param device Power sequencer device that enables and monitors the rail
+     * @param services System services like hardware presence and the journal
      * @param gpioValues GPIO values obtained from the device (if any)
      * @param additionalData Additional data to include in an error log if this
      *                       method returns true
      * @return true if a pgood fault was found on the rail, false otherwise
      */
-    bool hasPgoodFaultGPIO(Services& services,
+    bool hasPgoodFaultGPIO(PowerSequencerDevice& device, Services& services,
                            const std::vector<int>& gpioValues,
                            std::map<std::string, std::string>& additionalData);
 
@@ -316,6 +309,14 @@
         PowerSequencerDevice& device, Services& services,
         std::map<std::string, std::string>& additionalData);
 
+  private:
+    /**
+     * Verifies that a PMBus PAGE number is defined for the rail.
+     *
+     * Throws an exception if a PAGE number is not defined.
+     */
+    void verifyHasPage();
+
     /**
      * Store pgood fault debug data in the specified additional data map.
      *
diff --git a/phosphor-power-sequencer/src/standard_device.cpp b/phosphor-power-sequencer/src/standard_device.cpp
index a11a9cc..5bfbfb3 100644
--- a/phosphor-power-sequencer/src/standard_device.cpp
+++ b/phosphor-power-sequencer/src/standard_device.cpp
@@ -39,32 +39,28 @@
         // obtain, so obtain them once and then pass values to each Rail object.
         std::vector<int> gpioValues = getGPIOValuesIfPossible(services);
 
-        // Loop through all the rails checking if any detected a pgood fault.
-        // The rails are in power-on-sequence order.
-        for (std::unique_ptr<Rail>& rail : rails)
+        // Try to find a voltage rail where a pgood fault occurred
+        Rail* rail = findRailWithPgoodFault(services, gpioValues,
+                                            additionalData);
+        if (rail != nullptr)
         {
-            if (rail->hasPgoodFault(*this, services, gpioValues,
-                                    additionalData))
+            services.logErrorMsg(std::format(
+                "Pgood fault found in rail monitored by device {}", name));
+
+            // If this is a PSU rail and a PSU error was previously detected
+            if (rail->isPowerSupplyRail() && !powerSupplyError.empty())
             {
-                services.logErrorMsg(std::format(
-                    "Pgood fault found in rail monitored by device {}", name));
-
-                // If this is a PSU rail and a PSU error was previously detected
-                if (rail->isPowerSupplyRail() && !powerSupplyError.empty())
-                {
-                    // Return power supply error as root cause
-                    error = powerSupplyError;
-                }
-                else
-                {
-                    // Return pgood fault as root cause
-                    error =
-                        "xyz.openbmc_project.Power.Error.PowerSequencerVoltageFault";
-                }
-
-                storePgoodFaultDebugData(services, gpioValues, additionalData);
-                break;
+                // Return power supply error as root cause
+                error = powerSupplyError;
             }
+            else
+            {
+                // Return pgood fault as root cause
+                error =
+                    "xyz.openbmc_project.Power.Error.PowerSequencerVoltageFault";
+            }
+
+            storePgoodFaultDebugData(services, gpioValues, additionalData);
         }
     }
     catch (const std::exception& e)
@@ -88,6 +84,43 @@
     return values;
 }
 
+Rail* StandardDevice::findRailWithPgoodFault(
+    Services& services, const std::vector<int>& gpioValues,
+    std::map<std::string, std::string>& additionalData)
+{
+    // Look for the first rail in the power on sequence with a pgood fault based
+    // on STATUS_VOUT.  This is usually the most accurate method.  For example,
+    // if a pgood fault occurs, the power sequencer device may automatically
+    // shut off related rails.  Ideally the device will only set fault bits in
+    // STATUS_VOUT for the rail with the pgood fault.  However, all the related
+    // rails will likely appear to be faulted by the other methods.
+    for (std::unique_ptr<Rail>& rail : rails)
+    {
+        if (rail->hasPgoodFaultStatusVout(*this, services, additionalData))
+        {
+            return rail.get();
+        }
+    }
+
+    // Look for the first rail in the power on sequence with a pgood fault based
+    // on either a GPIO or the output voltage.  Both methods check if the rail
+    // is powered off.  If a pgood fault occurs during the power on sequence,
+    // the power sequencer device may stop powering on rails.  As a result, all
+    // rails after the faulted one in the sequence may also be powered off.
+    for (std::unique_ptr<Rail>& rail : rails)
+    {
+        if (rail->hasPgoodFaultGPIO(*this, services, gpioValues,
+                                    additionalData) ||
+            rail->hasPgoodFaultOutputVoltage(*this, services, additionalData))
+        {
+            return rail.get();
+        }
+    }
+
+    // No rail with pgood fault found
+    return nullptr;
+}
+
 void StandardDevice::storePgoodFaultDebugData(
     Services& services, const std::vector<int>& gpioValues,
     std::map<std::string, std::string>& additionalData)
diff --git a/phosphor-power-sequencer/src/standard_device.hpp b/phosphor-power-sequencer/src/standard_device.hpp
index 210494f..2c1cf3b 100644
--- a/phosphor-power-sequencer/src/standard_device.hpp
+++ b/phosphor-power-sequencer/src/standard_device.hpp
@@ -110,6 +110,27 @@
     virtual std::vector<int> getGPIOValuesIfPossible(Services& services);
 
     /**
+     * Checks whether a pgood fault has occurred on one of the rails being
+     * monitored by this device.
+     *
+     * If a pgood fault was found in a rail, a pointer to the Rail object is
+     * returned.
+     *
+     * Throws an exception if an error occurs while trying to obtain the status
+     * of the rails.
+     *
+     * @param services System services like hardware presence and the journal
+     * @param gpioValues GPIO values obtained from the device (if any)
+     * @param additionalData Additional data to include in the error log if
+     *                       a pgood fault was found
+     * @return pointer to Rail object where fault was found, or nullptr if no
+     *         Rail found
+     */
+    virtual Rail* findRailWithPgoodFault(
+        Services& services, const std::vector<int>& gpioValues,
+        std::map<std::string, std::string>& additionalData);
+
+    /**
      * Store pgood fault debug data in the specified additional data map.
      *
      * The default implementation stores the device name and then calls