pseq: Add pgood fault detection to Rail class
Add power good (pgood) fault detection to the Rail class in the
phosphor-power-sequencer application.
Implement the checking defined in the JSON configuration file:
* Check for fault bits set in STATUS_VOUT
* Check for a GPIO with the wrong value
* Check if the output voltage (READ_VOUT) is below the undervoltage
limit (VOUT_UV_FAULT_LIMIT)
If a pgood fault is detected, capture the relevant debug information
such as the rail name and STATUS_WORD value.
Tested:
* Added gtests for the new code
* Ran all gtests and verified they passed
Change-Id: I09c3ed6c504fe907a7854a4ac462a2bc4a8b806f
Signed-off-by: Shawn McCarney <shawnmm@us.ibm.com>
diff --git a/phosphor-power-sequencer/src/meson.build b/phosphor-power-sequencer/src/meson.build
index c1fa67c..65146ad 100644
--- a/phosphor-power-sequencer/src/meson.build
+++ b/phosphor-power-sequencer/src/meson.build
@@ -6,6 +6,7 @@
phosphor_power_sequencer_library = static_library(
'phosphor-power-sequencer',
'config_file_parser.cpp',
+ 'rail.cpp',
'services.cpp',
implicit_include_directories: false,
dependencies: [
diff --git a/phosphor-power-sequencer/src/power_sequencer_device.hpp b/phosphor-power-sequencer/src/power_sequencer_device.hpp
index e7ec274..26e7c6a 100644
--- a/phosphor-power-sequencer/src/power_sequencer_device.hpp
+++ b/phosphor-power-sequencer/src/power_sequencer_device.hpp
@@ -19,7 +19,6 @@
#include <cstdint>
#include <map>
-#include <stdexcept>
#include <string>
#include <vector>
@@ -122,20 +121,6 @@
virtual double getVoutUVFaultLimit(uint8_t page) = 0;
/**
- * Returns the value of the PMBus VOUT_OV_FAULT_LIMIT command for the
- * specified PMBus page.
- *
- * The returned value is in Volts.
- *
- * Throws an exception if the value could not be obtained or the device does
- * not support the VOUT_OV_FAULT_LIMIT command.
- *
- * @param page PMBus page
- * @return VOUT_OV_FAULT_LIMIT value in volts
- */
- virtual double getVoutOVFaultLimit(uint8_t page) = 0;
-
- /**
* Returns whether a pgood fault has occurred on one of the rails being
* monitored by this device.
*
diff --git a/phosphor-power-sequencer/src/rail.cpp b/phosphor-power-sequencer/src/rail.cpp
new file mode 100644
index 0000000..7355b9a
--- /dev/null
+++ b/phosphor-power-sequencer/src/rail.cpp
@@ -0,0 +1,284 @@
+/**
+ * Copyright © 2024 IBM Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rail.hpp"
+
+#include "pmbus.hpp"
+
+#include <exception>
+#include <format>
+
+namespace phosphor::power::sequencer
+{
+namespace status_vout = phosphor::pmbus::status_vout;
+
+bool Rail::isPresent(Services& services)
+{
+ // Initially assume rail is present
+ bool present{true};
+
+ // If presence data member contains an inventory path to check
+ if (presence)
+ {
+ const std::string& inventoryPath = *presence;
+ try
+ {
+ present = services.isPresent(inventoryPath);
+ }
+ catch (const std::exception& e)
+ {
+ throw std::runtime_error{std::format(
+ "Unable to determine presence of rail {} using inventory path {}: {}",
+ name, inventoryPath, e.what())};
+ }
+ }
+
+ return present;
+}
+
+uint16_t Rail::getStatusWord(PowerSequencerDevice& device)
+{
+ uint16_t value{0};
+ try
+ {
+ verifyHasPage();
+ value = device.getStatusWord(*page);
+ }
+ catch (const std::exception& e)
+ {
+ throw std::runtime_error{
+ std::format("Unable to read STATUS_WORD value for rail {}: {}",
+ name, e.what())};
+ }
+ return value;
+}
+
+uint8_t Rail::getStatusVout(PowerSequencerDevice& device)
+{
+ uint8_t value{0};
+ try
+ {
+ verifyHasPage();
+ value = device.getStatusVout(*page);
+ }
+ catch (const std::exception& e)
+ {
+ throw std::runtime_error{
+ std::format("Unable to read STATUS_VOUT value for rail {}: {}",
+ name, e.what())};
+ }
+ return value;
+}
+
+double Rail::getReadVout(PowerSequencerDevice& device)
+{
+ double value{0.0};
+ try
+ {
+ verifyHasPage();
+ value = device.getReadVout(*page);
+ }
+ catch (const std::exception& e)
+ {
+ throw std::runtime_error{std::format(
+ "Unable to read READ_VOUT value for rail {}: {}", name, e.what())};
+ }
+ return value;
+}
+
+double Rail::getVoutUVFaultLimit(PowerSequencerDevice& device)
+{
+ double value{0.0};
+ try
+ {
+ verifyHasPage();
+ value = device.getVoutUVFaultLimit(*page);
+ }
+ catch (const std::exception& e)
+ {
+ throw std::runtime_error{std::format(
+ "Unable to read VOUT_UV_FAULT_LIMIT value for rail {}: {}", name,
+ e.what())};
+ }
+ return value;
+}
+
+bool Rail::hasPgoodFault(PowerSequencerDevice& device, Services& services,
+ const std::vector<int>& gpioValues,
+ std::map<std::string, std::string>& additionalData)
+{
+ // If rail is not present, return false and don't check anything else
+ if (!isPresent(services))
+ {
+ services.logInfoMsg(std::format("Rail {} is not present", name));
+ return false;
+ }
+
+ // Check if STATUS_VOUT indicates a pgood fault occurred
+ bool hasFault = hasPgoodFaultStatusVout(device, services, additionalData);
+
+ // Check if a GPIO value indicates a pgood fault occurred
+ if (!hasFault)
+ {
+ hasFault = hasPgoodFaultGPIO(services, gpioValues, additionalData);
+ }
+
+ // Check if output voltage is below UV limit indicating pgood fault occurred
+ if (!hasFault)
+ {
+ hasFault = hasPgoodFaultOutputVoltage(device, services, additionalData);
+ }
+
+ // If fault detected, store debug data in additional data map
+ if (hasFault)
+ {
+ services.logErrorMsg(
+ std::format("Pgood fault detected in rail {}", name));
+ storePgoodFaultDebugData(device, services, additionalData);
+ }
+
+ return hasFault;
+}
+
+void Rail::verifyHasPage()
+{
+ if (!page)
+ {
+ throw std::runtime_error{
+ std::format("No PAGE number defined for rail {}", name)};
+ }
+}
+
+bool Rail::hasPgoodFaultStatusVout(
+ PowerSequencerDevice& device, Services& services,
+ std::map<std::string, std::string>& additionalData)
+{
+ bool hasFault{false};
+
+ // If we are checking the value of STATUS_VOUT for the rail
+ if (checkStatusVout)
+ {
+ // Read STATUS_VOUT value from device
+ uint8_t statusVout = getStatusVout(device);
+
+ // Check if fault (non-warning) bits are set in value
+ if (statusVout & ~status_vout::WARNING_MASK)
+ {
+ hasFault = true;
+ services.logErrorMsg(std::format(
+ "Rail {} has fault bits set in STATUS_VOUT: {:#04x}", name,
+ statusVout));
+ additionalData.emplace("STATUS_VOUT",
+ std::format("{:#04x}", statusVout));
+ }
+ else if (statusVout != 0)
+ {
+ services.logInfoMsg(std::format(
+ "Rail {} has warning bits set in STATUS_VOUT: {:#04x}", name,
+ statusVout));
+ }
+ }
+
+ return hasFault;
+}
+
+bool Rail::hasPgoodFaultGPIO(Services& services,
+ const std::vector<int>& gpioValues,
+ std::map<std::string, std::string>& additionalData)
+{
+ bool hasFault{false};
+
+ // If a GPIO is defined for checking pgood status
+ if (gpio)
+ {
+ // Get GPIO value
+ unsigned int line = gpio->line;
+ bool activeLow = gpio->activeLow;
+ if (line >= gpioValues.size())
+ {
+ throw std::runtime_error{std::format(
+ "Invalid GPIO line offset {} for rail {}: Device only has {} GPIO values",
+ line, name, gpioValues.size())};
+ }
+ int value = gpioValues[line];
+
+ // Check if value indicates pgood signal is not active
+ if ((activeLow && (value == 1)) || (!activeLow && (value == 0)))
+ {
+ hasFault = true;
+ services.logErrorMsg(std::format(
+ "Rail {} pgood GPIO line offset {} has inactive value {}", name,
+ line, value));
+ additionalData.emplace("GPIO_LINE", std::format("{}", line));
+ additionalData.emplace("GPIO_VALUE", std::format("{}", value));
+ }
+ }
+
+ return hasFault;
+}
+
+bool Rail::hasPgoodFaultOutputVoltage(
+ PowerSequencerDevice& device, Services& services,
+ std::map<std::string, std::string>& additionalData)
+{
+ bool hasFault{false};
+
+ // If we are comparing output voltage to UV limit to check pgood status
+ if (compareVoltageToLimit)
+ {
+ // Read output voltage and UV fault limit values from device
+ double vout = getReadVout(device);
+ double uvLimit = getVoutUVFaultLimit(device);
+
+ // If output voltage is at or below UV fault limit
+ if (vout <= uvLimit)
+ {
+ hasFault = true;
+ services.logErrorMsg(std::format(
+ "Rail {} output voltage {}V is <= UV fault limit {}V", name,
+ vout, uvLimit));
+ additionalData.emplace("READ_VOUT", std::format("{}", vout));
+ additionalData.emplace("VOUT_UV_FAULT_LIMIT",
+ std::format("{}", uvLimit));
+ }
+ }
+
+ return hasFault;
+}
+
+void Rail::storePgoodFaultDebugData(
+ PowerSequencerDevice& device, Services& services,
+ std::map<std::string, std::string>& additionalData)
+{
+ additionalData.emplace("RAIL_NAME", name);
+ if (page)
+ {
+ try
+ {
+ uint16_t statusWord = getStatusWord(device);
+ services.logInfoMsg(
+ std::format("Rail {} STATUS_WORD: {:#06x}", name, statusWord));
+ additionalData.emplace("STATUS_WORD",
+ std::format("{:#06x}", statusWord));
+ }
+ catch (...)
+ {
+ // Ignore error; don't interrupt pgood fault handling
+ }
+ }
+}
+
+} // namespace phosphor::power::sequencer
diff --git a/phosphor-power-sequencer/src/rail.hpp b/phosphor-power-sequencer/src/rail.hpp
index a0115e5..5a49e13 100644
--- a/phosphor-power-sequencer/src/rail.hpp
+++ b/phosphor-power-sequencer/src/rail.hpp
@@ -15,10 +15,15 @@
*/
#pragma once
+#include "power_sequencer_device.hpp"
+#include "services.hpp"
+
#include <cstdint>
+#include <map>
#include <optional>
#include <stdexcept>
#include <string>
+#include <vector>
namespace phosphor::power::sequencer
{
@@ -93,7 +98,7 @@
compareVoltageToLimit{compareVoltageToLimit}, gpio{gpio}
{
// If checking STATUS_VOUT or output voltage, verify PAGE was specified
- if ((checkStatusVout || compareVoltageToLimit) && !page.has_value())
+ if ((checkStatusVout || compareVoltageToLimit) && !page)
{
throw std::invalid_argument{"PMBus PAGE is required"};
}
@@ -172,8 +177,158 @@
return gpio;
}
+ /**
+ * Returns whether the rail is present.
+ *
+ * Returns true if no inventory path was specified for presence detection.
+ *
+ * @param services System services like hardware presence and the journal
+ * @return true if rail is present, false otherwise
+ */
+ bool isPresent(Services& services);
+
+ /**
+ * Returns the value of the PMBus STATUS_WORD command for the rail.
+ *
+ * Reads the value from the specified device. The returned value is in
+ * host-endian order.
+ *
+ * Throws an exception if the value could not be obtained.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @return STATUS_WORD value
+ */
+ uint16_t getStatusWord(PowerSequencerDevice& device);
+
+ /**
+ * Returns the value of the PMBus STATUS_VOUT command for the rail.
+ *
+ * Reads the value from the specified device.
+ *
+ * Throws an exception if the value could not be obtained.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @return STATUS_VOUT value
+ */
+ uint8_t getStatusVout(PowerSequencerDevice& device);
+
+ /**
+ * Returns the value of the PMBus READ_VOUT command for the rail.
+ *
+ * Reads the value from the specified device. The returned value is in
+ * volts.
+ *
+ * Throws an exception if the value could not be obtained.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @return READ_VOUT value in volts
+ */
+ double getReadVout(PowerSequencerDevice& device);
+
+ /**
+ * Returns the value of the PMBus VOUT_UV_FAULT_LIMIT command for the rail.
+ *
+ * Reads the value from the specified device. The returned value is in
+ * volts.
+ *
+ * Throws an exception if the value could not be obtained.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @return VOUT_UV_FAULT_LIMIT value in volts
+ */
+ double getVoutUVFaultLimit(PowerSequencerDevice& device);
+
+ /**
+ * Returns whether a pgood (power good) fault has occurred on the rail.
+ *
+ * Throws an exception if an error occurs while trying to obtain the rail
+ * status.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @param services System services like hardware presence and the journal
+ * @param gpioValues GPIO values obtained from the device (if any)
+ * @param additionalData Additional data to include in an error log if this
+ * method returns true
+ * @return true if a pgood fault was found on the rail, false otherwise
+ */
+ bool hasPgoodFault(PowerSequencerDevice& device, Services& services,
+ const std::vector<int>& gpioValues,
+ std::map<std::string, std::string>& additionalData);
+
private:
/**
+ * Verifies that a PMBus PAGE number is defined for the rail.
+ *
+ * Throws an exception if a PAGE number is not defined.
+ */
+ void verifyHasPage();
+
+ /**
+ * Returns whether the PMBus STATUS_VOUT command indicates a pgood fault
+ * has occurred on the rail.
+ *
+ * Throws an exception if an error occurs while trying to obtain the rail
+ * status.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @param services System services like hardware presence and the journal
+ * @param additionalData Additional data to include in an error log if this
+ * method returns true
+ * @return true if a pgood fault was found on the rail, false otherwise
+ */
+ bool hasPgoodFaultStatusVout(
+ PowerSequencerDevice& device, Services& services,
+ std::map<std::string, std::string>& additionalData);
+
+ /**
+ * Returns whether a GPIO value indicates a pgood fault has occurred on the
+ * rail.
+ *
+ * Throws an exception if an error occurs while trying to obtain the rail
+ * status.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @param gpioValues GPIO values obtained from the device (if any)
+ * @param additionalData Additional data to include in an error log if this
+ * method returns true
+ * @return true if a pgood fault was found on the rail, false otherwise
+ */
+ bool hasPgoodFaultGPIO(Services& services,
+ const std::vector<int>& gpioValues,
+ std::map<std::string, std::string>& additionalData);
+
+ /**
+ * Returns whether the output voltage is below the undervoltage limit
+ * indicating a pgood fault has occurred on the rail.
+ *
+ * Throws an exception if an error occurs while trying to obtain the rail
+ * status.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @param services System services like hardware presence and the journal
+ * @param additionalData Additional data to include in an error log if this
+ * method returns true
+ * @return true if a pgood fault was found on the rail, false otherwise
+ */
+ bool hasPgoodFaultOutputVoltage(
+ PowerSequencerDevice& device, Services& services,
+ std::map<std::string, std::string>& additionalData);
+
+ /**
+ * Store pgood fault debug data in the specified additional data map.
+ *
+ * Stores data that is relevant regardless of which method was used to
+ * detect the pgood fault.
+ *
+ * @param device Power sequencer device that enables and monitors the rail
+ * @param services System services like hardware presence and the journal
+ * @param additionalData Additional data to include in an error log
+ */
+ void storePgoodFaultDebugData(
+ PowerSequencerDevice& device, Services& services,
+ std::map<std::string, std::string>& additionalData);
+
+ /**
* Unique name for the rail.
*/
std::string name{};