Add support for power supply off when it should be on
If the power good bit indicates false, or the UNIT_IS_OFF bit is on,
create an error log and attach STATUS_WORD, STATUS_INPUT, STATUS_VOUT,
STATUS_IOUT, and MFR_SPECIFIC values to the metadata. The combination of
those PMBus command results should give an indication as to why the
power supply has turned off.
Change-Id: I692a8fdeac3fe208a5eb70964db7b5094cfb587c
Signed-off-by: Brandon Wyman <bjwyman@gmail.com>
diff --git a/elog-errors.hpp b/elog-errors.hpp
index 7309c89..7dd5897 100644
--- a/elog-errors.hpp
+++ b/elog-errors.hpp
@@ -135,6 +135,26 @@
{
namespace openbmc_project
{
+namespace Power
+{
+namespace Fault
+{
+namespace Error
+{
+ struct PowerSupplyShouldBeOn;
+} // namespace Error
+} // namespace Fault
+} // namespace Power
+} // namespace openbmc_project
+} // namespace xyz
+} // namespace sdbusplus
+
+namespace sdbusplus
+{
+namespace xyz
+{
+namespace openbmc_project
+{
namespace Common
{
namespace Callout
@@ -655,6 +675,54 @@
{
namespace Fault
{
+namespace _PowerSupplyShouldBeOn
+{
+
+struct RAW_STATUS
+{
+ static constexpr auto str = "RAW_STATUS=%s";
+ static constexpr auto str_short = "RAW_STATUS";
+ using type = std::tuple<std::decay_t<decltype(str)>,const char*>;
+ explicit constexpr RAW_STATUS(const char* a) : _entry(entry(str, a)) {};
+ type _entry;
+};
+
+} // namespace _PowerSupplyShouldBeOn
+
+struct PowerSupplyShouldBeOn
+{
+ static constexpr auto L = level::ERR;
+ using RAW_STATUS = _PowerSupplyShouldBeOn::RAW_STATUS;
+ using CALLOUT_INVENTORY_PATH = xyz::openbmc_project::Common::Callout::Inventory::CALLOUT_INVENTORY_PATH;
+ using metadata_types = std::tuple<RAW_STATUS, CALLOUT_INVENTORY_PATH>;
+
+};
+
+} // namespace Fault
+} // namespace Power
+} // namespace openbmc_project
+} // namespace xyz
+
+
+namespace details
+{
+
+template <>
+struct map_exception_type<sdbusplus::xyz::openbmc_project::Power::Fault::Error::PowerSupplyShouldBeOn>
+{
+ using type = xyz::openbmc_project::Power::Fault::PowerSupplyShouldBeOn;
+};
+
+}
+
+namespace xyz
+{
+namespace openbmc_project
+{
+namespace Power
+{
+namespace Fault
+{
namespace _Shutdown
{
diff --git a/pmbus.hpp b/pmbus.hpp
index 214040d..6682f00 100644
--- a/pmbus.hpp
+++ b/pmbus.hpp
@@ -11,25 +11,46 @@
namespace fs = std::experimental::filesystem;
-// The file name Linux uses to capture the VIN_UV_FAULT bit from the STATUS_WORD
-constexpr auto VIN_UV_FAULT = "in1_alarm";
-
-// The file name Linux uses to capture the input fault or warning bit from the
-// STATUS_WORD
-constexpr auto INPUT_FAULT_WARN = "power1_alarm";
-
// The file name Linux uses to capture the STATUS_WORD from pmbus.
constexpr auto STATUS_WORD = "status0";
// The file name Linux uses to capture the STATUS_INPUT from pmbus.
constexpr auto STATUS_INPUT = "status0_input";
+// Voltage out status.
+// Overvoltage fault or warning, Undervoltage fault or warning, maximum or
+// minimum warning, ....
// Uses Page substitution
constexpr auto STATUS_VOUT = "statusP_vout";
+// Current output status bits.
+constexpr auto STATUS_IOUT = "status0_iout";
+
+// Manufacturing specific status bits
+constexpr auto STATUS_MFR = "status0_mfr";
+
namespace status_word
{
constexpr auto VOUT_FAULT = 0x8000;
+
+// The IBM CFF power supply driver does map this bit to power1_alarm in the
+// hwmon space, but since the other bits that need to be checked do not have
+// a similar mapping, the code will just read STATUS_WORD and use bit masking
+// to see if the INPUT FAULT OR WARNING bit is on.
+constexpr auto INPUT_FAULT_WARN = 0x2000;
+
+// The bit mask representing the POWER_GOOD Negated bit of the STATUS_WORD.
+constexpr auto POWER_GOOD_NEGATED = 0x0800;
+
+// The bit mask representing the UNITI_IS_OFF bit of the STATUS_WORD.
+constexpr auto UNIT_IS_OFF = 0x0040;
+
+// The IBM CFF power supply driver does map this bit to in1_alarm, however,
+// since a number of the other bits are not mapped that way for STATUS_WORD,
+// this code will just read the entire STATUS_WORD and use bit masking to find
+// out if that fault is on.
+constexpr auto VIN_UV_FAULT = 0x0008;
+
}
/**
@@ -174,7 +195,7 @@
* Finds the path relative to basePath to the hwmon directory
* for the device and stores it in hwmonRelPath.
*/
- void findHwmonDir();
+ void findHwmonDir();
/**
* Returns the path to use for the passed in type.
@@ -183,7 +204,7 @@
*
* @return fs::path - the full path
*/
- fs::path getPath(Type type);
+ fs::path getPath(Type type);
private:
diff --git a/power-supply/power_supply.cpp b/power-supply/power_supply.cpp
index 1510753..467144b 100644
--- a/power-supply/power_supply.cpp
+++ b/power-supply/power_supply.cpp
@@ -90,7 +90,12 @@
{
if (present)
{
- auto curUVFault = pmbusIntf.readBit(VIN_UV_FAULT, Type::Hwmon);
+ std::uint16_t statusWord = 0;
+ std::uint8_t statusInput = 0;
+
+ // Read the 2 byte STATUS_WORD value to check for faults.
+ statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug);
+
//TODO: 3 consecutive reads should be performed.
// If 3 consecutive reads are seen, log the fault.
// Driver gives cached value, read once a second.
@@ -98,63 +103,96 @@
// If count reaches 3, we have fault. If count reaches 0, fault is
// cleared.
- auto curInputFault = pmbusIntf.readBit(INPUT_FAULT_WARN,
- Type::Hwmon);
-
- if (curUVFault != vinUVFault)
+ if ((statusWord & status_word::VIN_UV_FAULT) && !vinUVFault)
{
- vinUVFault = curUVFault;
+ vinUVFault = true;
- if (curUVFault)
- {
- std::uint16_t statusWord = 0;
- statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug);
+ util::NamesValues nv;
+ nv.add("STATUS_WORD", statusWord);
- util::NamesValues nv;
- nv.add("STATUS_WORD", statusWord);
+ using metadata = xyz::openbmc_project::Power::Fault::
+ PowerSupplyUnderVoltageFault;
- using metadata = xyz::openbmc_project::Power::Fault::
- PowerSupplyUnderVoltageFault;
+ report<PowerSupplyUnderVoltageFault>(
+ metadata::RAW_STATUS(nv.get().c_str()));
- report<PowerSupplyUnderVoltageFault>(
- metadata::RAW_STATUS(nv.get().c_str()));
-
- vinUVFault = true;
- }
- else
- {
- log<level::INFO>("VIN_UV_FAULT cleared",
- entry("POWERSUPPLY=%s",
- inventoryPath.c_str()));
- }
-
+ vinUVFault = true;
+ }
+ else
+ {
+ vinUVFault = false;
+ log<level::INFO>("VIN_UV_FAULT cleared",
+ entry("POWERSUPPLY=%s",
+ inventoryPath.c_str()));
}
- if (curInputFault != inputFault)
+ if ((statusWord & status_word::INPUT_FAULT_WARN) && !inputFault)
{
- if (curInputFault)
- {
- std::uint16_t statusWord = 0;
- std::uint8_t statusInput = 0;
+ inputFault = true;
- statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug);
+ statusInput = pmbusIntf.read(STATUS_INPUT, Type::Debug);
+
+ util::NamesValues nv;
+ nv.add("STATUS_WORD", statusWord);
+ nv.add("STATUS_INPUT", statusInput);
+
+ using metadata = xyz::openbmc_project::Power::Fault::
+ PowerSupplyInputFault;
+
+ report<PowerSupplyInputFault>(metadata::RAW_STATUS(
+ nv.get().c_str()));
+ }
+ else
+ {
+ if ((inputFault) &&
+ !(statusWord & status_word::INPUT_FAULT_WARN))
+ {
+ inputFault = false;
+
statusInput = pmbusIntf.read(STATUS_INPUT, Type::Debug);
+ log<level::INFO>("INPUT_FAULT_WARN cleared",
+ entry("POWERSUPPLY=%s",
+ inventoryPath.c_str()),
+ entry("STATUS_WORD=0x%04X", statusWord),
+ entry("STATUS_INPUT=0x%02X", statusInput));
+ }
+ }
+
+ if (powerOn)
+ {
+ // Check PG# and UNIT_IS_OFF
+ if (((statusWord & status_word::POWER_GOOD_NEGATED) ||
+ (statusWord & status_word::UNIT_IS_OFF)) &&
+ !powerOnFault)
+ {
+ std::uint8_t statusVout = 0;
+ std::uint8_t statusIout = 0;
+ std::uint8_t statusMFR = 0;
+
+ statusInput = pmbusIntf.read(STATUS_INPUT, Type::Debug);
+ auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0);
+ statusVout = pmbusIntf.read(status0Vout, Type::Debug);
+ statusIout = pmbusIntf.read(STATUS_IOUT, Type::Debug);
+ statusMFR = pmbusIntf.read(STATUS_MFR, Type::Debug);
+
util::NamesValues nv;
nv.add("STATUS_WORD", statusWord);
nv.add("STATUS_INPUT", statusInput);
+ nv.add("STATUS_VOUT", statusVout);
+ nv.add("STATUS_IOUT", statusIout);
+ nv.add("MFR_SPECIFIC", statusMFR);
using metadata = xyz::openbmc_project::Power::Fault::
- PowerSupplyInputFault;
+ PowerSupplyShouldBeOn;
- report<PowerSupplyInputFault>(
- metadata::RAW_STATUS(nv.get().c_str()));
+ // A power supply is OFF (or pgood low) but should be on.
+ report<PowerSupplyShouldBeOn>(
+ metadata::RAW_STATUS(nv.get().c_str()),
+ metadata::CALLOUT_INVENTORY_PATH(
+ inventoryPath.c_str()));
- inputFault = true;
- }
- else
- {
- inputFault = false;
+ powerOnFault = true;
}
}
@@ -239,6 +277,7 @@
readFailLogged = false;
vinUVFault = false;
inputFault = false;
+ powerOnFault = false;
powerOnTimer.start(powerOnInterval, Timer::TimerType::oneshot);
}
else
diff --git a/power-supply/power_supply.hpp b/power-supply/power_supply.hpp
index 317c20b..e9bd9f3 100644
--- a/power-supply/power_supply.hpp
+++ b/power-supply/power_supply.hpp
@@ -96,6 +96,9 @@
/** @brief True if the power is on. */
bool powerOn = false;
+ /** @brief True if power on fault has been detected/reported. */
+ bool powerOnFault = false;
+
/** @brief The sd_event structure used by the power on timer. */
event::Event& event;
@@ -119,9 +122,7 @@
/** @brief Used to subscribe to D-Bus power on state changes **/
std::unique_ptr<sdbusplus::bus::match_t> powerOnMatch;
- /**
- * @brief Has a PMBus read failure already been logged?
- */
+ /** @brief Has a PMBus read failure already been logged? */
bool readFailLogged = false;
/**
@@ -140,7 +141,8 @@
*/
bool inputFault = false;
- /** @brief Callback for inventory property changes
+ /**
+ * @brief Callback for inventory property changes
*
* Process change of Present property for power supply.
*
@@ -166,7 +168,9 @@
*/
void updatePowerState();
- /** @brief Callback for power state property changes
+ /**
+ * @brief Callback for power state property changes
+ *
* Process changes to the powered on stat property for the system.
*
* @param[in] msg - Data associated with the power state signal
diff --git a/xyz/openbmc_project/Power/Fault.errors.yaml b/xyz/openbmc_project/Power/Fault.errors.yaml
index 6911753..4f270e4 100644
--- a/xyz/openbmc_project/Power/Fault.errors.yaml
+++ b/xyz/openbmc_project/Power/Fault.errors.yaml
@@ -2,6 +2,8 @@
description: The power supply has indicated an input or under voltage fault condition.
- name: PowerSupplyInputFault
description: The power supply has indicated an input fault or warn condition.
+- name: PowerSupplyShouldBeOn
+ description: The power supply indicated that it is not on when it should be.
- name: Shutdown
description: A power off was issued because a power fault was detected
diff --git a/xyz/openbmc_project/Power/Fault.metadata.yaml b/xyz/openbmc_project/Power/Fault.metadata.yaml
index 184fffa..1615734 100644
--- a/xyz/openbmc_project/Power/Fault.metadata.yaml
+++ b/xyz/openbmc_project/Power/Fault.metadata.yaml
@@ -8,6 +8,13 @@
meta:
- str: "RAW_STATUS=%s"
type: string
+- name: PowerSupplyShouldBeOn
+ level: ERR
+ meta:
+ - str: "RAW_STATUS=%s"
+ type: string
+ inherits:
+ - xyz.openbmc_project.Common.Callout.Inventory
- name: Shutdown
level: ERR