psu-ng: Add DEGLITCH_LIMIT, deglitch pgoodFault
While the power supply should not arbitrarily report a PGOOD fault, and
then turn it back off, there is a perception that this is indeed
possible, a glitch of some sort.
To avoid possibly logging an error for an erroneous fault reporting,
make sure the fault is reported more than once before considering it to
be a true fault (deglitch the signal).
Tested:
Real Rainier 2S2U:
Verify tracing PGOOD faults seen and cleared, no error logged
Verify PGOOD/OFF error logged when manually set ON_OFF_CONFIG & OPERATION.
Verify deglitched PGOOD again on restart service (ON_OFF_CONFIG reset).
Change-Id: I54f775004d2e363cff21ff0512bd9283408f1f72
Signed-off-by: Brandon Wyman <bjwyman@gmail.com>
diff --git a/phosphor-power-supply/power_supply.cpp b/phosphor-power-supply/power_supply.cpp
index 5e697e1..ea0d172 100644
--- a/phosphor-power-supply/power_supply.cpp
+++ b/phosphor-power-supply/power_supply.cpp
@@ -330,7 +330,7 @@
if ((statusWord & status_word::POWER_GOOD_NEGATED) ||
(statusWord & status_word::UNIT_IS_OFF))
{
- if (!pgoodFault)
+ if (pgoodFault < DEGLITCH_LIMIT)
{
log<level::ERR>(
fmt::format("PGOOD fault: "
@@ -338,9 +338,13 @@
"STATUS_MFR_SPECIFIC = {:#02x}",
statusWord, statusMFR)
.c_str());
- }
- pgoodFault = true;
+ pgoodFault++;
+ }
+ }
+ else
+ {
+ pgoodFault = 0;
}
if (statusWord & status_word::MFR_SPECIFIC_FAULT)
@@ -384,12 +388,12 @@
voutUVFault = false;
fanFault = false;
tempFault = false;
- if (pgoodFault)
+ if (pgoodFault > 0)
{
log<level::INFO>(fmt::format("pgoodFault cleared path: {}",
inventoryPath)
.c_str());
- pgoodFault = false;
+ pgoodFault = 0;
}
}
}
@@ -446,7 +450,7 @@
voutUVFault = false;
fanFault = false;
tempFault = false;
- pgoodFault = false;
+ pgoodFault = 0;
readFail = 0;
try
diff --git a/phosphor-power-supply/power_supply.hpp b/phosphor-power-supply/power_supply.hpp
index cf4c2bb..320fb65 100644
--- a/phosphor-power-supply/power_supply.hpp
+++ b/phosphor-power-supply/power_supply.hpp
@@ -34,6 +34,7 @@
#endif
constexpr auto LOG_LIMIT = 3;
+constexpr auto DEGLITCH_LIMIT = 3;
/**
* @class PowerSupply
@@ -207,7 +208,7 @@
{
return (hasCommFault() || vinUVFault || inputFault || voutOVFault ||
ioutOCFault || voutUVFault || fanFault || tempFault ||
- pgoodFault || mfrFault);
+ (pgoodFault >= DEGLITCH_LIMIT) || mfrFault);
}
/**
@@ -296,7 +297,7 @@
*/
bool hasPgoodFault() const
{
- return pgoodFault;
+ return (pgoodFault >= DEGLITCH_LIMIT);
}
/**
@@ -417,10 +418,12 @@
bool tempFault = false;
/**
- * @brief True if bit 11 or 6 of STATUS_WORD is on. PGOOD# is inactive, or
- * the unit is off.
+ * @brief Incremented if bit 11 or 6 of STATUS_WORD is on. PGOOD# is
+ * inactive, or the unit is off.
+ *
+ * Considered faulted if reaches DEGLITCH_LIMIT.
*/
- bool pgoodFault = false;
+ int pgoodFault = 0;
/** @brief Count of the number of read failures. */
size_t readFail = 0;
diff --git a/phosphor-power-supply/test/power_supply_tests.cpp b/phosphor-power-supply/test/power_supply_tests.cpp
index d42d359..222d9c3 100644
--- a/phosphor-power-supply/test/power_supply_tests.cpp
+++ b/phosphor-power-supply/test/power_supply_tests.cpp
@@ -216,16 +216,25 @@
// be present in order to read from the PMBus device(s).
MockedGPIOInterface* mockPresenceGPIO2 =
static_cast<MockedGPIOInterface*>(psu2.getPresenceGPIO());
- ON_CALL(*mockPresenceGPIO2, read()).WillByDefault(Return(1));
+ // Always return 1 to indicate present.
+ // Each analyze() call will trigger a read of the presence GPIO.
+ EXPECT_CALL(*mockPresenceGPIO2, read()).WillRepeatedly(Return(1));
+ // Not present until analyze() does the GPIO read.
EXPECT_EQ(psu2.isPresent(), false);
MockedPMBus& mockPMBus = static_cast<MockedPMBus&>(psu2.getPMBus());
+ // First analyze() call will trigger missing to present, requiring update
+ // to find the new HWMON directory.
+ EXPECT_CALL(mockPMBus, findHwmonDir());
// Presence change from missing to present will trigger write to
// ON_OFF_CONFIG.
EXPECT_CALL(mockPMBus, writeBinary(ON_OFF_CONFIG, _, _));
// Presence change from missing to present will trigger in1_input read
// in an attempt to get CLEAR_FAULTS called.
EXPECT_CALL(mockPMBus, read(READ_VIN, _)).Times(1).WillOnce(Return(206000));
+ // Change from missing to present will trigger call to update Present
+ // property in the inventory
+ EXPECT_CALL(mockedUtil, setPresence(_, _, true, _));
// STATUS_WORD INPUT fault.
{
@@ -477,7 +486,7 @@
EXPECT_EQ(psu2.hasPgoodFault(), false);
}
- // PGOOD/OFF fault.
+ // PGOOD/OFF fault. Deglitched, needs to reach DEGLITCH_LIMIT.
{
// First STATUS_WORD with no bits set.
PMBusExpectations expectations;
@@ -487,22 +496,39 @@
// POWER_GOOD# inactive, and OFF bit on.
expectations.statusWordValue =
((status_word::POWER_GOOD_NEGATED) | (status_word::UNIT_IS_OFF));
- // STATUS_INPUT, STATUS_MFR, STATUS_CML, STATUS_VOUT, and
- // STATUS_TEMPERATURE: Don't care if bits set or not (defaults).
- setPMBusExpectations(mockPMBus, expectations);
- psu2.analyze();
- EXPECT_EQ(psu2.isPresent(), true);
- EXPECT_EQ(psu2.isFaulted(), true);
- EXPECT_EQ(psu2.hasInputFault(), false);
- EXPECT_EQ(psu2.hasMFRFault(), false);
- EXPECT_EQ(psu2.hasVINUVFault(), false);
- EXPECT_EQ(psu2.hasCommFault(), false);
- EXPECT_EQ(psu2.hasVoutOVFault(), false);
- EXPECT_EQ(psu2.hasVoutUVFault(), false);
- EXPECT_EQ(psu2.hasIoutOCFault(), false);
- EXPECT_EQ(psu2.hasFanFault(), false);
- EXPECT_EQ(psu2.hasTempFault(), false);
- EXPECT_EQ(psu2.hasPgoodFault(), true);
+ for (auto x = 1; x <= DEGLITCH_LIMIT; x++)
+ {
+ // STATUS_INPUT, STATUS_MFR, STATUS_CML, STATUS_VOUT, and
+ // STATUS_TEMPERATURE: Don't care if bits set or not (defaults).
+ setPMBusExpectations(mockPMBus, expectations);
+ psu2.analyze();
+ EXPECT_EQ(psu2.isPresent(), true);
+ if (x < DEGLITCH_LIMIT)
+ {
+ EXPECT_EQ(psu2.isFaulted(), false);
+ }
+ else
+ {
+ EXPECT_EQ(psu2.isFaulted(), true);
+ }
+ EXPECT_EQ(psu2.hasInputFault(), false);
+ EXPECT_EQ(psu2.hasMFRFault(), false);
+ EXPECT_EQ(psu2.hasVINUVFault(), false);
+ EXPECT_EQ(psu2.hasCommFault(), false);
+ EXPECT_EQ(psu2.hasVoutOVFault(), false);
+ EXPECT_EQ(psu2.hasVoutUVFault(), false);
+ EXPECT_EQ(psu2.hasIoutOCFault(), false);
+ EXPECT_EQ(psu2.hasFanFault(), false);
+ EXPECT_EQ(psu2.hasTempFault(), false);
+ if (x < DEGLITCH_LIMIT)
+ {
+ EXPECT_EQ(psu2.hasPgoodFault(), false);
+ }
+ else
+ {
+ EXPECT_EQ(psu2.hasPgoodFault(), true);
+ }
+ }
}
// TODO: ReadFailure
@@ -559,12 +585,19 @@
PowerSupply psu{bus, PSUInventoryPath, 13, 0x68, PSUGPIOLineName};
MockedGPIOInterface* mockPresenceGPIO =
static_cast<MockedGPIOInterface*>(psu.getPresenceGPIO());
- // GPIO read return 1 to indicate present.
- ON_CALL(*mockPresenceGPIO, read()).WillByDefault(Return(1));
+ // Always return 1 to indicate present.
+ // Each analyze() call will trigger a read of the presence GPIO.
+ EXPECT_CALL(*mockPresenceGPIO, read()).WillRepeatedly(Return(1));
MockedPMBus& mockPMBus = static_cast<MockedPMBus&>(psu.getPMBus());
+ // Change from missing to present will trigger HWMON directory update.
+ EXPECT_CALL(mockPMBus, findHwmonDir());
+ // Change from missing to present will trigger ON_OFF_CONFIG write.
+ EXPECT_CALL(mockPMBus, writeBinary(ON_OFF_CONFIG, _, _));
// Presence change from missing to present will trigger in1_input read in
// an attempt to get CLEAR_FAULTS called.
EXPECT_CALL(mockPMBus, read(READ_VIN, _)).Times(1).WillOnce(Return(206000));
+ // Missing/present call will update Presence in inventory.
+ EXPECT_CALL(mockedUtil, setPresence(_, _, true, _));
// STATUS_WORD 0x0000 is powered on, no faults.
PMBusExpectations expectations;
setPMBusExpectations(mockPMBus, expectations);
@@ -613,6 +646,14 @@
EXPECT_EQ(psu.hasVoutUVFault(), false);
EXPECT_EQ(psu.hasFanFault(), true);
EXPECT_EQ(psu.hasTempFault(), true);
+ // pgoodFault is deglitched up to DEGLITCH_LIMIT
+ EXPECT_EQ(psu.hasPgoodFault(), false);
+ setPMBusExpectations(mockPMBus, expectations);
+ psu.analyze();
+ EXPECT_EQ(psu.hasPgoodFault(), false);
+ setPMBusExpectations(mockPMBus, expectations);
+ psu.analyze();
+ // DEGLITCH_LIMIT reached for pgoodFault
EXPECT_EQ(psu.hasPgoodFault(), true);
EXPECT_CALL(mockPMBus, read("in1_input", _))
@@ -990,9 +1031,17 @@
static_cast<MockedGPIOInterface*>(psu.getPresenceGPIO());
// Always return 1 to indicate present.
EXPECT_CALL(*mockPresenceGPIO, read()).WillRepeatedly(Return(1));
- psu.analyze();
MockedPMBus& mockPMBus = static_cast<MockedPMBus&>(psu.getPMBus());
- EXPECT_EQ(psu.hasPgoodFault(), false);
+ EXPECT_CALL(mockPMBus, findHwmonDir());
+ // Presence change from missing to present will trigger write to
+ // ON_OFF_CONFIG.
+ EXPECT_CALL(mockPMBus, writeBinary(ON_OFF_CONFIG, _, _));
+ // Missing/present will trigger read of "in1_input" to try CLEAR_FAULTS.
+ EXPECT_CALL(mockPMBus, read("in1_input", _))
+ .Times(1)
+ .WillOnce(Return(207000));
+ // Missing/present call will update Presence in inventory.
+ EXPECT_CALL(mockedUtil, setPresence(_, _, true, _));
// STATUS_WORD 0x0000 is powered on, no faults.
PMBusExpectations expectations;
setPMBusExpectations(mockPMBus, expectations);
@@ -1002,6 +1051,15 @@
expectations.statusWordValue = (status_word::POWER_GOOD_NEGATED);
setPMBusExpectations(mockPMBus, expectations);
psu.analyze();
+ // Expect false until reaches DEGLITCH_LIMIT
+ EXPECT_EQ(psu.hasPgoodFault(), false);
+ setPMBusExpectations(mockPMBus, expectations);
+ psu.analyze();
+ // Expect false until reaches DEGLITCH_LIMIT
+ EXPECT_EQ(psu.hasPgoodFault(), false);
+ setPMBusExpectations(mockPMBus, expectations);
+ psu.analyze();
+ // DEGLITCH_LIMIT reached, expect true.
EXPECT_EQ(psu.hasPgoodFault(), true);
// Back to no fault bits on in STATUS_WORD
expectations.statusWordValue = 0;
@@ -1012,6 +1070,12 @@
expectations.statusWordValue = (status_word::UNIT_IS_OFF);
setPMBusExpectations(mockPMBus, expectations);
psu.analyze();
+ EXPECT_EQ(psu.hasPgoodFault(), false);
+ setPMBusExpectations(mockPMBus, expectations);
+ psu.analyze();
+ EXPECT_EQ(psu.hasPgoodFault(), false);
+ setPMBusExpectations(mockPMBus, expectations);
+ psu.analyze();
EXPECT_EQ(psu.hasPgoodFault(), true);
// Back to no fault bits on in STATUS_WORD
expectations.statusWordValue = 0;