psu-ng: Change detection of brownout errors
Brownout errors can occur such that the power supplies are able to
recover but the chassis power good is lost and the system powers down.
In these cases the power supply application should continue to process
the brownout condition so that the brownout error is logged and the auto
power restart happens correctly. Use the power state and the chassis
pgood properties from the power control service to allow this to happen.
The power supply driver or device may not latch the AC fault. Set an
expiring indication that the AC fault occurred.
A power supply may not indicate the loss of AC and may only indicate the
loss of power good. As long as at least one power supply indicates the
AC loss, tolerate the pgood failure indications.
The net of the changes is that a brownout error will be issued when the
chassis power good is lost, and all present power supplies indicate an
AC failure or a power good failure, and at least one power supply
indicates an AC failure.
Signed-off-by: Jim Wright <jlwright@us.ibm.com>
Change-Id: I5a11746a036d0a66b11c76ec12784b8870fa306f
diff --git a/phosphor-power-supply/power_supply.cpp b/phosphor-power-supply/power_supply.cpp
index 6908260..c68e9be 100644
--- a/phosphor-power-supply/power_supply.cpp
+++ b/phosphor-power-supply/power_supply.cpp
@@ -506,6 +506,8 @@
}
vinUVFault++;
}
+ // Remember that this PSU has seen an AC fault
+ acFault = AC_FAULT_LIMIT;
}
if (vinUVFault &&
@@ -518,6 +520,11 @@
shortName, statusWord, statusMFR, statusInput)
.c_str());
vinUVFault = 0;
+ // No AC fail, decrement counter
+ if (acFault)
+ {
+ --acFault;
+ }
}
}
@@ -607,6 +614,11 @@
}
clearFaultFlags();
+ // No AC fail, decrement counter
+ if (acFault)
+ {
+ --acFault;
+ }
}
// Save off old inputVoltage value.
@@ -635,8 +647,8 @@
shortName, vinUVFault, actualInputVoltage)
.c_str());
// Do we have a VIN_UV fault latched that can now be cleared
- // due to voltage back in range? Attempt to clear the fault(s),
- // re-check faults on next call.
+ // due to voltage back in range? Attempt to clear the
+ // fault(s), re-check faults on next call.
clearVinUVFault();
}
else if (std::abs(actualInputVoltageOld - actualInputVoltage) >
diff --git a/phosphor-power-supply/power_supply.hpp b/phosphor-power-supply/power_supply.hpp
index ec58bf3..2c61f13 100644
--- a/phosphor-power-supply/power_supply.hpp
+++ b/phosphor-power-supply/power_supply.hpp
@@ -45,6 +45,9 @@
constexpr auto LOG_LIMIT = 3;
constexpr auto DEGLITCH_LIMIT = 3;
constexpr auto PGOOD_DEGLITCH_LIMIT = 5;
+// Number of polls to remember that an AC fault occured. Should remain greater
+// than PGOOD_DEGLITCH_LIMIT.
+constexpr auto AC_FAULT_LIMIT = 6;
/**
* @class PowerSupply
@@ -380,6 +383,15 @@
}
/**
+ * @brief Returns true if an AC fault has occurred in the window of
+ * interest.
+ */
+ bool hasACFault() const
+ {
+ return acFault != 0;
+ }
+
+ /**
* @brief Returns the device path
*
* This can be used for error call outs.
@@ -685,6 +697,13 @@
size_t psCS12VFault = 0;
/**
+ * @brief Set to AC_FAULT_LIMIT when AC fault is detected, decremented when
+ * AC fault has cleared. Effectively forms a timer since last AC failure.
+ * Zero indicates being outside the window of concern.
+ */
+ size_t acFault = 0;
+
+ /**
* @brief Count of the number of read failures.
*/
size_t readFail = 0;
diff --git a/phosphor-power-supply/psu_manager.cpp b/phosphor-power-supply/psu_manager.cpp
index 47f4372..242632b 100644
--- a/phosphor-power-supply/psu_manager.cpp
+++ b/phosphor-power-supply/psu_manager.cpp
@@ -558,6 +558,49 @@
log<level::INFO>("Synchronize INPUT_HISTORY completed");
}
+bool PSUManager::isBrownout(std::map<std::string, std::string>& additionalData)
+{
+ size_t presentCount = 0;
+ size_t notPresentCount = 0;
+ size_t acFailedCount = 0;
+ size_t pgoodFailedCount = 0;
+ for (const auto& psu : psus)
+ {
+ if (psu->isPresent())
+ {
+ ++presentCount;
+ if (psu->hasACFault())
+ {
+ ++acFailedCount;
+ }
+ else if (psu->hasPgoodFault())
+ {
+ ++pgoodFailedCount;
+ }
+ }
+ else
+ {
+ ++notPresentCount;
+ }
+ }
+
+ // In brownout if at least one PS has seen an AC fail and all present PSUs
+ // have an AC or pgood failure. Note an AC fail is only set if at least one
+ // PSU is present.
+ bool isBrownout =
+ acFailedCount && (presentCount == (acFailedCount + pgoodFailedCount));
+ if (isBrownout)
+ {
+ additionalData.emplace("NOT_PRESENT_COUNT",
+ std::to_string(notPresentCount));
+ additionalData.emplace("VIN_FAULT_COUNT",
+ std::to_string(acFailedCount));
+ additionalData.emplace("PGOOD_FAULT_COUNT",
+ std::to_string(pgoodFailedCount));
+ }
+ return isBrownout;
+}
+
void PSUManager::analyze()
{
auto syncHistoryRequired =
@@ -576,25 +619,10 @@
std::map<std::string, std::string> additionalData;
- auto notPresentCount = decltype(psus.size())(
- std::count_if(psus.begin(), psus.end(),
- [](const auto& psu) { return !psu->isPresent(); }));
-
- auto hasVINUVFaultCount = decltype(psus.size())(
- std::count_if(psus.begin(), psus.end(), [](const auto& psu) {
- return (psu->isPresent() && psu->hasVINUVFault());
- }));
-
- // The PSU D-Bus objects may not be available yet, so ignore if all
- // PSUs are not present or the number of PSUs is still 0.
- if ((psus.size() == (notPresentCount + hasVINUVFaultCount)) &&
- (psus.size() != notPresentCount) && (psus.size() != 0))
+ // Only issue brownout failure if chassis pgood has failed and PSUs indicate
+ // AC failure
+ if (powerFaultOccurring && isBrownout(additionalData))
{
- // Brownout: All PSUs report an AC failure: At least one PSU reports
- // AC loss VIN fault and the rest either report AC loss VIN fault as
- // well or are not present.
- additionalData["NOT_PRESENT_COUNT"] = std::to_string(notPresentCount);
- additionalData["VIN_FAULT_COUNT"] = std::to_string(hasVINUVFaultCount);
setBrownout(additionalData);
}
else
diff --git a/phosphor-power-supply/psu_manager.hpp b/phosphor-power-supply/psu_manager.hpp
index 4b4171a..5d2dce3 100644
--- a/phosphor-power-supply/psu_manager.hpp
+++ b/phosphor-power-supply/psu_manager.hpp
@@ -317,6 +317,13 @@
void setPowerConfigGPIO();
/**
+ * @brief Determine if system is in brownout failure
+ * @param additionalData AdditionalData property of the error log entry
+ * @return true if system is in brownout failure, false otherwise.
+ */
+ bool isBrownout(std::map<std::string, std::string>& additionalData);
+
+ /**
* @brief Indicate that the system is in a brownout condition by creating an
* error log and setting the PowerSystemInputs status property to Fault.
*