pseq: Add UCD90320 specific error handling
Add device specific handling for voltage and power good failures to
UCD90320 device subclass.
Signed-off-by: Jim Wright <jlwright@us.ibm.com>
Change-Id: I0ed7657320f4944d64ee434c1d02ce25d5cdc43b
diff --git a/phosphor-power-sequencer/src/power_sequencer_monitor.hpp b/phosphor-power-sequencer/src/power_sequencer_monitor.hpp
index 944e7ae..e8b36fc 100644
--- a/phosphor-power-sequencer/src/power_sequencer_monitor.hpp
+++ b/phosphor-power-sequencer/src/power_sequencer_monitor.hpp
@@ -41,7 +41,7 @@
* known to be in an error state. A log will be created.
* @param[in] timeout if the failure state was determined by timing out
* @param[in] powerSupplyError The power supply error to log. A default
- * std:string, i.e. empty string ("") is passed when there is no power
+ * std:string, i.e. empty string (""), is passed when there is no power
* supply error to log.
*/
virtual void onFailure(bool timeout, const std::string& powerSupplyError);
diff --git a/phosphor-power-sequencer/src/ucd90320_monitor.cpp b/phosphor-power-sequencer/src/ucd90320_monitor.cpp
index d7ef7bb..f2bb057 100644
--- a/phosphor-power-sequencer/src/ucd90320_monitor.cpp
+++ b/phosphor-power-sequencer/src/ucd90320_monitor.cpp
@@ -24,6 +24,7 @@
#include <nlohmann/json.hpp>
#include <phosphor-logging/log.hpp>
#include <sdbusplus/bus.hpp>
+#include <xyz/openbmc_project/Common/Device/error.hpp>
#include <fstream>
#include <map>
@@ -33,6 +34,7 @@
{
using json = nlohmann::json;
+using namespace pmbus;
using namespace phosphor::logging;
using namespace phosphor::power;
@@ -40,6 +42,8 @@
"xyz.openbmc_project.Configuration.IBMCompatibleSystem";
const std::string compatibleNamesProperty = "Names";
+namespace device_error = sdbusplus::xyz::openbmc_project::Common::Device::Error;
+
UCD90320Monitor::UCD90320Monitor(sdbusplus::bus::bus& bus, std::uint8_t i2cBus,
std::uint16_t i2cAddress) :
PowerSequencerMonitor(bus),
@@ -60,6 +64,103 @@
findCompatibleSystemTypes();
}
+bool UCD90320Monitor::checkPGOODFaults(
+ std::map<std::string, std::string>& additionalData)
+{
+ // Check only the GPIs configured on this system.
+ std::vector<int> values = lines.get_values();
+
+ bool errorCreated = false;
+ for (size_t pin = 0; pin < pins.size(); ++pin)
+ {
+ if (pin < values.size() && !values[pin])
+ {
+ try
+ {
+ additionalData.emplace(
+ "STATUS_WORD", fmt::format("{:#04x}", readStatusWord()));
+ additionalData.emplace("MFR_STATUS",
+ fmt::format("{:#04x}", readMFRStatus()));
+ }
+ catch (device_error::ReadFailure& e)
+ {
+ log<level::ERR>("ReadFailure when collecting metadata");
+ }
+ additionalData.emplace("INPUT_NUM",
+ fmt::format("{}", pins[pin].line));
+ additionalData.emplace("INPUT_NAME", pins[pin].name);
+ additionalData.emplace("INPUT_STATUS",
+ fmt::format("{}", values[pin]));
+
+ logError("xyz.openbmc_project.Power.Error.PowerSequencerPGOODFault",
+ additionalData);
+
+ errorCreated = true;
+ break;
+ }
+ }
+ return errorCreated;
+}
+
+bool UCD90320Monitor::checkVOUTFaults(
+ std::map<std::string, std::string>& additionalData)
+{
+ // The status_word register has a summary bit to tell us
+ // if each page even needs to be checked
+ auto statusWord = readStatusWord();
+ if (!(statusWord & status_word::VOUT_FAULT))
+ {
+ return false;
+ }
+
+ constexpr size_t numberPages = 24;
+ bool errorCreated = false;
+ for (size_t page = 0; page < numberPages; page++)
+ {
+ auto statusVout = pmbusInterface.insertPageNum(STATUS_VOUT, page);
+ uint8_t vout = pmbusInterface.read(statusVout, Type::Debug);
+
+ // If any bits are on log them, though some are just
+ // warnings so they won't cause errors
+ if (vout)
+ {
+ log<level::INFO>("A voltage rail has bits on in STATUS_VOUT",
+ entry("STATUS_VOUT=0x%X", vout),
+ entry("PAGE=%d", page));
+ }
+
+ // Log errors if any non-warning bits on
+ if (vout & ~status_vout::WARNING_MASK)
+ {
+ auto railName = rails[page];
+
+ additionalData.emplace("STATUS_WORD",
+ fmt::format("{:#04x}", statusWord));
+ additionalData.emplace("STATUS_VOUT", fmt::format("{:#02x}", vout));
+ try
+ {
+ additionalData.emplace("MFR_STATUS",
+ fmt::format("{:#04x}", readMFRStatus()));
+ }
+ catch (device_error::ReadFailure& e)
+ {
+ log<level::ERR>("ReadFailure when collecting MFR_STATUS");
+ }
+ additionalData.emplace("RAIL", fmt::format("{}", page));
+ additionalData.emplace("RAIL_NAME", railName);
+
+ logError(
+ "xyz.openbmc_project.Power.Error.PowerSequencerVoltageFault",
+ additionalData);
+
+ errorCreated = true;
+ break;
+ }
+ }
+
+ return errorCreated;
+}
+
void UCD90320Monitor::findCompatibleSystemTypes()
{
try
@@ -253,6 +354,60 @@
}
}
+void UCD90320Monitor::onFailure(bool timeout,
+ const std::string& powerSupplyError)
+{
+ std::map<std::string, std::string> additionalData{};
+ if (!powerSupplyError.empty())
+ {
+ logError(powerSupplyError, additionalData);
+ return;
+ }
+
+ try
+ {
+ bool voutError = checkVOUTFaults(additionalData);
+ bool pgoodError = checkPGOODFaults(additionalData);
+
+ // Not a voltage or PGOOD fault, but we know something
+ // failed so still create an error log.
+ if (!voutError && !pgoodError)
+ {
+ // Default to generic pgood error
+ logError("xyz.openbmc_project.Power.Error.Shutdown",
+ additionalData);
+ }
+ }
+ catch (device_error::ReadFailure& e)
+ {
+ log<level::ERR>("ReadFailure when collecting metadata");
+
+ if (timeout)
+ {
+ // Default to timeout error
+ logError("xyz.openbmc_project.Power.Error.PowerOnTimeout",
+ additionalData);
+ }
+ else
+ {
+ // Default to generic pgood error
+ logError("xyz.openbmc_project.Power.Error.Shutdown",
+ additionalData);
+ }
+ }
+}
+
+uint16_t UCD90320Monitor::readStatusWord()
+{
+ return pmbusInterface.read(STATUS_WORD, Type::Debug);
+}
+
+uint32_t UCD90320Monitor::readMFRStatus()
+{
+ const std::string mfrStatus = "mfr_status";
+ return pmbusInterface.read(mfrStatus, Type::HwmonDeviceDebug);
+}
+
void UCD90320Monitor::setUpGpio(const std::vector<unsigned int>& offsets)
{
gpiod::chip chip{"ucd90320", gpiod::chip::OPEN_BY_LABEL};
diff --git a/phosphor-power-sequencer/src/ucd90320_monitor.hpp b/phosphor-power-sequencer/src/ucd90320_monitor.hpp
index 97958ce..b4d1916 100644
--- a/phosphor-power-sequencer/src/ucd90320_monitor.hpp
+++ b/phosphor-power-sequencer/src/ucd90320_monitor.hpp
@@ -49,6 +49,9 @@
*/
void interfacesAddedHandler(sdbusplus::message::message& msg);
+ /** @copydoc PowerSequencerMonitor::onFailure() */
+ void onFailure(bool timeout, const std::string& powerSupplyError) override;
+
private:
/**
* Set of GPIO lines to monitor in this UCD chip.
@@ -76,6 +79,20 @@
std::vector<std::string> rails;
/**
+ * Checks for PGOOD faults on the device.
+ * @param[in] additionalData AdditionalData property of the error log entry
+ * @return bool true if an error log was created
+ */
+ bool checkPGOODFaults(std::map<std::string, std::string>& additionalData);
+
+ /**
+ * Checks for VOUT faults on the device.
+ * @param[in] additionalData AdditionalData property of the error log entry
+ * @return bool true if an error log was created
+ */
+ bool checkVOUTFaults(std::map<std::string, std::string>& additionalData);
+
+ /**
* Finds the list of compatible system types using D-Bus methods.
* This list is used to find the correct JSON configuration file for the
* current system.
@@ -99,6 +116,18 @@
void parseConfigFile(const std::filesystem::path& pathName);
/**
+ * Reads the mfr_status register
+ * @return uint32_t the register contents
+ */
+ uint32_t readMFRStatus();
+
+ /**
+ * Reads the status_word register
+ * @return uint16_t the register contents
+ */
+ uint16_t readStatusWord();
+
+ /**
* Set up GPIOs
* @param[in] offsets the list of pin offsets
*/