pseq: Refactor on failure method
Refactor onFailure() method to consolidate logic and improve amount
of device information captured as additional data.
Signed-off-by: Jim Wright <jlwright@us.ibm.com>
Change-Id: Id8ab37343ce9c9a98a2e52f360b552c2af7c4848
diff --git a/phosphor-power-sequencer/src/ucd90320_monitor.cpp b/phosphor-power-sequencer/src/ucd90320_monitor.cpp
index f2bb057..c54c4a4 100644
--- a/phosphor-power-sequencer/src/ucd90320_monitor.cpp
+++ b/phosphor-power-sequencer/src/ucd90320_monitor.cpp
@@ -64,103 +64,6 @@
findCompatibleSystemTypes();
}
-bool UCD90320Monitor::checkPGOODFaults(
- std::map<std::string, std::string>& additionalData)
-{
- // Check only the GPIs configured on this system.
- std::vector<int> values = lines.get_values();
-
- bool errorCreated = false;
- for (size_t pin = 0; pin < pins.size(); ++pin)
- {
- if (pin < values.size() && !values[pin])
- {
- try
- {
- additionalData.emplace(
- "STATUS_WORD", fmt::format("{:#04x}", readStatusWord()));
- additionalData.emplace("MFR_STATUS",
- fmt::format("{:#04x}", readMFRStatus()));
- }
- catch (device_error::ReadFailure& e)
- {
- log<level::ERR>("ReadFailure when collecting metadata");
- }
- additionalData.emplace("INPUT_NUM",
- fmt::format("{}", pins[pin].line));
- additionalData.emplace("INPUT_NAME", pins[pin].name);
- additionalData.emplace("INPUT_STATUS",
- fmt::format("{}", values[pin]));
-
- logError("xyz.openbmc_project.Power.Error.PowerSequencerPGOODFault",
- additionalData);
-
- errorCreated = true;
- break;
- }
- }
- return errorCreated;
-}
-
-bool UCD90320Monitor::checkVOUTFaults(
- std::map<std::string, std::string>& additionalData)
-{
- // The status_word register has a summary bit to tell us
- // if each page even needs to be checked
- auto statusWord = readStatusWord();
- if (!(statusWord & status_word::VOUT_FAULT))
- {
- return false;
- }
-
- constexpr size_t numberPages = 24;
- bool errorCreated = false;
- for (size_t page = 0; page < numberPages; page++)
- {
- auto statusVout = pmbusInterface.insertPageNum(STATUS_VOUT, page);
- uint8_t vout = pmbusInterface.read(statusVout, Type::Debug);
-
- // If any bits are on log them, though some are just
- // warnings so they won't cause errors
- if (vout)
- {
- log<level::INFO>("A voltage rail has bits on in STATUS_VOUT",
- entry("STATUS_VOUT=0x%X", vout),
- entry("PAGE=%d", page));
- }
-
- // Log errors if any non-warning bits on
- if (vout & ~status_vout::WARNING_MASK)
- {
- auto railName = rails[page];
-
- additionalData.emplace("STATUS_WORD",
- fmt::format("{:#04x}", statusWord));
- additionalData.emplace("STATUS_VOUT", fmt::format("{:#02x}", vout));
- try
- {
- additionalData.emplace("MFR_STATUS",
- fmt::format("{:#04x}", readMFRStatus()));
- }
- catch (device_error::ReadFailure& e)
- {
- log<level::ERR>("ReadFailure when collecting MFR_STATUS");
- }
- additionalData.emplace("RAIL", fmt::format("{}", page));
- additionalData.emplace("RAIL_NAME", railName);
-
- logError(
- "xyz.openbmc_project.Power.Error.PowerSequencerVoltageFault",
- additionalData);
-
- errorCreated = true;
- break;
- }
- }
-
- return errorCreated;
-}
-
void UCD90320Monitor::findCompatibleSystemTypes()
{
try
@@ -357,44 +260,106 @@
void UCD90320Monitor::onFailure(bool timeout,
const std::string& powerSupplyError)
{
+ std::string message;
std::map<std::string, std::string> additionalData{};
- if (!powerSupplyError.empty())
- {
- logError(powerSupplyError, additionalData);
- return;
- }
try
{
- bool voutError = checkVOUTFaults(additionalData);
- bool pgoodError = checkPGOODFaults(additionalData);
-
- // Not a voltage or PGOOD fault, but we know something
- // failed so still create an error log.
- if (!voutError && !pgoodError)
+ auto statusWord = readStatusWord();
+ additionalData.emplace("STATUS_WORD",
+ fmt::format("{:#04x}", statusWord));
+ try
{
- // Default to generic pgood error
- logError("xyz.openbmc_project.Power.Error.Shutdown",
- additionalData);
+ additionalData.emplace("MFR_STATUS",
+ fmt::format("{:#04x}", readMFRStatus()));
+ }
+ catch (device_error::ReadFailure& e)
+ {
+ log<level::ERR>(
+ fmt::format("ReadFailure when collecting MFR_STATUS, error {}",
+ e.what())
+ .c_str());
+ }
+
+ // The status_word register has a summary bit to tell us if each page
+ // even needs to be checked
+ if (statusWord & status_word::VOUT_FAULT)
+ {
+ constexpr size_t numberPages = 24;
+ for (size_t page = 0; page < numberPages; page++)
+ {
+ auto statusVout =
+ pmbusInterface.insertPageNum(STATUS_VOUT, page);
+ uint8_t vout = pmbusInterface.read(statusVout, Type::Debug);
+
+ // If any bits are on log them, though some are just warnings so
+ // they won't cause errors
+ if (vout)
+ {
+ log<level::INFO>(
+ fmt::format("STATUS_VOUT, page: {}, value: {:#02x}",
+ page, vout)
+ .c_str());
+ }
+
+ // Log errors if any non-warning bits on
+ if (vout & ~status_vout::WARNING_MASK)
+ {
+ additionalData.emplace("STATUS_VOUT",
+ fmt::format("{:#02x}", vout));
+ additionalData.emplace("PAGE", fmt::format("{}", page));
+ additionalData.emplace("RAIL_NAME", rails[page]);
+
+ // Use power supply error if set and 12v rail has failed,
+ // else use voltage error
+ message =
+ ((page == 0) && !powerSupplyError.empty())
+ ? powerSupplyError
+ : "xyz.openbmc_project.Power.Error.PowerSequencerVoltageFault";
+ break;
+ }
+ }
+ }
+
+ // Check only the GPIs configured on this system.
+ std::vector<int> values = lines.get_values();
+ additionalData.emplace("GPIO_VALUES", fmt::format("{}", values));
+
+ if (message.empty())
+ {
+ // Didn't find a rail fail, examine pins
+ for (size_t pin = 0; pin < pins.size(); ++pin)
+ {
+ if (pin < values.size() && !values[pin])
+ {
+ additionalData.emplace("INPUT_NUM",
+ fmt::format("{}", pins[pin].line));
+ additionalData.emplace("INPUT_NAME", pins[pin].name);
+ additionalData.emplace("INPUT_STATUS",
+ fmt::format("{}", values[pin]));
+ message =
+ "xyz.openbmc_project.Power.Error.PowerSequencerPGOODFault";
+ break;
+ }
+ }
}
}
catch (device_error::ReadFailure& e)
{
- log<level::ERR>("ReadFailure when collecting metadata");
-
- if (timeout)
- {
- // Default to timeout error
- logError("xyz.openbmc_project.Power.Error.PowerOnTimeout",
- additionalData);
- }
- else
- {
- // Default to generic pgood error
- logError("xyz.openbmc_project.Power.Error.Shutdown",
- additionalData);
- }
+ log<level::ERR>(
+ fmt::format("ReadFailure when collecting metadata, error {}",
+ e.what())
+ .c_str());
}
+
+ if (message.empty())
+ {
+ // Could not isolate, but we know something failed, so issue a timeout
+ // or generic power good error
+ message = timeout ? "xyz.openbmc_project.Power.Error.PowerOnTimeout"
+ : "xyz.openbmc_project.Power.Error.Shutdown";
+ }
+ logError(message, additionalData);
}
uint16_t UCD90320Monitor::readStatusWord()
diff --git a/phosphor-power-sequencer/src/ucd90320_monitor.hpp b/phosphor-power-sequencer/src/ucd90320_monitor.hpp
index b4d1916..938d0a1 100644
--- a/phosphor-power-sequencer/src/ucd90320_monitor.hpp
+++ b/phosphor-power-sequencer/src/ucd90320_monitor.hpp
@@ -79,20 +79,6 @@
std::vector<std::string> rails;
/**
- * Checks for PGOOD faults on the device.
- * @param[in] additionalData AdditionalData property of the error log entry
- * @return bool true if an error log was created
- */
- bool checkPGOODFaults(std::map<std::string, std::string>& additionalData);
-
- /**
- * Checks for VOUT faults on the device.
- * @param[in] additionalData AdditionalData property of the error log entry
- * @return bool true if an error log was created
- */
- bool checkVOUTFaults(std::map<std::string, std::string>& additionalData);
-
- /**
* Finds the list of compatible system types using D-Bus methods.
* This list is used to find the correct JSON configuration file for the
* current system.
@@ -117,13 +103,13 @@
/**
* Reads the mfr_status register
- * @return uint32_t the register contents
+ * @return the register contents
*/
uint32_t readMFRStatus();
/**
* Reads the status_word register
- * @return uint16_t the register contents
+ * @return the register contents
*/
uint16_t readStatusWord();