PEL: Clear deconfig flag after callout replaced
Making use of the previous commit's framework to call a function when a
fan or power supply becomes present, add code to the Manager class to
register a callback that will clear the deconfig flag for all PELs
created with the power-thermal or fan component ID that have the
location code of the replaced fan/PS as a callout.
This way, the degraded mode reporting code will no longer pick up those
PELs in its report as since the hardware was replaced those PELs are no
longer relevant.
This is necessary only for fans or power supplies because they're the
only N+1 hardware that can be hot plugged at runtime. And also because
this is what the IBM service team wants.
Tested:
Simulated missing hardware (changed present D-Bus property for fans,
toggled PSU presence GPIO in the simulator for PSs). Saw errors get
created for it, then simulated replacing it and saw those errors have
their deconfig flag cleared, verifying before and after with peltool:
```
// Remove and replace fan
phosphor-fan-monitor: Fan /system/chassis/motherboard/fan0 presence state change to false
phosphor-log-manager: Created PEL 0x50000002 (BMC ID 2) with SRC 110076F1
phosphor-fan-monitor: Fan /system/chassis/motherboard/fan0 presence state change to true
phosphor-log-manager: Detected FRU /xyz/openbmc_project/inventory/system/chassis/motherboard/fan0 (U78DB.ND0.1234567-A0) present
phosphor-log-manager: Clearing deconfig flag in PEL 0x50000002 with SRC 110076F1 because U78DB.ND0.1234567-A0 was replaced
// Remove and replace PS
phosphor-log-manager: Created PEL 0x50000003 (BMC ID 3) with SRC 110015F6
...
phosphor-psu-monitor: Updating inventory present property. present:true invpath:/system/chassis/motherboard/powersupply0 name:powersupply0
phosphor-log-manager: Detected FRU /xyz/openbmc_project/inventory/system/chassis/motherboard/powersupply0 (U78DB.ND0.1234567-E0) present
phosphor-log-manager: Clearing deconfig flag in PEL 0x50000003 with SRC 110015F6 because U78DB.ND0.1234567-E0 was replaced
```
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: Iee05b4a612ca8f438f8c89f37b4e7b529a131a9f
diff --git a/extensions/openpower-pels/manager.cpp b/extensions/openpower-pels/manager.cpp
index 9f2a13e..61eb36b 100644
--- a/extensions/openpower-pels/manager.cpp
+++ b/extensions/openpower-pels/manager.cpp
@@ -54,6 +54,8 @@
} // namespace additional_data
constexpr auto defaultLogMessage = "xyz.openbmc_project.Logging.Error.Default";
+constexpr uint32_t bmcThermalCompID = 0x2700;
+constexpr uint32_t bmcFansCompID = 0x2800;
Manager::~Manager()
{
@@ -1032,5 +1034,74 @@
_obmcLogDeleteEventSource.reset();
}
+bool Manager::clearPowerThermalDeconfigFlag(const std::string& locationCode,
+ openpower::pels::PEL& pel)
+{
+ // The requirements state that only power-thermal or
+ // fan PELs need their deconfig flag cleared.
+ static const std::vector<uint32_t> compIDs{bmcThermalCompID, bmcFansCompID};
+
+ if (std::find(compIDs.begin(), compIDs.end(),
+ pel.privateHeader().header().componentID) == compIDs.end())
+ {
+ return false;
+ }
+
+ auto src = pel.primarySRC();
+ const auto& callouts = (*src)->callouts();
+ if (!callouts)
+ {
+ return false;
+ }
+
+ for (const auto& callout : callouts->callouts())
+ {
+ // Look for the passed in location code in a callout that
+ // is either a normal HW callout or a symbolic FRU with
+ // a trusted location code callout.
+ if ((callout->locationCode() != locationCode) ||
+ !callout->fruIdentity())
+ {
+ continue;
+ }
+
+ if ((callout->fruIdentity()->failingComponentType() !=
+ src::FRUIdentity::hardwareFRU) &&
+ (callout->fruIdentity()->failingComponentType() !=
+ src::FRUIdentity::symbolicFRUTrustedLocCode))
+ {
+ continue;
+ }
+
+ log<level::INFO>(
+ fmt::format(
+ "Clearing deconfig flag in PEL {:#x} with SRC {} because {} was replaced",
+ pel.id(), (*src)->asciiString().substr(0, 8), locationCode)
+ .c_str());
+ (*src)->clearErrorStatusFlag(SRC::ErrorStatusFlags::deconfigured);
+ return true;
+ }
+ return false;
+}
+
+void Manager::hardwarePresent(const std::string& locationCode)
+{
+ Repository::PELUpdateFunc handlePowerThermalHardwarePresent =
+ [locationCode](openpower::pels::PEL& pel) {
+ return Manager::clearPowerThermalDeconfigFlag(locationCode, pel);
+ };
+
+ // If the PEL was created by the BMC and has the deconfig flag set,
+ // it's a candidate to have the deconfig flag cleared.
+ for (const auto& [id, attributes] : _repo.getAttributesMap())
+ {
+ if ((attributes.creator == static_cast<uint8_t>(CreatorID::openBMC)) &&
+ attributes.deconfig)
+ {
+ _repo.updatePEL(attributes.path, handlePowerThermalHardwarePresent);
+ }
+ }
+}
+
} // namespace pels
} // namespace openpower
diff --git a/extensions/openpower-pels/manager.hpp b/extensions/openpower-pels/manager.hpp
index 8f439a5..ca7f12e 100644
--- a/extensions/openpower-pels/manager.hpp
+++ b/extensions/openpower-pels/manager.hpp
@@ -70,6 +70,10 @@
std::bind(&Manager::updateResolution, this, std::placeholders::_1));
setupPELDeleteWatch();
+
+ _dataIface->subscribeToFruPresent(
+ "Manager",
+ std::bind(&Manager::hardwarePresent, this, std::placeholders::_1));
}
/**
@@ -479,6 +483,30 @@
void deleteObmcLog(sdeventplus::source::EventBase&, uint32_t obmcLogID);
/**
+ * @brief Clears the deconfig flag in the PEL if necessary.
+ *
+ * If the passed in location code is in a callout and it's a PEL with
+ * the BMC power/thermal or fans component ID, clear the deconfig flag.
+ *
+ * @param[in] locationCode - The location code to look for
+ * @param[inout] pel - The PEL to check and modify.
+ * @return bool - true if the flag was cleared for this PEL
+ */
+ static bool clearPowerThermalDeconfigFlag(const std::string& locationCode,
+ openpower::pels::PEL& pel);
+
+ /**
+ * @brief Called by DataInterface when the presence of hotpluggable
+ * hardware is detected.
+ *
+ * Clears the 'Deconfig' flag in any PEL that has the location code
+ * of the hardware in a callout.
+ *
+ * @param[in] locationCode - The location code of the hardware.
+ */
+ void hardwarePresent(const std::string& locationCode);
+
+ /**
* @brief Reference to phosphor-logging's Manager class
*/
phosphor::logging::internal::Manager& _logManager;
diff --git a/extensions/openpower-pels/repository.hpp b/extensions/openpower-pels/repository.hpp
index e1c565e..7bb7a11 100644
--- a/extensions/openpower-pels/repository.hpp
+++ b/extensions/openpower-pels/repository.hpp
@@ -312,6 +312,16 @@
getPELAttributes(const LogID& id) const;
/**
+ * @brief Returns the attributes map so that others can traverse PELs.
+ *
+ * @return - A const reference to the attributes map.
+ */
+ const std::map<LogID, PELAttributes>& getAttributesMap() const
+ {
+ return _pelAttributes;
+ }
+
+ /**
* @brief Sets the host transmission state on a PEL file
*
* Writes the host transmission state field in the User Header
@@ -444,7 +454,6 @@
*/
void archivePEL(const PEL& pel);
- private:
using PELUpdateFunc = std::function<bool(PEL&)>;
/**
@@ -459,6 +468,7 @@
*/
void updatePEL(const std::filesystem::path& path, PELUpdateFunc updateFunc);
+ private:
/**
* @brief Finds an entry in the _pelAttributes map.
*
diff --git a/extensions/openpower-pels/src.hpp b/extensions/openpower-pels/src.hpp
index e12cab3..bc73fa2 100644
--- a/extensions/openpower-pels/src.hpp
+++ b/extensions/openpower-pels/src.hpp
@@ -327,6 +327,16 @@
return _hexData[3] & static_cast<uint32_t>(flag);
}
+ /**
+ * @brief Clears an error status flag in the SRC.
+ *
+ * @param[in] flag - The flag to set
+ */
+ void clearErrorStatusFlag(ErrorStatusFlags flag)
+ {
+ _hexData[3] &= ~static_cast<uint32_t>(flag);
+ }
+
private:
/**
* @brief Fills in the user defined hex words from the
diff --git a/test/openpower-pels/mocks.hpp b/test/openpower-pels/mocks.hpp
index 103d7e8..5a12c7e 100644
--- a/test/openpower-pels/mocks.hpp
+++ b/test/openpower-pels/mocks.hpp
@@ -76,6 +76,11 @@
{
_hmcManaged = managed;
}
+
+ void fruPresent(const std::string& locationCode)
+ {
+ setFruPresent(locationCode);
+ }
};
/**
diff --git a/test/openpower-pels/pel_manager_test.cpp b/test/openpower-pels/pel_manager_test.cpp
index 5f0e579..ee44c34 100644
--- a/test/openpower-pels/pel_manager_test.cpp
+++ b/test/openpower-pels/pel_manager_test.cpp
@@ -1131,3 +1131,107 @@
// convert the last four chars to spaces
EXPECT_EQ(Manager::sanitizeFieldForDBus(string), base + " ");
}
+
+TEST_F(ManagerTest, TestFruPlug)
+{
+ const auto registry = R"(
+{
+ "PELs":
+ [{
+ "Name": "xyz.openbmc_project.Fan.Error.Fault",
+ "Subsystem": "power_fans",
+ "ComponentID": "0x2800",
+ "SRC":
+ {
+ "Type": "11",
+ "ReasonCode": "0x76F0",
+ "Words6To9": {},
+ "DeconfigFlag": true
+ },
+ "Callouts": [{
+ "CalloutList": [
+ {"Priority": "low", "LocCode": "P0"},
+ {"Priority": "high", "LocCode": "A3"}
+ ]
+ }],
+ "Documentation": {
+ "Description": "A Fan Fault",
+ "Message": "Fan had a Fault"
+ }
+ }]
+}
+)";
+
+ auto path = getPELReadOnlyDataPath();
+ fs::create_directories(path);
+ path /= "message_registry.json";
+
+ std::ofstream registryFile{path};
+ registryFile << registry;
+ registryFile.close();
+
+ std::unique_ptr<DataInterfaceBase> dataIface =
+ std::make_unique<MockDataInterface>();
+
+ MockDataInterface* mockIface =
+ reinterpret_cast<MockDataInterface*>(dataIface.get());
+
+ // Set up the mock calls used when building callouts
+ EXPECT_CALL(*mockIface, getInventoryFromLocCode("P0", 0, false))
+ .WillRepeatedly(Return(std::vector<std::string>{"motherboard"}));
+ EXPECT_CALL(*mockIface, expandLocationCode("P0", 0))
+ .WillRepeatedly(Return("U1234-P0"));
+ EXPECT_CALL(*mockIface, getInventoryFromLocCode("U1234-P0", 0, true))
+ .WillRepeatedly(Return(std::vector<std::string>{"motherboard"}));
+
+ EXPECT_CALL(*mockIface, getInventoryFromLocCode("A3", 0, false))
+ .WillRepeatedly(Return(std::vector<std::string>{"fan"}));
+ EXPECT_CALL(*mockIface, expandLocationCode("A3", 0))
+ .WillRepeatedly(Return("U1234-A3"));
+ EXPECT_CALL(*mockIface, getInventoryFromLocCode("U1234-A3", 0, true))
+ .WillRepeatedly(Return(std::vector<std::string>{"fan"}));
+
+ std::unique_ptr<JournalBase> journal = std::make_unique<MockJournal>();
+
+ openpower::pels::Manager manager{
+ logManager, std::move(dataIface),
+ std::bind(std::mem_fn(&TestLogger::log), &logger, std::placeholders::_1,
+ std::placeholders::_2, std::placeholders::_3),
+ std::move(journal)};
+
+ std::vector<std::string> additionalData;
+ std::vector<std::string> associations;
+
+ auto checkDeconfigured = [](bool deconfigured) {
+ auto pelFile = findAnyPELInRepo();
+ ASSERT_TRUE(pelFile);
+
+ auto data = readPELFile(*pelFile);
+ PEL pel(*data);
+ ASSERT_TRUE(pel.valid());
+
+ EXPECT_EQ(pel.primarySRC().value()->getErrorStatusFlag(
+ SRC::ErrorStatusFlags::deconfigured),
+ deconfigured);
+ };
+
+ manager.create("xyz.openbmc_project.Fan.Error.Fault", 42, 0,
+ phosphor::logging::Entry::Level::Error, additionalData,
+ associations);
+ checkDeconfigured(true);
+
+ // Replace A3 so PEL deconfigured flag should be set to false
+ mockIface->fruPresent("U1234-A3");
+ checkDeconfigured(false);
+
+ manager.erase(42);
+
+ // Create it again and replace a FRU not in the callout list.
+ // Deconfig flag should stay on.
+ manager.create("xyz.openbmc_project.Fan.Error.Fault", 43, 0,
+ phosphor::logging::Entry::Level::Error, additionalData,
+ associations);
+ checkDeconfigured(true);
+ mockIface->fruPresent("U1234-A4");
+ checkDeconfigured(true);
+}