PEL: Implement respository pruning
This adds a prune() public method on the Repository class to remove PELs
down to at most 90% of capacity and then down to 80% of the maximum
number of PELs if there were more than the maximum.
It does the first set of pruning by placing each PEL is one of 4
categories, and then reducing the total size of each category. The
categories are:
* BMC informational PELs - reduced to 15% of max
* BMC non-informational PELs - reduced to 30% of max
* non-BMC informational PELs - reduced to 15% of max
* non-BMC non-informational PELs - reduced to 30% of max
Within each category, PELs are removed oldest first, and also 4 passes
are made through the PELs, only removing PELs that meet a specific
requirement each pass, stopping as soon as the category limit is
reached.
The pass requirements are:
* Pass 1: Only remove HMC acked PELs
* Pass 2: Only remove OS acked PELs
* Pass 3: Only remove host sent PELs
* Pass 4: Remove any PEL
After the 4 passes on the 4 categories are done then the number of PELs
remaining is checked against the maximum number. If it is more than the
maximum, it will remove the PELs down to 80% of that limit using the
same 4 passes as above. This is done to keep the number of PELs down to
a manageable number when there are a lot of small PELs that don't engage
the size based pruning.
The pruning code doesn't just bring the size or number of PELs to just
below their limit, but rather a percentage below, so that it won't get
into a situation where the algorithm has to run on the repository every
single time a PEL is added.
The OpenBMC event log corresponding to the PELs are not removed. That
is left to other code.
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: I24da611c095fd3b22b6b1ffab52d919cac5f68b4
diff --git a/extensions/openpower-pels/repository.cpp b/extensions/openpower-pels/repository.cpp
index 8458fcc..e608927 100644
--- a/extensions/openpower-pels/repository.cpp
+++ b/extensions/openpower-pels/repository.cpp
@@ -499,5 +499,180 @@
}
}
+std::vector<Repository::AttributesReference>
+ Repository::getAllPELAttributes(SortOrder order) const
+{
+ std::vector<Repository::AttributesReference> attributes;
+
+ std::for_each(
+ _pelAttributes.begin(), _pelAttributes.end(),
+ [&attributes](auto& pelEntry) { attributes.push_back(pelEntry); });
+
+ std::sort(attributes.begin(), attributes.end(),
+ [order](const auto& left, const auto& right) {
+ if (order == SortOrder::ascending)
+ {
+ return left.get().second.path < right.get().second.path;
+ }
+ return left.get().second.path > right.get().second.path;
+ });
+
+ return attributes;
+}
+
+std::vector<uint32_t> Repository::prune()
+{
+ std::vector<uint32_t> obmcLogIDs;
+ std::string msg = "Pruning PEL repository that takes up " +
+ std::to_string(_sizes.total) + " bytes and has " +
+ std::to_string(_pelAttributes.size()) + " PELs";
+ log<level::INFO>(msg.c_str());
+
+ // Set up the 5 functions to check if the PEL category
+ // is still over its limits.
+
+ // BMC informational PELs should only take up 15%
+ IsOverLimitFunc overBMCInfoLimit = [this]() {
+ return _sizes.bmcInfo > _maxRepoSize * 15 / 100;
+ };
+
+ // BMC non informational PELs should only take up 30%
+ IsOverLimitFunc overBMCNonInfoLimit = [this]() {
+ return _sizes.bmcServiceable > _maxRepoSize * 30 / 100;
+ };
+
+ // Non BMC informational PELs should only take up 15%
+ IsOverLimitFunc overNonBMCInfoLimit = [this]() {
+ return _sizes.nonBMCInfo > _maxRepoSize * 15 / 100;
+ };
+
+ // Non BMC non informational PELs should only take up 15%
+ IsOverLimitFunc overNonBMCNonInfoLimit = [this]() {
+ return _sizes.nonBMCServiceable > _maxRepoSize * 30 / 100;
+ };
+
+ // Bring the total number of PELs down to 80% of the max
+ IsOverLimitFunc tooManyPELsLimit = [this]() {
+ return _pelAttributes.size() > _maxNumPELs * 80 / 100;
+ };
+
+ // Set up the functions to determine which category a PEL is in.
+ // TODO: Return false in these functions if a PEL caused a guard record.
+
+ // A BMC informational PEL
+ IsPELTypeFunc isBMCInfo = [](const PELAttributes& pel) {
+ return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) &&
+ !Repository::isServiceableSev(pel);
+ };
+
+ // A BMC non informational PEL
+ IsPELTypeFunc isBMCNonInfo = [](const PELAttributes& pel) {
+ return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) &&
+ Repository::isServiceableSev(pel);
+ };
+
+ // A non BMC informational PEL
+ IsPELTypeFunc isNonBMCInfo = [](const PELAttributes& pel) {
+ return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) &&
+ !Repository::isServiceableSev(pel);
+ };
+
+ // A non BMC non informational PEL
+ IsPELTypeFunc isNonBMCNonInfo = [](const PELAttributes& pel) {
+ return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) &&
+ Repository::isServiceableSev(pel);
+ };
+
+ // When counting PELs, count every PEL
+ IsPELTypeFunc isAnyPEL = [](const PELAttributes& pel) { return true; };
+
+ // Check all 4 categories, which will result in at most 90%
+ // usage (15 + 30 + 15 + 30).
+ removePELs(overBMCInfoLimit, isBMCInfo, obmcLogIDs);
+ removePELs(overBMCNonInfoLimit, isBMCNonInfo, obmcLogIDs);
+ removePELs(overNonBMCInfoLimit, isNonBMCInfo, obmcLogIDs);
+ removePELs(overNonBMCNonInfoLimit, isNonBMCNonInfo, obmcLogIDs);
+
+ // After the above pruning check if there are still too many PELs,
+ // which can happen depending on PEL sizes.
+ if (_pelAttributes.size() > _maxNumPELs)
+ {
+ removePELs(tooManyPELsLimit, isAnyPEL, obmcLogIDs);
+ }
+
+ if (!obmcLogIDs.empty())
+ {
+ std::string msg = "Number of PELs removed to save space: " +
+ std::to_string(obmcLogIDs.size());
+ log<level::INFO>(msg.c_str());
+ }
+
+ return obmcLogIDs;
+}
+
+void Repository::removePELs(IsOverLimitFunc& isOverLimit,
+ IsPELTypeFunc& isPELType,
+ std::vector<uint32_t>& removedBMCLogIDs)
+{
+ if (!isOverLimit())
+ {
+ return;
+ }
+
+ auto attributes = getAllPELAttributes(SortOrder::ascending);
+
+ // Make 4 passes on the PELs, stopping as soon as isOverLimit
+ // returns false.
+ // Pass 1: only delete HMC acked PELs
+ // Pass 2: only delete OS acked PELs
+ // Pass 3: only delete PHYP sent PELs
+ // Pass 4: delete all PELs
+ static const std::vector<std::function<bool(const PELAttributes& pel)>>
+ stateChecks{[](const auto& pel) {
+ return pel.hmcState == TransmissionState::acked;
+ },
+
+ [](const auto& pel) {
+ return pel.hostState == TransmissionState::acked;
+ },
+
+ [](const auto& pel) {
+ return pel.hostState == TransmissionState::sent;
+ },
+
+ [](const auto& pel) { return true; }};
+
+ for (const auto& stateCheck : stateChecks)
+ {
+ for (auto it = attributes.begin(); it != attributes.end();)
+ {
+ const auto& pel = it->get();
+ if (isPELType(pel.second) && stateCheck(pel.second))
+ {
+ auto removedID = pel.first.obmcID.id;
+ remove(pel.first);
+
+ removedBMCLogIDs.push_back(removedID);
+
+ attributes.erase(it);
+
+ if (!isOverLimit())
+ {
+ break;
+ }
+ }
+ else
+ {
+ ++it;
+ }
+ }
+
+ if (!isOverLimit())
+ {
+ break;
+ }
+ }
+}
+
} // namespace pels
} // namespace openpower
diff --git a/extensions/openpower-pels/repository.hpp b/extensions/openpower-pels/repository.hpp
index e9b6a27..f33289c 100644
--- a/extensions/openpower-pels/repository.hpp
+++ b/extensions/openpower-pels/repository.hpp
@@ -112,6 +112,9 @@
}
};
+ using AttributesReference =
+ std::reference_wrapper<const std::pair<const LogID, PELAttributes>>;
+
/**
* @brief A structure for keeping a breakdown of the sizes of PELs
* of different types in the repository.
@@ -353,6 +356,38 @@
*/
static bool isServiceableSev(const PELAttributes& pel);
+ /**
+ * @brief Deletes PELs to bring the repository size down
+ * to at most 90% full by placing PELs into 4 different
+ * catogories and then removing PELs until those catogories
+ * only take up certain percentages of the allowed space.
+ *
+ * This does not delete the corresponding OpenBMC event logs, which
+ * is why those IDs are returned, so they can be deleted later.
+ *
+ * The categories and their rules are:
+ * 1) Informational BMC PELs cannot take up more than 15% of
+ * the allocated space.
+ * 2) Non-informational BMC PELs cannot take up more than 30%
+ * of the allocated space.
+ * 3) Informational non-BMC PELs cannot take up more than 15% of
+ * the allocated space.
+ * 4) Non-informational non-BMC PELs cannot take up more than 30%
+ * of the allocated space.
+ *
+ * While removing PELs in a category, 4 passes will be made, with
+ * PELs being removed oldest first during each pass.
+ *
+ * Pass 1: only delete HMC acked PELs
+ * Pass 2: only delete OS acked PELs
+ * Pass 3: only delete PHYP sent PELs
+ * Pass 4: delete all PELs
+ *
+ * @return std::vector<uint32_t> - The OpenBMC event log IDs of
+ * the PELs that were deleted.
+ */
+ std::vector<uint32_t> prune();
+
private:
using PELUpdateFunc = std::function<void(PEL&)>;
@@ -420,6 +455,48 @@
*/
void updateRepoStats(const PELAttributes& pel, bool pelAdded);
+ enum class SortOrder
+ {
+ ascending,
+ descending
+ };
+
+ /**
+ * @brief Returns a vector of all the _pelAttributes entries sorted
+ * as specified
+ *
+ * @param[in] order - If the PELs should be returned in ascending
+ * (oldest first) or descending order.
+ *
+ * @return std::vector<AttributesReference> - The sorted vector of
+ * references to the pair<LogID, PELAttributes> entries of
+ * _pelAttributes.
+ */
+ std::vector<AttributesReference> getAllPELAttributes(SortOrder order) const;
+
+ using IsOverLimitFunc = std::function<bool()>;
+ using IsPELTypeFunc = std::function<bool(const PELAttributes&)>;
+
+ /**
+ * @brief Makes 4 passes on the PELs that meet the IsPELTypeFunc
+ * criteria removing PELs until IsOverLimitFunc returns false.
+ *
+ * Pass 1: only delete HMC acked PELs
+ * Pass 2: only delete Os acked PELs
+ * Pass 3: only delete PHYP sent PELs
+ * Pass 4: delete all PELs
+ *
+ * @param[in] isOverLimit - The bool(void) function that should
+ * return true if PELs still need to be
+ * removed.
+ * @param[in] isPELType - The bool(const PELAttributes&) function
+ * used to select the PELs to operate on.
+ *
+ * @param[out] removedBMCLogIDs - The OpenBMC event log IDs of the
+ * removed PELs.
+ */
+ void removePELs(IsOverLimitFunc& isOverLimit, IsPELTypeFunc& isPELType,
+ std::vector<uint32_t>& removedBMCLogIDs);
/**
* @brief The filesystem path to the PEL logs.
*/
diff --git a/test/openpower-pels/repository_test.cpp b/test/openpower-pels/repository_test.cpp
index ae88551..a4f8bef 100644
--- a/test/openpower-pels/repository_test.cpp
+++ b/test/openpower-pels/repository_test.cpp
@@ -588,3 +588,199 @@
EXPECT_EQ(stats.nonBMCInfo, 0);
}
}
+
+// Prune PELs, when no HMC/OS/PHYP acks
+TEST_F(RepositoryTest, TestPruneNoAcks)
+{
+ Repository repo{repoPath, 4096 * 20, 100};
+
+ // Add 10 4096B (on disk) PELs of BMC nonInfo, Info and nonBMC info,
+ // nonInfo errors. None of them acked by PHYP, host, or HMC.
+ for (uint32_t i = 1; i <= 10; i++)
+ {
+ // BMC predictive
+ auto data = pelFactory(i, 'O', 0x20, 0x8800, 500);
+ auto pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+
+ // BMC info
+ data = pelFactory(i + 100, 'O', 0x0, 0x8800, 500);
+ pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+
+ // Hostboot predictive
+ data = pelFactory(i + 200, 'B', 0x20, 0x8800, 500);
+ pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+
+ // Hostboot info
+ data = pelFactory(i + 300, 'B', 0x0, 0x8800, 500);
+ pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+ }
+
+ const auto& sizes = repo.getSizeStats();
+ EXPECT_EQ(sizes.total, 4096 * 40);
+
+ // Sanity check the very first PELs with IDs 1 to 4 are
+ // there so we can check they are removed after the prune.
+ for (uint32_t i = 1; i < 5; i++)
+ {
+ Repository::LogID id{Repository::LogID::Pel{i}};
+ EXPECT_TRUE(repo.getPELAttributes(id));
+ }
+
+ // Prune down to 15%/30%/15%/30% = 90% total
+ auto IDs = repo.prune();
+
+ // Check the final sizes
+ EXPECT_EQ(sizes.total, 4096 * 18); // 90% of 20 PELs
+ EXPECT_EQ(sizes.bmcInfo, 4096 * 3); // 15% of 20 PELs
+ EXPECT_EQ(sizes.bmcServiceable, 4096 * 6); // 30% of 20 PELs
+ EXPECT_EQ(sizes.nonBMCInfo, 4096 * 3); // 15% of 20 PELs
+ EXPECT_EQ(sizes.nonBMCServiceable, 4096 * 6); // 30% of 20 PELs
+
+ // Check that at least the 4 oldest, which are the oldest of
+ // each type, were removed.
+ for (uint32_t i = 1; i < 5; i++)
+ {
+ Repository::LogID id{Repository::LogID::Pel{i}};
+ EXPECT_FALSE(repo.getPELAttributes(id));
+
+ // Make sure the corresponding OpenBMC event log ID which is
+ // 500 + the PEL ID is in the list.
+ EXPECT_TRUE(std::find(IDs.begin(), IDs.end(), 500 + i) != IDs.end());
+ }
+}
+
+// Test that if filled completely with 1 type of PEL, that
+// pruning still works properly
+TEST_F(RepositoryTest, TestPruneInfoOnly)
+{
+ Repository repo{repoPath, 4096 * 22, 100};
+
+ // Fill 4096*23 bytes on disk of BMC info PELs
+ for (uint32_t i = 1; i <= 23; i++)
+ {
+ auto data = pelFactory(i, 'O', 0, 0x8800, 1000);
+ auto pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+ }
+
+ const auto& sizes = repo.getSizeStats();
+ EXPECT_EQ(sizes.total, 4096 * 23);
+
+ // Pruning to 15% of 4096 * 22 will leave 3 4096B PELs.
+
+ // Sanity check the oldest 20 are there so when they
+ // get pruned below we'll know they were removed.
+ for (uint32_t i = 1; i <= 20; i++)
+ {
+ Repository::LogID id{Repository::LogID::Pel{i}};
+ EXPECT_TRUE(repo.getPELAttributes(id));
+ }
+
+ auto IDs = repo.prune();
+
+ // Check the final sizes
+ EXPECT_EQ(sizes.total, 4096 * 3);
+ EXPECT_EQ(sizes.bmcInfo, 4096 * 3);
+ EXPECT_EQ(sizes.bmcServiceable, 0);
+ EXPECT_EQ(sizes.nonBMCInfo, 0);
+ EXPECT_EQ(sizes.nonBMCServiceable, 0);
+
+ EXPECT_EQ(IDs.size(), 20);
+
+ // Can no longer find the oldest 20 PELs.
+ for (uint32_t i = 1; i <= 20; i++)
+ {
+ Repository::LogID id{Repository::LogID::Pel{i}};
+ EXPECT_FALSE(repo.getPELAttributes(id));
+ EXPECT_TRUE(std::find(IDs.begin(), IDs.end(), 500 + i) != IDs.end());
+ }
+}
+
+// Test that the HMC/OS/PHYP ack values affect the
+// pruning order.
+TEST_F(RepositoryTest, TestPruneWithAcks)
+{
+ Repository repo{repoPath, 4096 * 20, 100};
+
+ // Fill 30% worth of BMC non-info non-acked PELs
+ for (uint32_t i = 1; i <= 6; i++)
+ {
+ // BMC predictive
+ auto data = pelFactory(i, 'O', 0x20, 0x8800, 500);
+ auto pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+ }
+
+ // Add another PEL to push it over the 30%, each time adding
+ // a different type that should be pruned before the above ones
+ // even though those are older.
+ for (uint32_t i = 1; i <= 3; i++)
+ {
+ auto data = pelFactory(i, 'O', 0x20, 0x8800, 500);
+ auto pel = std::make_unique<PEL>(data);
+ auto idToDelete = pel->obmcLogID();
+ repo.add(pel);
+
+ if (0 == i)
+ {
+ repo.setPELHMCTransState(pel->id(), TransmissionState::acked);
+ }
+ else if (1 == i)
+ {
+ repo.setPELHostTransState(pel->id(), TransmissionState::acked);
+ }
+ else
+ {
+ repo.setPELHostTransState(pel->id(), TransmissionState::sent);
+ }
+
+ auto IDs = repo.prune();
+ EXPECT_EQ(repo.getSizeStats().total, 4096 * 6);
+
+ // The newest PEL should be the one deleted
+ ASSERT_EQ(IDs.size(), 1);
+ EXPECT_EQ(IDs[0], idToDelete);
+ }
+}
+
+// Test that the total number of PELs limit is enforced.
+TEST_F(RepositoryTest, TestPruneTooManyPELs)
+{
+ Repository repo{repoPath, 4096 * 100, 10};
+
+ // Add 10, which is the limit and is still OK
+ for (uint32_t i = 1; i <= 10; i++)
+ {
+ auto data = pelFactory(i, 'O', 0x20, 0x8800, 500);
+ auto pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+ }
+
+ auto IDs = repo.prune();
+
+ // Nothing pruned yet
+ EXPECT_TRUE(IDs.empty());
+
+ // Add 1 more PEL which will be too many.
+ {
+ auto data = pelFactory(11, 'O', 0x20, 0x8800, 500);
+ auto pel = std::make_unique<PEL>(data);
+ repo.add(pel);
+ }
+
+ // Now that's it's over the limit of 10, it will bring it down
+ // to 80%, which is 8 after it removes 3.
+ IDs = repo.prune();
+ EXPECT_EQ(repo.getSizeStats().total, 4096 * 8);
+ ASSERT_EQ(IDs.size(), 3);
+
+ // Check that it deleted the oldest ones.
+ // The OpenBMC log ID is the PEL ID + 500.
+ EXPECT_EQ(IDs[0], 500 + 1);
+ EXPECT_EQ(IDs[1], 500 + 2);
+ EXPECT_EQ(IDs[2], 500 + 3);
+}