PEL: Capture the journal in UserData sections
If a PEL message registry entry has a 'JournalCapture' section, capture
the listed portions of the journal in UserData sections for that error.
If the JSON looks like:
"JournalCapture": {
"NumLines": 30
}
Then the code will capture the previous 30 lines from the journal into a
single UserData section.
If the JSON looks like:
"JournalCapture":
{
"Sections": [
{
"SyslogID": "phosphor-bmc-state-manager",
"NumLines": 20
},
{
"SyslogID": "phosphor-log-manager",
"NumLines": 15
}
]
}
Then the code will create two UserData sections, the first with the most
recent 20 lines from phosphor-bmc-state-manager, and the second with 15
lines from phosphor-log-manager.
If a section would cause the PEL to exceed its maximum size of 16KB, it
will be dropped. While the UserData class does have a shrink() method,
it prunes data from the end, which would cause the most recent journal
entries to be removed, which could be misleading.
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: I2ecbd8002b0e7087eb166a1219c6ab9da14a122a
diff --git a/extensions/openpower-pels/journal.cpp b/extensions/openpower-pels/journal.cpp
index 0a728f0..18359fd 100644
--- a/extensions/openpower-pels/journal.cpp
+++ b/extensions/openpower-pels/journal.cpp
@@ -15,10 +15,11 @@
*/
#include "journal.hpp"
-#include <phosphor-logging/log.hpp>
+#include "util.hpp"
-#include <stdexcept>
-#include <thread>
+#include <fmt/format.h>
+
+#include <phosphor-logging/log.hpp>
namespace openpower::pels
{
@@ -50,6 +51,23 @@
sd_journal* journal{nullptr};
};
+void Journal::sync() const
+{
+ auto start = std::chrono::steady_clock::now();
+
+ util::journalSync();
+
+ auto end = std::chrono::steady_clock::now();
+ auto duration =
+ std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+
+ if (duration.count() > 100)
+ {
+ log<level::INFO>(
+ fmt::format("Journal sync took {}ms", duration.count()).c_str());
+ }
+}
+
std::vector<std::string> Journal::getMessages(const std::string& syslogID,
size_t maxMessages) const
{
diff --git a/extensions/openpower-pels/journal.hpp b/extensions/openpower-pels/journal.hpp
index 1261e32..620ce50 100644
--- a/extensions/openpower-pels/journal.hpp
+++ b/extensions/openpower-pels/journal.hpp
@@ -32,6 +32,11 @@
*/
virtual std::vector<std::string> getMessages(const std::string& syslogID,
size_t maxMessages) const = 0;
+
+ /**
+ * @brief Call journalctl --sync to write unwritten journal data to disk
+ */
+ virtual void sync() const = 0;
};
/**
@@ -60,6 +65,11 @@
std::vector<std::string> getMessages(const std::string& syslogID,
size_t maxMessages) const override;
+ /**
+ * @brief Call journalctl --sync to write unwritten journal data to disk
+ */
+ void sync() const override;
+
private:
/**
* @brief Gets a field from the current journal entry
diff --git a/extensions/openpower-pels/manager.cpp b/extensions/openpower-pels/manager.cpp
index be89bb1..20992b3 100644
--- a/extensions/openpower-pels/manager.cpp
+++ b/extensions/openpower-pels/manager.cpp
@@ -376,7 +376,8 @@
}
auto pel = std::make_unique<openpower::pels::PEL>(
- *entry, obmcLogID, timestamp, severity, ad, pelFFDC, *_dataIface);
+ *entry, obmcLogID, timestamp, severity, ad, pelFFDC, *_dataIface,
+ *_journal);
_repo.add(pel);
diff --git a/extensions/openpower-pels/pel.cpp b/extensions/openpower-pels/pel.cpp
index f74eaf3..9fa1747 100644
--- a/extensions/openpower-pels/pel.cpp
+++ b/extensions/openpower-pels/pel.cpp
@@ -56,7 +56,7 @@
PEL::PEL(const message::Entry& regEntry, uint32_t obmcLogID, uint64_t timestamp,
phosphor::logging::Entry::Level severity,
const AdditionalData& additionalData, const PelFFDC& ffdcFilesIn,
- const DataInterfaceBase& dataIface)
+ const DataInterfaceBase& dataIface, const JournalBase& journal)
{
// No changes in input, for non SBE error related requests
PelFFDC ffdcFiles = ffdcFilesIn;
@@ -195,6 +195,8 @@
}
}
+ addJournalSections(regEntry, journal);
+
_ph->setSectionCount(2 + _optionalSections.size());
checkRulesAndFix();
@@ -596,6 +598,111 @@
}
}
+void PEL::addJournalSections(const message::Entry& regEntry,
+ const JournalBase& journal)
+{
+ if (!regEntry.journalCapture)
+ {
+ return;
+ }
+
+ // Write all unwritten journal data to disk.
+ journal.sync();
+
+ const auto& jc = regEntry.journalCapture.value();
+ std::vector<std::vector<std::string>> allMessages;
+
+ if (std::holds_alternative<size_t>(jc))
+ {
+ // Get the previous numLines journal entries
+ const auto& numLines = std::get<size_t>(jc);
+ try
+ {
+ auto messages = journal.getMessages("", numLines);
+ if (!messages.empty())
+ {
+ allMessages.push_back(std::move(messages));
+ }
+ }
+ catch (const std::exception& e)
+ {
+ log<level::ERR>(
+ fmt::format("Failed during journal collection: {}", e.what())
+ .c_str());
+ }
+ }
+ else if (std::holds_alternative<message::AppCaptureList>(jc))
+ {
+ // Get journal entries based on the syslog id field.
+ const auto& sections = std::get<message::AppCaptureList>(jc);
+ for (const auto& [syslogID, numLines] : sections)
+ {
+ try
+ {
+ auto messages = journal.getMessages(syslogID, numLines);
+ if (!messages.empty())
+ {
+ allMessages.push_back(std::move(messages));
+ }
+ }
+ catch (const std::exception& e)
+ {
+ log<level::ERR>(
+ fmt::format("Failed during journal collection: {}",
+ e.what())
+ .c_str());
+ }
+ }
+ }
+
+ // Create the UserData sections
+ for (const auto& messages : allMessages)
+ {
+ auto buffer = util::flattenLines(messages);
+
+ // If the buffer is way too big, it can overflow the uint16_t
+ // PEL section size field that is checked below so do a cursory
+ // check here.
+ if (buffer.size() > _maxPELSize)
+ {
+ log<level::WARNING>(
+ "Journal UserData section does not fit in PEL, dropping");
+ log<level::WARNING>(fmt::format("PEL size = {}, data size = {}",
+ size(), buffer.size())
+ .c_str());
+ continue;
+ }
+
+ // Sections must be 4 byte aligned.
+ while (buffer.size() % 4 != 0)
+ {
+ buffer.push_back(0);
+ }
+
+ auto ud = std::make_unique<UserData>(
+ static_cast<uint16_t>(ComponentID::phosphorLogging),
+ static_cast<uint8_t>(UserDataFormat::text),
+ static_cast<uint8_t>(UserDataFormatVersion::text), buffer);
+
+ if (size() + ud->header().size <= _maxPELSize)
+ {
+ _optionalSections.push_back(std::move(ud));
+ }
+ else
+ {
+ // Don't attempt to shrink here since we'd be dropping the
+ // most recent journal entries which would be confusing.
+ log<level::WARNING>(
+ "Journal UserData section does not fit in PEL, dropping");
+ log<level::WARNING>(fmt::format("PEL size = {}, UserData size = {}",
+ size(), ud->header().size)
+ .c_str());
+ ud.reset();
+ continue;
+ }
+ }
+}
+
namespace util
{
@@ -852,6 +959,23 @@
return std::make_unique<UserData>(compID, subType, version, data);
}
+std::vector<uint8_t> flattenLines(const std::vector<std::string>& lines)
+{
+ std::vector<uint8_t> out;
+
+ for (const auto& line : lines)
+ {
+ out.insert(out.end(), line.begin(), line.end());
+
+ if (out.back() != '\n')
+ {
+ out.push_back('\n');
+ }
+ }
+
+ return out;
+}
+
} // namespace util
} // namespace pels
diff --git a/extensions/openpower-pels/pel.hpp b/extensions/openpower-pels/pel.hpp
index 460d588..d6f76e1 100644
--- a/extensions/openpower-pels/pel.hpp
+++ b/extensions/openpower-pels/pel.hpp
@@ -2,6 +2,7 @@
#include "additional_data.hpp"
#include "data_interface.hpp"
+#include "journal.hpp"
#include "private_header.hpp"
#include "registry.hpp"
#include "src.hpp"
@@ -105,11 +106,12 @@
* @param[in] additionalData - The AdditionalData contents
* @param[in] ffdcFiles - FFCD files that go into UserData sections
* @param[in] dataIface - The data interface object
+ * @param[in] journal - The journal object
*/
PEL(const openpower::pels::message::Entry& entry, uint32_t obmcLogID,
uint64_t timestamp, phosphor::logging::Entry::Level severity,
const AdditionalData& additionalData, const PelFFDC& ffdcFiles,
- const DataInterfaceBase& dataIface);
+ const DataInterfaceBase& dataIface, const JournalBase& journal);
/**
* @brief Convenience function to return the log ID field from the
@@ -381,6 +383,16 @@
void updateTerminateBitInSRCSection();
/**
+ * @brief Adds journal data to the PEL as UserData sections
+ * if specified to in the message registry.
+ *
+ * @param regEntry - The registry entry
+ * @param journal - The journal object
+ */
+ void addJournalSections(const message::Entry& regEntry,
+ const JournalBase& journal);
+
+ /**
* @brief The PEL Private Header section
*/
std::unique_ptr<PrivateHeader> _ph;
@@ -457,6 +469,19 @@
*/
std::unique_ptr<UserData> makeFFDCuserDataSection(uint16_t componentID,
const PelFFDCfile& file);
+
+/**
+ * @brief Flattens a vector of strings into a vector of bytes suitable
+ * for storing in a PEL section.
+ *
+ * Adds a newline character after each string.
+ *
+ * @param lines - The vector of strings to convert
+ *
+ * @return std::vector<uint8_t> - The flattened data
+ */
+std::vector<uint8_t> flattenLines(const std::vector<std::string>& lines);
+
} // namespace util
} // namespace pels
diff --git a/test/openpower-pels/mocks.hpp b/test/openpower-pels/mocks.hpp
index 7a1ee35..5c44727 100644
--- a/test/openpower-pels/mocks.hpp
+++ b/test/openpower-pels/mocks.hpp
@@ -286,6 +286,8 @@
MOCK_METHOD(std::vector<std::string>, getMessages,
(const std::string&, size_t), (const override));
+
+ MOCK_METHOD(void, sync, (), (const override));
};
} // namespace pels
diff --git a/test/openpower-pels/pel_test.cpp b/test/openpower-pels/pel_test.cpp
index e317997..b31704b 100644
--- a/test/openpower-pels/pel_test.cpp
+++ b/test/openpower-pels/pel_test.cpp
@@ -164,6 +164,7 @@
std::vector<std::string> data{"KEY1=VALUE1"};
AdditionalData ad{data};
NiceMock<MockDataInterface> dataIface;
+ NiceMock<MockJournal> journal;
PelFFDC ffdc;
std::vector<std::string> dumpType{"bmc/entry", "resource/entry",
@@ -172,7 +173,7 @@
.WillRepeatedly(Return(std::vector<bool>{false, false, false}));
PEL pel{regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ ad, ffdc, dataIface, journal};
EXPECT_TRUE(pel.valid());
EXPECT_EQ(pel.privateHeader().obmcLogID(), 42);
@@ -217,7 +218,7 @@
PEL pel2{
regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ ad, ffdc, dataIface, journal};
EXPECT_EQ(pel2.userHeader().actionFlags(), 0xA800);
}
@@ -245,6 +246,7 @@
std::vector<std::string> data{bigAD};
AdditionalData ad{data};
NiceMock<MockDataInterface> dataIface;
+ NiceMock<MockJournal> journal;
std::vector<std::string> dumpType{"bmc/entry", "resource/entry",
"system/entry"};
@@ -252,7 +254,7 @@
.WillOnce(Return(std::vector<bool>{false, false, false}));
PEL pel{regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ ad, ffdc, dataIface, journal};
EXPECT_TRUE(pel.valid());
EXPECT_EQ(pel.size(), 16384);
@@ -761,6 +763,7 @@
std::vector<std::string> additionalData{"KEY1=VALUE1"};
AdditionalData ad{additionalData};
NiceMock<MockDataInterface> dataIface;
+ NiceMock<MockJournal> journal;
PelFFDC ffdc;
std::vector<uint8_t> customData{1, 2, 3, 4, 5, 6, 7, 8};
@@ -780,7 +783,7 @@
.WillOnce(Return(std::vector<bool>{false, false, false}));
PEL pel{regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ ad, ffdc, dataIface, journal};
EXPECT_TRUE(pel.valid());
@@ -823,6 +826,7 @@
regEntry.src.reasonCode = 0x1234;
NiceMock<MockDataInterface> dataIface;
+ NiceMock<MockJournal> journal;
PelFFDC ffdc;
const auto calloutJSON = R"(
@@ -886,7 +890,7 @@
PEL pel{
regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ ad, ffdc, dataIface, journal};
ASSERT_TRUE(pel.primarySRC().value()->callouts());
auto& callouts = pel.primarySRC().value()->callouts()->callouts();
@@ -936,7 +940,7 @@
PEL pel{
regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ ad, ffdc, dataIface, journal};
// no callouts
EXPECT_FALSE(pel.primarySRC().value()->callouts());
@@ -996,6 +1000,7 @@
AdditionalData ad;
NiceMock<MockDataInterface> dataIface;
+ NiceMock<MockJournal> journal;
EXPECT_CALL(dataIface, expandLocationCode("P0-C1", 0))
.Times(1)
@@ -1021,8 +1026,8 @@
regEntry.src.type = 0xBD;
regEntry.src.reasonCode = 0x1234;
- PEL pel{regEntry, 42, 5, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ PEL pel{regEntry, 42, 5, phosphor::logging::Entry::Level::Error,
+ ad, ffdc, dataIface, journal};
ASSERT_TRUE(pel.valid());
ASSERT_TRUE(pel.primarySRC().value()->callouts());
@@ -1079,6 +1084,7 @@
AdditionalData ad;
NiceMock<MockDataInterface> dataIface;
+ NiceMock<MockJournal> journal;
std::vector<std::string> dumpType{"bmc/entry", "resource/entry",
"system/entry"};
@@ -1092,8 +1098,8 @@
regEntry.src.type = 0xBD;
regEntry.src.reasonCode = 0x1234;
- PEL pel{regEntry, 42, 5, phosphor::logging::Entry::Level::Error,
- ad, ffdc, dataIface};
+ PEL pel{regEntry, 42, 5, phosphor::logging::Entry::Level::Error,
+ ad, ffdc, dataIface, journal};
ASSERT_TRUE(pel.valid());
ASSERT_TRUE(pel.primarySRC().value()->callouts());
@@ -1110,3 +1116,176 @@
}
fs::remove_all(dir);
}
+
+TEST_F(PELTest, FlattenLinesTest)
+{
+ std::vector<std::string> msgs{"test1 test2", "test3 test4", "test5 test6"};
+
+ auto buffer = util::flattenLines(msgs);
+
+ std::string string{"test1 test2\ntest3 test4\ntest5 test6\n"};
+ std::vector<uint8_t> expected(string.begin(), string.end());
+
+ EXPECT_EQ(buffer, expected);
+}
+
+void checkJournalSection(const std::unique_ptr<Section>& section,
+ const std::string& expected)
+{
+ ASSERT_EQ(SectionID::userData,
+ static_cast<SectionID>(section->header().id));
+ ASSERT_EQ(UserDataFormat::text,
+ static_cast<UserDataFormat>(section->header().subType));
+ ASSERT_EQ(section->header().version,
+ static_cast<uint8_t>(UserDataFormatVersion::text));
+
+ auto ud = static_cast<UserData*>(section.get());
+
+ std::vector<uint8_t> expectedData(expected.begin(), expected.end());
+
+ // PEL sections are 4B aligned so add padding before the compare
+ while (expectedData.size() % 4 != 0)
+ {
+ expectedData.push_back('\0');
+ }
+
+ EXPECT_EQ(ud->data(), expectedData);
+}
+
+TEST_F(PELTest, CaptureJournalTest)
+{
+ message::Entry regEntry;
+ uint64_t timestamp = 5;
+
+ regEntry.name = "test";
+ regEntry.subsystem = 5;
+ regEntry.actionFlags = 0xC000;
+ regEntry.src.type = 0xBD;
+ regEntry.src.reasonCode = 0x1234;
+
+ std::vector<std::string> data;
+ AdditionalData ad{data};
+ NiceMock<MockDataInterface> dataIface;
+ NiceMock<MockJournal> journal;
+ PelFFDC ffdc;
+
+ std::vector<std::string> dumpType{"bmc/entry", "resource/entry",
+ "system/entry"};
+ EXPECT_CALL(dataIface, checkDumpStatus(dumpType))
+ .WillRepeatedly(Return(std::vector<bool>{false, false, false}));
+
+ size_t pelSectsWithOneUD{0};
+
+ {
+ // Capture 5 lines from the journal into a single UD section
+ message::JournalCapture jc = size_t{5};
+ regEntry.journalCapture = jc;
+
+ std::vector<std::string> msgs{"test1 test2", "test3 test4",
+ "test5 test6", "4", "5"};
+
+ EXPECT_CALL(journal, getMessages("", 5)).WillOnce(Return(msgs));
+
+ PEL pel{
+ regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
+ ad, ffdc, dataIface, journal};
+
+ // Check the generated UserData section
+ std::string expected{"test1 test2\ntest3 test4\ntest5 test6\n4\n5\n"};
+
+ checkJournalSection(pel.optionalSections().back(), expected);
+
+ // Save for upcoming testcases
+ pelSectsWithOneUD = pel.privateHeader().sectionCount();
+ }
+
+ {
+ // Attempt to capture too many journal entries so the
+ // section gets dropped.
+ message::JournalCapture jc = size_t{1};
+ regEntry.journalCapture = jc;
+
+ EXPECT_CALL(journal, sync()).Times(1);
+
+ // A 20000 byte line won't fit in a PEL
+ EXPECT_CALL(journal, getMessages("", 1))
+ .WillOnce(
+ Return(std::vector<std::string>{std::string(20000, 'x')}));
+
+ PEL pel{
+ regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
+ ad, ffdc, dataIface, journal};
+
+ // Check for 1 fewer sections than in the previous PEL
+ EXPECT_EQ(pel.privateHeader().sectionCount(), pelSectsWithOneUD - 1);
+ }
+
+ // Capture 3 different journal sections
+ {
+ message::AppCaptureList captureList{
+ message::AppCapture{"app1", 3},
+ message::AppCapture{"app2", 4},
+ message::AppCapture{"app3", 1},
+ };
+ message::JournalCapture jc = captureList;
+ regEntry.journalCapture = jc;
+
+ std::vector<std::string> app1{"A B", "C D", "E F"};
+ std::vector<std::string> app2{"1 2", "3 4", "5 6", "7 8"};
+ std::vector<std::string> app3{"a b c"};
+
+ std::string expected1{"A B\nC D\nE F\n"};
+ std::string expected2{"1 2\n3 4\n5 6\n7 8\n"};
+ std::string expected3{"a b c\n"};
+
+ EXPECT_CALL(journal, sync()).Times(1);
+ EXPECT_CALL(journal, getMessages("app1", 3)).WillOnce(Return(app1));
+ EXPECT_CALL(journal, getMessages("app2", 4)).WillOnce(Return(app2));
+ EXPECT_CALL(journal, getMessages("app3", 1)).WillOnce(Return(app3));
+
+ PEL pel{
+ regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
+ ad, ffdc, dataIface, journal};
+
+ // Two more sections than the 1 extra UD section in the first testcase
+ ASSERT_EQ(pel.privateHeader().sectionCount(), pelSectsWithOneUD + 2);
+
+ const auto& optionalSections = pel.optionalSections();
+ auto numOptSections = optionalSections.size();
+
+ checkJournalSection(optionalSections[numOptSections - 3], expected1);
+ checkJournalSection(optionalSections[numOptSections - 2], expected2);
+ checkJournalSection(optionalSections[numOptSections - 1], expected3);
+ }
+
+ {
+ // One section gets saved, and one is too big and gets dropped
+ message::AppCaptureList captureList{
+ message::AppCapture{"app4", 2},
+ message::AppCapture{"app5", 1},
+ };
+ message::JournalCapture jc = captureList;
+ regEntry.journalCapture = jc;
+
+ std::vector<std::string> app4{"w x", "y z"};
+ std::string expected4{"w x\ny z\n"};
+
+ EXPECT_CALL(journal, sync()).Times(1);
+
+ EXPECT_CALL(journal, getMessages("app4", 2)).WillOnce(Return(app4));
+
+ // A 20000 byte line won't fit in a PEL
+ EXPECT_CALL(journal, getMessages("app5", 1))
+ .WillOnce(
+ Return(std::vector<std::string>{std::string(20000, 'x')}));
+
+ PEL pel{
+ regEntry, 42, timestamp, phosphor::logging::Entry::Level::Error,
+ ad, ffdc, dataIface, journal};
+
+ // The last section should have been dropped, so same as first TC
+ ASSERT_EQ(pel.privateHeader().sectionCount(), pelSectsWithOneUD);
+
+ checkJournalSection(pel.optionalSections().back(), expected4);
+ }
+}