PEL: Handle failing to start a PLDM cmd better
A recent PLDM bug caused the registerReceiveCallback() function, which
is used to setup listening for the PLDM response from the host when
telling them about a new PEL, to throw an exception.
When this happened, the code got stuck in the 'in progress' state, so it
would never try again when the next PEL came in.
Fix that by having startCommand() throw an exception instead of calling
the failure response function callback. With this change, the code will
continue on to call the cleanupCmd() function so everything is ready
when the next PEL comes in.
Tested:
With the bad PLDM code, after the first PEL ran out of retry attempts,
created another PEL and saw the code attempt again to call PLDM. Also,
wrote a new unit test case for it.
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: I38034440435d6a86e8dd880eef09499f19dd6e9c
diff --git a/extensions/openpower-pels/pldm_interface.cpp b/extensions/openpower-pels/pldm_interface.cpp
index 502cc7d..3326d82 100644
--- a/extensions/openpower-pels/pldm_interface.cpp
+++ b/extensions/openpower-pels/pldm_interface.cpp
@@ -97,7 +97,7 @@
auto e = errno;
lg2::error("pldm_open failed. errno = {ERRNO}, rc = {RC}", "ERRNO", e,
"RC", _fd);
- throw std::exception{};
+ throw std::runtime_error{"pldm_open failed"};
}
}
@@ -140,7 +140,14 @@
}
else
{
- startCommand();
+ try
+ {
+ startCommand();
+ }
+ catch (const std::exception& e)
+ {
+ callResponseFunc(ResponseStatus::failure);
+ }
}
}
@@ -167,9 +174,11 @@
}
catch (const std::exception& e)
{
+ lg2::error("startCommand exception: {ERROR}", "ERROR", e);
+
cleanupCmd();
- callResponseFunc(ResponseStatus::failure);
+ throw;
}
}
@@ -184,6 +193,7 @@
lg2::error(
"Error calling sd_bus_call_method_async, rc = {RC}, msg = {MSG}",
"RC", rc, "MSG", strerror(-rc));
+ throw std::runtime_error{"sd_bus_call_method_async failed"};
}
}
@@ -237,7 +247,7 @@
if (rc != PLDM_SUCCESS)
{
lg2::error("encode_new_file_req failed, rc = {RC}", "RC", rc);
- throw std::exception{};
+ throw std::runtime_error{"encode_new_file_req failed"};
}
rc = pldm_send(_eid, _fd, requestMsg.data(), requestMsg.size());
@@ -246,8 +256,7 @@
auto e = errno;
lg2::error("pldm_send failed, rc = {RC}, errno = {ERRNO}", "RC", rc,
"ERRNO", e);
-
- throw std::exception{};
+ throw std::runtime_error{"pldm_send failed"};
}
}
diff --git a/test/openpower-pels/host_notifier_test.cpp b/test/openpower-pels/host_notifier_test.cpp
index 6fc235b..971dadd 100644
--- a/test/openpower-pels/host_notifier_test.cpp
+++ b/test/openpower-pels/host_notifier_test.cpp
@@ -449,6 +449,70 @@
EXPECT_EQ(notifier.queueSize(), 2);
}
+// Test that if the command cannot be started it will give
+// up but still try again later
+TEST_F(HostNotifierTest, TestCannotStartCmd)
+{
+ sdeventplus::Event sdEvent{event};
+
+ HostNotifier notifier{repo, dataIface, std::move(hostIface)};
+
+ // Make it behave like startCommand() fails.
+ auto sendFailure = [this](uint32_t /*id*/, uint32_t /*size*/) {
+ this->mockHostIface->cancelCmd();
+ return CmdStatus::failure;
+ };
+
+ auto sendSuccess = [this](uint32_t /*id*/, uint32_t /*size*/) {
+ return this->mockHostIface->send(0);
+ };
+
+ // Fails 16 times (1 fail + 15 retries) and
+ // then start working.
+ EXPECT_CALL(*mockHostIface, sendNewLogCmd(_, _))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillOnce(Invoke(sendFailure))
+ .WillRepeatedly(Invoke(sendSuccess));
+
+ dataIface.changeHostState(true);
+
+ auto pel = makePEL();
+ repo.add(pel);
+
+ // Clock more retries than necessary
+ runEvents(sdEvent, 40, mockHostIface->getReceiveRetryDelay());
+
+ // Didn't get far enough for a cmd to be processed
+ EXPECT_EQ(mockHostIface->numCmdsProcessed(), 0);
+ EXPECT_EQ(notifier.queueSize(), 1);
+
+ // At this point, commands will work again.
+
+ pel = makePEL();
+ repo.add(pel);
+
+ // Run the events to send the PELs
+ runEvents(sdEvent, 5, mockHostIface->getReceiveRetryDelay());
+
+ // All PELs sent
+ EXPECT_EQ(mockHostIface->numCmdsProcessed(), 2);
+ EXPECT_EQ(notifier.queueSize(), 0);
+}
+
// Cancel an in progress command
TEST_F(HostNotifierTest, TestCancelCmd)
{