PEL: Notifier support for 'host full'

The host firmware may have a limited size for its staging area
before it passes the PELs through to the OS, and this area may
fill up with PELs it can't send if there are too many or the OS
isn't up yet.

In this case, it will send down an 'Ack PEL' PLDM command with a
special response indicating this host full condition.  The PLDM
daemon will then call a method on this daemon to let it know.

This command handles the host full condition on the HostNotifier
class.  When this is set:

* The PEL that hit this condition will be put back on the queue
  to be sent again.
* No new PELs will be sent up, except as noted below
* A 60s timer will be started at the end of which another attempt
  will be made to send a PEL, in the hopes the condition went away.
  - If it didn't go away, this process will repeat.
  - If it did go away, a successful ack will be received and things
    will proceed as normal.

Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: Iaeb38f43f7acc595bcff234ba50cedf8188b3d9b
diff --git a/extensions/openpower-pels/host_notifier.cpp b/extensions/openpower-pels/host_notifier.cpp
index b4e92a8..ca30340 100644
--- a/extensions/openpower-pels/host_notifier.cpp
+++ b/extensions/openpower-pels/host_notifier.cpp
@@ -30,7 +30,10 @@
     _repo(repo),
     _dataIface(dataIface), _hostIface(std::move(hostIface)),
     _retryTimer(_hostIface->getEvent(),
-                std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this))
+                std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
+    _hostFullTimer(
+        _hostIface->getEvent(),
+        std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this))
 {
     // Subscribe to be told about new PELs.
     _repo.subscribeToAdds(subscriptionName,
@@ -155,7 +158,8 @@
 
     _pelQueue.push_back(pel.id());
 
-    if (!_dataIface.isHostUp())
+    // Notify shouldn't happen if host is down or full
+    if (!_dataIface.isHostUp() || _hostFull)
     {
         return;
     }
@@ -194,7 +198,8 @@
 
 void HostNotifier::doNewLogNotify()
 {
-    if (!_dataIface.isHostUp() || _retryTimer.isEnabled())
+    if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
+        _hostFullTimer.isEnabled())
     {
         return;
     }
@@ -264,6 +269,7 @@
 void HostNotifier::hostStateChange(bool hostUp)
 {
     _retryCount = 0;
+    _hostFull = false;
 
     if (hostUp && !_pelQueue.empty())
     {
@@ -282,6 +288,11 @@
         }
 
         _sentPELs.clear();
+
+        if (_hostFullTimer.isEnabled())
+        {
+            _hostFullTimer.setEnabled(false);
+        }
     }
 }
 
@@ -298,7 +309,8 @@
 
         _repo.setPELHostTransState(id, TransmissionState::sent);
 
-        if (!_pelQueue.empty())
+        // If the host is full, don't send off the next PEL
+        if (!_hostFull && !_pelQueue.empty())
         {
             doNewLogNotify();
         }
@@ -324,6 +336,11 @@
     }
 }
 
+void HostNotifier::hostFullTimerExpired()
+{
+    doNewLogNotify();
+}
+
 void HostNotifier::stopCommand()
 {
     _retryCount = 0;
@@ -355,6 +372,51 @@
     {
         _sentPELs.erase(sent);
     }
+
+    // An ack means the host is no longer full
+    if (_hostFullTimer.isEnabled())
+    {
+        _hostFullTimer.setEnabled(false);
+    }
+
+    if (_hostFull)
+    {
+        _hostFull = false;
+
+        // Start sending PELs again, from the event loop
+        if (!_pelQueue.empty())
+        {
+            scheduleDispatch();
+        }
+    }
+}
+
+void HostNotifier::setHostFull(uint32_t id)
+{
+    log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id));
+
+    _hostFull = true;
+
+    // This PEL needs to get re-sent
+    auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
+    if (sent != _sentPELs.end())
+    {
+        _sentPELs.erase(sent);
+        _repo.setPELHostTransState(id, TransmissionState::newPEL);
+
+        if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
+            _pelQueue.end())
+        {
+            _pelQueue.push_front(id);
+        }
+    }
+
+    // The only PELs that will be sent when the
+    // host is full is from this timer callback.
+    if (!_hostFullTimer.isEnabled())
+    {
+        _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
+    }
 }
 
 } // namespace openpower::pels
diff --git a/extensions/openpower-pels/host_notifier.hpp b/extensions/openpower-pels/host_notifier.hpp
index d160dd4..21bd072 100644
--- a/extensions/openpower-pels/host_notifier.hpp
+++ b/extensions/openpower-pels/host_notifier.hpp
@@ -87,10 +87,36 @@
      *
      * This means the PEL never needs to be sent up again.
      *
+     * If the host was previously full, it is also an indication
+     * it no longer is.
+     *
      * @param[in] id - The PEL ID
      */
     void ackPEL(uint32_t id);
 
+    /**
+     * @brief Called when the host does not have room for more
+     *        PELs at this time.
+     *
+     * This can happen when an OS isn't running yet, and the
+     * staging area to hold the PELs before sending them up
+     * to the OS is full.  This will stop future PEls from being
+     * sent up, as explained below.
+     *
+     * The PEL with this ID will need to be sent again, so its
+     * state is set back to 'new', and it is removed from the list
+     * of already sent PELs.
+     *
+     * A timer will be started, if it isn't already running, to
+     * issue another send in the hopes that space has been freed
+     * up by then (Receiving an ackPEL response is also an
+     * indication of this if there happened to have been other
+     * PELs in flight).
+     *
+     * @param[in] id - The PEL ID
+     */
+    void setHostFull(uint32_t id);
+
   private:
     /**
      * @brief This function gets called by the Repository class
@@ -171,6 +197,15 @@
     void retryTimerExpired();
 
     /**
+     * @brief The function called when the 'host full' retry timer
+     *        expires.
+     *
+     * This will re-issue a command to try again with the PEL at
+     * the front of the queue.
+     */
+    void hostFullTimerExpired();
+
+    /**
      * @brief Stops an in progress command
      *
      * In progress meaning after the send but before the response.
@@ -217,11 +252,23 @@
     size_t _retryCount = 0;
 
     /**
+     * @brief Indicates if the host has said it is full and does not
+     *        currently have the space for more PELs.
+     */
+    bool _hostFull = false;
+
+    /**
      * @brief The command retry timer.
      */
     sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic> _retryTimer;
 
     /**
+     * @brief The host full timer, used to retry sending a PEL if the host
+     *        said it is full.
+     */
+    sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic> _hostFullTimer;
+
+    /**
      * @brief The object used to dispatch a new PEL send from the
      *        event loop, so the calling function can be returned from
      *        first.
diff --git a/test/openpower-pels/host_notifier_test.cpp b/test/openpower-pels/host_notifier_test.cpp
index 62c79b2..cd30e8d 100644
--- a/test/openpower-pels/host_notifier_test.cpp
+++ b/test/openpower-pels/host_notifier_test.cpp
@@ -643,3 +643,106 @@
 
     EXPECT_EQ(notifier.queueSize(), 0);
 }
+
+// Test the host full condition
+TEST_F(HostNotifierTest, TestHostFull)
+{
+    // The full interaction with the host is:
+    // BMC:  new PEL available
+    // Host: ReadPELFile  (not modeled here)
+    // Host: Ack(id) (if not full), or HostFull(id)
+    // BMC: if full and any new PELs come in, don't sent them
+    // Start a timer and try again
+    // Host responds with either Ack or full
+    // and repeat
+
+    Repository repo{repoPath};
+    MockDataInterface dataIface;
+
+    sdeventplus::Event sdEvent{event};
+
+    std::unique_ptr<HostInterface> hostIface =
+        std::make_unique<MockHostInterface>(event, dataIface);
+
+    MockHostInterface& mockHostIface =
+        reinterpret_cast<MockHostInterface&>(*hostIface);
+
+    HostNotifier notifier{repo, dataIface, std::move(hostIface)};
+
+    auto send = [&mockHostIface](uint32_t id, uint32_t size) {
+        return mockHostIface.send(0);
+    };
+
+    EXPECT_CALL(mockHostIface, sendNewLogCmd(_, _))
+        .WillRepeatedly(Invoke(send));
+
+    dataIface.changeHostState(true);
+
+    // Add and dispatch/send one PEL
+    auto pel = makePEL();
+    auto id = pel->id();
+    repo.add(pel);
+    runEvents(sdEvent, 2);
+
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 1);
+    EXPECT_EQ(notifier.queueSize(), 0);
+
+    // Host is full
+    notifier.setHostFull(id);
+
+    // It goes back on the queue
+    EXPECT_EQ(notifier.queueSize(), 1);
+
+    // The transmission state goes back to new
+    Repository::LogID i{Repository::LogID::Pel{id}};
+    auto data = repo.getPELData(i);
+    PEL pelFromRepo{*data};
+    EXPECT_EQ(pelFromRepo.hostTransmissionState(), TransmissionState::newPEL);
+
+    // Clock it, nothing should be sent still.
+    runEvents(sdEvent, 1);
+
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 1);
+    EXPECT_EQ(notifier.queueSize(), 1);
+
+    // Add another PEL and clock it, still nothing sent
+    pel = makePEL();
+    repo.add(pel);
+    runEvents(sdEvent, 2);
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 1);
+    EXPECT_EQ(notifier.queueSize(), 2);
+
+    // Let the host full timer expire to trigger a retry.
+    // Add some extra event passes just to be sure nothing new is sent.
+    runEvents(sdEvent, 5, mockHostIface.getHostFullRetryDelay());
+
+    // The timer expiration will send just the 1, not both
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 2);
+    EXPECT_EQ(notifier.queueSize(), 1);
+
+    // Host still full
+    notifier.setHostFull(id);
+
+    // Let the host full timer attempt again
+    runEvents(sdEvent, 2, mockHostIface.getHostFullRetryDelay());
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 3);
+
+    // Add yet another PEL with the retry timer expired.
+    // It shouldn't get sent out.
+    pel = makePEL();
+    repo.add(pel);
+    runEvents(sdEvent, 2);
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 3);
+
+    // The last 2 PELs still on the queue
+    EXPECT_EQ(notifier.queueSize(), 2);
+
+    // Host no longer full, it finally acks the first PEL
+    notifier.ackPEL(id);
+
+    // Now the remaining 2 PELs will be dispatched
+    runEvents(sdEvent, 3);
+
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 5);
+    EXPECT_EQ(notifier.queueSize(), 0);
+}
diff --git a/test/openpower-pels/mocks.hpp b/test/openpower-pels/mocks.hpp
index dcce142..5bec579 100644
--- a/test/openpower-pels/mocks.hpp
+++ b/test/openpower-pels/mocks.hpp
@@ -100,6 +100,18 @@
     }
 
     /**
+     * @brief Returns the amount of time to wait before retrying if the
+     *        host firmware's PEL storage was full and it can't store
+     *        any more logs until it is freed up somehow.
+     *
+     * @return milliseconds - The amount of time to wait
+     */
+    virtual std::chrono::milliseconds getHostFullRetryDelay() const override
+    {
+        return std::chrono::milliseconds(20);
+    }
+
+    /**
      * @brief Returns the number of commands processed
      */
     size_t numCmdsProcessed() const