Add rate limiting

A host CPU can write POST codes much faster than the BMC can handle
them, considering all the D-Bus/IPC work required. Ideally `dbus-broker`
would apply backpressure when it gets full of unhandled signals, but its
quota mechanism uses a simple per-user accounting that doesn't
differentiate between all the connections from OpenBMC daemons running
as root. So there is no way to configure it to prevent just `snoopd`
from sending too many messages - instead it will disconnect arbitrary
services leading to mass chaos.

So without a D-Bus policy mechanism to prevent excess memory usage,
there are 2 different failure cases during a POST code storm:
1. `snoopd` continues to send messages faster than `post-code-manager`
   can process them, leading to `dbus-broker` consuming all the system
   memory.
2. `snoopd` fills up the D-Bus socket buffer. Once sd-bus fails to send
   a message across the socket, it starts queuing messages internally
   leading to `snoopd` consuming all the system memory. This only
   happens because we get stuck in the `snoopd` read loop during a POST
   code storm, and we don't process other events that would allow the
   write queue to drain.

As a workaround, introduce configurable rate limiting to `snoopd`. A new
meson option 'rate-limit' sets the corresponding '--rate-limit'
command-line parameter. These options take an integer value representing
the maximum number of POST codes to process per second. The default
meson option value is 1000, and the value of 0 will disable rate limiting.

Tested: Ran the POST code stress on host for 30 minutes:
```
[root@sut ~]# stress-ng --ioport 2
```

Watched BMC process memory usage and CPU usage in `top`, verified that
`post-code-manager`, `dbus-broker`, and `snoopd` each used less than 10%
CPU and 2% memory on AST2600 with 512 MiB of DRAM.

Change-Id: If03a01e0cd62366d188109bb4dff52958346e1db
Signed-off-by: Jonathan Doman <jonathan.doman@intel.com>
diff --git a/lpcsnoop/snoop.hpp b/lpcsnoop/snoop.hpp
index dfa11de..f04c090 100644
--- a/lpcsnoop/snoop.hpp
+++ b/lpcsnoop/snoop.hpp
@@ -26,4 +26,5 @@
                          : PostObject::action::emit_object_added)
     {
     }
+    unsigned int rateLimit = 0;
 };
diff --git a/main.cpp b/main.cpp
index 3b76870..5188502 100644
--- a/main.cpp
+++ b/main.cpp
@@ -27,6 +27,7 @@
 #include <systemd/sd-event.h>
 #include <unistd.h>
 
+#include <chrono>
 #include <cstdint>
 #include <exception>
 #include <functional>
@@ -36,6 +37,7 @@
 #include <sdeventplus/source/event.hpp>
 #include <sdeventplus/source/io.hpp>
 #include <sdeventplus/source/signal.hpp>
+#include <sdeventplus/source/time.hpp>
 #include <sdeventplus/utility/sdbus.hpp>
 #include <span>
 #include <stdplus/signal.hpp>
@@ -47,6 +49,7 @@
 
 static size_t codeSize = 1; /* Size of each POST code in bytes */
 const char* defaultHostInstances = "0";
+static bool verbose = false;
 #ifdef ENABLE_IPMI_SNOOP
 const uint8_t minPositionVal = 0;
 const uint8_t maxPositionVal = 5;
@@ -109,15 +112,76 @@
             name, codeSize, defaultHostInstances);
 }
 
+/**
+ * Call once for each POST code received. If the number of POST codes exceeds
+ * the configured rate limit, this function will disable the snoop device IO
+ * source until the end of the 1 second interval, then re-enable it.
+ *
+ * @return Whether the rate limit is exceeded.
+ */
+bool rateLimit(PostReporter& reporter, sdeventplus::source::IO& ioSource)
+{
+    if (reporter.rateLimit == 0)
+    {
+        // Rate limiting is disabled.
+        return false;
+    }
+
+    using Clock = sdeventplus::Clock<sdeventplus::ClockId::Monotonic>;
+
+    static constexpr std::chrono::seconds rateLimitInterval(1);
+    static unsigned int rateLimitCount = 0;
+    static Clock::time_point rateLimitEndTime;
+
+    const sdeventplus::Event& event = ioSource.get_event();
+
+    if (rateLimitCount == 0)
+    {
+        // Initialize the end time when we start a new interval
+        rateLimitEndTime = Clock(event).now() + rateLimitInterval;
+    }
+
+    if (++rateLimitCount < reporter.rateLimit)
+    {
+        return false;
+    }
+
+    rateLimitCount = 0;
+
+    if (rateLimitEndTime < Clock(event).now())
+    {
+        return false;
+    }
+
+    if (verbose)
+    {
+        fprintf(stderr, "Hit POST code rate limit - disabling temporarily\n");
+    }
+
+    ioSource.set_enabled(sdeventplus::source::Enabled::Off);
+    sdeventplus::source::Time<sdeventplus::ClockId::Monotonic>(
+        event, rateLimitEndTime, std::chrono::milliseconds(100),
+        [&ioSource](auto&, auto) {
+            if (verbose)
+            {
+                fprintf(stderr, "Reenabling POST code handler\n");
+            }
+            ioSource.set_enabled(sdeventplus::source::Enabled::On);
+        })
+        .set_floating(true);
+    return true;
+}
+
 /*
  * Callback handling IO event from the POST code fd. i.e. there is new
  * POST code available to read.
  */
-void PostCodeEventHandler(PostReporter* reporter, bool verbose,
-                          sdeventplus::source::IO& s, int postFd, uint32_t)
+void PostCodeEventHandler(PostReporter* reporter, sdeventplus::source::IO& s,
+                          int postFd, uint32_t)
 {
     uint64_t code = 0;
     ssize_t readb;
+
     while ((readb = read(postFd, &code, codeSize)) > 0)
     {
         code = le64toh(code);
@@ -134,6 +198,11 @@
         // read depends on old data being cleared since it doens't always read
         // the full code size
         code = 0;
+
+        if (rateLimit(*reporter, s))
+        {
+            return;
+        }
     }
 
     if (readb < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
@@ -226,9 +295,9 @@
 #ifndef ENABLE_IPMI_SNOOP
     int postFd = -1;
 #endif
+    unsigned int rateLimit = 0;
 
     int opt;
-    bool verbose = false;
 
 #ifdef ENABLE_IPMI_SNOOP
     std::vector<std::string> host;
@@ -250,12 +319,14 @@
         #ifndef ENABLE_IPMI_SNOOP
         {"device", optional_argument, NULL, 'd'},
         #endif
+        {"rate-limit", optional_argument, NULL, 'r'},
         {"verbose", no_argument, NULL, 'v'},
         {0, 0, 0, 0}
     };
     // clang-format on
 
-    while ((opt = getopt_long(argc, argv, "h:b:d:v", long_options, NULL)) != -1)
+    while ((opt = getopt_long(argc, argv, "h:b:d:r:v", long_options, NULL)) !=
+           -1)
     {
         switch (opt)
         {
@@ -299,11 +370,34 @@
                 }
                 break;
 #endif
+            case 'r': {
+                int argVal = -1;
+                try
+                {
+                    argVal = std::stoi(optarg);
+                }
+                catch (...)
+                {
+                }
+
+                if (argVal < 1)
+                {
+                    fprintf(stderr, "Invalid rate limit '%s'. Must be >= 1.\n",
+                            optarg);
+                    return EXIT_FAILURE;
+                }
+
+                rateLimit = static_cast<unsigned int>(argVal);
+                fprintf(stderr, "Rate limiting to %d POST codes per second.\n",
+                        argVal);
+                break;
+            }
             case 'v':
                 verbose = true;
                 break;
             default:
                 usage(argv[0]);
+                return EXIT_FAILURE;
         }
     }
 
@@ -338,9 +432,10 @@
         std::optional<sdeventplus::source::IO> reporterSource;
         if (postFd > 0)
         {
+            reporter.rateLimit = rateLimit;
             reporterSource.emplace(
-                event, postFd, EPOLLIN | EPOLLET,
-                std::bind_front(PostCodeEventHandler, &reporter, verbose));
+                event, postFd, EPOLLIN,
+                std::bind_front(PostCodeEventHandler, &reporter));
         }
         // Enable bus to handle incoming IO and bus events
         auto intCb = [](sdeventplus::source::Signal& source,
diff --git a/meson.build b/meson.build
index 9a0d49a..8573539 100644
--- a/meson.build
+++ b/meson.build
@@ -33,7 +33,12 @@
 
 if get_option('snoop-device') != ''
   snoopd_args += ' -d /dev/' + get_option('snoop-device')
+  rate_limit = get_option('rate-limit')
+  if rate_limit > 0
+    snoopd_args += ' --rate-limit=' + rate_limit.to_string()
+  endif
 endif
+
 conf_data.set('SNOOPD_ARGS', snoopd_args)
 
 configure_file(
diff --git a/meson_options.txt b/meson_options.txt
index fb4460e..9e0ef64 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -32,3 +32,11 @@
 option(
     'tests', type: 'feature', description: 'Build tests.',
 )
+option(
+    'rate-limit',
+    description: 'Maximum number of POST codes to read from snoop device every'
+    + 'second. Value of 0 disables rate limiting.',
+    type: 'integer',
+    min: 0,
+    value: 1000
+)