Add SMI timeout monitoring and logging

This adds SMI timeout monitoring to the host error monitor.  When
the SMI signal is asserted for more than 90 seconds, the BMC will
log it, trigger a Crashdump, and reset the system if enabled.

Tested:
Manually triggered an SMI timeout and confirmed that the event is
handled and logged correctly.

Change-Id: I0579c96211d8e6abcdc190c154f3671151d5e60d
Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp
index 2d5d900..bd2ac9c 100644
--- a/src/host_error_monitor.cpp
+++ b/src/host_error_monitor.cpp
@@ -31,6 +31,7 @@
 
 const static constexpr size_t caterrTimeoutMs = 2000;
 const static constexpr size_t err2TimeoutMs = 90000;
+const static constexpr size_t smiTimeoutMs = 90000;
 const static constexpr size_t crashdumpTimeoutS = 300;
 
 // Timers
@@ -38,12 +39,16 @@
 static boost::asio::steady_timer caterrAssertTimer(io);
 // Timer for ERR2 asserted
 static boost::asio::steady_timer err2AssertTimer(io);
+// Timer for SMI asserted
+static boost::asio::steady_timer smiAssertTimer(io);
 
 // GPIO Lines and Event Descriptors
 static gpiod::line caterrLine;
 static boost::asio::posix::stream_descriptor caterrEvent(io);
 static gpiod::line err2Line;
 static boost::asio::posix::stream_descriptor err2Event(io);
+static gpiod::line smiLine;
+static boost::asio::posix::stream_descriptor smiEvent(io);
 //----------------------------------
 // PCH_BMC_THERMTRIP function related definition
 //----------------------------------
@@ -92,6 +97,13 @@
                     "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
 }
 
+static void smiTimeoutLog()
+{
+    sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
+                    "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
+                    "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
+}
+
 static void initializeErrorState();
 static void initializeHostState()
 {
@@ -151,6 +163,7 @@
             {
                 caterrAssertTimer.cancel();
                 err2AssertTimer.cancel();
+                smiAssertTimer.cancel();
             }
         });
 }
@@ -708,6 +721,75 @@
                          });
 }
 
+static void smiAssertHandler()
+{
+    smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
+    smiAssertTimer.async_wait([](const boost::system::error_code ec) {
+        if (ec)
+        {
+            // operation_aborted is expected if timer is canceled before
+            // completion.
+            if (ec != boost::asio::error::operation_aborted)
+            {
+                std::cerr << "smi timeout async_wait failed: " << ec.message()
+                          << "\n";
+            }
+            return;
+        }
+        std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
+                  << " ms\n";
+        smiTimeoutLog();
+        conn->async_method_call(
+            [](boost::system::error_code ec,
+               const std::variant<bool>& property) {
+                if (ec)
+                {
+                    return;
+                }
+                const bool* reset = std::get_if<bool>(&property);
+                if (reset == nullptr)
+                {
+                    std::cerr << "Unable to read reset on SMI value\n";
+                    return;
+                }
+                startCrashdumpAndRecovery(*reset);
+            },
+            "xyz.openbmc_project.Settings",
+            "/xyz/openbmc_project/control/bmc_reset_disables",
+            "org.freedesktop.DBus.Properties", "Get",
+            "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
+    });
+}
+
+static void smiHandler()
+{
+    if (!hostOff)
+    {
+        gpiod::line_event gpioLineEvent = smiLine.event_read();
+
+        bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
+        if (smi)
+        {
+            smiAssertHandler();
+        }
+        else
+        {
+            smiAssertTimer.cancel();
+        }
+    }
+    smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
+                        [](const boost::system::error_code ec) {
+                            if (ec)
+                            {
+                                std::cerr
+                                    << "smi handler error: " << ec.message()
+                                    << "\n";
+                                return;
+                            }
+                            smiHandler();
+                        });
+}
+
 static void initializeErrorState()
 {
     // Handle CPU_CATERR if it's asserted now
@@ -721,6 +803,12 @@
     {
         err2AssertHandler();
     }
+
+    // Handle SMI if it's asserted now
+    if (smiLine.get_value() == 0)
+    {
+        smiAssertHandler();
+    }
 }
 } // namespace host_error_monitor
 
@@ -759,6 +847,14 @@
         return -1;
     }
 
+    // Request SMI GPIO events
+    if (!host_error_monitor::requestGPIOEvents(
+            "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
+            host_error_monitor::smiEvent))
+    {
+        return -1;
+    }
+
     // Request PCH_BMC_THERMTRIP GPIO events
     if (!host_error_monitor::requestGPIOEvents(
             "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,