Check for initial error state on startup

Since the error monitor is looking at GPIO edge events, an error
won't be detected if it is already asserted when the monintor
starts. This change detects if the host is already on when it
starts monitoring and will check if an error is already asserted.

Tested:
Injected an IERR while the error monitor was stopped. After
checking that the IERR pin was asserted, started the error monitor
and confirmed that the error was detected, logged, and handled
correctly.

Change-Id: Ie1e0238c914e6b2cda4b121f579776b813b191c1
Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp
index 459eb7a..3d1b7e7 100644
--- a/src/host_error_monitor.cpp
+++ b/src/host_error_monitor.cpp
@@ -44,6 +44,7 @@
 static gpiod::line pchThermtripLine;
 static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
 
+static void initializeErrorState();
 static void initializeHostState()
 {
     conn->async_method_call(
@@ -60,6 +61,11 @@
                 return;
             }
             hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
+            // If the system is on, initialize the error state
+            if (!hostOff)
+            {
+                initializeErrorState();
+            }
         },
         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
         "org.freedesktop.DBus.Properties", "Get",
@@ -215,6 +221,46 @@
         "com.intel.crashdump.Stored", "GenerateStoredLog");
 }
 
+static void caterrAssertHandler()
+{
+    std::cout << "CPU CATERR detected, starting timer\n";
+    caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs));
+    caterrAssertTimer.async_wait([](const boost::system::error_code ec) {
+        if (ec)
+        {
+            // operation_aborted is expected if timer is canceled
+            // before completion.
+            if (ec != boost::asio::error::operation_aborted)
+            {
+                std::cerr << "caterr timeout async_wait failed: "
+                          << ec.message() << "\n";
+            }
+            std::cout << "CATERR assert timer canceled\n";
+            return;
+        }
+        std::cout << "CATERR asset timer completed\n";
+        conn->async_method_call(
+            [](boost::system::error_code ec,
+               const std::variant<bool>& property) {
+                if (ec)
+                {
+                    return;
+                }
+                const bool* reset = std::get_if<bool>(&property);
+                if (reset == nullptr)
+                {
+                    std::cerr << "Unable to read reset on CATERR value\n";
+                    return;
+                }
+                startCrashdumpAndRecovery(*reset);
+            },
+            "xyz.openbmc_project.Settings",
+            "/xyz/openbmc_project/control/processor_error_config",
+            "org.freedesktop.DBus.Properties", "Get",
+            "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR");
+    });
+}
+
 static void caterrHandler()
 {
     if (!hostOff)
@@ -225,46 +271,7 @@
             gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
         if (caterr)
         {
-            std::cout << "CPU CATERR detected, starting timer\n";
-            caterrAssertTimer.expires_after(
-                std::chrono::milliseconds(caterrTimeoutMs));
-            caterrAssertTimer.async_wait(
-                [](const boost::system::error_code ec) {
-                    if (ec)
-                    {
-                        // operation_aborted is expected if timer is canceled
-                        // before completion.
-                        if (ec != boost::asio::error::operation_aborted)
-                        {
-                            std::cerr << "caterr timeout async_wait failed: "
-                                      << ec.message() << "\n";
-                        }
-                        std::cout << "CATERR assert timer canceled\n";
-                        return;
-                    }
-                    std::cout << "CATERR asset timer completed\n";
-                    conn->async_method_call(
-                        [](boost::system::error_code ec,
-                           const std::variant<bool>& property) {
-                            if (ec)
-                            {
-                                return;
-                            }
-                            const bool* reset = std::get_if<bool>(&property);
-                            if (reset == nullptr)
-                            {
-                                std::cerr
-                                    << "Unable to read reset on CATERR value\n";
-                                return;
-                            }
-                            startCrashdumpAndRecovery(*reset);
-                        },
-                        "xyz.openbmc_project.Settings",
-                        "/xyz/openbmc_project/control/processor_error_config",
-                        "org.freedesktop.DBus.Properties", "Get",
-                        "xyz.openbmc_project.Control.Processor.ErrConfig",
-                        "ResetOnCATERR");
-                });
+            caterrAssertHandler();
         }
         else
         {
@@ -312,6 +319,14 @@
         });
 }
 
+static void initializeErrorState()
+{
+    // Handle CPU_CATERR if it's asserted now
+    if (caterrLine.get_value() == 0)
+    {
+        caterrAssertHandler();
+    }
+}
 } // namespace host_error_monitor
 
 int main(int argc, char* argv[])