Switch to a specific signal for crashdump complete
The PropertiesChanged event with a timer is unreliable since
crashdump can sometimes take longer than expected and miss the
timeout, but we don't want to keep the match for too long because
a PropertiesChanged event can happen for more than just crashdump
complete.
This change switches to a new signal specific for crashdump
complete.  Since that signal only occurs when a crashdump
completes, we no longer need the timer to reset the match, so
this also removes the timer.
Tested:
Injected an IERR and confirmed that when the CrashdumpComplete
signal was received, the host system was reset.
Change-Id: I6c0f27ac3648778763c22fcf8c1fd310a2ae7a79
Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp
index 873348f..f23d25e 100644
--- a/src/host_error_monitor.cpp
+++ b/src/host_error_monitor.cpp
@@ -44,7 +44,6 @@
 const static constexpr size_t caterrTimeoutMsMax = 600000; // 10 minutes maximum
 const static constexpr size_t errTimeoutMs = 90000;
 const static constexpr size_t smiTimeoutMs = 90000;
-const static constexpr size_t crashdumpTimeoutS = 300;
 
 // Timers
 // Timer for CATERR asserted
@@ -422,14 +421,12 @@
 {
     std::cerr << "Starting crashdump\n";
     static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
-    static boost::asio::steady_timer crashdumpTimer(io);
 
     crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
         *conn,
-        "type='signal',interface='org.freedesktop.DBus.Properties',"
-        "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
+        "type='signal',interface='com.intel.crashdump.Stored',member='"
+        "CrashdumpComplete'",
         [recoverSystem](sdbusplus::message::message& msg) {
-            crashdumpTimer.cancel();
             std::cerr << "Crashdump completed\n";
             if (recoverSystem)
             {
@@ -439,29 +436,11 @@
             crashdumpCompleteMatch.reset();
         });
 
-    crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
-    crashdumpTimer.async_wait([](const boost::system::error_code ec) {
-        if (ec)
-        {
-            // operation_aborted is expected if timer is canceled
-            if (ec != boost::asio::error::operation_aborted)
-            {
-                std::cerr << "Crashdump async_wait failed: " << ec.message()
-                          << "\n";
-            }
-            std::cerr << "Crashdump timer canceled\n";
-            return;
-        }
-        std::cerr << "Crashdump failed to complete before timeout\n";
-        crashdumpCompleteMatch.reset();
-    });
-
     conn->async_method_call(
         [](boost::system::error_code ec) {
             if (ec)
             {
                 std::cerr << "failed to start Crashdump\n";
-                crashdumpTimer.cancel();
             }
         },
         "com.intel.crashdump", "/com/intel/crashdump",