monitor: Use USR1 signal to dump debug data
Similar to what fan control is already doing, this commit adds a handler
for the USR1 signal to write debug data to /tmp/fan_monitor_dump.json.
The data being written is the same data saved in an event log - the
current sensor status plus any of the Logger class's logs.
Example output, which shows fan0 recovering from previous faults:
{
"logs": [
...
[
"Aug 26 17:04:47",
"Setting tach sensor /xyz/openbmc_project/sensors/fan_tach/fan0_0 functional state to false. [target = 18000, input = 3446, allowed range = (10600 - NoMax) owned = true]"
],
[
"Aug 26 17:04:47",
"Starting shutdown action 'EPOW Power Off: 60s/60s' due to cause '2 Nonfunctional Fan Rotors'"
],
[
"Aug 26 17:04:47",
"Action EPOW Power Off: 60s/60s: Starting service mode timer"
],
[
"Aug 26 17:04:47",
"Creating event log for faulted fan /xyz/openbmc_project/inventory/system/chassis/motherboard/fan0 sensor /xyz/openbmc_project/sensors/fan_tach/fan0_0"
]
],
"sensors": {
"sensors": {
"/xyz/openbmc_project/sensors/fan_tach/fan0_0": {
"functional": false,
"in_range": true,
"present": true,
"prev_tachs": "[11829,11867,11829,11867,11829,11867,11718,11467]",
"prev_targets": "[18000,9000,9040,10320,0,0,0,0]",
"tach": 11829.0,
"target": 18000,
"ticks": 18
},
"/xyz/openbmc_project/sensors/fan_tach/fan0_1": {
"functional": false,
"in_range": true,
"present": true,
"prev_tachs": "[17857,17772,17857,17772,17201,17045,16741,16375]",
"tach": 17857.0,
"ticks": 20
},
"/xyz/openbmc_project/sensors/fan_tach/fan1_0": {
"functional": true,
"in_range": true,
"present": true,
"prev_tachs": "[11755,11792,11755,11792,11755,11792,11755,11792]",
"prev_targets": "[18000,9000,9040,10320,0,0,0,0]",
"tach": 11755.0,
"target": 18000,
"ticks": 0
},
...
}
}
}
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: I84179f78ec83ca6bab788052d0bebe677c1fd29f
diff --git a/monitor/main.cpp b/monitor/main.cpp
index 9ab6d15..fbdcd0d 100644
--- a/monitor/main.cpp
+++ b/monitor/main.cpp
@@ -80,6 +80,14 @@
std::bind(&System::sighupHandler,
&system, std::placeholders::_1,
std::placeholders::_2));
+
+ // Enable SIGUSR1 handling to dump debug data
+ stdplus::signal::block(SIGUSR1);
+ sdeventplus::source::Signal sigUsr1(
+ event, SIGUSR1,
+ std::bind(&System::dumpDebugData, &system, std::placeholders::_1,
+ std::placeholders::_2));
+
bus.request_name(THERMAL_ALERT_BUSNAME);
#else
system.start();
diff --git a/monitor/system.cpp b/monitor/system.cpp
index 526d7fe..bcd9e33 100644
--- a/monitor/system.cpp
+++ b/monitor/system.cpp
@@ -46,6 +46,8 @@
using namespace phosphor::logging;
+const std::string System::dumpFile = "/tmp/fan_monitor_dump.json";
+
System::System(Mode mode, sdbusplus::bus_t& bus,
const sdeventplus::Event& event) :
_mode(mode),
@@ -553,4 +555,30 @@
}
}
+void System::dumpDebugData(sdeventplus::source::Signal&,
+ const struct signalfd_siginfo*)
+{
+ json output;
+
+ if (_loaded)
+ {
+ output["logs"] = getLogger().getLogs();
+ output["sensors"] = captureSensorData();
+ }
+ else
+ {
+ output["error"] = "Fan monitor not loaded yet. Try again later.";
+ }
+
+ std::ofstream file{System::dumpFile};
+ if (!file)
+ {
+ log<level::ERR>("Could not open file for fan monitor dump");
+ }
+ else
+ {
+ file << std::setw(4) << output;
+ }
+}
+
} // namespace phosphor::fan::monitor
diff --git a/monitor/system.hpp b/monitor/system.hpp
index df3e35f..bf6e6b6 100644
--- a/monitor/system.hpp
+++ b/monitor/system.hpp
@@ -26,6 +26,7 @@
#include <nlohmann/json.hpp>
#include <sdbusplus/bus.hpp>
#include <sdeventplus/event.hpp>
+#include <sdeventplus/source/event.hpp>
#include <sdeventplus/source/signal.hpp>
#include <memory>
@@ -123,6 +124,13 @@
*/
void load();
+ /**
+ * @brief Callback function to handle receiving a USR1 signal to dump
+ * debug data to a file.
+ */
+ void dumpDebugData(sdeventplus::source::Signal&,
+ const struct signalfd_siginfo*);
+
private:
/**
* @brief Callback from D-Bus when Inventory service comes online
@@ -199,6 +207,11 @@
bool _loaded = false;
/**
+ * @brief The name of the dump file
+ */
+ static const std::string dumpFile;
+
+ /**
* @brief Captures tach sensor data as JSON for use in
* fan fault and fan missing event logs.
*