Add CPU Thermtrip monitoring and logging
This adds a monitor for the CPU Thermtrip signals.
Tested:
Heated each CPU with a heat gun until the system shut down and
verfied that the CPU Thermal Trip event was logged.
Change-Id: I6a13c22f1cce917130c2716934b474ea8f5e51f8
Signed-off-by: Jason M. Bills <jason.m.bills@linux.intel.com>
diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp
index 52c34b4..58231db 100644
--- a/src/host_error_monitor.cpp
+++ b/src/host_error_monitor.cpp
@@ -58,10 +58,13 @@
static boost::asio::posix::stream_descriptor err2Event(io);
static gpiod::line smiLine;
static boost::asio::posix::stream_descriptor smiEvent(io);
+static gpiod::line cpu1ThermtripLine;
+static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io);
+static gpiod::line cpu2ThermtripLine;
+static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io);
//----------------------------------
// PCH_BMC_THERMTRIP function related definition
//----------------------------------
-// GPIO Lines and Event Descriptors
static gpiod::line pchThermtripLine;
static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
@@ -116,6 +119,16 @@
"REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
}
+static void cpuThermTripLog(const int cpuNum)
+{
+ std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip";
+
+ sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
+ LOG_INFO, "REDFISH_MESSAGE_ID=%s",
+ "OpenBMC.0.1.CPUThermalTrip", "REDFISH_MESSAGE_ARGS=%d",
+ cpuNum, NULL);
+}
+
static void ssbThermTripLog()
{
sd_journal_send("MESSAGE=HostError: SSB thermal trip", "PRIORITY=%i",
@@ -625,6 +638,59 @@
caterrHandler();
});
}
+
+static void cpu1ThermtripHandler()
+{
+ if (!hostOff)
+ {
+ gpiod::line_event gpioLineEvent = cpu1ThermtripLine.event_read();
+
+ bool cpu1Thermtrip =
+ gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
+ if (cpu1Thermtrip)
+ {
+ cpuThermTripLog(1);
+ }
+ }
+ cpu1ThermtripEvent.async_wait(
+ boost::asio::posix::stream_descriptor::wait_read,
+ [](const boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << "CPU 1 Thermtrip handler error: " << ec.message()
+ << "\n";
+ return;
+ }
+ cpu1ThermtripHandler();
+ });
+}
+
+static void cpu2ThermtripHandler()
+{
+ if (!hostOff)
+ {
+ gpiod::line_event gpioLineEvent = cpu2ThermtripLine.event_read();
+
+ bool cpu2Thermtrip =
+ gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
+ if (cpu2Thermtrip)
+ {
+ cpuThermTripLog(2);
+ }
+ }
+ cpu2ThermtripEvent.async_wait(
+ boost::asio::posix::stream_descriptor::wait_read,
+ [](const boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << "CPU 2 Thermtrip handler error: " << ec.message()
+ << "\n";
+ return;
+ }
+ cpu2ThermtripHandler();
+ });
+}
+
static void pchThermtripHandler()
{
if (!hostOff)
@@ -1052,6 +1118,24 @@
return -1;
}
+ // Request CPU1_THERMTRIP GPIO events
+ if (!host_error_monitor::requestGPIOEvents(
+ "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler,
+ host_error_monitor::cpu1ThermtripLine,
+ host_error_monitor::cpu1ThermtripEvent))
+ {
+ return -1;
+ }
+
+ // Request CPU2_THERMTRIP GPIO events
+ if (!host_error_monitor::requestGPIOEvents(
+ "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler,
+ host_error_monitor::cpu2ThermtripLine,
+ host_error_monitor::cpu2ThermtripEvent))
+ {
+ return -1;
+ }
+
// Request PCH_BMC_THERMTRIP GPIO events
if (!host_error_monitor::requestGPIOEvents(
"PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,