Memhot event monitor for MEMHOT pins
The MEM_THERM_EVENT pins are tied to the incorrect SGPIO, so
Memory Thermtrip is incorrectly logging based on the MEMHOT
pins.
Define MemhotMonitor. This changes the Memhot monitor to log
based on the MEMHOT pins.
Tested:
1. Booted and confirmed that host-error-monitor correctly starts
2. Verified using RedFish event log. ComponentOverTemperature event log
generated successfully.
GET:
https:/<BMC-IP>/redfish/v1/Systems/system/LogServices/EventLog/Entries
{
"@odata.id": "/redfish/v1/Systems/system/LogServices/EventLog/
Entries/1646193775",
"@odata.type": "#LogEntry.v1_8_0.LogEntry",
"Created": "2022-03-02T04:02:55+00:00",
"EntryType": "Event",
"Id": "1646193775",
"Message": "CPU 1 memory over temperature and being throttled.",
"MessageArgs": [
"CPU 1 memory"
],
"MessageId": "OpenBMC.0.1.ComponentOverTemperature",
"Name": "System Event Log Entry",
"Severity": "Critical"
}
Signed-off-by: Hardik Panchal <hardikx.panchal@intel.com>
Signed-off-by: Jayaprakash Mutyala <mutyalax.jayaprakash@intel.com>
Change-Id: I39c68155422ae2c9691f3701946a174a2caae463
diff --git a/include/error_monitors/memhot_monitor.hpp b/include/error_monitors/memhot_monitor.hpp
new file mode 100644
index 0000000..68c93b9
--- /dev/null
+++ b/include/error_monitors/memhot_monitor.hpp
@@ -0,0 +1,56 @@
+/*
+// Copyright (c) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+#pragma once
+#include <error_monitors/base_gpio_monitor.hpp>
+#include <host_error_monitor.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+namespace host_error_monitor::memhot_monitor
+{
+class MemhotMonitor :
+ public host_error_monitor::base_gpio_monitor::BaseGPIOMonitor
+{
+ const static host_error_monitor::base_gpio_monitor::AssertValue
+ assertValue =
+ host_error_monitor::base_gpio_monitor::AssertValue::lowAssert;
+ size_t cpuNum;
+
+ void logEvent() override
+ {
+ std::string cpuNumber = "CPU " + std::to_string(cpuNum);
+ std::string msg = cpuNumber + " Memhot.";
+ std::string redfishMsgArgs = cpuNumber + " memory";
+
+ sd_journal_send(
+ "MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i", LOG_ERR,
+ "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.ComponentOverTemperature",
+ "REDFISH_MESSAGE_ARGS=%s", redfishMsgArgs.c_str(), NULL);
+ }
+
+ public:
+ MemhotMonitor(boost::asio::io_service& io,
+ std::shared_ptr<sdbusplus::asio::connection> conn,
+ const std::string& signalName, const size_t cpuNum) :
+ BaseGPIOMonitor(io, conn, signalName, assertValue),
+ cpuNum(cpuNum)
+ {
+ if (valid)
+ {
+ startMonitoring();
+ }
+ }
+};
+} // namespace host_error_monitor::memhot_monitor