Add host error event monitor
Add host error event monitor to record event found by host-error-monitor
and then add IPMI SEL record to journal.
Tested:
Test with the following PRs:
https://gerrit.openbmc.org/c/openbmc/host-error-monitor/+/59127
https://gerrit.openbmc.org/c/openbmc/host-error-monitor/+/59129
https://gerrit.openbmc.org/c/openbmc/host-error-monitor/+/59130
Trigger host error and check result in "ipmitool sel" is as expected.
Signed-off-by: JinFuLin <JeffLin2@quantatw.com>
Change-Id: I72a49d62d9c3c4248ed8d748aebe2c8171221078
diff --git a/include/host_error_event_monitor.hpp b/include/host_error_event_monitor.hpp
new file mode 100644
index 0000000..86c8776
--- /dev/null
+++ b/include/host_error_event_monitor.hpp
@@ -0,0 +1,93 @@
+/*
+// Copyright (c) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <boost/container/flat_map.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+#include <sel_logger.hpp>
+#include <sensorutils.hpp>
+
+using sdbusMatch = std::shared_ptr<sdbusplus::bus::match_t>;
+static sdbusMatch thermTripEventMatcher;
+static sdbusMatch ierrEventMatcher;
+
+static boost::container::flat_map<std::string, sdbusMatch> hostErrorMatches = {
+ {"ThermalTrip", thermTripEventMatcher}, {"IERR", ierrEventMatcher}};
+static boost::container::flat_set<std::string> hostErrorEvents;
+
+void hostErrorEventMonitor(sdbusplus::message_t& msg)
+{
+ std::string msgInterface;
+ boost::container::flat_map<std::string, std::variant<bool>> values;
+ try
+ {
+ msg.read(msgInterface, values);
+ }
+ catch (const sdbusplus::exception_t& ec)
+ {
+ std::cerr << "error getting asserted value from " << msg.get_path()
+ << " ec= " << ec.what() << "\n";
+ return;
+ }
+ std::string objectPath = msg.get_path();
+ auto findState = values.find("Asserted");
+ if (values.empty() || findState == values.end())
+ {
+ return;
+ }
+ bool assert = std::get<bool>(findState->second);
+ // Check if the log should be recorded.
+ if (assert)
+ {
+ if (hostErrorEvents.insert(objectPath).second == false)
+ {
+ return;
+ }
+ }
+ else
+ {
+ if (hostErrorEvents.erase(objectPath) == 0)
+ {
+ return;
+ }
+ }
+ std::string eventName = objectPath.substr(objectPath.find_last_of('/') + 1,
+ objectPath.length());
+ std::string message =
+ (assert) ? eventName + " Asserted" : eventName + " De-Asserted";
+ uint8_t selType = (msgInterface.ends_with("ThermalTrip")) ? 0x01 : 0x00;
+
+ std::vector<uint8_t> selData{selType, 0xff, 0xff};
+ selAddSystemRecord(message, objectPath, selData, assert, selBMCGenID);
+}
+
+inline static void startHostErrorEventMonitor(
+ std::shared_ptr<sdbusplus::asio::connection> conn)
+{
+ for (auto iter = hostErrorMatches.begin(); iter != hostErrorMatches.end();
+ iter++)
+ {
+ iter->second = std::make_shared<sdbusplus::bus::match_t>(
+ static_cast<sdbusplus::bus_t&>(*conn),
+ "type='signal',interface='org.freedesktop.DBus.Properties',member='"
+ "PropertiesChanged',arg0namespace='xyz.openbmc_project."
+ "HostErrorMonitor.Processor." +
+ iter->first + "'",
+ [conn, iter](sdbusplus::message_t& msg) {
+ hostErrorEventMonitor(msg);
+ });
+ }
+}
diff --git a/meson.build b/meson.build
index 0c7f34c..8e6ac29 100644
--- a/meson.build
+++ b/meson.build
@@ -28,6 +28,9 @@
if get_option('log-alarm')
cpp_args += '-DSEL_LOGGER_MONITOR_THRESHOLD_ALARM_EVENTS'
endif
+if get_option('log-host')
+ cpp_args += '-DSEL_LOGGER_MONITOR_HOST_ERROR_EVENTS'
+endif
if get_option('send-to-logger')
cpp_args += '-DSEL_LOGGER_SEND_TO_LOGGING_SERVICE'
diff --git a/meson_options.txt b/meson_options.txt
index 1a1cd51..81ab2f6 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -6,6 +6,8 @@
description: 'Automatically log SEL records for watchdog events')
option('log-alarm', type: 'boolean',
description: 'Monitor threshold alarm signals and log SEL records for threshold sensor events')
+option('log-host', type: 'boolean',
+ description: 'Automatically log SEL records for host error events')
option('send-to-logger', type: 'boolean',
description: 'Automatically log events to Redfish for pulse type assert-deassert sensor events')
option('clears-sel', type: 'boolean',
diff --git a/src/sel_logger.cpp b/src/sel_logger.cpp
index 29cbd46..6798682 100644
--- a/src/sel_logger.cpp
+++ b/src/sel_logger.cpp
@@ -27,6 +27,9 @@
#ifdef SEL_LOGGER_MONITOR_THRESHOLD_ALARM_EVENTS
#include <threshold_alarm_event_monitor.hpp>
#endif
+#ifdef SEL_LOGGER_MONITOR_HOST_ERROR_EVENTS
+#include <host_error_event_monitor.hpp>
+#endif
#include <filesystem>
#include <fstream>
@@ -305,6 +308,10 @@
#ifdef SEL_LOGGER_MONITOR_THRESHOLD_ALARM_EVENTS
startThresholdAlarmMonitor(conn);
#endif
+
+#ifdef SEL_LOGGER_MONITOR_HOST_ERROR_EVENTS
+ startHostErrorEventMonitor(conn);
+#endif
io.run();
return 0;