Introduce ignoreFailIfHostOff config setting
Some sensors only provide valid readings when the host is powered on.
This change introduces the `ignoreFailIfHostOff` configuration option to
differentiate between unavailable readings based on host state.
- Host OFF: Sensor unavailable is acceptable and will not be treated as
a failure.
- Host ON (running): Sensor unavailable is unexpected, and the failsafe
mode will be triggered.
This ensures that sensors dependent on host power state are handled
correctly without causing unnecessary failsafe triggers during host-off.
Tested on Catalina: failsafe mode is not triggered when the host is off.
- config.json example: add `ignoreFailIfHostOff` for sensors only
available when the host is on.
```
{
"sensors": [
......
{
"name": "HDDBOARD_SSD0_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/HDDBOARD_SSD0_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
{
"name": "HDDBOARD_SSD2_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/HDDBOARD_SSD2_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
{
"name": "PDB_P48V_HSC1_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/PDB_P48V_HSC1_TEMP_C",
"timeout": 0,
"ignoreDbusMinMax": true
},
{
"name": "PDB_VR_P12V_N1_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/PDB_VR_P12V_N1_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
{
"name": "PDB_VR_P12V_N2_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/PDB_VR_P12V_N2_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
{
"name": "IOB0_NIC0_OSFP_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/IOB0_NIC0_OSFP_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
{
"name": "IOB0_NIC1_OSFP_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/IOB0_NIC1_OSFP_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
{
"name": "IOB1_NIC0_OSFP_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/IOB1_NIC0_OSFP_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
{
"name": "IOB1_NIC1_OSFP_TEMP_C",
"type": "temp",
"readPath": "/xyz/openbmc_project/sensors/temperature/IOB1_NIC1_OSFP_TEMP_C",
"timeout": 0,
"ignoreFailIfHostOff": true,
"ignoreDbusMinMax": true
},
......
],
"zones": [
......
]
}
```
Change-Id: I5355c453ca3c6d918c197dcd2cc9119e471d615d
Signed-off-by: Potin Lai <potin.lai@quantatw.com>
diff --git a/conf.hpp b/conf.hpp
index 796f80e..9d73cf8 100644
--- a/conf.hpp
+++ b/conf.hpp
@@ -31,6 +31,7 @@
int64_t timeout;
bool ignoreDbusMinMax;
bool unavailableAsFailed;
+ bool ignoreFailIfHostOff;
};
/*
diff --git a/hoststatemonitor.hpp b/hoststatemonitor.hpp
new file mode 100644
index 0000000..4d47705
--- /dev/null
+++ b/hoststatemonitor.hpp
@@ -0,0 +1,141 @@
+#pragma once
+
+#include <boost/container/flat_map.hpp>
+#include <sdbusplus/bus.hpp>
+#include <sdbusplus/bus/match.hpp>
+#include <sdbusplus/message.hpp>
+
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <variant>
+
+constexpr const char* PROPERTIES_INTERFACE = "org.freedesktop.DBus.Properties";
+constexpr const char* HOST_STATE_BUSNAME = "xyz.openbmc_project.State.Host0";
+constexpr const char* HOST_STATE_INTERFACE = "xyz.openbmc_project.State.Host";
+constexpr const char* HOST_STATE_PATH = "/xyz/openbmc_project/state/host0";
+constexpr const char* CURRENT_HOST_STATE_PROPERTY = "CurrentHostState";
+
+class HostStateMonitor
+{
+ public:
+ static HostStateMonitor& getInstance();
+ static HostStateMonitor& getInstance(sdbusplus::bus_t& bus);
+
+ ~HostStateMonitor() = default;
+
+ // Delete copy constructor and assignment operator
+ HostStateMonitor(const HostStateMonitor&) = delete;
+ HostStateMonitor& operator=(const HostStateMonitor&) = delete;
+
+ // Delete move constructor and assignment operator for singleton
+ HostStateMonitor(HostStateMonitor&&) = delete;
+ HostStateMonitor& operator=(HostStateMonitor&&) = delete;
+
+ void startMonitoring();
+ void stopMonitoring();
+ bool isPowerOn() const
+ {
+ return powerStatusOn;
+ }
+
+ private:
+ explicit HostStateMonitor(sdbusplus::bus_t& bus);
+
+ void handleStateChange(sdbusplus::message_t& message);
+ void getInitialState();
+
+ sdbusplus::bus_t& bus;
+ bool powerStatusOn;
+ std::unique_ptr<sdbusplus::bus::match_t> hostStateMatch;
+};
+
+// Implementation
+inline HostStateMonitor& HostStateMonitor::getInstance()
+{
+ static sdbusplus::bus_t defaultBus = sdbusplus::bus::new_default();
+ return getInstance(defaultBus);
+}
+
+inline HostStateMonitor& HostStateMonitor::getInstance(sdbusplus::bus_t& bus)
+{
+ static HostStateMonitor instance(bus);
+ return instance;
+}
+
+inline HostStateMonitor::HostStateMonitor(sdbusplus::bus_t& bus) :
+ bus(bus), powerStatusOn(false), hostStateMatch(nullptr)
+{
+ getInitialState();
+}
+
+inline void HostStateMonitor::startMonitoring()
+{
+ if (hostStateMatch == nullptr)
+ {
+ using namespace sdbusplus::bus::match::rules;
+
+ hostStateMatch = std::make_unique<sdbusplus::bus::match_t>(
+ bus,
+ propertiesChangedNamespace(HOST_STATE_PATH, HOST_STATE_INTERFACE),
+ [this](sdbusplus::message_t& message) {
+ handleStateChange(message);
+ });
+ }
+}
+
+inline void HostStateMonitor::stopMonitoring()
+{
+ hostStateMatch.reset();
+}
+
+inline void HostStateMonitor::handleStateChange(sdbusplus::message_t& message)
+{
+ std::string objectName;
+ boost::container::flat_map<std::string, std::variant<std::string>> values;
+
+ try
+ {
+ message.read(objectName, values);
+
+ auto findState = values.find(CURRENT_HOST_STATE_PROPERTY);
+ if (findState != values.end())
+ {
+ const std::string& stateValue =
+ std::get<std::string>(findState->second);
+ bool newPowerStatus = stateValue.ends_with(".Running");
+
+ if (newPowerStatus != powerStatusOn)
+ {
+ powerStatusOn = newPowerStatus;
+ }
+ }
+ }
+ catch (const std::exception& e)
+ {
+ std::cerr << "Failed to handle host state change: " << e.what()
+ << std::endl;
+ }
+}
+
+inline void HostStateMonitor::getInitialState()
+{
+ try
+ {
+ auto method = bus.new_method_call(HOST_STATE_BUSNAME, HOST_STATE_PATH,
+ PROPERTIES_INTERFACE, "Get");
+ method.append(HOST_STATE_INTERFACE, CURRENT_HOST_STATE_PROPERTY);
+
+ auto reply = bus.call(method);
+ std::variant<std::string> currentState;
+ reply.read(currentState);
+
+ const std::string& stateValue = std::get<std::string>(currentState);
+ powerStatusOn = stateValue.ends_with(".Running");
+ }
+ catch (const std::exception& e)
+ {
+ powerStatusOn = false;
+ }
+}
diff --git a/main.cpp b/main.cpp
index d4bfaae..d50bfbd 100644
--- a/main.cpp
+++ b/main.cpp
@@ -19,6 +19,7 @@
#include "conf.hpp"
#include "dbus/dbusconfiguration.hpp"
#include "failsafeloggers/builder.hpp"
+#include "hoststatemonitor.hpp"
#include "pid/builder.hpp"
#include "pid/buildjson.hpp"
#include "pid/pidloop.hpp"
@@ -94,6 +95,7 @@
static sdbusplus::asio::connection modeControlBus(io);
static sdbusplus::asio::connection hostBus(io, sdbusplus::bus::new_bus());
static sdbusplus::asio::connection passiveBus(io, sdbusplus::bus::new_bus());
+static sdbusplus::asio::connection hostMatchBus(io, sdbusplus::bus::new_bus());
namespace pid_control
{
@@ -426,6 +428,10 @@
pid_control::tryRestartControlLoops();
+ /* setup host state monitor */
+ auto& monitor = HostStateMonitor::getInstance(hostMatchBus);
+ monitor.startMonitoring();
+
io.run();
return 0;
}
diff --git a/sensors/builder.cpp b/sensors/builder.cpp
index e523b26..636e734 100644
--- a/sensors/builder.cpp
+++ b/sensors/builder.cpp
@@ -158,7 +158,8 @@
}
auto sensor = std::make_unique<PluggableSensor>(
- name, info->timeout, std::move(ri), std::move(wi));
+ name, info->timeout, std::move(ri), std::move(wi),
+ info->ignoreFailIfHostOff);
mgmr.addSensor(info->type, name, std::move(sensor));
}
else if (info->type == "temp" || info->type == "margin" ||
@@ -186,7 +187,8 @@
{
wi = std::make_unique<ReadOnlyNoExcept>();
auto sensor = std::make_unique<PluggableSensor>(
- name, info->timeout, std::move(ri), std::move(wi));
+ name, info->timeout, std::move(ri), std::move(wi),
+ info->ignoreFailIfHostOff);
mgmr.addSensor(info->type, name, std::move(sensor));
}
}
diff --git a/sensors/buildjson.cpp b/sensors/buildjson.cpp
index c5ef849..ed6e35e 100644
--- a/sensors/buildjson.cpp
+++ b/sensors/buildjson.cpp
@@ -51,6 +51,7 @@
*/
s.ignoreDbusMinMax = false;
s.unavailableAsFailed = true;
+ s.ignoreFailIfHostOff = false;
s.min = 0;
s.max = 0;
@@ -66,6 +67,12 @@
j.at("unavailableAsFailed").get_to(s.unavailableAsFailed);
}
+ auto findIgnoreIfHostOff = j.find("ignoreFailIfHostOff");
+ if (findIgnoreIfHostOff != j.end())
+ {
+ j.at("ignoreFailIfHostOff").get_to(s.ignoreFailIfHostOff);
+ }
+
/* The min field is optional in a configuration. */
auto min = j.find("min");
if (min != j.end())
diff --git a/sensors/host.cpp b/sensors/host.cpp
index 0bf78e0..f388443 100644
--- a/sensors/host.cpp
+++ b/sensors/host.cpp
@@ -17,6 +17,7 @@
#include "host.hpp"
#include "failsafeloggers/failsafe_logger_utility.hpp"
+#include "hoststatemonitor.hpp"
#include "interfaces.hpp"
#include "sensor.hpp"
@@ -45,10 +46,10 @@
std::unique_ptr<Sensor> HostSensor::createTemp(
const std::string& name, int64_t timeout, sdbusplus::bus_t& bus,
- const char* objPath, bool defer)
+ const char* objPath, bool defer, bool ignoreFailIfHostOff)
{
- auto sensor =
- std::make_unique<HostSensor>(name, timeout, bus, objPath, defer);
+ auto sensor = std::make_unique<HostSensor>(name, timeout, bus, objPath,
+ defer, ignoreFailIfHostOff);
sensor->value(0);
// DegreesC and value of 0 are the defaults at present, therefore testing
@@ -112,6 +113,15 @@
return false;
}
+ if (getIgnoreFailIfHostOff())
+ {
+ auto& hostState = HostStateMonitor::getInstance();
+ if (!hostState.isPowerOn())
+ {
+ return false;
+ }
+ }
+
outputFailsafeLogWithSensor(getName(), true, getName(),
"The sensor has invalid readings.");
return true;
diff --git a/sensors/host.hpp b/sensors/host.hpp
index d3f97ed..0f6eac4 100644
--- a/sensors/host.hpp
+++ b/sensors/host.hpp
@@ -45,11 +45,12 @@
public:
static std::unique_ptr<Sensor> createTemp(
const std::string& name, int64_t timeout, sdbusplus::bus_t& bus,
- const char* objPath, bool defer);
+ const char* objPath, bool defer, bool ignoreFailIfHostOff = false);
HostSensor(const std::string& name, int64_t timeout, sdbusplus::bus_t& bus,
- const char* objPath, bool defer) :
- Sensor(name, timeout),
+ const char* objPath, bool defer,
+ bool ignoreFailIfHostOff = false) :
+ Sensor(name, timeout, ignoreFailIfHostOff),
ValueObject(bus, objPath,
defer ? ValueObject::action::defer_emit
: ValueObject::action::emit_object_added)
diff --git a/sensors/pluggable.cpp b/sensors/pluggable.cpp
index f9844f5..491152f 100644
--- a/sensors/pluggable.cpp
+++ b/sensors/pluggable.cpp
@@ -16,6 +16,7 @@
#include "pluggable.hpp"
+#include "hoststatemonitor.hpp"
#include "interfaces.hpp"
#include <cstdint>
@@ -41,7 +42,18 @@
bool PluggableSensor::getFailed(void)
{
- return _reader->getFailed();
+ bool isFailed = _reader->getFailed();
+
+ if (isFailed && getIgnoreFailIfHostOff())
+ {
+ auto& hostState = HostStateMonitor::getInstance();
+ if (!hostState.isPowerOn())
+ {
+ return false;
+ }
+ }
+
+ return isFailed;
}
std::string PluggableSensor::getFailReason(void)
diff --git a/sensors/pluggable.hpp b/sensors/pluggable.hpp
index b65aefd..f14aa2c 100644
--- a/sensors/pluggable.hpp
+++ b/sensors/pluggable.hpp
@@ -19,8 +19,9 @@
public:
PluggableSensor(const std::string& name, int64_t timeout,
std::unique_ptr<ReadInterface> reader,
- std::unique_ptr<WriteInterface> writer) :
- Sensor(name, timeout), _reader(std::move(reader)),
+ std::unique_ptr<WriteInterface> writer,
+ bool ignoreFailIfHostOff = false) :
+ Sensor(name, timeout, ignoreFailIfHostOff), _reader(std::move(reader)),
_writer(std::move(writer))
{}
diff --git a/sensors/sensor.hpp b/sensors/sensor.hpp
index e319d2b..49489db 100644
--- a/sensors/sensor.hpp
+++ b/sensors/sensor.hpp
@@ -28,8 +28,10 @@
return (type == "fan") ? 0 : 2;
}
- Sensor(const std::string& name, int64_t timeout) :
- _name(name), _timeout(timeout)
+ Sensor(const std::string& name, int64_t timeout,
+ bool ignoreFailIfHostOff = false) :
+ _name(name), _timeout(timeout),
+ _ignoreFailIfHostOff(ignoreFailIfHostOff)
{}
virtual ~Sensor() = default;
@@ -67,9 +69,15 @@
return _timeout;
}
+ bool getIgnoreFailIfHostOff(void) const
+ {
+ return _ignoreFailIfHostOff;
+ }
+
private:
std::string _name;
int64_t _timeout;
+ bool _ignoreFailIfHostOff;
};
} // namespace pid_control
diff --git a/util.cpp b/util.cpp
index bced2c9..61f8d4a 100644
--- a/util.cpp
+++ b/util.cpp
@@ -49,7 +49,8 @@
std::cout << pair.second.min << ", ";
std::cout << pair.second.max << ", ";
std::cout << pair.second.timeout << ", ";
- std::cout << pair.second.unavailableAsFailed << "},\n\t},\n";
+ std::cout << pair.second.unavailableAsFailed << ", ";
+ std::cout << pair.second.ignoreFailIfHostOff << "},\n\t},\n";
}
std::cout << "}\n\n";
std::cout << "ZoneDetailsConfig\n";