monitor: Allow missing D-Bus sensors on startup
Now that phosphor-fan-monitor is starting at the multi-user target, it
may be starting before the fan sensor hwmon daemon is able to put the
tach reading sensors on D-Bus. This was causing the TachSensor class
objects to not get created so even if the hwmon tach sensor values did
show up later on D-Bus fan monitor wouldn't notice them.
To fix this, still create the TachSensor objects if the corresponding
hwmon D-Bus objects aren't there, and still set them to functional in
the inventory so that any other monitoring code, such as
phosphor-dbus-monitor, won't shut down the system before the hwmon tach
sensors get a chance to show up on D-Bus, which was happening on
witherspoon when a reboot was done with the power on.
When the monitor delay timer expires to kick off monitoring, a D-Bus
read is forced, and if the hwmon sensors still aren't on D-Bus then the
corresponding TachSensor objects will be set to nonfunctional to start
down the error paths.
Also, when the power state changes to on, instead of blindly setting all
TachSensor objects to functional, again check if their hwmon sensor
values are on D-Bus before doing so.
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: I3e62727296630bf68602b0472328f4613e1a78e3
diff --git a/monitor/fan.cpp b/monitor/fan.cpp
index 5919d92..8a30300 100644
--- a/monitor/fan.cpp
+++ b/monitor/fan.cpp
@@ -71,30 +71,15 @@
auto& sensors = std::get<sensorListField>(def);
for (auto& s : sensors)
{
- try
- {
- _sensors.emplace_back(std::make_shared<TachSensor>(
- mode, bus, *this, std::get<sensorNameField>(s),
- std::get<hasTargetField>(s), std::get<funcDelay>(def),
- std::get<targetInterfaceField>(s), std::get<factorField>(s),
- std::get<offsetField>(s), std::get<methodField>(def),
- std::get<thresholdField>(s), std::get<timeoutField>(def),
- std::get<nonfuncRotorErrDelayField>(def), event));
+ _sensors.emplace_back(std::make_shared<TachSensor>(
+ mode, bus, *this, std::get<sensorNameField>(s),
+ std::get<hasTargetField>(s), std::get<funcDelay>(def),
+ std::get<targetInterfaceField>(s), std::get<factorField>(s),
+ std::get<offsetField>(s), std::get<methodField>(def),
+ std::get<thresholdField>(s), std::get<timeoutField>(def),
+ std::get<nonfuncRotorErrDelayField>(def), event));
- _trustManager->registerSensor(_sensors.back());
- }
- catch (InvalidSensorError& e)
- {
- // Count the number of failed tach sensors, though if
- // _numSensorFailsForNonFunc is zero that means the fan should not
- // be set to nonfunctional.
- if (_numSensorFailsForNonFunc &&
- (++_numFailedSensor >= _numSensorFailsForNonFunc))
- {
- // Mark associated fan as nonfunctional
- updateInventory(false);
- }
- }
+ _trustManager->registerSensor(_sensors.back());
}
#ifndef MONITOR_USE_JSON
@@ -185,7 +170,42 @@
{
_monitorReady = true;
- tachChanged();
+ std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
+ if (_present)
+ {
+ try
+ {
+ // Force a getProperty call to check if the tach sensor is
+ // on D-Bus. If it isn't, now set it to nonfunctional.
+ // This isn't done earlier so that code watching for
+ // nonfunctional tach sensors doesn't take actions before
+ // those sensors show up on D-Bus.
+ sensor->updateTachAndTarget();
+ tachChanged(*sensor);
+ }
+ catch (const util::DBusServiceError& e)
+ {
+ // The tach property still isn't on D-Bus, ensure
+ // sensor is nonfunctional.
+ getLogger().log(fmt::format(
+ "Monitoring starting but {} sensor value not on D-Bus",
+ sensor->name()));
+
+ sensor->setFunctional(false);
+
+ if (_numSensorFailsForNonFunc)
+ {
+ if (_functional && (countNonFunctionalSensors() >=
+ _numSensorFailsForNonFunc))
+ {
+ updateInventory(false);
+ }
+ }
+
+ _system.fanStatusChange(*this);
+ }
+ }
+ });
}
void Fan::tachChanged()
@@ -417,13 +437,47 @@
#ifdef MONITOR_USE_JSON
if (powerStateOn)
{
- // set all fans back to functional to start with
- std::for_each(_sensors.begin(), _sensors.end(),
- [](auto& sensor) { sensor->setFunctional(true); });
-
_monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
- if (!_present)
+ if (_present)
+ {
+ std::for_each(
+ _sensors.begin(), _sensors.end(), [this](auto& sensor) {
+ try
+ {
+ // Force a getProperty call. If sensor is on D-Bus,
+ // then make sure it's functional.
+ sensor->updateTachAndTarget();
+
+ // If not functional, set it back to functional.
+ if (!sensor->functional())
+ {
+ sensor->setFunctional(true);
+ _system.fanStatusChange(*this, true);
+ }
+ }
+ catch (const util::DBusServiceError& e)
+ {
+ // Properties still aren't on D-Bus. Let startMonitor()
+ // deal with it.
+ getLogger().log(fmt::format(
+ "At power on, tach sensor {} value not on D-Bus",
+ sensor->name()));
+ }
+ });
+
+ // If configured to change functional state on the fan itself,
+ // Set it back to true now if necessary.
+ if (_numSensorFailsForNonFunc)
+ {
+ if (!_functional &&
+ (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
+ {
+ updateInventory(true);
+ }
+ }
+ }
+ else
{
getLogger().log(
fmt::format("At power on, fan {} is missing", _name));
diff --git a/monitor/system.cpp b/monitor/system.cpp
index b61e320..ea042cc 100644
--- a/monitor/system.cpp
+++ b/monitor/system.cpp
@@ -169,11 +169,11 @@
std::make_tuple(fan.present(), std::move(sensorStatus));
}
-void System::fanStatusChange(const Fan& fan)
+void System::fanStatusChange(const Fan& fan, bool skipRulesCheck)
{
updateFanHealth(fan);
- if (_powerState->isPowerOn())
+ if (_powerState->isPowerOn() && !skipRulesCheck)
{
std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
[this](auto& rule) {
diff --git a/monitor/system.hpp b/monitor/system.hpp
index e01d81a..df03fe2 100644
--- a/monitor/system.hpp
+++ b/monitor/system.hpp
@@ -70,8 +70,9 @@
* fan health map.
*
* @param[in] fan - The fan that changed
+ * @param[in] skipRulesCheck - If the rules checks should be done now.
*/
- void fanStatusChange(const Fan& fan);
+ void fanStatusChange(const Fan& fan, bool skipRulesCheck = false);
/**
* @brief Called when a fan sensor's error timer expires, which
diff --git a/monitor/tach_sensor.cpp b/monitor/tach_sensor.cpp
index 95d52b2..96fc06b 100644
--- a/monitor/tach_sensor.cpp
+++ b/monitor/tach_sensor.cpp
@@ -92,25 +92,12 @@
#endif
try
{
- // Use getProperty directly to allow a missing sensor object
- // to abort construction.
- _tachInput = util::SDBusPlus::getProperty<decltype(_tachInput)>(
- _bus, _name, FAN_SENSOR_VALUE_INTF, FAN_VALUE_PROPERTY);
+ updateTachAndTarget();
}
- catch (std::exception& e)
+ catch (const std::exception& e)
{
- log<level::ERR>(
- fmt::format("Failed to retrieve tach sensor {}", _name)
- .c_str());
- // Mark tach sensor as nonfunctional
- setFunctional(false);
- throw InvalidSensorError();
- }
-
- if (_hasTarget)
- {
- readProperty(_interface, FAN_TARGET_PROPERTY, _name, _bus,
- _tachTarget);
+ // Until the parent Fan's monitor-ready timer expires, the
+ // object can be functional with a missing D-bus sensor.
}
auto match = getMatchString(FAN_SENSOR_VALUE_INTF);
@@ -140,6 +127,17 @@
#endif
}
+void TachSensor::updateTachAndTarget()
+{
+ _tachInput = util::SDBusPlus::getProperty<decltype(_tachInput)>(
+ _bus, _name, FAN_SENSOR_VALUE_INTF, FAN_VALUE_PROPERTY);
+
+ if (_hasTarget)
+ {
+ readProperty(_interface, FAN_TARGET_PROPERTY, _name, _bus, _tachTarget);
+ }
+}
+
std::string TachSensor::getMatchString(const std::string& interface)
{
return sdbusplus::bus::match::rules::propertiesChanged(_name, interface);
diff --git a/monitor/tach_sensor.hpp b/monitor/tach_sensor.hpp
index 3c3bcf5..dc61074 100644
--- a/monitor/tach_sensor.hpp
+++ b/monitor/tach_sensor.hpp
@@ -304,6 +304,12 @@
*/
void resetMethod();
+ /**
+ * @brief Refreshes the tach input and target values by
+ * reading them from D-Bus.
+ */
+ void updateTachAndTarget();
+
private:
/**
* @brief Returns the match string to use for matching