monitor: Trust group cancel/start monitoring
Any fan monitoring trust groups configured should cancel the monitoring
of all fan sensors configured in the trust group for all monitoring
methods(timer based or count based) when the group is determined not to
be trusted.
Also, the trust manager should restart monitoring all the sensors in the
group by processing each sensor against the configured monitoring
method. This matches how each sensor is processed on each tach changed
signal where only when a tach changed signal is received does the trust
state of the sensor get checked prior to processing the state of the
sensor.
Tested:
Verify fan sensors in trust group's functional state is correct
Signed-off-by: Matthew Barth <msbarth@us.ibm.com>
Change-Id: I7b2ffc538eb1f17017826235353dba85e4f92ebe
diff --git a/monitor/fan.cpp b/monitor/fan.cpp
index 7905b78..18e9d05 100644
--- a/monitor/fan.cpp
+++ b/monitor/fan.cpp
@@ -167,6 +167,11 @@
}
}
+ process(sensor);
+}
+
+void Fan::process(TachSensor& sensor)
+{
// If this sensor is out of range at this moment, start
// its timer, at the end of which the inventory
// for the fan may get updated to not functional.
diff --git a/monitor/fan.hpp b/monitor/fan.hpp
index 62514f6..5e3802e 100644
--- a/monitor/fan.hpp
+++ b/monitor/fan.hpp
@@ -166,6 +166,18 @@
*/
void sensorErrorTimerExpired(const TachSensor& sensor);
+ /**
+ * @brief Process the state of the given tach sensor without checking
+ * any trust groups the sensor may be included in
+ *
+ * @param[in] sensor - Tach sensor to process
+ *
+ * This function is intended to check the current state of a tach sensor
+ * regardless of whether or not the tach sensor is configured to be in any
+ * trust groups.
+ */
+ void process(TachSensor& sensor);
+
private:
/**
* @brief Returns true if the sensor input is not within
diff --git a/monitor/tach_sensor.cpp b/monitor/tach_sensor.cpp
index 605aff5..95d52b2 100644
--- a/monitor/tach_sensor.cpp
+++ b/monitor/tach_sensor.cpp
@@ -167,6 +167,34 @@
return std::make_pair(min, max);
}
+void TachSensor::processState()
+{
+ _fan.process(*this);
+}
+
+void TachSensor::resetMethod()
+{
+ switch (_method)
+ {
+ case MethodMode::timebased:
+ if (timerRunning())
+ {
+ stopTimer();
+ }
+ break;
+ case MethodMode::count:
+ if (_functional)
+ {
+ _counter = 0;
+ }
+ else
+ {
+ _counter = _threshold;
+ }
+ break;
+ }
+}
+
void TachSensor::setFunctional(bool functional)
{
_functional = functional;
diff --git a/monitor/tach_sensor.hpp b/monitor/tach_sensor.hpp
index 703605a..3c3bcf5 100644
--- a/monitor/tach_sensor.hpp
+++ b/monitor/tach_sensor.hpp
@@ -294,6 +294,16 @@
*/
std::pair<uint64_t, uint64_t> getRange(const size_t deviation) const;
+ /**
+ * @brief Processes the current state of the sensor
+ */
+ void processState();
+
+ /**
+ * @brief Resets the monitoring method of the sensor
+ */
+ void resetMethod();
+
private:
/**
* @brief Returns the match string to use for matching
diff --git a/monitor/trust_group.hpp b/monitor/trust_group.hpp
index e9367e2..53ed610 100644
--- a/monitor/trust_group.hpp
+++ b/monitor/trust_group.hpp
@@ -96,37 +96,28 @@
}
/**
- * Stops the timers on all sensors in the group.
+ * Cancels monitoring on all sensors in the group.
*
* Called when the group just changed to not trusted,
- * so that its sensors' timers can't fire a callback
- * that may cause them to be considered faulted.
+ * so that its sensors' monitoring method does not
+ * cause them to be considered faulted.
*/
- void stopTimers()
+ void cancelMonitoring()
{
std::for_each(_sensors.begin(), _sensors.end(),
- [](const auto& s) { s.sensor->stopTimer(); });
+ [](const auto& s) { s.sensor->resetMethod(); });
}
/**
- * Starts the timers on all functional sensors in the group if
- * their target and input values do not match.
+ * Starts monitoring on all sensors in the group by processing their current
+ * state
*
* Called when the group just changed to trusted.
*/
- void startTimers()
+ void startMonitoring()
{
- std::for_each(_sensors.begin(), _sensors.end(), [](const auto& s) {
- // If a sensor isn't functional, then its timer
- // already expired so don't bother starting it again
- if (s.sensor->functional() &&
- static_cast<uint64_t>(s.sensor->getInput()) !=
- s.sensor->getTarget())
- {
- s.sensor->startTimer(
- phosphor::fan::monitor::TimerMode::nonfunc);
- }
- });
+ std::for_each(_sensors.begin(), _sensors.end(),
+ [](const auto& s) { s.sensor->processState(); });
}
/**
@@ -226,7 +217,7 @@
* determining trust for this group
*/
const std::vector<GroupDefinition> _names;
-};
+}; // namespace trust
} // namespace trust
} // namespace fan
diff --git a/monitor/trust_manager.hpp b/monitor/trust_manager.hpp
index 1925003..871d9be 100644
--- a/monitor/trust_manager.hpp
+++ b/monitor/trust_manager.hpp
@@ -69,10 +69,9 @@
*
* While checking group trust, the code will also check
* if the trust status has just changed. If the status
- * just changed to false, it will stop the tach error
- * timers for that group so these untrusted sensors won't
- * cause errors. If changed to true, it will start those timers
- * back up again.
+ * just changed to false, it will cancel the tach error
+ * method for that group so these untrusted sensors won't
+ * cause errors.
*
* Note this means groups should be designed such that
* in the same call to this function a sensor shouldn't
@@ -99,14 +98,14 @@
if (changed)
{
- group->stopTimers();
+ group->cancelMonitoring();
}
}
else
{
if (changed)
{
- group->startTimers();
+ group->startMonitoring();
}
}
}