monitor: Add up/down count fault detection
Create an up/down count fault determination algorithm that
could be used in place of the current timer based outOfRange()
function.
The up/down count is a different method for determining when
a fan is faulted by counting up each iteration a rotor is
out of spec and removing those counts when the rotor
returns within spec.
Tested:
1. Remove a fan and run Mihawk, the counter add 1 when sensor
is out of spec, and replaced the fan back before hit the
threshold, the counter decrement back to 0.
2. Remove a fan, counter add 1 and mark the removed fan as
nonfunctional when counter reaches the threshold, and
Replaced the fan back, counter will decrement back to 0
and fan back to functional.
Change-Id: I632dd2c7553b007beb7ae6bb694a590d2cfc2a1c
Signed-off-by: Jolie Ku <jolie_ku@wistron.com>
Signed-off-by: Matthew Barth <msbarth@us.ibm.com>
diff --git a/monitor/tach_sensor.hpp b/monitor/tach_sensor.hpp
index 814df69..76a800d 100644
--- a/monitor/tach_sensor.hpp
+++ b/monitor/tach_sensor.hpp
@@ -42,6 +42,17 @@
};
/**
+ * The mode that the method is running in:
+ * - time - Use a percentage based deviation
+ * - count - Run up/down count fault detection
+ */
+enum MethodMode
+{
+ timebased = 0,
+ count
+};
+
+/**
* @class TachSensor
*
* This class represents the sensor that reads a tach value.
@@ -78,6 +89,8 @@
* @param[in] interface - the interface of the target
* @param[in] factor - the factor of the sensor target
* @param[in] offset - the offset of the sensor target
+ * @param[in] method - the method of out of range
+ * @param[in] threshold - the threshold of counter method
* @param[in] timeout - Normal timeout value to use
* @param[in] errorDelay - Delay in seconds before creating an error
* or std::nullopt if no errors.
@@ -87,7 +100,8 @@
TachSensor(Mode mode, sdbusplus::bus::bus& bus, Fan& fan,
const std::string& id, bool hasTarget, size_t funcDelay,
const std::string& interface, double factor, int64_t offset,
- size_t timeout, const std::optional<size_t>& errorDelay,
+ size_t method, size_t threshold, size_t timeout,
+ const std::optional<size_t>& errorDelay,
const sdeventplus::Event& event);
/**
@@ -136,6 +150,35 @@
}
/**
+ * @brief Returns the method of out of range
+ */
+ inline size_t getMethod() const
+ {
+ return _method;
+ }
+
+ /**
+ * @brief Returns the threshold of count method
+ */
+ inline size_t getThreshold() const
+ {
+ return _threshold;
+ }
+
+ /**
+ * Set the sensor faulted counter
+ */
+ void setCounter(bool count);
+
+ /**
+ * @brief Returns the sensor faulted count
+ */
+ inline size_t getCounter() const
+ {
+ return _counter;
+ }
+
+ /**
* Returns true if the hardware behind this
* sensor is considered working OK/functional.
*/
@@ -290,6 +333,21 @@
const int64_t _offset;
/**
+ * @brief The method of out of range
+ */
+ const size_t _method;
+
+ /**
+ * @brief The threshold for count method
+ */
+ const size_t _threshold;
+
+ /**
+ * @brief The counter for count method
+ */
+ size_t _counter = 0;
+
+ /**
* @brief The input speed, from the Value dbus property
*/
double _tachInput = 0;