Make specific UNA sensors not trigger failsafe

By convention, sensors at some states like 'not present',
'power state not matching' are marked as 'unavailable' on dbus.
At such states, some specific sensors should not be considered as
failed and trigger pid 'failsafe'.

A typical example is when a system is powered-off, its CPU/DIMM temp
sensors are 'unavailable', these sensors should not be treated as
'failed' and trigger pid 'failsafe'. This is necessary for systems
whose Fans will keep working when the CPU is off.

This feature is configurable per sensor (valid on thermal sensors). It
can be enabled by setting the Pid controller option
"InputUnavailableAsFailed" to 'false' when one configuring the PID module
via entity-manager, or by setting the sensor option "unavailableAsFailed"
to 'false' when one configuring the PID module via JSON. (These options are
optional and default to 'true')

Tested:
1. On a Fan 'always-on' system, enabale this feature on CPU temp sensors,
poweroff the system, 'unavailable' CPU temp sensors do not trigger the
failsafe mode.
2. 'Unavailable' Fans still trigger the failsafe mode.
3. 'Unfunctional' or 'failed' sensors still trigger the failsafe mode.

Signed-off-by: Zheng Song <zheng.song@intel.com>
Change-Id: I1dd1d76466f43e7dcf51c161c96714f1bcfae88d
diff --git a/dbus/dbusconfiguration.cpp b/dbus/dbusconfiguration.cpp
index e9a7991..1af938a 100644
--- a/dbus/dbusconfiguration.cpp
+++ b/dbus/dbusconfiguration.cpp
@@ -599,6 +599,15 @@
                         getPIDAttribute(base, "Outputs"));
                 }
 
+                bool unavailableAsFailed = true;
+                auto findUnavailableAsFailed =
+                    base.find("InputUnavailableAsFailed");
+                if (findUnavailableAsFailed != base.end())
+                {
+                    unavailableAsFailed =
+                        std::get<bool>(findUnavailableAsFailed->second);
+                }
+
                 std::vector<SensorInterfaceType> inputSensorInterfaces;
                 std::vector<SensorInterfaceType> outputSensorInterfaces;
                 /* populate an interface list for different sensor direction
@@ -640,6 +649,7 @@
                     {
                         config.timeout = 0;
                         config.ignoreDbusMinMax = true;
+                        config.unavailableAsFailed = unavailableAsFailed;
                     }
                     if (dbusInterface != sensorInterface)
                     {
@@ -788,6 +798,15 @@
                 std::vector<std::string> sensorNames =
                     std::get<std::vector<std::string>>(base.at("Inputs"));
 
+                bool unavailableAsFailed = true;
+                auto findUnavailableAsFailed =
+                    base.find("InputUnavailableAsFailed");
+                if (findUnavailableAsFailed != base.end())
+                {
+                    unavailableAsFailed =
+                        std::get<bool>(findUnavailableAsFailed->second);
+                }
+
                 bool sensorFound = false;
                 for (const std::string& sensorName : sensorNames)
                 {
@@ -811,6 +830,7 @@
                         config.readPath = sensorPathIfacePair.first;
                         config.type = "temp";
                         config.ignoreDbusMinMax = true;
+                        config.unavailableAsFailed = unavailableAsFailed;
                         // todo: maybe un-hardcode this if we run into slower
                         // timeouts with sensors
 
diff --git a/dbus/dbusconfiguration.hpp b/dbus/dbusconfiguration.hpp
index aef3622..318da68 100644
--- a/dbus/dbusconfiguration.hpp
+++ b/dbus/dbusconfiguration.hpp
@@ -28,7 +28,7 @@
 #include <vector>
 
 using DbusVariantType =
-    std::variant<uint64_t, int64_t, double, std::string,
+    std::variant<uint64_t, int64_t, double, std::string, bool,
                  std::vector<std::string>, std::vector<double>>;
 
 using ManagedObjectType = std::unordered_map<
diff --git a/dbus/dbushelper.cpp b/dbus/dbushelper.cpp
index 0e4ba67..d08f537 100644
--- a/dbus/dbushelper.cpp
+++ b/dbus/dbushelper.cpp
@@ -130,6 +130,17 @@
 
     prop->value = std::visit(VariantToDoubleVisitor(), propMap["Value"]);
 
+    bool available = true;
+    try
+    {
+        getProperty(service, path, availabilityIntf, "Available", available);
+    }
+    catch (const sdbusplus::exception::exception& ex)
+    {
+        // unsupported Available property, leaving reading at 'True'
+    }
+    prop->available = available;
+
     return;
 }
 
diff --git a/dbus/dbushelper.hpp b/dbus/dbushelper.hpp
index ffda223..3f3e688 100644
--- a/dbus/dbushelper.hpp
+++ b/dbus/dbushelper.hpp
@@ -18,6 +18,8 @@
     static constexpr char propertiesintf[] = "org.freedesktop.DBus.Properties";
     static constexpr char criticalThreshInf[] =
         "xyz.openbmc_project.Sensor.Threshold.Critical";
+    static constexpr char availabilityIntf[] =
+        "xyz.openbmc_project.State.Decorator.Availability";
 
     explicit DbusHelper(sdbusplus::bus::bus bus) : _bus(std::move(bus))
     {}
diff --git a/dbus/dbushelper_interface.hpp b/dbus/dbushelper_interface.hpp
index 3f7d744..6d7e506 100644
--- a/dbus/dbushelper_interface.hpp
+++ b/dbus/dbushelper_interface.hpp
@@ -13,6 +13,8 @@
     double min;
     double max;
     std::string unit;
+    bool available;
+    bool unavailableAsFailed;
 };
 
 class DbusHelperInterface
diff --git a/dbus/dbuspassive.cpp b/dbus/dbuspassive.cpp
index 0c73db2..a5da100 100644
--- a/dbus/dbuspassive.cpp
+++ b/dbus/dbuspassive.cpp
@@ -72,6 +72,8 @@
         settings.max = 0;
     }
 
+    settings.unavailableAsFailed = info->unavailableAsFailed;
+
     return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
                                          settings, failed, path, redundancy);
 }
@@ -90,9 +92,12 @@
     _scale = settings.scale;
     _min = settings.min * std::pow(10.0, _scale);
     _max = settings.max * std::pow(10.0, _scale);
+    _available = settings.available;
+    _unavailableAsFailed = settings.unavailableAsFailed;
 
     // Cache this type knowledge, to avoid repeated string comparison
     _typeMargin = (type == "margin");
+    _typeFan = (type == "fan");
 
     // Force value to be stored, otherwise member would be uninitialized
     updateValue(settings.value, true);
@@ -126,6 +131,19 @@
         }
     }
 
+    /*
+     * Unavailable thermal sensors, who are not present or
+     * power-state-not-matching, should not trigger the failSafe mode. For
+     * example, when a system stays at a powered-off state, its CPU Temp
+     * sensors will be unavailable, these unavailable sensors should not be
+     * treated as failed and trigger failSafe.
+     * This is important for systems whose Fans are always on.
+     */
+    if (!_typeFan && !_available && !_unavailableAsFailed)
+    {
+        return false;
+    }
+
     // If a reading has came in,
     // but its value bad in some way (determined by sensor type),
     // indicate this sensor has failed,
@@ -146,7 +164,7 @@
         return true;
     }
 
-    return _failed || !_functional;
+    return _failed || !_available || !_functional;
 }
 
 void DbusPassive::setFailed(bool value)
@@ -159,6 +177,11 @@
     _functional = value;
 }
 
+void DbusPassive::setAvailable(bool value)
+{
+    _available = value;
+}
+
 int64_t DbusPassive::getScale(void)
 {
     return _scale;
@@ -267,6 +290,24 @@
         }
         owner->setFailed(asserted);
     }
+    else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
+    {
+        auto available = msgData.find("Available");
+        if (available == msgData.end())
+        {
+            return 0;
+        }
+        bool asserted = std::get<bool>(available->second);
+        owner->setAvailable(asserted);
+        if (!asserted)
+        {
+            // A thermal controller will continue its PID calculation and not
+            // trigger a 'failsafe' when some inputs are unavailable.
+            // So, forced to clear the value here to prevent a historical
+            // value to participate in a latter PID calculation.
+            owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
+        }
+    }
     else if (msgSensor ==
              "xyz.openbmc_project.State.Decorator.OperationalStatus")
     {
diff --git a/dbus/dbuspassive.hpp b/dbus/dbuspassive.hpp
index 346986b..bc16719 100644
--- a/dbus/dbuspassive.hpp
+++ b/dbus/dbuspassive.hpp
@@ -61,6 +61,7 @@
 
     void setFailed(bool value);
     void setFunctional(bool value);
+    void setAvailable(bool value);
 
     int64_t getScale(void);
     std::string getID(void);
@@ -79,8 +80,11 @@
     double _min = 0;
     bool _failed = false;
     bool _functional = true;
+    bool _available = true;
+    bool _unavailableAsFailed = true;
 
     bool _typeMargin = false;
+    bool _typeFan = false;
     bool _badReading = false;
     bool _marginHot = false;