print zone failsafe reason when enter failsafe mode
Tested:
```
Jun 24 01:52:45 bmc sel-logger[640]: cpu0_nbm critical high threshold assert. Reading=49.000000 Threshold=45.000000.
Jun 24 01:52:46 bmc swampd[2944]: Zone 2 fans, entering failsafe mode, output pwm: 100
Jun 24 01:52:46 bmc swampd[2944]: Fail sensor: cpu0_nbm, reason: Sensor threshold asserted
Jun 24 01:52:46 bmc swampd[2944]: Zone 0 fans, entering failsafe mode, output pwm: 100
Jun 24 01:52:46 bmc swampd[2944]: Fail sensor: cpu0_nbm, reason: Sensor threshold asserted
```
Signed-off-by: Harvey Wu <Harvey.Wu@quantatw.com>
Change-Id: I607d01b6bed11b00a40153db6521a9c9d23da519
diff --git a/dbus/dbuspassive.cpp b/dbus/dbuspassive.cpp
index 1d88b60..8e37d8e 100644
--- a/dbus/dbuspassive.cpp
+++ b/dbus/dbuspassive.cpp
@@ -248,6 +248,31 @@
return false;
}
+std::string DbusPassive::getFailReason(void) const
+{
+ if (_badReading)
+ {
+ return "Sensor reading bad";
+ }
+ if (_marginHot)
+ {
+ return "Margin hot";
+ }
+ if (_failed)
+ {
+ return "Sensor threshold asserted";
+ }
+ if (!_available)
+ {
+ return "Sensor unavailable";
+ }
+ if (!_functional)
+ {
+ return "Sensor not functional";
+ }
+ return "Unknown";
+}
+
void DbusPassive::setFailed(bool value)
{
_failed = value;
diff --git a/dbus/dbuspassive.hpp b/dbus/dbuspassive.hpp
index 3813622..156be13 100644
--- a/dbus/dbuspassive.hpp
+++ b/dbus/dbuspassive.hpp
@@ -55,6 +55,7 @@
ReadReturn read(void) override;
bool getFailed(void) const override;
+ std::string getFailReason(void) const override;
void updateValue(double value, bool force);
void setValue(double value, double unscaled);
diff --git a/interfaces.hpp b/interfaces.hpp
index bbd5b4a..013822c 100644
--- a/interfaces.hpp
+++ b/interfaces.hpp
@@ -44,6 +44,11 @@
{
return false;
}
+
+ virtual std::string getFailReason(void) const
+ {
+ return "Unimplemented";
+ }
};
/*
diff --git a/pid/fancontroller.cpp b/pid/fancontroller.cpp
index 378ef9b..11538be 100644
--- a/pid/fancontroller.cpp
+++ b/pid/fancontroller.cpp
@@ -176,6 +176,14 @@
<< (failsafeCurrState ? "entering failsafe"
: "returning to normal")
<< " mode, output pwm: " << percent << "\n";
+
+ std::map<std::string, std::pair<std::string, double>>
+ failSensorList = _owner->getFailSafeSensors();
+ for (const auto& it : failSensorList)
+ {
+ std::cerr << "Fail sensor: " << it.first
+ << ", reason: " << it.second.first << "\n";
+ }
}
}
}
diff --git a/pid/zone.cpp b/pid/zone.cpp
index d9fe3c0..6402c79 100644
--- a/pid/zone.cpp
+++ b/pid/zone.cpp
@@ -97,7 +97,13 @@
return !_failSafeSensors.empty();
}
-void DbusPidZone::markSensorMissing(const std::string& name)
+FailSafeSensorsMap DbusPidZone::getFailSafeSensors(void) const
+{
+ return _failSafeSensors;
+}
+
+void DbusPidZone::markSensorMissing(const std::string& name,
+ const std::string& failReason)
{
if (_missingAcceptable.find(name) != _missingAcceptable.end())
{
@@ -109,11 +115,12 @@
if (_sensorFailSafePercent[name] == 0)
{
- _failSafeSensors[name] = _zoneFailSafePercent;
+ _failSafeSensors[name] = std::pair(failReason, _zoneFailSafePercent);
}
else
{
- _failSafeSensors[name] = _sensorFailSafePercent[name];
+ _failSafeSensors[name] =
+ std::pair(failReason, _sensorFailSafePercent[name]);
}
if (debugEnabled)
@@ -183,24 +190,24 @@
double DbusPidZone::getFailSafePercent(void)
{
- std::map<std::string, double>::iterator maxData = std::max_element(
+ FailSafeSensorsMap::iterator maxData = std::max_element(
_failSafeSensors.begin(), _failSafeSensors.end(),
- [](const std::pair<std::string, double> firstData,
- const std::pair<std::string, double> secondData) {
- return firstData.second < secondData.second;
+ [](const FailSafeSensorPair firstData,
+ const FailSafeSensorPair secondData) {
+ return firstData.second.second < secondData.second.second;
});
// In dbus/dbusconfiguration.cpp, the default sensor failsafepercent is 0 if
// there is no setting in json.
// Therfore, if the max failsafe duty in _failSafeSensors is 0, set final
// failsafe duty to _zoneFailSafePercent.
- if ((*maxData).second == 0)
+ if ((*maxData).second.second == 0)
{
return _zoneFailSafePercent;
}
else
{
- return (*maxData).second;
+ return (*maxData).second.second;
}
}
@@ -505,7 +512,7 @@
_cachedFanOutputs[f] = {nan, nan};
// Start all fans in fail-safe mode.
- markSensorMissing(f);
+ markSensorMissing(f, "");
}
for (const auto& t : _thermalInputs)
@@ -513,7 +520,7 @@
_cachedValuesByName[t] = {nan, nan};
// Start all sensors in fail-safe mode.
- markSensorMissing(t);
+ markSensorMissing(t, "");
}
}
diff --git a/pid/zone.hpp b/pid/zone.hpp
index 14017b8..325eb6c 100644
--- a/pid/zone.hpp
+++ b/pid/zone.hpp
@@ -36,6 +36,10 @@
sdbusplus::xyz::openbmc_project::Debug::Pid::server::ThermalPower;
using ProcessObject =
ServerObject<ProcessInterface, DebugThermalPowerInterface>;
+using FailSafeSensorsMap =
+ std::map<std::string, std::pair<std::string, double>>;
+using FailSafeSensorPair =
+ std::pair<std::string, std::pair<std::string, double>>;
namespace pid_control
{
@@ -74,7 +78,8 @@
bool getRedundantWrite(void) const override;
void setManualMode(bool mode);
bool getFailSafeMode(void) const override;
- void markSensorMissing(const std::string& name);
+ void markSensorMissing(const std::string& name,
+ const std::string& failReason);
bool getAccSetPoint(void) const override;
int64_t getZoneID(void) const override;
@@ -84,6 +89,7 @@
void clearSetPoints(void) override;
void clearRPMCeilings(void) override;
double getFailSafePercent(void) override;
+ FailSafeSensorsMap getFailSafeSensors(void) const override;
double getMinThermalSetPoint(void) const;
uint64_t getCycleIntervalTime(void) const override;
uint64_t getUpdateThermalsCycle(void) const override;
@@ -171,7 +177,7 @@
// check if fan fail.
if (sensor->getFailed())
{
- markSensorMissing(sensorInput);
+ markSensorMissing(sensorInput, sensor->getFailReason());
if (debugEnabled)
{
@@ -180,7 +186,7 @@
}
else if (timeout != 0 && duration >= period)
{
- markSensorMissing(sensorInput);
+ markSensorMissing(sensorInput, "Sensor timeout");
if (debugEnabled)
{
@@ -225,7 +231,10 @@
const double _zoneFailSafePercent;
const conf::CycleTime _cycleTime;
- std::map<std::string, double> _failSafeSensors;
+ /*
+ * <map key = sensor name, value = sensor fail reason and failsafe percent>
+ */
+ FailSafeSensorsMap _failSafeSensors;
std::set<std::string> _missingAcceptable;
std::map<std::string, double> _SetPoints;
diff --git a/pid/zone_interface.hpp b/pid/zone_interface.hpp
index 33f0a6f..808b80f 100644
--- a/pid/zone_interface.hpp
+++ b/pid/zone_interface.hpp
@@ -2,6 +2,7 @@
#include "sensors/sensor.hpp"
+#include <map>
#include <string>
namespace pid_control
@@ -93,6 +94,10 @@
*/
virtual double getFailSafePercent() = 0;
+ /** Return failsafe sensor list */
+ virtual std::map<std::string, std::pair<std::string, double>>
+ getFailSafeSensors() const = 0;
+
/** Return the zone's cycle time settings */
virtual uint64_t getCycleIntervalTime(void) const = 0;
virtual uint64_t getUpdateThermalsCycle(void) const = 0;
diff --git a/sensors/pluggable.cpp b/sensors/pluggable.cpp
index 530737d..da0b2f8 100644
--- a/sensors/pluggable.cpp
+++ b/sensors/pluggable.cpp
@@ -39,4 +39,9 @@
return _reader->getFailed();
}
+std::string PluggableSensor::getFailReason(void)
+{
+ return _reader->getFailReason();
+}
+
} // namespace pid_control
diff --git a/sensors/pluggable.hpp b/sensors/pluggable.hpp
index bd5bc1f..d139876 100644
--- a/sensors/pluggable.hpp
+++ b/sensors/pluggable.hpp
@@ -26,6 +26,7 @@
void write(double value) override;
void write(double value, bool force, int64_t* written) override;
bool getFailed(void) override;
+ std::string getFailReason(void) override;
private:
std::unique_ptr<ReadInterface> _reader;
diff --git a/sensors/sensor.hpp b/sensors/sensor.hpp
index a135c50..9bd39b9 100644
--- a/sensors/sensor.hpp
+++ b/sensors/sensor.hpp
@@ -48,6 +48,11 @@
return false;
};
+ virtual std::string getFailReason(void)
+ {
+ return "Unimplemented";
+ }
+
std::string getName(void) const
{
return _name;
diff --git a/test/pid_zone_unittest.cpp b/test/pid_zone_unittest.cpp
index 47b3f97..ea8ac91 100644
--- a/test/pid_zone_unittest.cpp
+++ b/test/pid_zone_unittest.cpp
@@ -317,9 +317,15 @@
zone->addPidFailSafePercent(input2, values[1]);
zone->addPidFailSafePercent(input3, values[2]);
- zone->markSensorMissing("temp1");
+ zone->markSensorMissing("temp1", "Sensor threshold asserted");
EXPECT_EQ(failSafePercent, zone->getFailSafePercent());
+
+ std::map<std::string, std::pair<std::string, double>> failSensorList =
+ zone->getFailSafeSensors();
+ EXPECT_EQ(1, failSensorList.size());
+ EXPECT_EQ("Sensor threshold asserted", failSensorList["temp1"].first);
+ EXPECT_EQ(failSafePercent, failSensorList["temp1"].second);
}
TEST_F(PidZoneTest, GetFailSafePercent_MultiFailedReturnsExpected)
@@ -336,11 +342,21 @@
zone->addPidFailSafePercent(input2, values[1]);
zone->addPidFailSafePercent(input3, values[2]);
- zone->markSensorMissing("temp1");
- zone->markSensorMissing("temp2");
- zone->markSensorMissing("temp3");
+ zone->markSensorMissing("temp1", "Sensor threshold asserted");
+ zone->markSensorMissing("temp2", "Sensor reading bad");
+ zone->markSensorMissing("temp3", "Sensor unavailable");
EXPECT_EQ(80, zone->getFailSafePercent());
+
+ std::map<std::string, std::pair<std::string, double>> failSensorList =
+ zone->getFailSafeSensors();
+ EXPECT_EQ(3, failSensorList.size());
+ EXPECT_EQ("Sensor threshold asserted", failSensorList["temp1"].first);
+ EXPECT_EQ(60, failSensorList["temp1"].second);
+ EXPECT_EQ("Sensor reading bad", failSensorList["temp2"].first);
+ EXPECT_EQ(80, failSensorList["temp2"].second);
+ EXPECT_EQ("Sensor unavailable", failSensorList["temp3"].first);
+ EXPECT_EQ(70, failSensorList["temp3"].second);
}
TEST_F(PidZoneTest, ThermalInputs_FailsafeToValid_ReadsSensors)
diff --git a/test/zone_mock.hpp b/test/zone_mock.hpp
index 885a2c4..7d08f63 100644
--- a/test/zone_mock.hpp
+++ b/test/zone_mock.hpp
@@ -43,6 +43,9 @@
MOCK_CONST_METHOD0(getManualMode, bool());
MOCK_CONST_METHOD0(getFailSafeMode, bool());
MOCK_METHOD0(getFailSafePercent, double());
+ MOCK_CONST_METHOD0(
+ getFailSafeSensors,
+ std::map<std::string, std::pair<std::string, double>>(void));
MOCK_CONST_METHOD0(getZoneID, int64_t());
MOCK_CONST_METHOD0(getCycleIntervalTime, uint64_t());