print zone failsafe reason when enter failsafe mode
Tested:
```
Jun 24 01:52:45 bmc sel-logger[640]: cpu0_nbm critical high threshold assert. Reading=49.000000 Threshold=45.000000.
Jun 24 01:52:46 bmc swampd[2944]: Zone 2 fans, entering failsafe mode, output pwm: 100
Jun 24 01:52:46 bmc swampd[2944]: Fail sensor: cpu0_nbm, reason: Sensor threshold asserted
Jun 24 01:52:46 bmc swampd[2944]: Zone 0 fans, entering failsafe mode, output pwm: 100
Jun 24 01:52:46 bmc swampd[2944]: Fail sensor: cpu0_nbm, reason: Sensor threshold asserted
```
Signed-off-by: Harvey Wu <Harvey.Wu@quantatw.com>
Change-Id: I607d01b6bed11b00a40153db6521a9c9d23da519
diff --git a/pid/fancontroller.cpp b/pid/fancontroller.cpp
index 378ef9b..11538be 100644
--- a/pid/fancontroller.cpp
+++ b/pid/fancontroller.cpp
@@ -176,6 +176,14 @@
<< (failsafeCurrState ? "entering failsafe"
: "returning to normal")
<< " mode, output pwm: " << percent << "\n";
+
+ std::map<std::string, std::pair<std::string, double>>
+ failSensorList = _owner->getFailSafeSensors();
+ for (const auto& it : failSensorList)
+ {
+ std::cerr << "Fail sensor: " << it.first
+ << ", reason: " << it.second.first << "\n";
+ }
}
}
}
diff --git a/pid/zone.cpp b/pid/zone.cpp
index d9fe3c0..6402c79 100644
--- a/pid/zone.cpp
+++ b/pid/zone.cpp
@@ -97,7 +97,13 @@
return !_failSafeSensors.empty();
}
-void DbusPidZone::markSensorMissing(const std::string& name)
+FailSafeSensorsMap DbusPidZone::getFailSafeSensors(void) const
+{
+ return _failSafeSensors;
+}
+
+void DbusPidZone::markSensorMissing(const std::string& name,
+ const std::string& failReason)
{
if (_missingAcceptable.find(name) != _missingAcceptable.end())
{
@@ -109,11 +115,12 @@
if (_sensorFailSafePercent[name] == 0)
{
- _failSafeSensors[name] = _zoneFailSafePercent;
+ _failSafeSensors[name] = std::pair(failReason, _zoneFailSafePercent);
}
else
{
- _failSafeSensors[name] = _sensorFailSafePercent[name];
+ _failSafeSensors[name] =
+ std::pair(failReason, _sensorFailSafePercent[name]);
}
if (debugEnabled)
@@ -183,24 +190,24 @@
double DbusPidZone::getFailSafePercent(void)
{
- std::map<std::string, double>::iterator maxData = std::max_element(
+ FailSafeSensorsMap::iterator maxData = std::max_element(
_failSafeSensors.begin(), _failSafeSensors.end(),
- [](const std::pair<std::string, double> firstData,
- const std::pair<std::string, double> secondData) {
- return firstData.second < secondData.second;
+ [](const FailSafeSensorPair firstData,
+ const FailSafeSensorPair secondData) {
+ return firstData.second.second < secondData.second.second;
});
// In dbus/dbusconfiguration.cpp, the default sensor failsafepercent is 0 if
// there is no setting in json.
// Therfore, if the max failsafe duty in _failSafeSensors is 0, set final
// failsafe duty to _zoneFailSafePercent.
- if ((*maxData).second == 0)
+ if ((*maxData).second.second == 0)
{
return _zoneFailSafePercent;
}
else
{
- return (*maxData).second;
+ return (*maxData).second.second;
}
}
@@ -505,7 +512,7 @@
_cachedFanOutputs[f] = {nan, nan};
// Start all fans in fail-safe mode.
- markSensorMissing(f);
+ markSensorMissing(f, "");
}
for (const auto& t : _thermalInputs)
@@ -513,7 +520,7 @@
_cachedValuesByName[t] = {nan, nan};
// Start all sensors in fail-safe mode.
- markSensorMissing(t);
+ markSensorMissing(t, "");
}
}
diff --git a/pid/zone.hpp b/pid/zone.hpp
index 14017b8..325eb6c 100644
--- a/pid/zone.hpp
+++ b/pid/zone.hpp
@@ -36,6 +36,10 @@
sdbusplus::xyz::openbmc_project::Debug::Pid::server::ThermalPower;
using ProcessObject =
ServerObject<ProcessInterface, DebugThermalPowerInterface>;
+using FailSafeSensorsMap =
+ std::map<std::string, std::pair<std::string, double>>;
+using FailSafeSensorPair =
+ std::pair<std::string, std::pair<std::string, double>>;
namespace pid_control
{
@@ -74,7 +78,8 @@
bool getRedundantWrite(void) const override;
void setManualMode(bool mode);
bool getFailSafeMode(void) const override;
- void markSensorMissing(const std::string& name);
+ void markSensorMissing(const std::string& name,
+ const std::string& failReason);
bool getAccSetPoint(void) const override;
int64_t getZoneID(void) const override;
@@ -84,6 +89,7 @@
void clearSetPoints(void) override;
void clearRPMCeilings(void) override;
double getFailSafePercent(void) override;
+ FailSafeSensorsMap getFailSafeSensors(void) const override;
double getMinThermalSetPoint(void) const;
uint64_t getCycleIntervalTime(void) const override;
uint64_t getUpdateThermalsCycle(void) const override;
@@ -171,7 +177,7 @@
// check if fan fail.
if (sensor->getFailed())
{
- markSensorMissing(sensorInput);
+ markSensorMissing(sensorInput, sensor->getFailReason());
if (debugEnabled)
{
@@ -180,7 +186,7 @@
}
else if (timeout != 0 && duration >= period)
{
- markSensorMissing(sensorInput);
+ markSensorMissing(sensorInput, "Sensor timeout");
if (debugEnabled)
{
@@ -225,7 +231,10 @@
const double _zoneFailSafePercent;
const conf::CycleTime _cycleTime;
- std::map<std::string, double> _failSafeSensors;
+ /*
+ * <map key = sensor name, value = sensor fail reason and failsafe percent>
+ */
+ FailSafeSensorsMap _failSafeSensors;
std::set<std::string> _missingAcceptable;
std::map<std::string, double> _SetPoints;
diff --git a/pid/zone_interface.hpp b/pid/zone_interface.hpp
index 33f0a6f..808b80f 100644
--- a/pid/zone_interface.hpp
+++ b/pid/zone_interface.hpp
@@ -2,6 +2,7 @@
#include "sensors/sensor.hpp"
+#include <map>
#include <string>
namespace pid_control
@@ -93,6 +94,10 @@
*/
virtual double getFailSafePercent() = 0;
+ /** Return failsafe sensor list */
+ virtual std::map<std::string, std::pair<std::string, double>>
+ getFailSafeSensors() const = 0;
+
/** Return the zone's cycle time settings */
virtual uint64_t getCycleIntervalTime(void) const = 0;
virtual uint64_t getUpdateThermalsCycle(void) const = 0;