regulators: Retry failed sensor monitoring
If a failure occurs while trying to read voltage regulator sensors,
retry the operation 5 times before logging an error.
This provides "de-glitching" to ignore transient hardware problems.
Signed-off-by: Shawn McCarney <shawnmm@us.ibm.com>
Change-Id: I310c15eb0f0d36d938057d6280a12b5aef854d20
diff --git a/phosphor-regulators/src/sensor_monitoring.cpp b/phosphor-regulators/src/sensor_monitoring.cpp
index b5e31ef..04a0547 100644
--- a/phosphor-regulators/src/sensor_monitoring.cpp
+++ b/phosphor-regulators/src/sensor_monitoring.cpp
@@ -31,6 +31,15 @@
namespace phosphor::power::regulators
{
+/**
+ * Maximum number of consecutive errors before an error log entry is created.
+ * This provides "de-glitching" to ignore transient hardware problems.
+ *
+ * Also the maximum number of consecutive errors that will be logged to the
+ * journal.
+ */
+constexpr unsigned short maxErrorCount{6};
+
void SensorMonitoring::execute(Services& services, System& system,
Chassis& chassis, Device& device, Rail& rail)
{
@@ -40,7 +49,6 @@
chassis.getInventoryPath());
// Read all sensors defined for this rail
- bool errorOccurred{false};
try
{
// Create ActionEnvironment
@@ -49,27 +57,32 @@
// Execute the actions
action_utils::execute(actions, environment);
+
+ // Reset consecutive error count since sensors were read successfully
+ errorCount = 0;
}
catch (const std::exception& e)
{
- // Set flag to notify sensors service that an error occurred
- errorOccurred = true;
-
- // Log error messages in journal for the first 3 errors
- if (++errorCount <= 3)
+ // If we haven't hit the maximum consecutive error count yet
+ if (errorCount < maxErrorCount)
{
+ // Log error messages in journal
services.getJournal().logError(exception_utils::getMessages(e));
services.getJournal().logError(
"Unable to monitor sensors for rail " + rail.getID());
- }
- // Create error log entry if this type hasn't already been logged
- error_logging_utils::logError(std::current_exception(),
- Entry::Level::Warning, services,
- errorHistory);
+ // Increment error count. If now at max, create error log entry.
+ if (++errorCount >= maxErrorCount)
+ {
+ error_logging_utils::logError(std::current_exception(),
+ Entry::Level::Warning, services,
+ errorHistory);
+ }
+ }
}
// Notify sensors service that monitoring has ended for this rail
+ bool errorOccurred = (errorCount > 0);
sensors.endRail(errorOccurred);
}
diff --git a/phosphor-regulators/src/sensor_monitoring.hpp b/phosphor-regulators/src/sensor_monitoring.hpp
index fcb02a5..47db47e 100644
--- a/phosphor-regulators/src/sensor_monitoring.hpp
+++ b/phosphor-regulators/src/sensor_monitoring.hpp
@@ -117,9 +117,9 @@
ErrorHistory errorHistory{};
/**
- * Number of errors that have occurred.
+ * Number of consecutive errors that have occurred.
*/
- unsigned int errorCount{0};
+ unsigned short errorCount{0};
};
} // namespace phosphor::power::regulators
diff --git a/phosphor-regulators/test/chassis_tests.cpp b/phosphor-regulators/test/chassis_tests.cpp
index 40c1f77..7963430 100644
--- a/phosphor-regulators/test/chassis_tests.cpp
+++ b/phosphor-regulators/test/chassis_tests.cpp
@@ -212,40 +212,51 @@
devices.emplace_back(std::move(device));
Chassis chassis{1, defaultInventoryPath, std::move(devices)};
- // Create mock services
- MockServices services{};
+ // Create lambda that sets MockServices expectations. The lambda allows
+ // us to set expectations multiple times without duplicate code.
+ auto setExpectations = [](MockServices& services) {
+ // Expect Sensors service to be called 10 times
+ MockSensors& sensors = services.getMockSensors();
+ EXPECT_CALL(sensors, startRail).Times(10);
+ EXPECT_CALL(sensors, setValue).Times(0);
+ EXPECT_CALL(sensors, endRail).Times(10);
- // Expect Sensors service to be called 5+5=10 times
- MockSensors& sensors = services.getMockSensors();
- EXPECT_CALL(sensors, startRail).Times(10);
- EXPECT_CALL(sensors, setValue).Times(0);
- EXPECT_CALL(sensors, endRail).Times(10);
+ // Expect Journal service to be called 6 times to log error messages
+ MockJournal& journal = services.getMockJournal();
+ EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
+ .Times(6);
+ EXPECT_CALL(journal, logError(A<const std::string&>())).Times(6);
- // Expect Journal service to be called 3+3=6 times to log error messages
- MockJournal& journal = services.getMockJournal();
- EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
- .Times(6);
- EXPECT_CALL(journal, logError(A<const std::string&>())).Times(6);
+ // Expect ErrorLogging service to be called once to log a DBus error
+ MockErrorLogging& errorLogging = services.getMockErrorLogging();
+ EXPECT_CALL(errorLogging, logDBusError).Times(1);
+ };
- // Expect ErrorLogging service to be called 1+1=2 times to log a DBus error
- MockErrorLogging& errorLogging = services.getMockErrorLogging();
- EXPECT_CALL(errorLogging, logDBusError).Times(2);
-
- // Monitor sensors 5 times. Should fail every time, write to journal 3
- // times, and log one error.
- for (int i = 1; i <= 5; ++i)
+ // Monitor sensors 10 times. Verify errors logged.
{
- chassis.monitorSensors(services, *system);
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services);
+
+ for (int i = 1; i <= 10; ++i)
+ {
+ chassis.monitorSensors(services, *system);
+ }
}
// Clear error history
chassis.clearErrorHistory();
- // Monitor sensors 5 times again. Should fail every time, write to journal
- // 3 times, and log one error.
- for (int i = 1; i <= 5; ++i)
+ // Monitor sensors 10 more times. Verify errors logged again.
{
- chassis.monitorSensors(services, *system);
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services);
+
+ for (int i = 1; i <= 10; ++i)
+ {
+ chassis.monitorSensors(services, *system);
+ }
}
}
diff --git a/phosphor-regulators/test/device_tests.cpp b/phosphor-regulators/test/device_tests.cpp
index 5d9a0e4..0c43b1e 100644
--- a/phosphor-regulators/test/device_tests.cpp
+++ b/phosphor-regulators/test/device_tests.cpp
@@ -284,18 +284,18 @@
std::move(phaseFaultDetection),
std::move(rails)};
- // Create lambda that sets MockServices expectations. The lambda allows us
- // to set the same expectations twice without duplicate code.
+ // Create lambda that sets MockServices expectations. The lambda allows
+ // us to set expectations multiple times without duplicate code.
auto setExpectations = [](MockServices& services) {
// Set Journal service expectations:
- // - 3 error messages for D-Bus errors
- // - 3 error messages for inability to monitor sensors
+ // - 6 error messages for D-Bus errors
+ // - 6 error messages for inability to monitor sensors
// - 2 error messages for the N phase fault
MockJournal& journal = services.getMockJournal();
EXPECT_CALL(journal, logError(std::vector<std::string>{"DBus Error"}))
- .Times(3);
+ .Times(6);
EXPECT_CALL(journal, logError("Unable to monitor sensors for rail vdd"))
- .Times(3);
+ .Times(6);
EXPECT_CALL(
journal,
logError("n phase fault detected in regulator reg2: count=1"))
@@ -313,19 +313,19 @@
EXPECT_CALL(errorLogging, logPhaseFault).Times(1);
// Set Sensors service expections:
- // - startRail() and endRail() called 5 times
+ // - startRail() and endRail() called 10 times
MockSensors& sensors = services.getMockSensors();
- EXPECT_CALL(sensors, startRail).Times(5);
- EXPECT_CALL(sensors, endRail).Times(5);
+ EXPECT_CALL(sensors, startRail).Times(10);
+ EXPECT_CALL(sensors, endRail).Times(10);
};
- // Monitor sensors and detect phase faults 5 times. Verify errors logged.
+ // Monitor sensors and detect phase faults 10 times. Verify errors logged.
{
// Create mock services. Set expectations via lambda.
MockServices services{};
setExpectations(services);
- for (int i = 1; i <= 5; ++i)
+ for (int i = 1; i <= 10; ++i)
{
device.monitorSensors(services, *system, *chassis);
device.detectPhaseFaults(services, *system, *chassis);
@@ -335,14 +335,14 @@
// Clear error history
device.clearErrorHistory();
- // Monitor sensors and detect phase faults 5 more times. Verify errors
+ // Monitor sensors and detect phase faults 10 more times. Verify errors
// logged again.
{
// Create mock services. Set expectations via lambda.
MockServices services{};
setExpectations(services);
- for (int i = 1; i <= 5; ++i)
+ for (int i = 1; i <= 10; ++i)
{
device.monitorSensors(services, *system, *chassis);
device.detectPhaseFaults(services, *system, *chassis);
diff --git a/phosphor-regulators/test/rail_tests.cpp b/phosphor-regulators/test/rail_tests.cpp
index db4294f..82bbbac 100644
--- a/phosphor-regulators/test/rail_tests.cpp
+++ b/phosphor-regulators/test/rail_tests.cpp
@@ -136,40 +136,51 @@
chassisVec.emplace_back(std::move(chassis));
System system{std::move(rules), std::move(chassisVec)};
- // Create mock services
- MockServices services{};
+ // Create lambda that sets MockServices expectations. The lambda allows
+ // us to set expectations multiple times without duplicate code.
+ auto setExpectations = [](MockServices& services) {
+ // Expect Sensors service to be called 10 times
+ MockSensors& sensors = services.getMockSensors();
+ EXPECT_CALL(sensors, startRail).Times(10);
+ EXPECT_CALL(sensors, setValue).Times(0);
+ EXPECT_CALL(sensors, endRail(true)).Times(10);
- // Expect Sensors service to be called 5+5=10 times
- MockSensors& sensors = services.getMockSensors();
- EXPECT_CALL(sensors, startRail).Times(10);
- EXPECT_CALL(sensors, setValue).Times(0);
- EXPECT_CALL(sensors, endRail).Times(10);
+ // Expect Journal service to be called 6 times to log error messages
+ MockJournal& journal = services.getMockJournal();
+ EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
+ .Times(6);
+ EXPECT_CALL(journal, logError(A<const std::string&>())).Times(6);
- // Expect Journal service to be called 3+3=6 times to log error messages
- MockJournal& journal = services.getMockJournal();
- EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
- .Times(6);
- EXPECT_CALL(journal, logError(A<const std::string&>())).Times(6);
+ // Expect ErrorLogging service to be called once to log a DBus error
+ MockErrorLogging& errorLogging = services.getMockErrorLogging();
+ EXPECT_CALL(errorLogging, logDBusError).Times(1);
+ };
- // Expect ErrorLogging service to be called 1+1=2 times to log a DBus error
- MockErrorLogging& errorLogging = services.getMockErrorLogging();
- EXPECT_CALL(errorLogging, logDBusError).Times(2);
-
- // Monitor sensors 5 times. Should fail every time, write to journal 3
- // times, and log one error.
- for (int i = 1; i <= 5; ++i)
+ // Monitor sensors 10 times. Verify errors logged.
{
- railPtr->monitorSensors(services, system, *chassisPtr, *devicePtr);
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services);
+
+ for (int i = 1; i <= 10; ++i)
+ {
+ railPtr->monitorSensors(services, system, *chassisPtr, *devicePtr);
+ }
}
// Clear error history
railPtr->clearErrorHistory();
- // Monitor sensors 5 times again. Should fail every time, write to journal
- // 3 times, and log one error.
- for (int i = 1; i <= 5; ++i)
+ // Monitor sensors 10 more times. Verify errors logged again.
{
- railPtr->monitorSensors(services, system, *chassisPtr, *devicePtr);
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services);
+
+ for (int i = 1; i <= 10; ++i)
+ {
+ railPtr->monitorSensors(services, system, *chassisPtr, *devicePtr);
+ }
}
}
diff --git a/phosphor-regulators/test/sensor_monitoring_tests.cpp b/phosphor-regulators/test/sensor_monitoring_tests.cpp
index b8d56ff..f879442 100644
--- a/phosphor-regulators/test/sensor_monitoring_tests.cpp
+++ b/phosphor-regulators/test/sensor_monitoring_tests.cpp
@@ -152,31 +152,32 @@
.WillRepeatedly(Throw(
i2c::I2CException{"Failed to read word data", "/dev/i2c-1", 0x70}));
- // Perform sensor monitoring 10 times to set error history data members
- {
- // Create mock services
- MockServices services{};
-
- // Set Sensors service expectations. SensorMonitoring will be executed
- // 10 times.
+ // Create lambda that sets MockServices expectations. The lambda allows us
+ // to set expectations multiple times without duplicate code.
+ auto setExpectations = [](MockServices& services) {
+ // Expect Sensors service to be called 10 times
MockSensors& sensors = services.getMockSensors();
EXPECT_CALL(sensors, startRail).Times(10);
EXPECT_CALL(sensors, setValue).Times(0);
EXPECT_CALL(sensors, endRail(true)).Times(10);
- // Set Journal service expectations. SensorMonitoring should log error
- // messages 3 times and then stop.
+ // Expect Journal service to be called 6 times to log error messages
MockJournal& journal = services.getMockJournal();
EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
- .Times(3);
- EXPECT_CALL(journal, logError(A<const std::string&>())).Times(3);
+ .Times(6);
+ EXPECT_CALL(journal, logError(A<const std::string&>())).Times(6);
- // Set ErrorLogging service expectations. SensorMonitoring should log
- // an error only once.
+ // Expect ErrorLogging service to be called once to log an I2C error
MockErrorLogging& errorLogging = services.getMockErrorLogging();
EXPECT_CALL(errorLogging, logI2CError).Times(1);
+ };
- // Execute SensorMonitoring 10 times
+ // Call execute() 10 times to set error history data members
+ {
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services);
+
for (int i = 1; i <= 10; ++i)
{
monitoring->execute(services, *system, *chassis, *device, *rail);
@@ -186,32 +187,16 @@
// Clear error history
monitoring->clearErrorHistory();
- // Perform sensor monitoring one more time. Should log errors again.
+ // Call execute() 10 more times. Should log errors again.
{
- // Create mock services
+ // Create mock services. Set expectations via lambda.
MockServices services{};
+ setExpectations(services);
- // Set Sensors service expectations. SensorMonitoring will be executed
- // 1 time.
- MockSensors& sensors = services.getMockSensors();
- EXPECT_CALL(sensors, startRail).Times(1);
- EXPECT_CALL(sensors, setValue).Times(0);
- EXPECT_CALL(sensors, endRail(true)).Times(1);
-
- // Set Journal service expectations. SensorMonitoring should log error
- // messages 1 time.
- MockJournal& journal = services.getMockJournal();
- EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
- .Times(1);
- EXPECT_CALL(journal, logError(A<const std::string&>())).Times(1);
-
- // Set ErrorLogging server expectations. SensorMonitoring should log an
- // error.
- MockErrorLogging& errorLogging = services.getMockErrorLogging();
- EXPECT_CALL(errorLogging, logI2CError).Times(1);
-
- // Execute SensorMonitoring
- monitoring->execute(services, *system, *chassis, *device, *rail);
+ for (int i = 1; i <= 10; ++i)
+ {
+ monitoring->execute(services, *system, *chassis, *device, *rail);
+ }
}
}
@@ -279,54 +264,85 @@
auto [system, chassis, device, i2cInterface, rail] =
createParentObjects(std::unique_ptr<SensorMonitoring>{monitoring});
- // Set I2CInterface expectations. Should read register 0x8C 4 times.
- EXPECT_CALL(*i2cInterface, isOpen)
- .Times(4)
- .WillRepeatedly(Return(true));
+ // Set I2CInterface expectations
+ EXPECT_CALL(*i2cInterface, isOpen).WillRepeatedly(Return(true));
EXPECT_CALL(*i2cInterface, read(TypedEq<uint8_t>(0x8C), A<uint16_t&>()))
- .Times(4)
.WillRepeatedly(Throw(i2c::I2CException{"Failed to read word data",
"/dev/i2c-1", 0x70}));
- // Create mock services
- MockServices services{};
+ // Create lambda that sets MockServices expectations. The lambda allows
+ // us to set expectations multiple times without duplicate code.
+ auto setExpectations = [](MockServices& services, int executeCount,
+ int journalCount, int errorLogCount) {
+ // Set Sensors service expectations
+ MockSensors& sensors = services.getMockSensors();
+ EXPECT_CALL(
+ sensors,
+ startRail("vdd",
+ "/xyz/openbmc_project/inventory/system/chassis/"
+ "motherboard/reg2",
+ "/xyz/openbmc_project/inventory/system/chassis"))
+ .Times(executeCount);
+ EXPECT_CALL(sensors, setValue).Times(0);
+ EXPECT_CALL(sensors, endRail(true)).Times(executeCount);
- // Set Sensors service expectations. SensorMonitoring will be executed
- // 4 times, and all should fail.
- MockSensors& sensors = services.getMockSensors();
- EXPECT_CALL(sensors,
- startRail("vdd",
- "/xyz/openbmc_project/inventory/system/chassis/"
- "motherboard/reg2",
- "/xyz/openbmc_project/inventory/system/chassis"))
- .Times(4);
- EXPECT_CALL(sensors, setValue).Times(0);
- EXPECT_CALL(sensors, endRail(true)).Times(4);
+ // Set Journal service expectations
+ MockJournal& journal = services.getMockJournal();
+ std::vector<std::string> expectedErrMessagesException{
+ "I2CException: Failed to read word data: bus /dev/i2c-1, addr 0x70",
+ "ActionError: pmbus_read_sensor: { type: iout, command: 0x8C, "
+ "format: linear_11 }"};
+ EXPECT_CALL(journal, logError(expectedErrMessagesException))
+ .Times(journalCount);
+ EXPECT_CALL(journal,
+ logError("Unable to monitor sensors for rail vdd"))
+ .Times(journalCount);
- // Set Journal service expectations. SensorMonitoring should log error
- // messages 3 times and then stop.
- MockJournal& journal = services.getMockJournal();
- std::vector<std::string> expectedErrMessagesException{
- "I2CException: Failed to read word data: bus /dev/i2c-1, addr 0x70",
- "ActionError: pmbus_read_sensor: { type: iout, command: 0x8C, "
- "format: linear_11 }"};
- EXPECT_CALL(journal, logError(expectedErrMessagesException)).Times(3);
- EXPECT_CALL(journal, logError("Unable to monitor sensors for rail vdd"))
- .Times(3);
+ // Set ErrorLogging service expectations
+ MockErrorLogging& errorLogging = services.getMockErrorLogging();
+ EXPECT_CALL(errorLogging,
+ logI2CError(Entry::Level::Warning, Ref(journal),
+ "/dev/i2c-1", 0x70, 0))
+ .Times(errorLogCount);
+ };
- // Set ErrorLogging service expectations. SensorMonitoring should log
- // an error only once.
- MockErrorLogging& errorLogging = services.getMockErrorLogging();
- EXPECT_CALL(errorLogging,
- logI2CError(Entry::Level::Warning, Ref(journal),
- "/dev/i2c-1", 0x70, 0))
- .Times(1);
-
- // Execute SensorMonitoring 4 times
- for (int i = 1; i <= 4; ++i)
+ // Call execute() 5 times. Should log 5 journal messages and create 0
+ // error logs.
{
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services, 5, 5, 0);
+
+ for (int i = 1; i <= 5; ++i)
+ {
+ monitoring->execute(services, *system, *chassis, *device,
+ *rail);
+ }
+ }
+
+ // Call execute() 1 more time. Should log 1 journal message and create
+ // 1 error log.
+ {
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services, 1, 1, 1);
+
monitoring->execute(services, *system, *chassis, *device, *rail);
}
+
+ // Call execute() 5 more times. Should log 0 journal messages and
+ // create 0 error logs.
+ {
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services, 5, 0, 0);
+
+ for (int i = 1; i <= 5; ++i)
+ {
+ monitoring->execute(services, *system, *chassis, *device,
+ *rail);
+ }
+ }
}
}
diff --git a/phosphor-regulators/test/system_tests.cpp b/phosphor-regulators/test/system_tests.cpp
index 720620d..1ea34b3 100644
--- a/phosphor-regulators/test/system_tests.cpp
+++ b/phosphor-regulators/test/system_tests.cpp
@@ -166,40 +166,51 @@
chassisVec.emplace_back(std::move(chassis));
System system{std::move(rules), std::move(chassisVec)};
- // Create mock services
- MockServices services{};
+ // Create lambda that sets MockServices expectations. The lambda allows
+ // us to set expectations multiple times without duplicate code.
+ auto setExpectations = [](MockServices& services) {
+ // Expect Sensors service to be called 10 times
+ MockSensors& sensors = services.getMockSensors();
+ EXPECT_CALL(sensors, startRail).Times(10);
+ EXPECT_CALL(sensors, setValue).Times(0);
+ EXPECT_CALL(sensors, endRail).Times(10);
- // Expect Sensors service to be called 5+5=10 times
- MockSensors& sensors = services.getMockSensors();
- EXPECT_CALL(sensors, startRail).Times(10);
- EXPECT_CALL(sensors, setValue).Times(0);
- EXPECT_CALL(sensors, endRail).Times(10);
+ // Expect Journal service to be called 6 times to log error messages
+ MockJournal& journal = services.getMockJournal();
+ EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
+ .Times(6);
+ EXPECT_CALL(journal, logError(A<const std::string&>())).Times(6);
- // Expect Journal service to be called 3+3=6 times to log error messages
- MockJournal& journal = services.getMockJournal();
- EXPECT_CALL(journal, logError(A<const std::vector<std::string>&>()))
- .Times(6);
- EXPECT_CALL(journal, logError(A<const std::string&>())).Times(6);
+ // Expect ErrorLogging service to be called once to log a DBus error
+ MockErrorLogging& errorLogging = services.getMockErrorLogging();
+ EXPECT_CALL(errorLogging, logDBusError).Times(1);
+ };
- // Expect ErrorLogging service to be called 1+1=2 times to log a DBus error
- MockErrorLogging& errorLogging = services.getMockErrorLogging();
- EXPECT_CALL(errorLogging, logDBusError).Times(2);
-
- // Monitor sensors 5 times. Should fail every time, write to journal 3
- // times, and log one error.
- for (int i = 1; i <= 5; ++i)
+ // Monitor sensors 10 times. Verify errors logged.
{
- system.monitorSensors(services);
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services);
+
+ for (int i = 1; i <= 10; ++i)
+ {
+ system.monitorSensors(services);
+ }
}
// Clear error history
system.clearErrorHistory();
- // Monitor sensors 5 times again. Should fail every time, write to journal
- // 3 times, and log one error.
- for (int i = 1; i <= 5; ++i)
+ // Monitor sensors 10 more times. Verify errors logged again.
{
- system.monitorSensors(services);
+ // Create mock services. Set expectations via lambda.
+ MockServices services{};
+ setExpectations(services);
+
+ for (int i = 1; i <= 10; ++i)
+ {
+ system.monitorSensors(services);
+ }
}
}