Fix for cpusensor going into D state.
When PECI driver returns the error code EAGAIN, async_read_until will
go into a keep trying again loop. As async_read_until does not handle
EAGAIN reliably.
Same issue was identified and fix for PSUSensor:
https://github.com/openbmc/dbus-sensors/commit/bcf76717697238e380be785bd28150963ecf0e9e
Fix for this is to use async_wait instead of async_read_until.
Tested:
In Intel system verified cpusensor daemon is not getting into D state
when there is a peci error EAGAIN.
Verified sensors are getting populated as expected when there is no PECI
error.
Signed-off-by: Arun Lal K M <arun.lal@intel.com>
Signed-off-by: Arun P. Mohanan <arun.p.m@linux.intel.com>
Change-Id: I52e35075e522d0ae0d99e1c893db76156e299871
diff --git a/src/CPUSensor.cpp b/src/CPUSensor.cpp
index 0fdb2f4..b95c76a 100644
--- a/src/CPUSensor.cpp
+++ b/src/CPUSensor.cpp
@@ -110,56 +110,59 @@
}
}
-void CPUSensor::setupRead(void)
+void CPUSensor::restartRead(void)
{
std::weak_ptr<CPUSensor> weakRef = weak_from_this();
+ waitTimer.expires_from_now(boost::posix_time::milliseconds(pollTime));
+ waitTimer.async_wait([weakRef](const boost::system::error_code& ec) {
+ if (ec == boost::asio::error::operation_aborted)
+ {
+ std::cerr << "Failed to reschedule\n";
+ return;
+ }
+ std::shared_ptr<CPUSensor> self = weakRef.lock();
+ if (self)
+ {
+ self->setupRead();
+ }
+ });
+}
+
+void CPUSensor::setupRead(void)
+{
if (readingStateGood())
{
inputDev.close();
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
- int fd = open(path.c_str(), O_RDONLY);
- if (fd >= 0)
- {
- inputDev.assign(fd);
-
- boost::asio::async_read_until(
- inputDev, readBuf, '\n',
- [weakRef](const boost::system::error_code& ec,
- std::size_t /*bytes_transfered*/) {
- std::shared_ptr<CPUSensor> self = weakRef.lock();
- if (!self)
- {
- return;
- }
- self->handleResponse(ec);
- });
- }
- else
+ fd = open(path.c_str(), O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
{
std::cerr << name << " unable to open fd!\n";
- pollTime = sensorFailedPollTimeMs;
+ return;
}
+
+ inputDev.assign(fd);
}
else
{
- pollTime = sensorFailedPollTimeMs;
markAvailable(false);
+ updateValue(std::numeric_limits<double>::quiet_NaN());
+ restartRead();
+ return;
}
- waitTimer.expires_from_now(boost::posix_time::milliseconds(pollTime));
- waitTimer.async_wait([weakRef](const boost::system::error_code& ec) {
- if (ec == boost::asio::error::operation_aborted)
- {
- return; // we're being canceled
- }
- std::shared_ptr<CPUSensor> self = weakRef.lock();
- if (!self)
- {
- return;
- }
- self->setupRead();
- });
+
+ std::weak_ptr<CPUSensor> weakRef = weak_from_this();
+ inputDev.async_wait(boost::asio::posix::descriptor_base::wait_read,
+ [weakRef](const boost::system::error_code& ec) {
+ std::shared_ptr<CPUSensor> self = weakRef.lock();
+
+ if (self)
+ {
+ self->handleResponse(ec);
+ }
+ });
}
void CPUSensor::updateMinMaxValues(void)
@@ -214,8 +217,8 @@
void CPUSensor::handleResponse(const boost::system::error_code& err)
{
-
- if (err == boost::system::errc::bad_file_descriptor)
+ if ((err == boost::system::errc::bad_file_descriptor) ||
+ (err == boost::asio::error::misc_errors::not_found))
{
return; // we're being destroyed
}
@@ -234,16 +237,30 @@
return;
}
loggedInterfaceDown = false;
- pollTime = CPUSensor::sensorPollMs;
- std::istream responseStream(&readBuf);
- if (!err)
+
+ if (err)
{
- std::string response;
+ pollTime = sensorFailedPollTimeMs;
+ incrementError();
+ return;
+ }
+
+ static constexpr uint32_t bufLen = 128;
+ std::string response;
+ response.resize(bufLen);
+ int rdLen = 0;
+
+ if (fd >= 0)
+ {
+ rdLen = pread(fd, response.data(), bufLen, 0);
+ }
+
+ if (rdLen > 0)
+ {
+
try
{
- std::getline(responseStream, response);
rawValue = std::stod(response);
- responseStream.clear();
double nvalue = rawValue / CPUSensor::sensorScaleFactor;
if (show)
@@ -302,8 +319,7 @@
pollTime = sensorFailedPollTimeMs;
incrementError();
}
-
- responseStream.clear();
+ restartRead();
}
void CPUSensor::checkThresholds(void)