Bug fix: pidControlLoop may loop infinitely.
boost::basic_waitable_timer::cancel is a non-blocking function and only
sends cancellation to the pending operations. For other operations:
```
If the timer has already expired when cancel() is called, then the
handlers for asynchronous wait operations will:
* have already been invoked; or
* have been queued for invocation in the near future.
These handlers can no longer be cancelled, and therefore are passed an
error code that indicates the successful completion of the wait
operation.
```
In our case, if pidControlLoop() has been invoked or in the invoke
queue while the timer cancellation, it will ignore the cancal ec and
infinitely call the pidControlLoop() chain.
Thus an extra cancel variable is introduced to break the chain.
Signed-off-by: Hao Jiang <jianghao@google.com>
Change-Id: Ie4e53454ee326bdf612abb511990610a6b528300
diff --git a/main.cpp b/main.cpp
index c49c522..37418e6 100644
--- a/main.cpp
+++ b/main.cpp
@@ -81,11 +81,13 @@
static SensorManager mgmr;
static std::unordered_map<int64_t, std::shared_ptr<ZoneInterface>> zones;
static std::vector<std::shared_ptr<boost::asio::steady_timer>> timers;
+ static bool isCanceling = false;
for (const auto timer : timers)
{
timer->cancel();
}
+ isCanceling = true;
timers.clear();
if (zones.size() > 0 && zones.begin()->second.use_count() > 1)
@@ -93,6 +95,7 @@
throw std::runtime_error("wait for count back to 1");
}
zones.clear();
+ isCanceling = false;
const std::string& path =
(configPath.length() > 0) ? configPath : jsonConfigurationPath;
@@ -140,7 +143,7 @@
std::shared_ptr<boost::asio::steady_timer> timer = timers.emplace_back(
std::make_shared<boost::asio::steady_timer>(io));
std::cerr << "pushing zone " << i.first << "\n";
- pidControlLoop(i.second, timer);
+ pidControlLoop(i.second, timer, &isCanceling);
}
}
diff --git a/pid/pidloop.cpp b/pid/pidloop.cpp
index 711ba2f..a5d0daf 100644
--- a/pid/pidloop.cpp
+++ b/pid/pidloop.cpp
@@ -48,8 +48,11 @@
void pidControlLoop(std::shared_ptr<ZoneInterface> zone,
std::shared_ptr<boost::asio::steady_timer> timer,
- bool first, int ms100cnt)
+ const bool* isCanceling, bool first, int ms100cnt)
{
+ if (*isCanceling)
+ return;
+
if (first)
{
if (loggingEnabled)
@@ -62,71 +65,71 @@
}
timer->expires_after(std::chrono::milliseconds(100));
- timer->async_wait(
- [zone, timer, ms100cnt](const boost::system::error_code& ec) mutable {
- if (ec == boost::asio::error::operation_aborted)
- {
- return; // timer being canceled, stop loop
- }
+ timer->async_wait([zone, timer, ms100cnt, isCanceling](
+ const boost::system::error_code& ec) mutable {
+ if (ec == boost::asio::error::operation_aborted)
+ {
+ return; // timer being canceled, stop loop
+ }
- /*
- * This should sleep on the conditional wait for the listen thread
- * to tell us it's in sync. But then we also need a timeout option
- * in case phosphor-hwmon is down, we can go into some weird failure
- * more.
- *
- * Another approach would be to start all sensors in worst-case
- * values, and fail-safe mode and then clear out of fail-safe mode
- * once we start getting values. Which I think it is a solid
- * approach.
- *
- * For now this runs before it necessarily has any sensor values.
- * For the host sensors they start out in fail-safe mode. For the
- * fans, they start out as 0 as input and then are adjusted once
- * they have values.
- *
- * If a fan has failed, it's value will be whatever we're told or
- * however we retrieve it. This program disregards fan values of 0,
- * so any code providing a fan speed can set to 0 on failure and
- * that fan value will be effectively ignored. The PID algorithm
- * will be unhappy but nothing bad will happen.
- *
- * TODO(venture): If the fan value is 0 should that loop just be
- * skipped? Right now, a 0 value is ignored in
- * FanController::inputProc()
- */
+ /*
+ * This should sleep on the conditional wait for the listen thread
+ * to tell us it's in sync. But then we also need a timeout option
+ * in case phosphor-hwmon is down, we can go into some weird failure
+ * more.
+ *
+ * Another approach would be to start all sensors in worst-case
+ * values, and fail-safe mode and then clear out of fail-safe mode
+ * once we start getting values. Which I think it is a solid
+ * approach.
+ *
+ * For now this runs before it necessarily has any sensor values.
+ * For the host sensors they start out in fail-safe mode. For the
+ * fans, they start out as 0 as input and then are adjusted once
+ * they have values.
+ *
+ * If a fan has failed, it's value will be whatever we're told or
+ * however we retrieve it. This program disregards fan values of 0,
+ * so any code providing a fan speed can set to 0 on failure and
+ * that fan value will be effectively ignored. The PID algorithm
+ * will be unhappy but nothing bad will happen.
+ *
+ * TODO(venture): If the fan value is 0 should that loop just be
+ * skipped? Right now, a 0 value is ignored in
+ * FanController::inputProc()
+ */
- // Check if we should just go back to sleep.
- if (zone->getManualMode())
- {
- pidControlLoop(zone, timer, false, ms100cnt);
- return;
- }
+ // Check if we should just go back to sleep.
+ if (zone->getManualMode())
+ {
+ pidControlLoop(zone, timer, isCanceling, false, ms100cnt);
+ return;
+ }
- // Get the latest fan speeds.
- zone->updateFanTelemetry();
+ // Get the latest fan speeds.
+ zone->updateFanTelemetry();
- if (10 <= ms100cnt)
- {
- ms100cnt = 0;
+ if (10 <= ms100cnt)
+ {
+ ms100cnt = 0;
- processThermals(zone);
- }
+ processThermals(zone);
+ }
- // Run the fan PIDs every iteration.
- zone->processFans();
+ // Run the fan PIDs every iteration.
+ zone->processFans();
- if (loggingEnabled)
- {
- std::ostringstream out;
- out << "," << zone->getFailSafeMode() << std::endl;
- zone->writeLog(out.str());
- }
+ if (loggingEnabled)
+ {
+ std::ostringstream out;
+ out << "," << zone->getFailSafeMode() << std::endl;
+ zone->writeLog(out.str());
+ }
- ms100cnt += 1;
+ ms100cnt += 1;
- pidControlLoop(zone, timer, false, ms100cnt);
- });
+ pidControlLoop(zone, timer, isCanceling, false, ms100cnt);
+ });
}
} // namespace pid_control
diff --git a/pid/pidloop.hpp b/pid/pidloop.hpp
index b7d1b34..f9b78b3 100644
--- a/pid/pidloop.hpp
+++ b/pid/pidloop.hpp
@@ -14,11 +14,14 @@
*
* @param[in] zone - ptr to the ZoneInterface implementation for this loop.
* @param[in] timer - boost timer used for async callback.
+ * @param[in] isCanceling - bool ptr to indicate whether pidControlLoop is being
+ * canceled.
* @param[in] first - boolean to denote if initialization needs to be run.
* @param[in] ms100cnt - loop timer counter.
*/
void pidControlLoop(std::shared_ptr<ZoneInterface> zone,
std::shared_ptr<boost::asio::steady_timer> timer,
- bool first = true, int ms100cnt = 0);
+ const bool* isCanceling, bool first = true,
+ int ms100cnt = 0);
} // namespace pid_control