Add timer expiration feature to ExternalSensor
ExternalSensor now functions as intended, wholly within dbus-sensors,
without requiring any modification to the IPMI or Redfish servers,
to provide the feature of timeout expiration of external data, so that
stale/lost external connections can be properly indicated as such.
A "Timeout" parameter is added, in decimal seconds, providing a
watchdog for the arrival of external data. The expectation is that the
external source will provide D-Bus updates, to the sensor Value
property, at regular intervals, repeating indefinitely.
If this external source stops doing this, the watchdog barks, and
the Value of this Sensor will become set to "NaN". This provides
an indication to consumers of this Sensor, to realize that the Value
of this sensor has became stale/disconnected.
A practical application of this is fan control. Upon loss of external
temperature notification, the fans could be thrown into failsafe
mode, instead of risking the system overheating by wrongly continuing
to believe an old temperature value that has become stale.
Tested: Works for me. I started an external data source, data arrived
into the Value of the sensor. I stopped that external data source,
after the Timeout period, the Value became "NaN". I started the
external source again, the Value became correct again, as soon as
external data started to arrive again. I repeated this stop and start
procedure a few times, verifying that it operated as intended.
Signed-off-by: Josh Lehan <krellan@google.com>
Change-Id: I53b9ff4c0aa771aff4aaf3449fcab23c07afa296
diff --git a/include/ExternalSensor.hpp b/include/ExternalSensor.hpp
index fd36bb0..02110fe 100644
--- a/include/ExternalSensor.hpp
+++ b/include/ExternalSensor.hpp
@@ -5,6 +5,7 @@
#include <sdbusplus/asio/object_server.hpp>
+#include <chrono>
#include <string>
#include <vector>
@@ -13,19 +14,49 @@
public std::enable_shared_from_this<ExternalSensor>
{
public:
- ExternalSensor(const std::string& objectType,
- sdbusplus::asio::object_server& objectServer,
- std::shared_ptr<sdbusplus::asio::connection>& conn,
- const std::string& sensorName,
- const std::string& sensorMeasure,
- std::vector<thresholds::Threshold>&& thresholds,
- const std::string& sensorConfiguration,
- const double& maxReading, const double& minReading,
- const PowerState& powerState);
+ ExternalSensor(
+ const std::string& objectType,
+ sdbusplus::asio::object_server& objectServer,
+ std::shared_ptr<sdbusplus::asio::connection>& conn,
+ const std::string& sensorName, const std::string& sensorMeasure,
+ std::vector<thresholds::Threshold>&& thresholdsIn,
+ const std::string& sensorConfiguration, double maxReading,
+ double minReading, double timeoutSecs, const PowerState& powerState,
+ std::function<void(std::chrono::steady_clock::time_point now)>&&
+ writeHookIn);
virtual ~ExternalSensor();
+ // Returns true if sensor has external Value that is subject to timeout
+ bool isAliveAndPerishable(void) const;
+
+ // Returns true if AliveAndPerishable and timeout has not yet happened
+ bool
+ isAliveAndFresh(const std::chrono::steady_clock::time_point& now) const;
+
+ // Marks the time when Value successfully received from external source
+ void writeBegin(const std::chrono::steady_clock::time_point& now);
+
+ // Marks sensor as timed out, replacing Value with floating-point "NaN"
+ void writeInvalidate(void);
+
+ // Returns amount of time elapsed since last writeBegin() happened
+ std::chrono::steady_clock::duration
+ ageElapsed(const std::chrono::steady_clock::time_point& now) const;
+
+ // Returns amount of time remaining until sensor timeout will happen
+ std::chrono::steady_clock::duration
+ ageRemaining(const std::chrono::steady_clock::time_point& now) const;
+
private:
sdbusplus::asio::object_server& objServer;
+ std::chrono::steady_clock::time_point writeLast;
+ std::chrono::steady_clock::duration writeTimeout;
+ bool writeAlive;
+ bool writePerishable;
+ std::function<void(const std::chrono::steady_clock::time_point& now)>
+ writeHook;
+
void checkThresholds(void) override;
+ void externalSetTrigger(void);
};
diff --git a/src/ExternalSensor.cpp b/src/ExternalSensor.cpp
index c6db174..13082e3 100644
--- a/src/ExternalSensor.cpp
+++ b/src/ExternalSensor.cpp
@@ -10,6 +10,7 @@
#include <sdbusplus/asio/connection.hpp>
#include <sdbusplus/asio/object_server.hpp>
+#include <chrono>
#include <iostream>
#include <istream>
#include <limits>
@@ -17,13 +18,17 @@
#include <string>
#include <vector>
+static constexpr bool debug = false;
+
ExternalSensor::ExternalSensor(
const std::string& objectType, sdbusplus::asio::object_server& objectServer,
std::shared_ptr<sdbusplus::asio::connection>& conn,
const std::string& sensorName, const std::string& sensorUnits,
std::vector<thresholds::Threshold>&& thresholdsIn,
- const std::string& sensorConfiguration, const double& maxReading,
- const double& minReading, const PowerState& powerState) :
+ const std::string& sensorConfiguration, double maxReading,
+ double minReading, double timeoutSecs, const PowerState& powerState,
+ std::function<void(std::chrono::steady_clock::time_point now)>&&
+ writeHookIn) :
// TODO(): When the Mutable feature is integrated,
// make sure all ExternalSensor instances are mutable,
// because that is the entire point of ExternalSensor,
@@ -31,7 +36,13 @@
Sensor(boost::replace_all_copy(sensorName, " ", "_"),
std::move(thresholdsIn), sensorConfiguration, objectType, maxReading,
minReading, conn, powerState),
- std::enable_shared_from_this<ExternalSensor>(), objServer(objectServer)
+ std::enable_shared_from_this<ExternalSensor>(), objServer(objectServer),
+ writeLast(std::chrono::steady_clock::now()),
+ writeTimeout(
+ std::chrono::duration_cast<std::chrono::steady_clock::duration>(
+ std::chrono::duration<double>(timeoutSecs))),
+ writeAlive(false), writePerishable(timeoutSecs > 0.0),
+ writeHook(std::move(writeHookIn))
{
// The caller must specify what physical characteristic
// an external sensor is expected to be measuring, such as temperature,
@@ -63,17 +74,121 @@
association =
objectServer.add_interface(objectPath, association::interface);
setInitialProperties(conn);
+
+ externalSetHook = [weakThis = weak_from_this()]() {
+ auto lockThis = weakThis.lock();
+ if (lockThis)
+ {
+ lockThis->externalSetTrigger();
+ }
+ };
+
+ if constexpr (debug)
+ {
+ std::cerr << "ExternalSensor " << name << " constructed: path "
+ << configurationPath << ", type " << objectType << ", min "
+ << minReading << ", max " << maxReading << ", timeout "
+ << std::chrono::duration_cast<std::chrono::microseconds>(
+ writeTimeout)
+ .count()
+ << " us\n";
+ }
}
ExternalSensor::~ExternalSensor()
{
+ // Make sure the write hook does not reference this object anymore
+ externalSetHook = nullptr;
+
objServer.remove_interface(association);
objServer.remove_interface(thresholdInterfaceCritical);
objServer.remove_interface(thresholdInterfaceWarning);
objServer.remove_interface(sensorInterface);
+
+ if constexpr (debug)
+ {
+ std::cerr << "ExternalSensor " << name << " destructed\n";
+ }
}
void ExternalSensor::checkThresholds(void)
{
thresholds::checkThresholds(this);
}
+
+bool ExternalSensor::isAliveAndPerishable(void) const
+{
+ return (writeAlive && writePerishable);
+}
+
+bool ExternalSensor::isAliveAndFresh(
+ const std::chrono::steady_clock::time_point& now) const
+{
+ // Must be alive and perishable, to have possibility of being fresh
+ if (!isAliveAndPerishable())
+ {
+ return false;
+ }
+
+ // If age, as of now, is less than timeout, it is deemed fresh
+ return (ageElapsed(now) < writeTimeout);
+}
+
+void ExternalSensor::writeBegin(
+ const std::chrono::steady_clock::time_point& now)
+{
+ if (!writeAlive)
+ {
+ std::cerr << "ExternalSensor " << name
+ << " online, receiving first value " << value << "\n";
+ }
+
+ writeLast = now;
+ writeAlive = true;
+}
+
+void ExternalSensor::writeInvalidate(void)
+{
+ writeAlive = false;
+
+ std::cerr << "ExternalSensor " << name << " offline, timed out\n";
+
+ // Take back control of this sensor from the external override,
+ // as the external source has timed out.
+ // This allows sensor::updateValue() to work normally,
+ // as it would do for internal sensors with values from hardware.
+ overriddenState = false;
+
+ // Invalidate the existing Value, similar to what internal sensors do,
+ // when they encounter errors trying to read from hardware.
+ updateValue(std::numeric_limits<double>::quiet_NaN());
+}
+
+std::chrono::steady_clock::duration ExternalSensor::ageElapsed(
+ const std::chrono::steady_clock::time_point& now) const
+{
+ // Comparing 2 time_point will return duration
+ return (now - writeLast);
+}
+
+std::chrono::steady_clock::duration ExternalSensor::ageRemaining(
+ const std::chrono::steady_clock::time_point& now) const
+{
+ // Comparing duration will return another duration
+ return (writeTimeout - ageElapsed(now));
+}
+
+void ExternalSensor::externalSetTrigger(void)
+{
+ if constexpr (debug)
+ {
+ std::cerr << "ExternalSensor " << name << " received " << value << "\n";
+ }
+
+ auto now = std::chrono::steady_clock::now();
+
+ writeBegin(now);
+
+ // Tell the owner to recalculate the expiration timer
+ writeHook(now);
+}
diff --git a/src/ExternalSensorMain.cpp b/src/ExternalSensorMain.cpp
index 19b8f60..0712b00 100644
--- a/src/ExternalSensorMain.cpp
+++ b/src/ExternalSensorMain.cpp
@@ -27,39 +27,138 @@
// The ExternalSensor is a sensor whose value is intended to be writable
// by something external to the BMC, so that the host (or something else)
-// can write to it, perhaps by using an IPMI connection.
+// can write to it, perhaps by using an IPMI or Redfish connection.
// Unlike most other sensors, an external sensor does not correspond
-// to a hwmon file or other kernel/hardware interface,
+// to a hwmon file or any other kernel/hardware interface,
// so, after initialization, this module does not have much to do,
// but it handles reinitialization and thresholds, similar to the others.
+// The main work of this module is to provide backing storage for a
+// sensor that exists only virtually, and to provide an optional
+// timeout service for detecting loss of timely updates.
// As there is no corresponding driver or hardware to support,
// all configuration of this sensor comes from the JSON parameters:
-// MinValue, MaxValue, PowerState, Measure, Name
+// MinValue, MaxValue, Timeout, PowerState, Units, Name
-// The purpose of "Measure" is to specify the physical characteristic
+// The purpose of "Units" is to specify the physical characteristic
// the external sensor is measuring, because with an external sensor
// there is no other way to tell, and it will be used for the object path
-// here: /xyz/openbmc_project/sensors/<Measure>/<Name>
+// here: /xyz/openbmc_project/sensors/<Units>/<Name>
+
+// For more information, see external-sensor.md design document:
+// https://gerrit.openbmc-project.xyz/c/openbmc/docs/+/41452
+// https://github.com/openbmc/docs/tree/master/designs/
static constexpr bool debug = false;
static const char* sensorType =
"xyz.openbmc_project.Configuration.ExternalSensor";
+void updateReaper(boost::container::flat_map<
+ std::string, std::shared_ptr<ExternalSensor>>& sensors,
+ boost::asio::steady_timer& timer,
+ const std::chrono::steady_clock::time_point& now)
+{
+ // First pass, reap all stale sensors
+ for (auto& sensor : sensors)
+ {
+ if (!sensor.second)
+ {
+ continue;
+ }
+
+ if (!sensor.second->isAliveAndPerishable())
+ {
+ continue;
+ }
+
+ if (!sensor.second->isAliveAndFresh(now))
+ {
+ // Mark sensor as dead, no longer alive
+ sensor.second->writeInvalidate();
+ }
+ }
+
+ std::chrono::steady_clock::duration nextCheck;
+ bool needCheck = false;
+
+ // Second pass, determine timer interval to next check
+ for (auto& sensor : sensors)
+ {
+ if (!sensor.second)
+ {
+ continue;
+ }
+
+ if (!sensor.second->isAliveAndPerishable())
+ {
+ continue;
+ }
+
+ auto expiration = sensor.second->ageRemaining(now);
+
+ if (needCheck)
+ {
+ nextCheck = std::min(nextCheck, expiration);
+ }
+ else
+ {
+ // Initialization
+ nextCheck = expiration;
+ needCheck = true;
+ }
+ }
+
+ if (!needCheck)
+ {
+ if constexpr (debug)
+ {
+ std::cerr << "Next ExternalSensor timer idle\n";
+ }
+
+ return;
+ }
+
+ timer.expires_at(now + nextCheck);
+
+ timer.async_wait([&sensors, &timer](const boost::system::error_code& err) {
+ if (err != boost::system::errc::success)
+ {
+ // Cancellation is normal, as timer is dynamically rescheduled
+ if (err != boost::system::errc::operation_canceled)
+ {
+ std::cerr << "ExternalSensor timer scheduling problem: "
+ << err.message() << "\n";
+ }
+ return;
+ }
+ updateReaper(sensors, timer, std::chrono::steady_clock::now());
+ });
+
+ if constexpr (debug)
+ {
+ std::cerr << "Next ExternalSensor timer "
+ << std::chrono::duration_cast<std::chrono::microseconds>(
+ nextCheck)
+ .count()
+ << " us\n";
+ }
+}
+
void createSensors(
boost::asio::io_service& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<ExternalSensor>>&
sensors,
std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
const std::shared_ptr<boost::container::flat_set<std::string>>&
- sensorsChanged)
+ sensorsChanged,
+ boost::asio::steady_timer& reaperTimer)
{
auto getter = std::make_shared<GetSensorConfiguration>(
dbusConnection,
- [&io, &objectServer, &sensors, &dbusConnection,
- sensorsChanged](const ManagedObjectType& sensorConfigurations) {
+ [&io, &objectServer, &sensors, &dbusConnection, sensorsChanged,
+ &reaperTimer](const ManagedObjectType& sensorConfigurations) {
bool firstScan = (sensorsChanged == nullptr);
for (const std::pair<sdbusplus::message::object_path, SensorData>&
@@ -116,10 +215,26 @@
continue;
}
- std::string sensorName;
- std::string sensorMeasure;
+ double timeoutSecs = 0.0;
- // Name and Measure are mandatory string parameters
+ // Timeout is an optional numeric parameter
+ auto timeoutFound = baseConfigMap.find("Timeout");
+ if (timeoutFound != baseConfigMap.end())
+ {
+ timeoutSecs = std::visit(VariantToDoubleVisitor(),
+ timeoutFound->second);
+ }
+ if (!(std::isfinite(timeoutSecs) && (timeoutSecs >= 0.0)))
+ {
+ std::cerr << "Timeout parameter not parsed for "
+ << interfacePath << "\n";
+ continue;
+ }
+
+ std::string sensorName;
+ std::string sensorUnits;
+
+ // Name and Units are mandatory string parameters
auto nameFound = baseConfigMap.find("Name");
if (nameFound == baseConfigMap.end())
{
@@ -136,18 +251,18 @@
continue;
}
- auto measureFound = baseConfigMap.find("Units");
- if (measureFound == baseConfigMap.end())
+ auto unitsFound = baseConfigMap.find("Units");
+ if (unitsFound == baseConfigMap.end())
{
std::cerr << "Units parameter not found for "
<< interfacePath << "\n";
continue;
}
- sensorMeasure =
- std::visit(VariantToStringVisitor(), measureFound->second);
- if (sensorMeasure.empty())
+ sensorUnits =
+ std::visit(VariantToStringVisitor(), unitsFound->second);
+ if (sensorUnits.empty())
{
- std::cerr << "Measure parameter not parsed for "
+ std::cerr << "Units parameter not parsed for "
<< interfacePath << "\n";
continue;
}
@@ -199,8 +314,18 @@
sensorEntry = std::make_shared<ExternalSensor>(
sensorType, objectServer, dbusConnection, sensorName,
- sensorMeasure, std::move(sensorThresholds), interfacePath,
- maxValue, minValue, readState);
+ sensorUnits, std::move(sensorThresholds), interfacePath,
+ maxValue, minValue, timeoutSecs, readState,
+ [&sensors, &reaperTimer](
+ const std::chrono::steady_clock::time_point& now) {
+ updateReaper(sensors, reaperTimer, now);
+ });
+
+ if constexpr (debug)
+ {
+ std::cerr << "ExternalSensor " << sensorName
+ << " created\n";
+ }
}
});
@@ -209,6 +334,11 @@
int main()
{
+ if constexpr (debug)
+ {
+ std::cerr << "ExternalSensor service starting up\n";
+ }
+
boost::asio::io_service io;
auto systemBus = std::make_shared<sdbusplus::asio::connection>(io);
systemBus->request_name("xyz.openbmc_project.ExternalSensor");
@@ -218,15 +348,17 @@
std::vector<std::unique_ptr<sdbusplus::bus::match::match>> matches;
auto sensorsChanged =
std::make_shared<boost::container::flat_set<std::string>>();
+ boost::asio::steady_timer reaperTimer(io);
- io.post([&io, &objectServer, &sensors, &systemBus]() {
- createSensors(io, objectServer, sensors, systemBus, nullptr);
+ io.post([&io, &objectServer, &sensors, &systemBus, &reaperTimer]() {
+ createSensors(io, objectServer, sensors, systemBus, nullptr,
+ reaperTimer);
});
boost::asio::deadline_timer filterTimer(io);
std::function<void(sdbusplus::message::message&)> eventHandler =
[&io, &objectServer, &sensors, &systemBus, &sensorsChanged,
- &filterTimer](sdbusplus::message::message& message) {
+ &filterTimer, &reaperTimer](sdbusplus::message::message& message) {
if (message.is_method_error())
{
std::cerr << "callback method error\n";
@@ -237,7 +369,7 @@
filterTimer.expires_from_now(boost::posix_time::seconds(1));
filterTimer.async_wait([&io, &objectServer, &sensors, &systemBus,
- &sensorsChanged](
+ &sensorsChanged, &reaperTimer](
const boost::system::error_code& ec) {
if (ec)
{
@@ -248,7 +380,7 @@
return;
}
createSensors(io, objectServer, sensors, systemBus,
- sensorsChanged);
+ sensorsChanged, reaperTimer);
});
};
@@ -259,5 +391,10 @@
eventHandler);
matches.emplace_back(std::move(match));
+ if constexpr (debug)
+ {
+ std::cerr << "ExternalSensor service entering main loop\n";
+ }
+
io.run();
}