Improve initialization of I2C sensors
After an AC cycle validation has witnessed some systems sensors are
missing. As Entity Manager begins the process of scanning for
sesnsors, and creating the hardware monitoring nodes, there are
occassionally some failures to correctly create the node. This
manifests itself by the 'dd' kernel driver emitting an -EBUSY error
message. Unfortunately the 'dd' driver also eats the error code, and
continues. This is by design.
This commit modifies how the nodes are initialized. The steps taken:
1. Determine if the node is already present
2. Create the node if it is not
3. Set a timer, to give the kernel time to create the node
4. For those sensors that create a "hwmon" subdir, search for that
directory after the timer elapses.
5. If the subdir is not present, delete the device, and try again by
initiating another timer.
6. Continue until the subdir exists, or a retry count expires.
Tested:
Ran AC cycles via a script.
After each cycle, wait for the SUT to DC on, and arrive at the EFI
Shell> prompt.
Issue "ipmitool sensor list", capturing the results
Search the list for all of the sensors that have been reported as
missing after AC cycles.
Change-Id: I118df674162677d66e7d211b089430fce384086b
Signed-off-by: Johnathan Mantey <johnathanx.mantey@intel.com>
diff --git a/src/Overlay.cpp b/src/Overlay.cpp
index cb6ed10..1ced2d8 100644
--- a/src/Overlay.cpp
+++ b/src/Overlay.cpp
@@ -21,6 +21,8 @@
#include "devices.hpp"
#include <boost/algorithm/string/predicate.hpp>
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/steady_timer.hpp>
#include <boost/container/flat_map.hpp>
#include <boost/container/flat_set.hpp>
#include <boost/process/child.hpp>
@@ -113,16 +115,170 @@
}
}
+static int deleteDevice(const std::string& devicePath,
+ std::shared_ptr<uint64_t> address,
+ const std::string& destructor)
+{
+ if (!address)
+ {
+ return -1;
+ }
+ std::filesystem::path deviceDestructor(devicePath);
+ deviceDestructor /= destructor;
+ std::ofstream deviceFile(deviceDestructor);
+ if (!deviceFile.good())
+ {
+ std::cerr << "Error writing " << deviceDestructor << "\n";
+ return -1;
+ }
+ deviceFile << std::to_string(*address);
+ deviceFile.close();
+ return 0;
+}
+
+static int createDevice(const std::string& devicePath,
+ const std::string& parameters,
+ const std::string& constructor)
+{
+ std::filesystem::path deviceConstructor(devicePath);
+ deviceConstructor /= constructor;
+ std::ofstream deviceFile(deviceConstructor);
+ if (!deviceFile.good())
+ {
+ std::cerr << "Error writing " << deviceConstructor << "\n";
+ return -1;
+ }
+ deviceFile << parameters;
+ deviceFile.close();
+
+ return 0;
+}
+
+static bool deviceIsCreated(const std::string& devicePath,
+ std::shared_ptr<uint64_t> bus,
+ std::shared_ptr<uint64_t> address,
+ const bool retrying)
+{
+ if (!bus || !address)
+ {
+ return false;
+ }
+
+ std::ostringstream hex;
+ hex << std::hex << std::setw(4) << std::setfill('0') << *address;
+ std::string addressHex = hex.str();
+ std::string busStr = std::to_string(*bus);
+
+ for (auto path = std::filesystem::recursive_directory_iterator(devicePath);
+ path != std::filesystem::recursive_directory_iterator(); path++)
+ {
+ if (!std::filesystem::is_directory(*path))
+ {
+ continue;
+ }
+
+ const std::string directoryName = path->path().filename();
+ if (directoryName == busStr + "-" + addressHex)
+ {
+ // The first time the BMC boots the kernel has creates a
+ // filesystem enumerating the I2C devices. The I2C device has not
+ // been initialized for use. This requires a call to a device
+ // node, such as "new_device". The first pass through this
+ // function is only confirming the filesystem contains the device
+ // entry of interest (i.e. i2c4-0050).
+ //
+ // An upper level function performs the device creation
+ // action. This action may fail. The device driver (dd) used to
+ // create the I2C filesystem substructure eats any error codes,
+ // and always returns 0. This is by design. It is also possible
+ // for the new_device action to fail because the device is not
+ // actually in the system, i.e. optional equipment.
+ //
+ // The 'retrying' pass of this function is used to confirm the
+ // 'dd' device driver succeeded. Success is measured by finding
+ // the 'hwmon' subdirectory in the filesystem. The first attempt
+ // is delayed by an arbitrary amount, in order to permit the
+ // kernel time to create the filesystem entries. The upper level
+ // function determines the number of times to retry calling this
+ // function.
+ if (retrying)
+ {
+ std::error_code ec;
+ std::filesystem::path hwmonDir(devicePath);
+ hwmonDir /= directoryName;
+ hwmonDir /= "hwmon";
+ return std::filesystem::is_directory(hwmonDir, ec);
+ }
+ return true;
+ }
+ else
+ {
+ path.disable_recursion_pending();
+ }
+ }
+ return false;
+}
+
+static int buildDevice(const std::string& devicePath,
+ const std::string& parameters,
+ std::shared_ptr<uint64_t> bus,
+ std::shared_ptr<uint64_t> address,
+ const std::string& constructor,
+ const std::string& destructor, const bool createsHWMon,
+ const size_t retries = 5)
+{
+ bool tryAgain = false;
+ if (!retries)
+ {
+ return -1;
+ }
+
+ if (!deviceIsCreated(devicePath, bus, address, false))
+ {
+ createDevice(devicePath, parameters, constructor);
+ tryAgain = true;
+ }
+ else if (createsHWMon && !deviceIsCreated(devicePath, bus, address, true))
+ {
+ // device is present, hwmon subdir missing
+ deleteDevice(devicePath, address, destructor);
+ tryAgain = true;
+ }
+
+ if (tryAgain)
+ {
+ std::shared_ptr<boost::asio::steady_timer> createTimer =
+ std::make_shared<boost::asio::steady_timer>(io);
+ createTimer->expires_after(std::chrono::milliseconds(500));
+ createTimer->async_wait([createTimer, devicePath, parameters, bus,
+ address, constructor, destructor, createsHWMon,
+ retries](const boost::system::error_code& ec) {
+ if (ec)
+ {
+ std::cerr << "Timer error: " << ec << "\n";
+ return -2;
+ }
+ return buildDevice(devicePath, parameters, bus, address,
+ constructor, destructor, createsHWMon,
+ retries - 1);
+ });
+ }
+ return 0;
+}
+
void exportDevice(const std::string& type,
const devices::ExportTemplate& exportTemplate,
const nlohmann::json& configuration)
{
std::string parameters = exportTemplate.parameters;
- std::string device = exportTemplate.device;
+ std::string devicePath = exportTemplate.devicePath;
+ std::string constructor = exportTemplate.add;
+ std::string destructor = exportTemplate.remove;
+ bool createsHWMon = exportTemplate.createsHWMon;
std::string name = "unknown";
- const uint64_t* bus = nullptr;
- const uint64_t* address = nullptr;
+ std::shared_ptr<uint64_t> bus = nullptr;
+ std::shared_ptr<uint64_t> address = nullptr;
const nlohmann::json::array_t* channels = nullptr;
for (auto keyPair = configuration.begin(); keyPair != configuration.end();
@@ -144,11 +300,13 @@
if (keyPair.key() == "Bus")
{
- bus = keyPair.value().get_ptr<const uint64_t*>();
+ bus = std::make_shared<uint64_t>(
+ *keyPair.value().get_ptr<const uint64_t*>());
}
else if (keyPair.key() == "Address")
{
- address = keyPair.value().get_ptr<const uint64_t*>();
+ address = std::make_shared<uint64_t>(
+ *keyPair.value().get_ptr<const uint64_t*>());
}
else if (keyPair.key() == "ChannelNames")
{
@@ -157,49 +315,14 @@
}
boost::replace_all(parameters, TEMPLATE_CHAR + keyPair.key(),
subsituteString);
- boost::replace_all(device, TEMPLATE_CHAR + keyPair.key(),
+ boost::replace_all(devicePath, TEMPLATE_CHAR + keyPair.key(),
subsituteString);
}
- // if we found bus and address we can attempt to prevent errors
- if (bus != nullptr && address != nullptr)
- {
- std::ostringstream hex;
- hex << std::hex << *address;
- const std::string& addressHex = hex.str();
- std::string busStr = std::to_string(*bus);
+ int err = buildDevice(devicePath, parameters, bus, address, constructor,
+ destructor, createsHWMon);
- std::filesystem::path devicePath(device);
- std::filesystem::path parentPath = devicePath.parent_path();
- if (std::filesystem::is_directory(parentPath))
- {
- for (const auto& path :
- std::filesystem::directory_iterator(parentPath))
- {
- if (!std::filesystem::is_directory(path))
- {
- continue;
- }
-
- const std::string& directoryName = path.path().filename();
- if (boost::starts_with(directoryName, busStr) &&
- boost::ends_with(directoryName, addressHex))
- {
- return; // already exported
- }
- }
- }
- }
-
- std::ofstream deviceFile(device);
- if (!deviceFile.good())
- {
- std::cerr << "Error writing " << device << "\n";
- return;
- }
- deviceFile << parameters;
- deviceFile.close();
- if (boost::ends_with(type, "Mux") && bus && address && channels)
+ if (!err && boost::ends_with(type, "Mux") && bus && address && channels)
{
linkMux(name, static_cast<size_t>(*bus), static_cast<size_t>(*address),
*channels);