Handle file errors when device path gets unbound
occ-control was asserting when the hwmon path would get removed due to
device being unbound. This change will gracefully handle the path
getting removed / added back.
Tested on Raininer by unbind/bind of devices:
echo occ-hwmon.2 > /sys/bus/platform/drivers/occ-hwmon/unbind
echo occ-hwmon.2 > /sys/bus/platform/drivers/occ-hwmon/bind
Change-Id: I46fd2c2c54868ffb8183d3dc49cd0c2751165d3b
Signed-off-by: Chris Cain <cjcain@us.ibm.com>
diff --git a/occ_device.cpp b/occ_device.cpp
index c95c0f3..5cadf14 100644
--- a/occ_device.cpp
+++ b/occ_device.cpp
@@ -3,6 +3,9 @@
#include "occ_manager.hpp"
#include "occ_status.hpp"
+#include <phosphor-logging/log.hpp>
+
+#include <filesystem>
#include <iostream>
namespace open_power
@@ -10,6 +13,8 @@
namespace occ
{
+using namespace phosphor::logging;
+
fs::path Device::bindPath = fs::path(OCC_HWMON_PATH) / "bind";
fs::path Device::unBindPath = fs::path(OCC_HWMON_PATH) / "unbind";
@@ -82,5 +87,31 @@
statusObject.throttleMemTemp(error);
}
+fs::path Device::getFilenameByRegex(fs::path basePath,
+ const std::regex& expr) const
+{
+ try
+ {
+ for (auto& file : fs::directory_iterator(basePath))
+ {
+ if (std::regex_search(file.path().string(), expr))
+ {
+ // Found match
+ return file;
+ }
+ }
+ }
+ catch (const fs::filesystem_error& e)
+ {
+ log<level::ERR>(
+ fmt::format("getFilenameByRegex: Failed to get filename: {}",
+ e.what())
+ .c_str());
+ }
+
+ // Return empty path
+ return fs::path{};
+}
+
} // namespace occ
} // namespace open_power
diff --git a/occ_device.hpp b/occ_device.hpp
index 97bdceb..3c11d13 100644
--- a/occ_device.hpp
+++ b/occ_device.hpp
@@ -283,17 +283,8 @@
*
* @return path to the file or empty path if not found
*/
- fs::path getFilenameByRegex(fs::path basePath, const std::regex& expr) const
- {
- for (auto& file : fs::directory_iterator(basePath))
- {
- if (std::regex_search(file.path().string(), expr))
- {
- return file;
- }
- }
- return fs::path{};
- }
+ fs::path getFilenameByRegex(fs::path basePath,
+ const std::regex& expr) const;
};
} // namespace occ
diff --git a/occ_manager.cpp b/occ_manager.cpp
index 78ac9f2..a3f246c 100644
--- a/occ_manager.cpp
+++ b/occ_manager.cpp
@@ -844,16 +844,33 @@
void Manager::getSensorValues(std::unique_ptr<Status>& occ)
{
- const fs::path fileName = occ->getHwmonPath();
+ static bool tracedError[8] = {0};
+ const fs::path sensorPath = occ->getHwmonPath();
const uint32_t id = occ->getOccInstanceID();
- // Read temperature sensors
- readTempSensors(fileName, id);
-
- if (occ->isMasterOcc())
+ if (fs::exists(sensorPath))
{
- // Read power sensors
- readPowerSensors(fileName, id);
+ // Read temperature sensors
+ readTempSensors(sensorPath, id);
+
+ if (occ->isMasterOcc())
+ {
+ // Read power sensors
+ readPowerSensors(sensorPath, id);
+ }
+ tracedError[id] = false;
+ }
+ else
+ {
+ if (!tracedError[id])
+ {
+ log<level::ERR>(
+ fmt::format(
+ "Manager::getSensorValues: OCC{} sensor path missing: {}",
+ id, sensorPath.c_str())
+ .c_str());
+ tracedError[id] = true;
+ }
}
return;
diff --git a/occ_status.cpp b/occ_status.cpp
index 95a7729..7c3658c 100644
--- a/occ_status.cpp
+++ b/occ_status.cpp
@@ -12,6 +12,8 @@
#endif
#include <phosphor-logging/log.hpp>
+#include <filesystem>
+
namespace open_power
{
namespace occ
@@ -385,18 +387,67 @@
}
#endif // POWER10
-fs::path Status::getHwmonPath() const
+fs::path Status::getHwmonPath()
{
using namespace std::literals::string_literals;
- // Build the base HWMON path
- fs::path prefixPath = fs::path{OCC_HWMON_PATH + "occ-hwmon."s +
- std::to_string(instance + 1) + "/hwmon/"s};
- // Get the hwmonXX directory name, there better only be 1 dir
- assert(std::distance(fs::directory_iterator(prefixPath),
- fs::directory_iterator{}) == 1);
+ if (!fs::exists(hwmonPath))
+ {
+ static bool tracedFail[8] = {0};
- return *fs::directory_iterator(prefixPath);
+ if (!hwmonPath.empty())
+ {
+ log<level::ERR>(
+ fmt::format("Status::getHwmonPath(): path no longer exists: {}",
+ hwmonPath.c_str())
+ .c_str());
+ hwmonPath.clear();
+ }
+
+ // Build the base HWMON path
+ fs::path prefixPath =
+ fs::path{OCC_HWMON_PATH + "occ-hwmon."s +
+ std::to_string(instance + 1) + "/hwmon/"s};
+
+ // Get the hwmonXX directory name
+ try
+ {
+ // there should only be one directory
+ const int numDirs = std::distance(
+ fs::directory_iterator(prefixPath), fs::directory_iterator{});
+ if (numDirs == 1)
+ {
+ hwmonPath = *fs::directory_iterator(prefixPath);
+ tracedFail[instance] = false;
+ }
+ else
+ {
+ if (!tracedFail[instance])
+ {
+ log<level::ERR>(
+ fmt::format(
+ "Status::getHwmonPath(): Found multiple ({}) hwmon paths!",
+ numDirs)
+ .c_str());
+ tracedFail[instance] = true;
+ }
+ }
+ }
+ catch (const fs::filesystem_error& e)
+ {
+ if (!tracedFail[instance])
+ {
+ log<level::ERR>(
+ fmt::format(
+ "Status::getHwmonPath(): error accessing {}: {}",
+ prefixPath.c_str(), e.what())
+ .c_str());
+ tracedFail[instance] = true;
+ }
+ }
+ }
+
+ return hwmonPath;
}
} // namespace occ
diff --git a/occ_status.hpp b/occ_status.hpp
index 6a00676..6376b6c 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -204,7 +204,7 @@
*
* @return path or empty path if not found
*/
- fs::path getHwmonPath() const;
+ fs::path getHwmonPath();
private:
/** @brief OCC dbus object path */
@@ -251,6 +251,9 @@
sdeventplus::Event sdpEvent;
#endif
+ /** @brief hwmon path for this OCC */
+ fs::path hwmonPath;
+
/** @brief Callback function on host control signals
*
* @param[in] msg - Data associated with subscribed signal
diff --git a/powercap.cpp b/powercap.cpp
index 682131c..cac7465 100644
--- a/powercap.cpp
+++ b/powercap.cpp
@@ -6,6 +6,7 @@
#include <powercap.hpp>
#include <cassert>
+#include <filesystem>
namespace open_power
{
@@ -141,23 +142,28 @@
{
if (pcapBasePathname.empty())
{
- static bool tracedError = false;
pcapBasePathname = occStatus.getHwmonPath();
- if (!fs::exists(pcapBasePathname) && !tracedError)
- {
- log<level::ERR>(fmt::format("Power Cap base filename not found: {}",
- pcapBasePathname.c_str())
- .c_str());
- tracedError = true;
- }
}
- for (auto& file : fs::directory_iterator(pcapBasePathname))
+
+ if (fs::exists(pcapBasePathname))
{
- if (std::regex_search(file.path().string(), expr))
+ // Search for pcap file based on the supplied expr
+ for (auto& file : fs::directory_iterator(pcapBasePathname))
{
- return file;
+ if (std::regex_search(file.path().string(), expr))
+ {
+ // Found match
+ return file;
+ }
}
}
+ else
+ {
+ log<level::ERR>(fmt::format("Power Cap base filename not found: {}",
+ pcapBasePathname.c_str())
+ .c_str());
+ }
+
// return empty path
return fs::path{};
}
@@ -219,10 +225,10 @@
}
// Open the sysfs file and read the power cap
- uint64_t cap;
std::ifstream file(userCapName, std::ios::in);
if (file)
{
+ uint64_t cap;
file >> cap;
file.close();
// Convert to Watts