monitor: Shut down if no readings at power on
If there are no tach sensors on D-Bus when the power state changes to
on, then create an event log and shut down the system. This is done
because in this case the code is not able to know the fan state - if
there are any present or spinning.
The most likely reason there are no sensors (aside from a glaring error
in the config file) is because the fan controller device driver failed
its probe and was unable to detect it, maybe because the device didn't
have power or there was an I2C problem. To aid in root cause analysis
if this were to occur in the field, the code adds the following FFDC
(First Failure Data Capture) to the event log:
* All of the loaded hwmon drivers, taken from /sys/class/hwmon/*/name
* Failure related lines in dmesg, which is where driver errors would
show up.
Tested: Unbound the fan device driver and then powered on the system.
Also disabled I2C to the fan controller device in simulation and tried a
power on.
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: Ic0b80d67ec79c9401f59324fe1134ff12084112a
diff --git a/hwmon_ffdc.cpp b/hwmon_ffdc.cpp
new file mode 100644
index 0000000..d8069e4
--- /dev/null
+++ b/hwmon_ffdc.cpp
@@ -0,0 +1,136 @@
+#include "hwmon_ffdc.hpp"
+
+#include "logging.hpp"
+
+#include <fmt/format.h>
+
+#include <array>
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <vector>
+
+namespace phosphor::fan::monitor
+{
+
+namespace util
+{
+
+namespace fs = std::filesystem;
+
+inline std::vector<std::string> executeCommand(const std::string& command)
+{
+ std::vector<std::string> output;
+ std::array<char, 128> buffer;
+
+ std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(command.c_str(), "r"),
+ pclose);
+ if (!pipe)
+ {
+ getLogger().log(
+ fmt::format("popen() failed when running command: {}", command));
+ return output;
+ }
+ while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr)
+ {
+ output.emplace_back(buffer.data());
+ }
+
+ return output;
+}
+
+std::vector<std::string> getHwmonNameFFDC()
+{
+ const fs::path hwmonBaseDir{"/sys/class/hwmon"};
+ std::vector<std::string> hwmonNames;
+
+ if (!fs::exists(hwmonBaseDir))
+ {
+ getLogger().log(fmt::format("Hwmon base directory {} doesn't exist",
+ hwmonBaseDir.native()));
+ return hwmonNames;
+ }
+
+ try
+ {
+ for (const auto& path : fs::directory_iterator(hwmonBaseDir))
+ {
+ if (!path.is_directory())
+ {
+ continue;
+ }
+
+ auto nameFile = path.path() / "name";
+ if (fs::exists(nameFile))
+ {
+ std::ifstream f{nameFile};
+ if (f.good())
+ {
+ std::string name;
+ f >> name;
+ hwmonNames.push_back(name);
+ }
+ }
+ }
+ }
+ catch (const std::exception& e)
+ {
+ getLogger().log(
+ fmt::format("Error traversing hwmon directories: {}", e.what()));
+ }
+
+ return hwmonNames;
+}
+
+std::vector<std::string> getDmesgFFDC()
+{
+ std::vector<std::string> output;
+ auto dmesgOutput = executeCommand("dmesg");
+
+ // Only pull in dmesg lines with interesting keywords.
+ // One example is:
+ // [ 16.390603] max31785: probe of 7-0052 failed with error -110
+ // using ' probe' to avoid 'modprobe'
+ std::vector<std::string> matches{" probe", "failed"};
+
+ for (const auto& line : dmesgOutput)
+ {
+ for (const auto& m : matches)
+ {
+ if (line.find(m) != std::string::npos)
+ {
+ output.push_back(line);
+ if (output.back().back() == '\n')
+ {
+ output.back().pop_back();
+ }
+ break;
+ }
+ }
+ }
+
+ return output;
+}
+
+} // namespace util
+
+nlohmann::json collectHwmonFFDC()
+{
+ nlohmann::json ffdc;
+
+ auto hwmonNames = util::getHwmonNameFFDC();
+ if (!hwmonNames.empty())
+ {
+ ffdc["hwmonNames"] = std::move(hwmonNames);
+ }
+
+ auto dmesg = util::getDmesgFFDC();
+ if (!dmesg.empty())
+ {
+ ffdc["dmesg"] = std::move(dmesg);
+ }
+
+ return ffdc;
+}
+
+} // namespace phosphor::fan::monitor