Add OCC present count detection and watch
Add a Presence child class of Error to handle detecting the number of
OCCs available. Add an instance of this Presence class if the Device
detects that it is the master OCC, since the number of present OCCs is
only reported by the master OCC. When a change to the number of OCCs
reported is detected, compare with the number of OCCs determined to be
active by the Manager, and if there is a mismatch, follow the usual
error path (reset OCC, etc).
Partially resolves openbmc/openbmc#2285
See https://gerrit.openbmc-project.xyz/#/c/7843/
Change-Id: Idbaca52b307992d9b01fe15439ab746ef6d64397
Signed-off-by: Edward A. James <eajames@us.ibm.com>
diff --git a/Makefile.am b/Makefile.am
index a2b263e..3044e38 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -6,6 +6,7 @@
occ_errors.hpp \
occ_events.hpp \
occ_finder.hpp \
+ occ_presence.hpp \
utils.hpp
noinst_LTLIBRARIES = libocc_control.la
@@ -21,6 +22,7 @@
occ_status.cpp \
occ_device.cpp \
occ_errors.cpp \
+ occ_presence.cpp \
powercap.cpp \
org/open_power/OCC/Device/error.cpp \
occ_finder.cpp \
diff --git a/occ_device.cpp b/occ_device.cpp
index 865aef2..8129502 100644
--- a/occ_device.cpp
+++ b/occ_device.cpp
@@ -1,3 +1,4 @@
+#include <iostream>
#include "occ_device.hpp"
namespace open_power
@@ -8,5 +9,21 @@
fs::path Device::bindPath = fs::path(OCC_HWMON_PATH) / "bind";
fs::path Device::unBindPath = fs::path(OCC_HWMON_PATH) / "unbind";
+bool Device::master() const
+{
+ int master;
+ auto masterFile = fs::path(DEV_PATH) / config / "occ_master";
+ std::ifstream file(masterFile, std::ios::in);
+
+ if (!file)
+ {
+ return false;
+ }
+
+ file >> master;
+ file.close();
+ return (master != 0);
+}
+
} // namespace occ
} // namespace open_power
diff --git a/occ_device.hpp b/occ_device.hpp
index 3f7dff4..7ffb0ef 100644
--- a/occ_device.hpp
+++ b/occ_device.hpp
@@ -4,6 +4,7 @@
#include <experimental/filesystem>
#include "occ_events.hpp"
#include "occ_errors.hpp"
+#include "occ_presence.hpp"
#include "config.h"
namespace open_power
@@ -11,6 +12,7 @@
namespace occ
{
+class Manager;
namespace fs = std::experimental::filesystem;
/** @class Device
@@ -30,10 +32,12 @@
*
* @param[in] event - Unique ptr reference to sd_event
* @param[in] name - OCC instance name
+ * @param[in] manager - OCC manager instance
* @param[in] callback - Optional callback on errors
*/
Device(EventPtr& event,
const std::string& name,
+ const Manager& manager,
std::function<void()> callBack = nullptr) :
#ifdef I2C_OCC
config(name),
@@ -41,7 +45,11 @@
config(name + '-' + "dev0"),
#endif
errorFile(fs::path(config) / "occ_error"),
- error(event, errorFile, callBack)
+ error(event, errorFile, callBack),
+ presence(event,
+ fs::path(config) / "occs_present",
+ manager,
+ callBack)
{
// Nothing to do here
}
@@ -76,13 +84,20 @@
/** @brief Starts to monitor for errors */
inline void addErrorWatch()
{
- return error.addWatch();
+ if (master())
+ {
+ presence.addWatch();
+ }
+
+ error.addWatch();
}
/** @brief stops monitoring for errors */
inline void removeErrorWatch()
{
- return error.removeWatch();
+ // we can always safely remove watch even if we don't add it
+ presence.removeWatch();
+ error.removeWatch();
}
private:
@@ -106,6 +121,9 @@
/** Abstraction of error monitoring */
Error error;
+ /** Abstraction of OCC presence monitoring */
+ Presence presence;
+
/** @brief file writer to achieve bind and unbind
*
* @param[in] filename - Name of file to be written
@@ -120,6 +138,9 @@
file.close();
return;
}
+
+ /** @brief Returns if device represents the master OCC */
+ bool master() const;
};
} // namespace occ
diff --git a/occ_errors.cpp b/occ_errors.cpp
index bc01c91..fc24310 100644
--- a/occ_errors.cpp
+++ b/occ_errors.cpp
@@ -93,7 +93,6 @@
int Error::processEvents(sd_event_source* es, int fd,
uint32_t revents, void* userData)
{
- log<level::INFO>("Error file updated");
auto error = static_cast<Error*>(userData);
error->analyzeEvent();
@@ -118,6 +117,8 @@
// A non-zero data indicates an error condition
// Let the caller take appropriate action on this
auto data = readFile(len);
+ log<level::INFO>("Error file updated",
+ entry("ERROR=%s", data.c_str()));
if (data.empty() ||
data.front() == NO_ERROR)
{
diff --git a/occ_errors.hpp b/occ_errors.hpp
index e3820d1..070e72c 100644
--- a/occ_errors.hpp
+++ b/occ_errors.hpp
@@ -61,15 +61,6 @@
/** @brief event source wrapped in unique_ptr */
EventSourcePtr eventSource;
- /** Error file */
- const fs::path file;
-
- /** @brief Optional function to call on error scenario */
- std::function<void()> callBack;
-
- /** @brief File descriptor to watch for errors */
- int fd = -1;
-
/** @brief Current state of error watching */
bool watching = false;
@@ -79,13 +70,6 @@
/** @brief Opens the file and populates fd */
void openFile();
- /** @brief Reads file data
- *
- * @return data read. Since its a /sysfs entry,
- * it would be a string
- */
- std::string readFile(int) const;
-
/** @brief Callback handler when the FD has some activity on it
*
* @param[in] es - Populated event source
@@ -103,7 +87,24 @@
* and makes a callback to error handler if the
* content denotes an error condition
*/
- void analyzeEvent();
+ virtual void analyzeEvent();
+
+ protected:
+ /** @brief File descriptor to watch for errors */
+ int fd = -1;
+
+ /** Error file */
+ const fs::path file;
+
+ /** @brief Optional function to call on error scenario */
+ std::function<void()> callBack;
+
+ /** @brief Reads file data
+ *
+ * @return data read. Since its a /sysfs entry,
+ * it would be a string
+ */
+ std::string readFile(int) const;
};
} // namespace occ
diff --git a/occ_manager.cpp b/occ_manager.cpp
index 8b99b1f..7d51490 100644
--- a/occ_manager.cpp
+++ b/occ_manager.cpp
@@ -70,6 +70,7 @@
bus,
event,
path.c_str(),
+ *this,
std::bind(std::mem_fn(&Manager::statusCallBack),
this, std::placeholders::_1)));
@@ -132,7 +133,8 @@
std::make_unique<Status>(
bus,
event,
- path.c_str()));
+ path.c_str(),
+ *this));
}
}
#endif
diff --git a/occ_manager.hpp b/occ_manager.hpp
index 1aaffc8..542c7d9 100644
--- a/occ_manager.hpp
+++ b/occ_manager.hpp
@@ -23,8 +23,8 @@
Manager() = delete;
Manager(const Manager&) = delete;
Manager& operator=(const Manager&) = delete;
- Manager(Manager&&) = default;
- Manager& operator=(Manager&&) = default;
+ Manager(Manager&&) = delete;
+ Manager& operator=(Manager&&) = delete;
~Manager() = default;
/** @brief Adds OCC pass-through and status objects on the bus
@@ -46,6 +46,11 @@
#endif
}
+ inline auto getNumOCCs() const
+ {
+ return activeCount;
+ }
+
private:
/** @brief Checks if the CPU inventory is present and if so, creates
* the occ D-Bus objects. Else, registers a handler to be
diff --git a/occ_presence.cpp b/occ_presence.cpp
new file mode 100644
index 0000000..f91521a
--- /dev/null
+++ b/occ_presence.cpp
@@ -0,0 +1,57 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <phosphor-logging/log.hpp>
+#include <phosphor-logging/elog.hpp>
+#include <xyz/openbmc_project/Common/error.hpp>
+#include <org/open_power/OCC/Device/error.hpp>
+#include "occ_presence.hpp"
+#include "occ_manager.hpp"
+#include "elog-errors.hpp"
+
+namespace open_power
+{
+namespace occ
+{
+
+// Reads the occs_present file and analyzes the data
+void Presence::analyzeEvent()
+{
+ using namespace phosphor::logging;
+ using namespace sdbusplus::org::open_power::OCC::Device::Error;
+
+ // Get the number of bytes to read
+ int len = -1;
+ auto r = ioctl(fd, FIONREAD, &len);
+ if (r < 0)
+ {
+ elog<ConfigFailure>(
+ phosphor::logging::org::open_power::OCC::Device::
+ ConfigFailure::CALLOUT_ERRNO(errno),
+ phosphor::logging::org::open_power::OCC::Device::
+ ConfigFailure::CALLOUT_DEVICE_PATH(file.c_str()));
+ }
+
+ auto data = readFile(len);
+ if (data.empty())
+ {
+ return;
+ }
+
+ // Let stoi determine the base
+ auto occsPresent = std::stoi(data, nullptr, 0);
+ if (manager.getNumOCCs() != occsPresent)
+ {
+ log<level::INFO>("OCC presence mismatch",
+ entry("BMC_OCCS=%d", manager.getNumOCCs(),
+ entry("OCC_OCCS=%d", occsPresent)));
+ if (callBack)
+ {
+ callBack();
+ }
+ }
+}
+
+} // namespace occ
+} // namespace open_power
diff --git a/occ_presence.hpp b/occ_presence.hpp
new file mode 100644
index 0000000..321dacb
--- /dev/null
+++ b/occ_presence.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "occ_errors.hpp"
+namespace open_power
+{
+namespace occ
+{
+
+class Manager;
+
+/** @class Presence
+ * @brief Monitors the number of OCCs present
+ */
+class Presence : public Error
+{
+ public:
+ Presence() = delete;
+ Presence(const Presence&) = delete;
+ Presence& operator=(const Presence&) = delete;
+ Presence(Presence&&) = default;
+ Presence& operator=(Presence&&) = default;
+
+ /** @brief Constructs the Presence object
+ *
+ * @param[in] event - Reference to sd_event unique_ptr
+ * @param[in] file - File used by driver to communicate errors
+ * @param[in] mgr - OCC manager instance
+ * @param[in] callBack - Optional function callback on error condition
+ */
+ Presence(EventPtr& event,
+ const fs::path& file,
+ const Manager& mgr,
+ std::function<void()> callBack = nullptr) :
+ Error(event, file, callBack),
+ manager(mgr)
+ {
+ // Nothing to do here.
+ }
+
+ private:
+ /** Store the manager instance to enable getting number of OCCs */
+ const Manager& manager;
+
+ /** @brief When the error event is received, analyzes it
+ * and makes a callback to error handler if the
+ * content denotes an error condition
+ */
+ void analyzeEvent() override;
+};
+
+} // namespace occ
+} // namespace open_power
diff --git a/occ_status.hpp b/occ_status.hpp
index a334a2b..55ee539 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -14,6 +14,7 @@
namespace occ
{
+class Manager;
namespace Base = sdbusplus::org::open_power::OCC::server;
using Interface = sdbusplus::server::object::object<Base::Status>;
@@ -48,12 +49,14 @@
* @param[in] bus - DBus bus to attach to
* @param[in] event - sd_event unique pointer reference
* @param[in] path - DBus object path
+ * @param[in] manager - OCC manager instance
* @param[in] callBack - Callback handler to invoke during
* property change
*/
Status(sdbusplus::bus::bus& bus,
EventPtr& event,
const char* path,
+ const Manager& manager,
std::function<void(bool)> callBack = nullptr)
: Interface(bus, path, true),
bus(bus),
@@ -66,6 +69,7 @@
#else
name + std::to_string(instance + 1),
#endif
+ manager,
std::bind(&Status::deviceErrorHandler, this)),
hostControlSignal(
bus,
diff --git a/test/utest.cpp b/test/utest.cpp
index 9731209..dc80cc2 100644
--- a/test/utest.cpp
+++ b/test/utest.cpp
@@ -1,5 +1,6 @@
#include <gtest/gtest.h>
#include <occ_events.hpp>
+#include <occ_manager.hpp>
#include "powercap.hpp"
using namespace open_power::occ;
@@ -11,7 +12,8 @@
bus(sdbusplus::bus::new_default()),
rc(sd_event_default(&event)),
eventP(event),
- occStatus(bus, eventP, "/test/path/occ1"),
+ manager(bus, eventP),
+ occStatus(bus, eventP, "/test/path/occ1", manager),
pcap(bus,occStatus)
{
EXPECT_GE(rc, 0);
@@ -25,6 +27,7 @@
int rc;
open_power::occ::EventPtr eventP;
+ Manager manager;
Status occStatus;
powercap::PowerCap pcap;
};