Enable OCC error monitoring
Fixes openbmc/openbmc#2165
Change-Id: I93f317a32c910f279003fa0fce6ae2d597f90312
Signed-off-by: Vishwanatha Subbanna <vishwa@linux.vnet.ibm.com>
diff --git a/occ_errors.cpp b/occ_errors.cpp
index f2da49f..fe81f17 100644
--- a/occ_errors.cpp
+++ b/occ_errors.cpp
@@ -56,25 +56,37 @@
// Starts to watch for errors
void Error::addWatch()
{
- // Open the file
- openFile();
+ if (!watching)
+ {
+ // Open the file
+ openFile();
- // register the callback handler
- registerCallBack();
+ // register the callback handler
+ registerCallBack();
+
+ // Set we are watching the error
+ watching = true;
+ }
}
// Stops watching for errors
void Error::removeWatch()
{
- // Close the file
- if (fd >= 0)
+ if (watching)
{
- close(fd);
- }
+ // Close the file
+ if (fd >= 0)
+ {
+ close(fd);
+ }
- // Reduce the reference count. Since there is only one instances
- // of add_io, this will result empty loop
- eventSource.reset();
+ // Reduce the reference count. Since there is only one instances
+ // of add_io, this will result empty loop
+ eventSource.reset();
+
+ // We are no more watching the error
+ watching = false;
+ }
}
// Callback handler when there is an activity on the FD
diff --git a/occ_errors.hpp b/occ_errors.hpp
index 1e66ffc..e3820d1 100644
--- a/occ_errors.hpp
+++ b/occ_errors.hpp
@@ -70,6 +70,9 @@
/** @brief File descriptor to watch for errors */
int fd = -1;
+ /** @brief Current state of error watching */
+ bool watching = false;
+
/** @brief attaches FD to events and sets up callback handler */
void registerCallBack();
diff --git a/occ_manager.hpp b/occ_manager.hpp
index 7b76703..df6f199 100644
--- a/occ_manager.hpp
+++ b/occ_manager.hpp
@@ -5,6 +5,8 @@
#include <experimental/filesystem>
#include <functional>
#include <sdbusplus/bus.hpp>
+#include <phosphor-logging/log.hpp>
+#include <phosphor-logging/elog.hpp>
#include <powercap.hpp>
#include "occ_pass_through.hpp"
#include "occ_status.hpp"
@@ -12,7 +14,6 @@
#include "config.h"
namespace sdbusRule = sdbusplus::bus::match::rules;
-
namespace open_power
{
namespace occ
@@ -68,6 +69,7 @@
}
}
+ private:
/** @brief Callback that responds to cpu creation in the inventory -
* by creating the needed objects.
*
@@ -92,7 +94,6 @@
return 0;
}
- private:
/** @brief Create child OCC objects.
*
* @param[in] occ - the occ name, such as occ0.
@@ -110,7 +111,9 @@
std::make_unique<Status>(
bus,
event,
- path.c_str()));
+ path.c_str(),
+ std::bind(std::mem_fn(&Manager::statusCallBack),
+ this, std::placeholders::_1)));
// Create the power cap monitor object for master occ (0)
if (!pcap)
@@ -121,6 +124,49 @@
}
}
+ /** @brief Callback handler invoked by Status object when the OccActive
+ * property is changed. This is needed to make sure that the
+ * error detection is started only after all the OCCs are bound.
+ * Similarly, when one of the OCC gets its OccActive property
+ * un-set, then the OCC error detection needs to be stopped on
+ * all the OCCs
+ *
+ * @param[in] status - OccActive status
+ */
+ void statusCallBack(bool status)
+ {
+ using namespace phosphor::logging;
+ using InternalFailure = sdbusplus::xyz::openbmc_project::Common::
+ Error::InternalFailure;
+
+ // At this time, it won't happen but keeping it
+ // here just in case something changes in the future
+ if ((activeCount == 0) && (!status))
+ {
+ log<level::ERR>("Invalid update on OCCActive");
+ elog<InternalFailure>();
+ }
+
+ activeCount += status ? 1 : -1;
+
+ // If all the OCCs are bound, then start error detection
+ if (activeCount == statusObjects.size())
+ {
+ for (const auto& occ: statusObjects)
+ {
+ occ->addErrorWatch();
+ }
+ }
+ else if (!status)
+ {
+ // If some OCCs are not bound yet, those will be a NO-OP
+ for (const auto& occ: statusObjects)
+ {
+ occ->removeErrorWatch();
+ }
+ }
+ }
+
/** @brief reference to the bus */
sdbusplus::bus::bus& bus;
@@ -138,6 +184,9 @@
/** @brief sbdbusplus match objects */
std::vector<sdbusplus::bus::match_t> cpuMatches;
+
+ /** @brief Number of OCCs that are bound */
+ uint8_t activeCount = 0;
};
} // namespace occ
diff --git a/occ_status.cpp b/occ_status.cpp
index 94b8c4c..15ffbc8 100644
--- a/occ_status.cpp
+++ b/occ_status.cpp
@@ -28,17 +28,21 @@
// Bind the device
device.bind();
- // And watch for errors
- // Commenting until we solve the occ error monitoring issue
- // TODO: openbmc/openbmc#2126
- // device.addErrorWatch();
+ // Call into Manager to let know that we have bound
+ if (this->callBack)
+ {
+ this->callBack(value);
+ }
}
else
{
- // Stop watching for errors
- // Commenting until we solve the occ error monitoring issue
- // TODO: openbmc/openbmc#2126
- // device.removeErrorWatch();
+ // Call into Manager to let know that we will unbind.
+ // Need to do this before doing un-bind since it will
+ // result in slave error if Master is un-bound
+ if (this->callBack)
+ {
+ this->callBack(value);
+ }
// Do the unbind.
device.unBind();
diff --git a/occ_status.hpp b/occ_status.hpp
index 74839d0..2e12806 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -1,5 +1,6 @@
#pragma once
+#include <functional>
#include <sdbusplus/bus.hpp>
#include <sdbusplus/server/object.hpp>
#include <org/open_power/OCC/Status/server.hpp>
@@ -42,13 +43,20 @@
/** @brief Constructs the Status object and
* the underlying device object
*
- * @param[in] bus - DBus bus to attach to
- * @param[in] path - DBus object path
+ * @param[in] bus - DBus bus to attach to
+ * @param[in] event - sd_event unique pointer reference
+ * @param[in] path - DBus object path
+ * @param[in] callBack - Callback handler to invoke during
+ * property change
*/
- Status(sdbusplus::bus::bus& bus, EventPtr& event, const char* path)
+ Status(sdbusplus::bus::bus& bus,
+ EventPtr& event,
+ const char* path,
+ std::function<void(bool)> callBack = nullptr)
: Interface(bus, path),
bus(bus),
path(path),
+ callBack(callBack),
instance(((this->path.back() - '0'))),
device(event,
name + std::to_string(instance + 1),
@@ -81,6 +89,18 @@
*/
bool occActive(bool value) override;
+ /** @brief Starts OCC error detection */
+ inline void addErrorWatch()
+ {
+ return device.addErrorWatch();
+ }
+
+ /** @brief Stops OCC error detection */
+ inline void removeErrorWatch()
+ {
+ return device.removeErrorWatch();
+ }
+
private:
/** @brief sdbus handle */
@@ -89,6 +109,11 @@
/** @brief OCC dbus object path */
std::string path;
+ /** @brief Callback handler to be invoked during property change.
+ * This is a handler in Manager class
+ */
+ std::function<void(bool)> callBack;
+
/** @brief occ name prefix */
std::string name = OCC_NAME;