Wait for OCC Active PDR to become available
occ-control was attempting to communicate with OCCs before the OCC
Active sensor PDR was available. This commit will wait until all PDRs
are available before allowing communication with the OCCs.
It will also read the OCC Active sensor to determine the state before
allowing communication.
When Hostboot moves a system to safe mode it will set the PLDM state of
the OCC Active sensor to PLDM_STATE_SET_OPERATIONAL_RUNNING_STATUS_DORMANT
This commit will also handle processing of that DORMANT status.
Change-Id: I348cf14455d9373898deec2ac4a04b3752e81c4a
Signed-off-by: Chris Cain <cjcain@us.ibm.com>
diff --git a/occ_manager.cpp b/occ_manager.cpp
index a77d49b..e621a21 100644
--- a/occ_manager.cpp
+++ b/occ_manager.cpp
@@ -67,29 +67,71 @@
#else
if (!fs::exists(HOST_ON_FILE))
{
- // Create the OCCs based on on the /dev/occX devices
- auto occs = findOCCsInDev();
-
- if (occs.empty() || (prevOCCSearch.size() != occs.size()))
+ static bool statusObjCreated = false;
+ if (!statusObjCreated)
{
- // Something changed or no OCCs yet, try again in 10s.
- // Note on the first pass prevOCCSearch will be empty,
- // so there will be at least one delay to give things
- // a chance to settle.
- prevOCCSearch = occs;
+ // Create the OCCs based on on the /dev/occX devices
+ auto occs = findOCCsInDev();
- discoverTimer->restartOnce(10s);
- }
- else
- {
- discoverTimer.reset();
-
- // createObjects requires OCC0 first.
- std::sort(occs.begin(), occs.end());
-
- for (auto id : occs)
+ if (occs.empty() || (prevOCCSearch.size() != occs.size()))
{
- createObjects(std::string(OCC_NAME) + std::to_string(id));
+ // Something changed or no OCCs yet, try again in 10s.
+ // Note on the first pass prevOCCSearch will be empty,
+ // so there will be at least one delay to give things
+ // a chance to settle.
+ prevOCCSearch = occs;
+
+ log<level::INFO>(
+ fmt::format(
+ "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
+ occs.size())
+ .c_str());
+
+ discoverTimer->restartOnce(10s);
+ }
+ else
+ {
+ // All OCCs appear to be available, create status objects
+
+ // createObjects requires OCC0 first.
+ std::sort(occs.begin(), occs.end());
+
+ log<level::INFO>(
+ fmt::format(
+ "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
+ occs.size())
+ .c_str());
+ for (auto id : occs)
+ {
+ createObjects(std::string(OCC_NAME) + std::to_string(id));
+ }
+ statusObjCreated = true;
+ }
+ }
+
+ if (statusObjCreated)
+ {
+ static bool tracedHostWait = false;
+ if (utils::isHostRunning())
+ {
+ if (tracedHostWait)
+ {
+ log<level::INFO>(
+ "Manager::findAndCreateObjects(): Host is running");
+ tracedHostWait = false;
+ }
+ waitingForAllOccActiveSensors = true;
+ checkAllActiveSensors();
+ }
+ else
+ {
+ if (!tracedHostWait)
+ {
+ log<level::INFO>(
+ "Manager::findAndCreateObjects(): Waiting for host to start");
+ tracedHostWait = true;
+ }
+ discoverTimer->restartOnce(30s);
}
}
}
@@ -105,6 +147,59 @@
#endif
}
+#ifdef POWER10
+// Check if all occActive sensors are available
+void Manager::checkAllActiveSensors()
+{
+ static bool allActiveSensorAvailable = false;
+ static bool tracedSensorWait = false;
+
+ // Start with the assumption that all are available
+ allActiveSensorAvailable = true;
+ for (auto& obj : statusObjects)
+ {
+ // If active sensor is already true, then no need to query sensor
+ if (!obj->occActive())
+ {
+ allActiveSensorAvailable = false;
+ if (!tracedSensorWait)
+ {
+ log<level::INFO>(
+ fmt::format(
+ "Manager::checkAllActiveSensors(): Waiting on OCC{} Active sensor",
+ obj->getOccInstanceID())
+ .c_str());
+ tracedSensorWait = true;
+ }
+ pldmHandle->checkActiveSensor(obj->getOccInstanceID());
+ break;
+ }
+ }
+
+ if (allActiveSensorAvailable)
+ {
+ // All sensors were found, disable the discovery timer
+ discoverTimer.reset();
+ waitingForAllOccActiveSensors = false;
+
+ log<level::INFO>(
+ "Manager::checkAllActiveSensors(): OCC Active sensors are available");
+ tracedSensorWait = false;
+ }
+ else
+ {
+ // Not all sensors were available, so keep waiting
+ if (!tracedSensorWait)
+ {
+ log<level::INFO>(
+ "Manager::checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
+ tracedSensorWait = true;
+ }
+ discoverTimer->restartOnce(30s);
+ }
+}
+#endif
+
std::vector<int> Manager::findOCCsInDev()
{
std::vector<int> occs;
@@ -287,6 +382,13 @@
setSensorValueToNonFunctional(instance);
#endif
}
+
+#ifdef POWER10
+ if (waitingForAllOccActiveSensors)
+ {
+ checkAllActiveSensors();
+ }
+#endif
}
#ifdef I2C_OCC
@@ -326,8 +428,9 @@
if (obj != statusObjects.end() && (*obj)->occActive())
{
- log<level::INFO>("SBE timeout, requesting HRESET",
- entry("SBE=%d", instance));
+ log<level::INFO>(
+ fmt::format("SBE timeout, requesting HRESET (OCC{})", instance)
+ .c_str());
setSBEState(instance, SBE_STATE_NOT_USABLE);
@@ -361,7 +464,8 @@
{
if (success)
{
- log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
+ log<level::INFO>(
+ fmt::format("HRESET succeeded (OCC{})", instance).c_str());
setSBEState(instance, SBE_STATE_BOOTED);
@@ -372,8 +476,9 @@
if (sbeCanDump(instance))
{
- log<level::INFO>("HRESET failed, triggering SBE dump",
- entry("SBE=%d", instance));
+ log<level::INFO>(
+ fmt::format("HRESET failed (OCC{}), triggering SBE dump", instance)
+ .c_str());
auto& bus = utils::getBus();
uint32_t src6 = instance << 16;
@@ -1066,6 +1171,34 @@
int masterInstance = -1;
for (auto& obj : statusObjects)
{
+#ifdef POWER10
+ if (!obj->occActive())
+ {
+ if (utils::isHostRunning())
+ {
+ // OCC does not appear to be active yet, check active sensor
+ pldmHandle->checkActiveSensor(obj->getOccInstanceID());
+ if (obj->occActive())
+ {
+ log<level::INFO>(
+ fmt::format(
+ "validateOccMaster: OCC{} is ACTIVE after reading sensor",
+ obj->getOccInstanceID())
+ .c_str());
+ }
+ }
+ else
+ {
+ log<level::WARNING>(
+ fmt::format(
+ "validateOccMaster: HOST is not running (OCC{})",
+ obj->getOccInstanceID())
+ .c_str());
+ return;
+ }
+ }
+#endif // POWER10
+
if (obj->isMasterOcc())
{
obj->addPresenceWatchMaster();
@@ -1086,9 +1219,13 @@
}
}
}
+
if (masterInstance < 0)
{
- log<level::ERR>("validateOccMaster: Master OCC not found!");
+ log<level::ERR>(
+ fmt::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
+ statusObjects.size())
+ .c_str());
// request reset
statusObjects.front()->deviceError();
}
diff --git a/occ_manager.hpp b/occ_manager.hpp
index d636976..d3388e8 100644
--- a/occ_manager.hpp
+++ b/occ_manager.hpp
@@ -94,7 +94,8 @@
std::bind(std::mem_fn(&Manager::updateOCCActive), this,
std::placeholders::_1, std::placeholders::_2),
std::bind(std::mem_fn(&Manager::sbeHRESETResult), this,
- std::placeholders::_1, std::placeholders::_2)))
+ std::placeholders::_1, std::placeholders::_2),
+ event))
#endif
#ifdef POWER10
,
@@ -219,6 +220,10 @@
/** @brief Poll timer event */
sdeventplus::Event sdpEvent;
+ /** @brief Flags to indicate if waiting for all of the OCC active sensors to
+ * come online */
+ bool waitingForAllOccActiveSensors = false;
+
/**
* @brief The timer to be used once the OCC goes active. When it expires,
* a POLL command will be sent to the OCC and then timer restarted.
@@ -318,6 +323,11 @@
* Manager).
*/
void occsNotAllRunning();
+
+ /** @brief Check if all of the OCC Active sensors are available and if not
+ * restart the discoverTimer
+ */
+ void checkAllActiveSensors();
#endif
/**
diff --git a/occ_status.cpp b/occ_status.cpp
index b79fff4..789afba 100644
--- a/occ_status.cpp
+++ b/occ_status.cpp
@@ -46,6 +46,9 @@
manager.updatePcapBounds();
}
+ // Update the OCC active sensor before notifying Manager
+ Base::Status::occActive(value);
+
// Call into Manager to let know that we have bound
if (this->managerCallBack)
{
@@ -66,7 +69,6 @@
safeStateDelayTimer.setEnabled(false);
}
#endif
-
// Call into Manager to let know that we will unbind.
if (this->managerCallBack)
{
@@ -520,6 +522,19 @@
}
else
{
+#ifdef POWER10
+ if (!stateValid && occActive())
+ {
+ if (!safeStateDelayTimer.isEnabled())
+ {
+ log<level::ERR>(
+ "Starting 60 sec delay timer before requesting a reset");
+ // start safe delay timer (before requesting reset)
+ using namespace std::literals::chrono_literals;
+ safeStateDelayTimer.restartOnce(60s);
+ }
+ }
+#else
// State could not be determined, set it to NO State.
lastState = 0;
@@ -529,6 +544,7 @@
// Disable and reset to try recovering
deviceError();
+#endif
}
}
}
diff --git a/occ_status.hpp b/occ_status.hpp
index 342c9f5..ef8fad0 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -129,12 +129,6 @@
resetCallBack(resetCallBack)
#endif
{
- // Check to see if we have OCC already bound. If so, just set it
- if (device.bound())
- {
- this->occActive(true);
- }
-
// Announce that we are ready
this->emit_object_added();
}
diff --git a/pldm.cpp b/pldm.cpp
index b703618..bff995c 100644
--- a/pldm.cpp
+++ b/pldm.cpp
@@ -9,17 +9,31 @@
#include <libpldm/state_set_oem_ibm.h>
#include <phosphor-logging/log.hpp>
+#include <sdbusplus/bus.hpp>
+#include <sdeventplus/clock.hpp>
+#include <sdeventplus/exception.hpp>
+#include <sdeventplus/source/io.hpp>
+#include <sdeventplus/source/time.hpp>
+
+#include <algorithm>
namespace pldm
{
using namespace phosphor::logging;
+using namespace sdeventplus;
+using namespace sdeventplus::source;
+constexpr auto clockId = sdeventplus::ClockId::RealTime;
+using Clock = sdeventplus::Clock<clockId>;
+using Timer = Time<clockId>;
+
void Interface::fetchSensorInfo(uint16_t stateSetId,
SensorToInstance& sensorInstanceMap,
SensorOffset& sensorOffset)
{
PdrList pdrs{};
+ static bool tracedError = false;
auto& bus = open_power::occ::utils::getBus();
try
@@ -27,23 +41,46 @@
auto method = bus.new_method_call(
"xyz.openbmc_project.PLDM", "/xyz/openbmc_project/pldm",
"xyz.openbmc_project.PLDM.PDR", "FindStateSensorPDR");
- method.append(tid, (uint16_t)PLDM_ENTITY_PROC, stateSetId);
+ method.append(tid, static_cast<uint16_t>(PLDM_ENTITY_PROC), stateSetId);
auto responseMsg = bus.call(method);
responseMsg.read(pdrs);
}
catch (const sdbusplus::exception::exception& e)
{
- log<level::ERR>("pldm: Failed to fetch the state sensor PDRs",
- entry("ERROR=%s", e.what()));
+ if (!tracedError)
+ {
+ log<level::ERR>(
+ fmt::format(
+ "fetchSensorInfo: Failed to find stateSetID:{} PDR: {}",
+ stateSetId, e.what())
+ .c_str());
+ tracedError = true;
+ }
}
if (pdrs.empty())
{
- log<level::ERR>("pldm: state sensor PDRs not present");
+ if (!tracedError)
+ {
+ log<level::ERR>(
+ fmt::format(
+ "fetchSensorInfo: state sensor PDRs ({}) not present",
+ stateSetId)
+ .c_str());
+ tracedError = true;
+ }
return;
}
+ // Found PDR
+ if (tracedError)
+ {
+ log<level::INFO>(
+ fmt::format("fetchSensorInfo: found {} PDRs", pdrs.size()).c_str());
+ tracedError = false;
+ }
+
bool offsetFound = false;
auto stateSensorPDR =
reinterpret_cast<const pldm_state_sensor_pdr*>(pdrs.front().data());
@@ -139,6 +176,24 @@
.c_str());
callBack(sensorEntry->second, false);
}
+ else if (eventState ==
+ static_cast<EventState>(
+ PLDM_STATE_SET_OPERATIONAL_RUNNING_STATUS_DORMANT))
+ {
+ log<level::INFO>(
+ fmt::format(
+ "PLDM: OCC{} has now STOPPED and system is in SAFE MODE",
+ sensorEntry->second)
+ .c_str());
+ callBack(sensorEntry->second, false);
+ }
+ else
+ {
+ log<level::INFO>(
+ fmt::format("PLDM: Unexpected PLDM state {} for OCC{}",
+ eventState, sensorEntry->second)
+ .c_str());
+ }
return;
}
@@ -152,8 +207,10 @@
{
if (eventState == static_cast<EventState>(SBE_HRESET_NOT_READY))
{
- log<level::INFO>("pldm: HRESET is NOT READY",
- entry("SBE=%d", sensorEntry->second));
+ log<level::INFO>(
+ fmt::format("pldm: HRESET is NOT READY (OCC{})",
+ sensorEntry->second)
+ .c_str());
}
else if (eventState == static_cast<EventState>(SBE_HRESET_READY))
{
@@ -179,15 +236,20 @@
auto propVal = std::get<std::string>(stateEntryValue);
if (propVal == "xyz.openbmc_project.State.Host.HostState.Off")
{
- sensorToOCCInstance.clear();
- occInstanceToEffecter.clear();
-
- sensorToSBEInstance.clear();
- sbeInstanceToEffecter.clear();
+ clearData();
}
}
}
+void Interface::clearData()
+{
+ sensorToOCCInstance.clear();
+ occInstanceToEffecter.clear();
+
+ sensorToSBEInstance.clear();
+ sbeInstanceToEffecter.clear();
+}
+
void Interface::fetchEffecterInfo(uint16_t stateSetId,
InstanceToEffecter& instanceToEffecterMap,
CompositeEffecterCount& effecterCount,
@@ -201,7 +263,7 @@
auto method = bus.new_method_call(
"xyz.openbmc_project.PLDM", "/xyz/openbmc_project/pldm",
"xyz.openbmc_project.PLDM.PDR", "FindStateEffecterPDR");
- method.append(tid, (uint16_t)PLDM_ENTITY_PROC, stateSetId);
+ method.append(tid, static_cast<uint16_t>(PLDM_ENTITY_PROC), stateSetId);
auto responseMsg = bus.call(method);
responseMsg.read(pdrs);
@@ -301,84 +363,104 @@
void Interface::resetOCC(open_power::occ::instanceID occInstanceId)
{
- if (!isPDREffecterCacheValid())
+ if (open_power::occ::utils::isHostRunning())
{
- fetchEffecterInfo(PLDM_STATE_SET_BOOT_RESTART_CAUSE,
- occInstanceToEffecter, OCCEffecterCount,
- bootRestartPosition);
- }
+ if (!isPDREffecterCacheValid())
+ {
+ fetchEffecterInfo(PLDM_STATE_SET_BOOT_RESTART_CAUSE,
+ occInstanceToEffecter, OCCEffecterCount,
+ bootRestartPosition);
+ }
- // Find the matching effecter for the OCC instance
- auto effecterEntry = occInstanceToEffecter.find(occInstanceId);
- if (effecterEntry == occInstanceToEffecter.end())
+ // Find the matching effecter for the OCC instance
+ auto effecterEntry = occInstanceToEffecter.find(occInstanceId);
+ if (effecterEntry == occInstanceToEffecter.end())
+ {
+ log<level::ERR>(
+ fmt::format(
+ "pldm: Failed to find a matching effecter for OCC instance {}",
+ occInstanceId)
+ .c_str());
+
+ return;
+ }
+
+ if (!getMctpInstanceId(mctpInstance))
+ {
+ return;
+ }
+
+ // Prepare the SetStateEffecterStates request to reset the OCC
+ auto request = prepareSetEffecterReq(
+ mctpInstance, effecterEntry->second, OCCEffecterCount,
+ bootRestartPosition, PLDM_STATE_SET_BOOT_RESTART_CAUSE_WARM_RESET);
+
+ if (request.empty())
+ {
+ log<level::ERR>(
+ "pldm: SetStateEffecterStates OCC reset request empty");
+ return;
+ }
+
+ // Send request to reset the OCCs/PM Complex (ignore response)
+ sendPldm(request, occInstanceId, false);
+ }
+ else
{
log<level::ERR>(
- fmt::format(
- "pldm: Failed to find a matching effecter for OCC instance {}",
- occInstanceId)
+ fmt::format("resetOCC: HOST is not running (OCC{})", occInstanceId)
.c_str());
-
- return;
+ clearData();
}
-
- uint8_t instanceId{};
- if (!getMctpInstanceId(instanceId))
- {
- return;
- }
-
- // Prepare the SetStateEffecterStates request to reset the OCC
- auto request = prepareSetEffecterReq(
- instanceId, effecterEntry->second, OCCEffecterCount,
- bootRestartPosition, PLDM_STATE_SET_BOOT_RESTART_CAUSE_WARM_RESET);
-
- if (request.empty())
- {
- log<level::ERR>("pldm: SetStateEffecterStates OCC reset request empty");
- return;
- }
-
- // Make asynchronous call to reset the OCCs/PM Complex
- sendPldm(request, true);
}
void Interface::sendHRESET(open_power::occ::instanceID sbeInstanceId)
{
- if (sbeInstanceToEffecter.empty())
+ if (open_power::occ::utils::isHostRunning())
{
- fetchEffecterInfo(PLDM_OEM_IBM_SBE_MAINTENANCE_STATE,
- sbeInstanceToEffecter, SBEEffecterCount,
- sbeMaintenanceStatePosition);
- }
+ if (sbeInstanceToEffecter.empty())
+ {
+ fetchEffecterInfo(PLDM_OEM_IBM_SBE_MAINTENANCE_STATE,
+ sbeInstanceToEffecter, SBEEffecterCount,
+ sbeMaintenanceStatePosition);
+ }
- auto effecterEntry = sbeInstanceToEffecter.find(sbeInstanceId);
- if (effecterEntry == sbeInstanceToEffecter.end())
+ auto effecterEntry = sbeInstanceToEffecter.find(sbeInstanceId);
+ if (effecterEntry == sbeInstanceToEffecter.end())
+ {
+ log<level::ERR>(
+ "pldm: Failed to find a matching effecter for SBE instance",
+ entry("SBE=%d", sbeInstanceId));
+ return;
+ }
+
+ if (!getMctpInstanceId(mctpInstance))
+ {
+ return;
+ }
+
+ // Prepare the SetStateEffecterStates request to HRESET the SBE
+ auto request = prepareSetEffecterReq(
+ mctpInstance, effecterEntry->second, SBEEffecterCount,
+ sbeMaintenanceStatePosition, SBE_RETRY_REQUIRED);
+
+ if (request.empty())
+ {
+ log<level::ERR>(
+ "pldm: SetStateEffecterStates HRESET request empty");
+ return;
+ }
+
+ // Send request to issue HRESET of SBE (ignore response)
+ sendPldm(request, sbeInstanceId, false);
+ }
+ else
{
- log<level::ERR>(
- "pldm: Failed to find a matching effecter for SBE instance",
- entry("SBE=%d", sbeInstanceId));
- return;
+ log<level::ERR>(fmt::format("sendHRESET: HOST is not running (OCC{})",
+ sbeInstanceId)
+ .c_str());
+ clearData();
}
-
- uint8_t instanceId{};
- if (!getMctpInstanceId(instanceId))
- {
- return;
- }
-
- // Prepare the SetStateEffecterStates request to HRESET the SBE
- auto request = prepareSetEffecterReq(
- instanceId, effecterEntry->second, SBEEffecterCount,
- sbeMaintenanceStatePosition, SBE_RETRY_REQUIRED);
-
- if (request.empty())
- {
- log<level::ERR>("pldm: SetStateEffecterStates HRESET request empty");
- return;
- }
-
- // Make asynchronous call to do the reset
- sendPldm(request, true);
}
bool Interface::getMctpInstanceId(uint8_t& instanceId)
@@ -403,66 +485,314 @@
return true;
}
-void Interface::sendPldm(const std::vector<uint8_t>& request, const bool async)
+void Interface::sendPldm(const std::vector<uint8_t>& request,
+ const uint8_t instance, const bool rspExpected)
{
// Connect to MCTP scoket
- int fd = pldm_open();
- if (fd == -1)
+ pldmFd = pldm_open();
+ auto openErrno = errno;
+ if (pldmFd == PLDM_REQUESTER_OPEN_FAIL)
{
log<level::ERR>(
- fmt::format("sendPldm: Failed to connect to MCTP socket, errno={}",
- errno)
+ fmt::format(
+ "sendPldm: Failed to connect to MCTP socket, errno={}/{}",
+ openErrno, strerror(openErrno))
.c_str());
return;
}
- open_power::occ::FileDescriptor fileFd(fd);
-
// Send the PLDM request message to HBRT
- if (async == false)
+ if (rspExpected)
{
- uint8_t* response = nullptr;
- size_t responseSize{};
- auto rc = pldm_send_recv(mctpEid, fileFd(), request.data(),
- request.size(), &response, &responseSize);
- std::unique_ptr<uint8_t, decltype(std::free)*> responsePtr{response,
- std::free};
+ // Register callback when response is available
+ registerPldmRspCallback();
+
+ // Send PLDM request
+ log<level::INFO>(
+ fmt::format(
+ "sendPldm: calling pldm_send(OCC{}, instance:{}, {} bytes)",
+ instance, mctpInstance, request.size())
+ .c_str());
+ pldmResponseReceived = false;
+ pldmResponseTimeout = false;
+ pldmResponseOcc = instance;
+ auto pldmRc =
+ pldm_send(mctpEid, pldmFd, request.data(), request.size());
+ auto sendErrno = errno;
+ if (pldmRc != PLDM_REQUESTER_SUCCESS)
+ {
+ log<level::ERR>(
+ fmt::format(
+ "sendPldm: pldm_send failed with rc={} and errno={}/{}",
+ pldmRc, sendErrno, strerror(sendErrno))
+ .c_str());
+ pldmClose();
+ return;
+ }
+
+ // start timer waiting for the response
+ using namespace std::literals::chrono_literals;
+ pldmRspTimer.restartOnce(10s);
+
+ // Wait for response/timeout
+ }
+ else // not expecting the response
+ {
+ log<level::INFO>(
+ fmt::format(
+ "sendPldm: calling pldm_send(mctpID:{}, fd:{}, {} bytes) for OCC{}",
+ mctpEid, pldmFd, request.size(), instance)
+ .c_str());
+ auto rc = pldm_send(mctpEid, pldmFd, request.data(), request.size());
+ auto sendErrno = errno;
if (rc)
{
log<level::ERR>(
fmt::format(
- "sendPldm: pldm_send_recv({},{},req,{},...) failed with rc={} and errno={}",
- mctpEid, fileFd(), request.size(), rc, errno)
+ "sendPldm: pldm_send(mctpID:{}, fd:{}, {} bytes) failed with rc={} and errno={}/{}",
+ mctpEid, pldmFd, request.size(), rc, sendErrno,
+ strerror(sendErrno))
.c_str());
}
+ pldmClose();
+ }
+}
- uint8_t completionCode{};
- auto responseMsg = reinterpret_cast<const pldm_msg*>(responsePtr.get());
- auto rcDecode = decode_set_state_effecter_states_resp(
- responseMsg, responseSize - sizeof(pldm_msg_hdr), &completionCode);
- if (rcDecode || completionCode)
- {
- log<level::ERR>(
- fmt::format(
- "sendPldm: decode_set_state_effecter_states_resp failed with rc={} and compCode={}",
- rcDecode, completionCode)
- .c_str());
- }
+// Attaches the FD to event loop and registers the callback handler
+void Interface::registerPldmRspCallback()
+{
+ decltype(eventSource.get()) sourcePtr = nullptr;
+ auto rc = sd_event_add_io(event.get(), &sourcePtr, pldmFd, EPOLLIN,
+ pldmRspCallback, this);
+ if (rc < 0)
+ {
+ log<level::ERR>(
+ fmt::format(
+ "registerPldmRspCallback: sd_event_add_io: Error({})={} : fd={}",
+ rc, strerror(-rc), pldmFd)
+ .c_str());
}
else
{
- log<level::INFO>(fmt::format("sendPldm: calling pldm_send({}, {})",
- mctpEid, fileFd())
- .c_str());
- auto rc = pldm_send(mctpEid, fileFd(), request.data(), request.size());
- if (rc)
+ // puts sourcePtr in the event source.
+ eventSource.reset(sourcePtr);
+ }
+}
+
+// Add a timer to the event loop, default 30s.
+void Interface::pldmRspExpired()
+{
+ if (!pldmResponseReceived)
+ {
+ log<level::ERR>(
+ fmt::format(
+ "pldmRspExpired: timerCallback - timeout waiting for pldm response for OCC{}",
+ pldmResponseOcc)
+ .c_str());
+ pldmResponseTimeout = true;
+ if (pldmFd)
+ {
+ pldmClose();
+ }
+ }
+ return;
+};
+
+void Interface::pldmClose()
+{
+ if (pldmRspTimer.isEnabled())
+ {
+ // stop PLDM response timer
+ pldmRspTimer.setEnabled(false);
+ }
+ close(pldmFd);
+ pldmFd = -1;
+ eventSource.reset();
+}
+
+int Interface::pldmRspCallback(sd_event_source* /*es*/, int fd,
+ uint32_t revents, void* userData)
+{
+ if (!(revents & EPOLLIN))
+ {
+ log<level::INFO>(
+ fmt::format("pldmRspCallback - revents={:08X}", revents).c_str());
+ return -1;
+ }
+
+ auto pldmIface = static_cast<Interface*>(userData);
+
+ uint8_t* responseMsg = nullptr;
+ size_t responseMsgSize{};
+
+ log<level::INFO>(
+ fmt::format("pldmRspCallback: calling pldm_recv() instance:{}",
+ pldmIface->mctpInstance)
+ .c_str());
+ auto rc = pldm_recv(mctpEid, fd, pldmIface->mctpInstance, &responseMsg,
+ &responseMsgSize);
+ int lastErrno = errno;
+ if (rc)
+ {
+ log<level::ERR>(
+ fmt::format(
+ "pldmRspCallback: pldm_recv failed with rc={}, errno={}/{}", rc,
+ lastErrno, strerror(lastErrno))
+ .c_str());
+ return -1;
+ }
+ log<level::INFO>(
+ fmt::format("pldmRspCallback: pldm_recv() rsp was {} bytes",
+ responseMsgSize)
+ .c_str());
+
+ if (pldmIface->pldmRspTimer.isEnabled())
+ {
+ // stop PLDM response timer
+ pldmIface->pldmRspTimer.setEnabled(false);
+ }
+
+ // Set pointer to autodelete
+ std::unique_ptr<uint8_t, decltype(std::free)*> responseMsgPtr{responseMsg,
+ std::free};
+
+ // We've got the response meant for the PLDM request msg that was
+ // sent out
+ // io.set_enabled(Enabled::Off);
+ auto response = reinterpret_cast<pldm_msg*>(responseMsgPtr.get());
+ if (response->payload[0] != PLDM_SUCCESS)
+ {
+ log<level::ERR>(
+ fmt::format("pldmRspCallback: payload[0] was not success: {}",
+ response->payload[0])
+ .c_str());
+ pldmIface->pldmClose();
+ return -1;
+ }
+
+ // Decode the response
+ uint8_t compCode = 0, sensorCount = 1;
+ get_sensor_state_field field[6];
+ responseMsgSize -= sizeof(pldm_msg_hdr);
+ auto msgRc = decode_get_state_sensor_readings_resp(
+ response, responseMsgSize, &compCode, &sensorCount, field);
+ if ((msgRc != PLDM_SUCCESS) || (compCode != PLDM_SUCCESS))
+ {
+ log<level::ERR>(
+ fmt::format(
+ "pldmRspCallback: decode_get_state_sensor_readings failed with rc={} and compCode={}",
+ msgRc, compCode)
+ .c_str());
+ pldmIface->pldmClose();
+ return -1;
+ }
+
+ pldmIface->pldmClose();
+
+ const uint8_t instance = pldmIface->pldmResponseOcc;
+ const uint8_t occSensorState = field[0].present_state;
+ pldmIface->pldmResponseReceived = true;
+
+ if (occSensorState == PLDM_STATE_SET_OPERATIONAL_RUNNING_STATUS_IN_SERVICE)
+ {
+ log<level::INFO>(
+ fmt::format("pldmRspCallback: OCC{} is RUNNING", instance).c_str());
+ pldmIface->callBack(instance, true);
+ }
+ else
+ {
+ log<level::INFO>(
+ fmt::format("pldmRspCallback: OCC{} is not running (state:{})",
+ instance, occSensorState)
+ .c_str());
+ pldmIface->callBack(instance, false);
+ }
+
+ return 0;
+};
+
+std::vector<uint8_t> Interface::encodeGetStateSensorRequest(uint8_t instance,
+ uint16_t sensorId)
+{
+ bitfield8_t sRearm = {0};
+ const size_t msgSize =
+ sizeof(pldm_msg_hdr) + PLDM_GET_STATE_SENSOR_READINGS_REQ_BYTES;
+ std::vector<uint8_t> request(msgSize);
+ auto msg = reinterpret_cast<pldm_msg*>(request.data());
+ auto msgRc = encode_get_state_sensor_readings_req(mctpInstance, sensorId,
+ sRearm, 0, msg);
+ if (msgRc != PLDM_SUCCESS)
+ {
+ log<level::ERR>(
+ fmt::format(
+ "encodeGetStateSensorRequest: Failed to encode sensorId:0x{:08X} for OCC{} (rc={})",
+ sensorId, instance, msgRc)
+ .c_str());
+ }
+ return request;
+}
+
+// Initiate query of the specified OCC Active Sensor
+void Interface::checkActiveSensor(uint8_t instance)
+{
+ static bool tracedOnce = false;
+ if (pldmFd > 0)
+ {
+ if (!tracedOnce)
{
log<level::ERR>(
fmt::format(
- "sendPldm: pldm_send({},{},req,{}) failed with rc={} and errno={}",
- mctpEid, fileFd(), request.size(), rc, errno)
+ "checkActiveSensor: already waiting on OCC{} (fd={})",
+ pldmResponseOcc, pldmFd)
.c_str());
+ tracedOnce = true;
}
+ return;
+ }
+ tracedOnce = false;
+
+ if (!isOCCSensorCacheValid())
+ {
+ fetchSensorInfo(PLDM_STATE_SET_OPERATIONAL_RUNNING_STATUS,
+ sensorToOCCInstance, OCCSensorOffset);
+ }
+
+ // look up sensor id (key) based on instance
+ auto entry = std::find_if(
+ sensorToOCCInstance.begin(), sensorToOCCInstance.end(),
+ [instance](const auto& entry) { return instance == entry.second; });
+ if (entry != sensorToOCCInstance.end())
+ {
+ // Query the OCC Active Sensor state for this instance
+ // SensorID sID = entry->first;
+ log<level::INFO>(
+ fmt::format("checkActiveSensor: OCC{} / sensorID: 0x{:04X}",
+ instance, entry->first)
+ .c_str());
+
+ if (!getMctpInstanceId(mctpInstance))
+ {
+ log<level::ERR>("checkActiveSensor: failed to getMctpInstanceId");
+ return;
+ }
+
+ // Encode GetStateSensorReadings PLDM message
+ auto request = encodeGetStateSensorRequest(instance, entry->first);
+ if (request.empty())
+ {
+ return;
+ }
+
+ // Send request to PLDM and setup callback for response
+ sendPldm(request, instance, true);
+ }
+ else
+ {
+ log<level::ERR>(
+ fmt::format(
+ "checkActiveSensor: Unable to find PLDM sensor for OCC{}",
+ instance)
+ .c_str());
}
}
diff --git a/pldm.hpp b/pldm.hpp
index 6e3db09..1ccdfa6 100644
--- a/pldm.hpp
+++ b/pldm.hpp
@@ -1,16 +1,19 @@
#pragma once
-
+#include "occ_events.hpp"
#include "occ_status.hpp"
#include "utils.hpp"
#include <libpldm/pldm.h>
#include <sdbusplus/bus/match.hpp>
+#include <sdeventplus/event.hpp>
+#include <sdeventplus/utility/timer.hpp>
namespace pldm
{
namespace MatchRules = sdbusplus::bus::match::rules;
+using namespace open_power::occ;
using CompositeEffecterCount = uint8_t;
using EffecterID = uint16_t;
@@ -54,9 +57,10 @@
*/
explicit Interface(
std::function<bool(open_power::occ::instanceID, bool)> callBack,
- std::function<void(open_power::occ::instanceID, bool)> sbeCallBack) :
+ std::function<void(open_power::occ::instanceID, bool)> sbeCallBack,
+ EventPtr& event) :
callBack(callBack),
- sbeCallBack(sbeCallBack),
+ sbeCallBack(sbeCallBack), event(event),
pldmEventSignal(
open_power::occ::utils::getBus(),
MatchRules::type::signal() +
@@ -70,7 +74,11 @@
MatchRules::propertiesChanged("/xyz/openbmc_project/state/host0",
"xyz.openbmc_project.State.Host"),
std::bind(std::mem_fn(&Interface::hostStateEvent), this,
- std::placeholders::_1))
+ std::placeholders::_1)),
+ sdpEvent(sdeventplus::Event::get_default()),
+ pldmRspTimer(
+ sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>(
+ sdpEvent, std::bind(&Interface::pldmRspExpired, this)))
{}
/** @brief Fetch the state sensor PDRs and populate the cache with
@@ -126,6 +134,14 @@
*/
void sendHRESET(open_power::occ::instanceID sbeInstanceId);
+ /** @brief Check if the OCC active sensor is available
+ * On successful read, the Manager callback will be called to update
+ * the status
+ *
+ * @param[in] instance - OCC instance to check
+ */
+ void checkActiveSensor(uint8_t instance);
+
private:
/** @brief Callback handler to be invoked when the state of the OCC
* changes
@@ -138,6 +154,12 @@
std::function<void(open_power::occ::instanceID, bool)> sbeCallBack =
nullptr;
+ /** @brief reference to sd_event wrapped in unique_ptr */
+ EventPtr& event;
+
+ /** @brief event source wrapped in unique_ptr */
+ EventSourcePtr eventSource;
+
/** @brief Used to subscribe to D-Bus PLDM StateSensorEvent signal and
* processes if the event corresponds to OCC state change.
*/
@@ -188,6 +210,39 @@
*/
uint8_t sbeMaintenanceStatePosition = 0;
+ /** @brief OCC instance number for the PLDM message */
+ uint8_t pldmResponseOcc = 0;
+
+ /** @brief File descriptor for PLDM messages */
+ int pldmFd = -1;
+
+ /** @brief MCTP instance number used in PLDM requests
+ */
+ uint8_t mctpInstance{};
+
+ /** @brief The response for the PLDM request msg is received flag.
+ */
+ bool pldmResponseReceived = false;
+
+ /** @brief The response for the PLDM request has timed out.
+ */
+ bool pldmResponseTimeout = false;
+
+ /** @brief timer event */
+ sdeventplus::Event sdpEvent;
+
+ /** @brief Timer that is started when PLDM command is sent
+ */
+ sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic> pldmRspTimer;
+
+ /** @brief Callback when PLDM response has not been received within the
+ * timeout period.
+ */
+ void pldmRspExpired();
+
+ /** @brief Close the MCTP file */
+ void pldmClose();
+
/** @brief When the OCC state changes host sends PlatformEventMessage
* StateSensorEvent, this function processes the D-Bus signal
* with the sensor event information and invokes the callback
@@ -205,6 +260,11 @@
*/
void hostStateEvent(sdbusplus::message::message& msg);
+ /** @brief Called when it is determined that the Host is not running.
+ * The cache of OCC sensors and effecters mapping is cleared.
+ */
+ void clearData();
+
/** @brief Check if the PDR cache for PLDM OCC sensors is valid
*
* @return true if cache is populated and false if the cache is not
@@ -233,13 +293,38 @@
*/
bool getMctpInstanceId(uint8_t& instanceId);
+ /** @brief Encode a GetStateSensor command into a PLDM request
+ * @param[in] instance - OCC instance number
+ * @param[in] sensorId - OCC Active sensor ID number
+ *
+ * @return request - The encoded PLDM messsage to be sent
+ */
+ std::vector<uint8_t> encodeGetStateSensorRequest(uint8_t instance,
+ uint16_t sensorId);
/** @brief Send the PLDM request
*
* @param[in] request - the request data
- * @param[in] async - false: wait for response, true: return immediately
+ * @param[in] rspExpected - false: no need to wait for the response
+ * true: will need to process response in callback
*/
- void sendPldm(const std::vector<uint8_t>& request,
- const bool async = false);
+ void sendPldm(const std::vector<uint8_t>& request, const uint8_t instance,
+ const bool rspExpected = false);
+
+ /** @brief Register a callback function to handle the PLDM response */
+ void registerPldmRspCallback();
+
+ /** @brief callback for the PLDM response event
+ *
+ * @param[in] es - Populated event source
+ * @param[in] fd - Associated File descriptor
+ * @param[in] revents - Type of event
+ * @param[in] userData - User data that was passed during registration
+ *
+ * @return - 0 or positive number on success and negative
+ * errno otherwise
+ */
+ static int pldmRspCallback(sd_event_source* es, int fd, uint32_t revents,
+ void* userData);
};
} // namespace pldm
diff --git a/utils.cpp b/utils.cpp
index e79bc5a..78b4234 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -1,10 +1,14 @@
#include "utils.hpp"
#include <fmt/core.h>
+#include <systemd/sd-event.h>
+#include <unistd.h>
#include <phosphor-logging/elog-errors.hpp>
#include <sdbusplus/bus.hpp>
#include <xyz/openbmc_project/Common/error.hpp>
+#include <xyz/openbmc_project/State/Boot/Progress/server.hpp>
+#include <xyz/openbmc_project/State/Host/server.hpp>
#include <string>
namespace open_power
@@ -18,6 +22,10 @@
using InternalFailure =
sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
+using BootProgress = sdbusplus::xyz::openbmc_project::State::Boot::server::
+ Progress::ProgressStages;
+constexpr auto HOST_STATE_OBJ_PATH = "/xyz/openbmc_project/state/host0";
+
const std::string getService(const std::string& path,
const std::string& interface)
{
@@ -179,6 +187,79 @@
return service;
}
+std::string getStateValue(const std::string& intf, const std::string& objPath,
+ const std::string& state)
+{
+ std::string stateVal;
+ try
+ {
+ auto& bus = getBus();
+ auto service = getService(objPath, intf);
+ if (service.empty())
+ {
+ throw std::runtime_error("getStateValue: Failed to get service");
+ }
+
+ auto method =
+ bus.new_method_call(service.c_str(), objPath.c_str(),
+ "org.freedesktop.DBus.Properties", "Get");
+
+ method.append(intf, state);
+
+ auto reply = bus.call(method);
+
+ std::variant<std::string> propertyVal;
+
+ reply.read(propertyVal);
+
+ stateVal = std::get<std::string>(propertyVal);
+ }
+ catch (const sdbusplus::exception::exception& e)
+ {
+ log<level::ERR>(fmt::format("D-Bus call exception, OBJPATH({}), "
+ "INTERFACE({}), PROPERTY({}) EXCEPTION({})",
+ objPath, intf, state, e.what())
+ .c_str());
+ throw std::runtime_error("Failed to get host state property");
+ }
+ catch (const std::bad_variant_access& e)
+ {
+ log<level::ERR>(
+ fmt::format("Exception raised while read host state({}) property "
+ "value, OBJPATH({}), INTERFACE({}), EXCEPTION({})",
+ state, objPath, intf, e.what())
+ .c_str());
+ throw std::runtime_error("Failed to get host state property");
+ }
+
+ return stateVal;
+}
+
+BootProgress getBootProgress()
+{
+ BootProgress bootProgessStage;
+ constexpr auto bootProgressInterface =
+ "xyz.openbmc_project.State.Boot.Progress";
+ std::string value = getStateValue(bootProgressInterface,
+ HOST_STATE_OBJ_PATH, "BootProgress");
+ bootProgessStage = sdbusplus::xyz::openbmc_project::State::Boot::server::
+ Progress::convertProgressStagesFromString(value);
+ return bootProgessStage;
+}
+
+bool isHostRunning()
+{
+ BootProgress bootProgressStatus = getBootProgress();
+ if ((bootProgressStatus == BootProgress::SystemInitComplete) ||
+ (bootProgressStatus == BootProgress::SystemSetup) ||
+ (bootProgressStatus == BootProgress::OSStart) ||
+ (bootProgressStatus == BootProgress::OSRunning))
+ {
+ return true;
+ }
+ return false;
+}
+
} // namespace utils
} // namespace occ
} // namespace open_power
diff --git a/utils.hpp b/utils.hpp
index 5033222..3a6bad5 100644
--- a/utils.hpp
+++ b/utils.hpp
@@ -89,6 +89,13 @@
std::string getServiceUsingSubTree(const std::string& interface,
std::string& path);
+/**
+ * @brief Get status of the host
+ *
+ * @return true is the host is running, else false
+ */
+bool isHostRunning();
+
} // namespace utils
} // namespace occ
} // namespace open_power