Add SBE hreset support to SBE attention handler
When a SBE attention is being handled we will
request a SBE hreset and try to clear the SBE
attentions. If this is not successful we will
request a SBE dump and host re-IPL.
Signed-off-by: Ben Tyner <ben.tyner@ibm.com>
Change-Id: I5eb8e8728e92af54673cd3e37529eca9f6b03857
diff --git a/attn/attn_handler.cpp b/attn/attn_handler.cpp
index a0e428e..bfe70d1 100644
--- a/attn/attn_handler.cpp
+++ b/attn/attn_handler.cpp
@@ -52,9 +52,6 @@
*/
int handleSpecial(Attention* i_attention);
-/** @brief Determine if attention is active and not masked */
-bool activeAttn(uint32_t i_val, uint32_t i_mask, uint32_t i_attn);
-
#ifdef CONFIG_PHAL_API
/** @brief Handle phal sbe exception */
void phalSbeExceptionHandler(openpower::phal::exception::SbeError& e,
@@ -67,9 +64,6 @@
/** @brief Check if TI info data is valid */
bool tiInfoValid(uint8_t* tiInfo);
-/** @brief Clear attention interrupts */
-void clearAttnInterrupts();
-
/**
* @brief The main attention handler logic
*
@@ -396,19 +390,7 @@
return rc;
}
-/**
- * @brief Determine if attention is active and not masked
- *
- * Determine whether an attention needs to be handled and trace details of
- * attention type and whether it is masked or not.
- *
- * @param i_val attention status register
- * @param i_mask attention true mask register
- * @param i_attn attention type
- * @param i_proc processor associated with registers
- *
- * @return true if attention is active and not masked, otherwise false
- */
+/** @brief Determine if attention is active and not masked */
bool activeAttn(uint32_t i_val, uint32_t i_mask, uint32_t i_attn)
{
bool rc = false; // assume attn masked and/or inactive
@@ -564,13 +546,7 @@
return tiInfoValid;
}
-/**
- * @brief Clear attention interrupts
- *
- * The attention interrupts are sticky and may still be set (MPIPL) even if
- * there are no active attentions. If there is an active attention then
- * clearing the associated interrupt will have no effect.
- */
+/** @brief Clear attention interrupts */
void clearAttnInterrupts()
{
trace::inf("Clearing attention interrupts");
diff --git a/attn/attn_handler.hpp b/attn/attn_handler.hpp
index 6099135..9feacb2 100644
--- a/attn/attn_handler.hpp
+++ b/attn/attn_handler.hpp
@@ -4,7 +4,6 @@
namespace attn
{
-
/** @brief Attention global status bits */
constexpr uint32_t SBE_ATTN = 0x00000002;
constexpr uint32_t ANY_ATTN = 0x80000000;
@@ -13,6 +12,15 @@
constexpr uint32_t RECOVERABLE_ATTN = 0x10000000;
/**
+ * @brief Clear attention interrupts
+ *
+ * The attention interrupts are sticky and may still be set (MPIPL) even if
+ * there are no active attentions. If there is an active attention then
+ * clearing the associated interrupt will have no effect.
+ */
+void clearAttnInterrupts();
+
+/**
* @brief The main attention handler logic
*
* Check each processor for active attentions of type SBE Vital (vital),
@@ -33,4 +41,19 @@
*/
void attnHandler(Config* i_config);
+/**
+ * @brief Determine if attention is active and not masked
+ *
+ * Determine whether an attention needs to be handled and trace details of
+ * attention type and whether it is masked or not.
+ *
+ * @param i_val attention status register
+ * @param i_mask attention true mask register
+ * @param i_attn attention type
+ * @param i_proc processor associated with registers
+ *
+ * @return true if attention is active and not masked, otherwise false
+ */
+bool activeAttn(uint32_t i_val, uint32_t i_mask, uint32_t i_attn);
+
} // namespace attn
diff --git a/attn/attn_logging.cpp b/attn/attn_logging.cpp
index 7f621df..e3571dd 100644
--- a/attn/attn_logging.cpp
+++ b/attn/attn_logging.cpp
@@ -13,7 +13,6 @@
namespace attn
{
-
/** @brief Tuple containing information about ffdc files */
using FFDCTuple =
std::tuple<util::FFDCFormat, uint8_t, uint8_t, sdbusplus::message::unix_fd>;
@@ -311,11 +310,13 @@
* @param i_event - The event type
* @param i_additional - Additional PEL data
* @param i_ffdc - FFDC PEL data
+ * @param i_severity - Severity level
* @return Event log Id (0 if no event log generated)
*/
uint32_t event(EventType i_event,
std::map<std::string, std::string>& i_additional,
- const std::vector<util::FFDCFile>& i_ffdc)
+ const std::vector<util::FFDCFile>& i_ffdc,
+ std::string i_severity = levelPelError)
{
uint32_t pelId = 0; // assume no event log generated
@@ -356,7 +357,7 @@
{
// Create PEL with additional data and FFDC data. The newly created
// PEL's platform log-id will be returned.
- pelId = util::dbus::createPel(eventName, levelPelError, i_additional,
+ pelId = util::dbus::createPel(eventName, i_severity, i_additional,
createFFDCTuples(i_ffdc));
// If this is a TI event we will create an additional PEL that is
@@ -482,13 +483,14 @@
}
/** @brief Commit SBE vital event to log, returns event log ID */
-uint32_t eventVital()
+uint32_t eventVital(std::string severity)
{
// Additional data for log
std::map<std::string, std::string> additionalData;
// Create log event with additional data and FFDC data
- return event(EventType::Vital, additionalData, createFFDCFiles(nullptr, 0));
+ return event(EventType::Vital, additionalData, createFFDCFiles(nullptr, 0),
+ severity);
}
/**
diff --git a/attn/attn_logging.hpp b/attn/attn_logging.hpp
index efbabf7..096a785 100644
--- a/attn/attn_logging.hpp
+++ b/attn/attn_logging.hpp
@@ -9,9 +9,10 @@
namespace attn
{
-
constexpr auto pathLogging = "/xyz/openbmc_project/logging";
constexpr auto levelPelError = "xyz.openbmc_project.Logging.Entry.Level.Error";
+constexpr auto levelPelInfo =
+ "xyz.openbmc_project.Logging.Entry.Level.Informational";
constexpr auto eventPelTerminate = "xyz.open_power.Attn.Error.Terminate";
/** @brief Logging event types */
@@ -29,8 +30,13 @@
void eventTerminate(std::map<std::string, std::string> i_additionalData,
char* i_tiInfoData);
-/** @brief Commit SBE vital event to log, returns event log Id */
-uint32_t eventVital();
+/** @brief Commit SBE vital event to log
+ *
+ * @param[in] severity - the PEL severity level of the event
+ *
+ * @return platform event log (PEL) ID
+ * */
+uint32_t eventVital(std::string severity);
/** @brief Commit attention handler failure event to log */
void eventAttentionFail(int i_error);
diff --git a/attn/vital_handler.cpp b/attn/vital_handler.cpp
index c076256..b4389df 100644
--- a/attn/vital_handler.cpp
+++ b/attn/vital_handler.cpp
@@ -1,53 +1,170 @@
#include <attn/attention.hpp>
#include <attn/attn_common.hpp>
#include <attn/attn_dump.hpp>
+#include <attn/attn_handler.hpp>
#include <attn/attn_logging.hpp>
#include <sdbusplus/bus.hpp>
#include <util/dbus.hpp>
+#include <util/pdbg.hpp>
+#include <util/pldm.hpp>
#include <util/trace.hpp>
namespace attn
{
+/*
+ * @brief Request SBE hreset and try to clear sbe attentions
+ *
+ * @param[in] sbeInstance - sbe instance to hreset (0 based)
+ *
+ * @return true if hreset is successful and attentions cleared
+ */
+bool attemptSbeRecovery(int sbeInstance)
+{
+ // attempt sbe hreset and attention interrupt clear
+ if (!util::pldm::hresetSbe(sbeInstance))
+ {
+ return false;
+ }
+
+ trace::inf("hreset completed");
+
+ // try to clear attention interrupts
+ clearAttnInterrupts();
+
+ // loop through processors checking attention interrupts
+ bool recovered = true;
+ pdbg_target* procTarget;
+ pdbg_for_each_class_target("proc", procTarget)
+ {
+ // active processors only
+ if (PDBG_TARGET_ENABLED !=
+ pdbg_target_probe(util::pdbg::getPibTrgt(procTarget)))
+ {
+ continue;
+ }
+
+ // get cfam is an fsi read
+ pdbg_target* fsiTarget = util::pdbg::getFsiTrgt(procTarget);
+ uint32_t int_val;
+
+ // get attention interrupts on processor
+ if (RC_SUCCESS == fsi_read(fsiTarget, 0x100b, &int_val))
+ {
+ if (int_val & SBE_ATTN)
+ {
+ trace::err("sbe attention did not clear");
+ recovered = false;
+ break;
+ }
+ }
+ else
+ {
+ // log cfam read error
+ trace::err("cfam read error");
+ recovered = false;
+ break;
+ }
+ }
+
+ if (recovered)
+ {
+ trace::inf("sbe attention cleared");
+ }
+
+ return recovered;
+}
+
+/**
+ * @brief Check for active checkstop attention
+ *
+ * @param procInstance - proc to check for attentions
+ *
+ * @pre pdbg target associated with proc instance is enabled for fsi access
+ *
+ * @return true if checkstop acive false otherwise
+ * */
+bool checkstopActive(int procInstance)
+{
+ // get fsi target
+ char path[16];
+ sprintf(path, "/proc%d/fsi", procInstance);
+ pdbg_target* fsiTarget = pdbg_target_from_path(nullptr, path);
+ if (nullptr == fsiTarget)
+ {
+ trace::inf("fsi path or target not found");
+ return false;
+ }
+
+ // check for active checkstop attention
+ int r;
+ uint32_t isr_val, isr_mask;
+
+ isr_val = 0xffffffff;
+ r = fsi_read(fsiTarget, 0x1007, &isr_val);
+ if ((RC_SUCCESS != r) || (0xffffffff == isr_val))
+ {
+ trace::err("cfam 1007 read error");
+ return false;
+ }
+
+ isr_mask = 0xffffffff;
+ r = fsi_read(fsiTarget, 0x100d, &isr_mask);
+ if ((RC_SUCCESS != r) || (0xffffffff == isr_mask))
+ {
+ trace::err("cfam 100d read error");
+ return false;
+ }
+
+ return activeAttn(isr_val, isr_mask, CHECKSTOP_ATTN);
+}
/**
* @brief Handle SBE vital attention
*
- * @param i_attention Attention object
- * @return 0 indicates that the vital attention was successfully handled
- * 1 indicates that the vital attention was NOT successfully handled
+ * @param i_attention - attention object
+ *
+ * @return non-zero if attention was not successfully handled
*/
int handleVital(Attention* i_attention)
{
- int rc = RC_SUCCESS; // assume vital handled
-
trace::inf("vital handler started");
- // if vital handling enabled, handle vital attention
+ // if vital handling disabled
if (false == (i_attention->getConfig()->getFlag(enVital)))
{
trace::inf("vital handling disabled");
- rc = RC_NOT_HANDLED;
+ return RC_NOT_HANDLED;
}
- else
+
+ // if power fault then we don't do anything
+ sleepSeconds(POWER_FAULT_WAIT);
+ if (util::dbus::powerFault())
{
- // wait for power fault handling before starting analyses
- sleepSeconds(POWER_FAULT_WAIT);
-
- // generate pel
- auto pelId = eventVital();
-
- // conditionally request dump
- if ((0 != pelId) && (util::dbus::HostRunningState::NotStarted ==
- util::dbus::hostRunningState()))
- {
- requestDump(pelId, DumpParameters{0, DumpType::SBE});
- }
-
- // transition host
- util::dbus::transitionHost(util::dbus::HostState::Quiesce);
+ trace::inf("power fault was reported");
+ return RC_SUCCESS;
}
- return rc;
+ // if no checkstop and host is running
+ int instance =
+ pdbg_target_index(i_attention->getTarget()); // get processor number
+
+ if (!checkstopActive(instance) &&
+ util::dbus::HostRunningState::Started == util::dbus::hostRunningState())
+ {
+ // attempt to recover the sbe
+ if (attemptSbeRecovery(instance))
+ {
+ eventVital(levelPelInfo);
+ return RC_SUCCESS;
+ }
+ }
+
+ // host not running, checkstop active or recovery failed
+ auto pelId = eventVital(levelPelError);
+ requestDump(pelId, DumpParameters{0, DumpType::SBE});
+ util::dbus::transitionHost(util::dbus::HostState::Quiesce);
+
+ return RC_SUCCESS;
}
} // namespace attn