Add SBE hreset support to SBE attention handler

When a SBE attention is being handled we will
request a SBE hreset and try to clear the SBE
attentions. If this is not successful we will
request a SBE dump and host re-IPL.

Signed-off-by: Ben Tyner <ben.tyner@ibm.com>
Change-Id: I5eb8e8728e92af54673cd3e37529eca9f6b03857
diff --git a/attn/attn_handler.cpp b/attn/attn_handler.cpp
index a0e428e..bfe70d1 100644
--- a/attn/attn_handler.cpp
+++ b/attn/attn_handler.cpp
@@ -52,9 +52,6 @@
  */
 int handleSpecial(Attention* i_attention);
 
-/** @brief Determine if attention is active and not masked */
-bool activeAttn(uint32_t i_val, uint32_t i_mask, uint32_t i_attn);
-
 #ifdef CONFIG_PHAL_API
 /** @brief Handle phal sbe exception */
 void phalSbeExceptionHandler(openpower::phal::exception::SbeError& e,
@@ -67,9 +64,6 @@
 /** @brief Check if TI info data is valid */
 bool tiInfoValid(uint8_t* tiInfo);
 
-/** @brief Clear attention interrupts */
-void clearAttnInterrupts();
-
 /**
  * @brief The main attention handler logic
  *
@@ -396,19 +390,7 @@
     return rc;
 }
 
-/**
- * @brief Determine if attention is active and not masked
- *
- * Determine whether an attention needs to be handled and trace details of
- * attention type and whether it is masked or not.
- *
- * @param i_val attention status register
- * @param i_mask attention true mask register
- * @param i_attn attention type
- * @param i_proc processor associated with registers
- *
- * @return true if attention is active and not masked, otherwise false
- */
+/** @brief Determine if attention is active and not masked */
 bool activeAttn(uint32_t i_val, uint32_t i_mask, uint32_t i_attn)
 {
     bool rc = false; // assume attn masked and/or inactive
@@ -564,13 +546,7 @@
     return tiInfoValid;
 }
 
-/**
- * @brief Clear attention interrupts
- *
- * The attention interrupts are sticky and may still be set (MPIPL) even if
- * there are no active attentions. If there is an active attention then
- * clearing the associated interrupt will have no effect.
- */
+/** @brief Clear attention interrupts */
 void clearAttnInterrupts()
 {
     trace::inf("Clearing attention interrupts");
diff --git a/attn/attn_handler.hpp b/attn/attn_handler.hpp
index 6099135..9feacb2 100644
--- a/attn/attn_handler.hpp
+++ b/attn/attn_handler.hpp
@@ -4,7 +4,6 @@
 
 namespace attn
 {
-
 /** @brief Attention global status bits */
 constexpr uint32_t SBE_ATTN         = 0x00000002;
 constexpr uint32_t ANY_ATTN         = 0x80000000;
@@ -13,6 +12,15 @@
 constexpr uint32_t RECOVERABLE_ATTN = 0x10000000;
 
 /**
+ * @brief Clear attention interrupts
+ *
+ * The attention interrupts are sticky and may still be set (MPIPL) even if
+ * there are no active attentions. If there is an active attention then
+ * clearing the associated interrupt will have no effect.
+ */
+void clearAttnInterrupts();
+
+/**
  * @brief The main attention handler logic
  *
  * Check each processor for active attentions of type SBE Vital (vital),
@@ -33,4 +41,19 @@
  */
 void attnHandler(Config* i_config);
 
+/**
+ * @brief Determine if attention is active and not masked
+ *
+ * Determine whether an attention needs to be handled and trace details of
+ * attention type and whether it is masked or not.
+ *
+ * @param i_val attention status register
+ * @param i_mask attention true mask register
+ * @param i_attn attention type
+ * @param i_proc processor associated with registers
+ *
+ * @return true if attention is active and not masked, otherwise false
+ */
+bool activeAttn(uint32_t i_val, uint32_t i_mask, uint32_t i_attn);
+
 } // namespace attn
diff --git a/attn/attn_logging.cpp b/attn/attn_logging.cpp
index 7f621df..e3571dd 100644
--- a/attn/attn_logging.cpp
+++ b/attn/attn_logging.cpp
@@ -13,7 +13,6 @@
 
 namespace attn
 {
-
 /** @brief Tuple containing information about ffdc files */
 using FFDCTuple =
     std::tuple<util::FFDCFormat, uint8_t, uint8_t, sdbusplus::message::unix_fd>;
@@ -311,11 +310,13 @@
  * @param  i_event - The event type
  * @param  i_additional - Additional PEL data
  * @param  i_ffdc - FFDC PEL data
+ * @param  i_severity - Severity level
  * @return Event log Id (0 if no event log generated)
  */
 uint32_t event(EventType i_event,
                std::map<std::string, std::string>& i_additional,
-               const std::vector<util::FFDCFile>& i_ffdc)
+               const std::vector<util::FFDCFile>& i_ffdc,
+               std::string i_severity = levelPelError)
 {
     uint32_t pelId = 0; // assume no event log generated
 
@@ -356,7 +357,7 @@
     {
         // Create PEL with additional data and FFDC data. The newly created
         // PEL's platform log-id will be returned.
-        pelId = util::dbus::createPel(eventName, levelPelError, i_additional,
+        pelId = util::dbus::createPel(eventName, i_severity, i_additional,
                                       createFFDCTuples(i_ffdc));
 
         // If this is a TI event we will create an additional PEL that is
@@ -482,13 +483,14 @@
 }
 
 /** @brief Commit SBE vital event to log, returns event log ID */
-uint32_t eventVital()
+uint32_t eventVital(std::string severity)
 {
     // Additional data for log
     std::map<std::string, std::string> additionalData;
 
     // Create log event with additional data and FFDC data
-    return event(EventType::Vital, additionalData, createFFDCFiles(nullptr, 0));
+    return event(EventType::Vital, additionalData, createFFDCFiles(nullptr, 0),
+                 severity);
 }
 
 /**
diff --git a/attn/attn_logging.hpp b/attn/attn_logging.hpp
index efbabf7..096a785 100644
--- a/attn/attn_logging.hpp
+++ b/attn/attn_logging.hpp
@@ -9,9 +9,10 @@
 
 namespace attn
 {
-
 constexpr auto pathLogging   = "/xyz/openbmc_project/logging";
 constexpr auto levelPelError = "xyz.openbmc_project.Logging.Entry.Level.Error";
+constexpr auto levelPelInfo =
+    "xyz.openbmc_project.Logging.Entry.Level.Informational";
 constexpr auto eventPelTerminate = "xyz.open_power.Attn.Error.Terminate";
 
 /** @brief Logging event types */
@@ -29,8 +30,13 @@
 void eventTerminate(std::map<std::string, std::string> i_additionalData,
                     char* i_tiInfoData);
 
-/** @brief Commit SBE vital event to log, returns event log Id */
-uint32_t eventVital();
+/** @brief Commit SBE vital event to log
+ *
+ *  @param[in] severity - the PEL severity level of the event
+ *
+ *  @return platform event log (PEL) ID
+ * */
+uint32_t eventVital(std::string severity);
 
 /** @brief Commit attention handler failure event to log */
 void eventAttentionFail(int i_error);
diff --git a/attn/vital_handler.cpp b/attn/vital_handler.cpp
index c076256..b4389df 100644
--- a/attn/vital_handler.cpp
+++ b/attn/vital_handler.cpp
@@ -1,53 +1,170 @@
 #include <attn/attention.hpp>
 #include <attn/attn_common.hpp>
 #include <attn/attn_dump.hpp>
+#include <attn/attn_handler.hpp>
 #include <attn/attn_logging.hpp>
 #include <sdbusplus/bus.hpp>
 #include <util/dbus.hpp>
+#include <util/pdbg.hpp>
+#include <util/pldm.hpp>
 #include <util/trace.hpp>
 
 namespace attn
 {
+/*
+ * @brief Request SBE hreset and try to clear sbe attentions
+ *
+ * @param[in] sbeInstance - sbe instance to hreset (0 based)
+ *
+ * @return true if hreset is successful and attentions cleared
+ */
+bool attemptSbeRecovery(int sbeInstance)
+{
+    // attempt sbe hreset and attention interrupt clear
+    if (!util::pldm::hresetSbe(sbeInstance))
+    {
+        return false;
+    }
+
+    trace::inf("hreset completed");
+
+    // try to clear attention interrupts
+    clearAttnInterrupts();
+
+    // loop through processors checking attention interrupts
+    bool recovered = true;
+    pdbg_target* procTarget;
+    pdbg_for_each_class_target("proc", procTarget)
+    {
+        // active processors only
+        if (PDBG_TARGET_ENABLED !=
+            pdbg_target_probe(util::pdbg::getPibTrgt(procTarget)))
+        {
+            continue;
+        }
+
+        // get cfam is an fsi read
+        pdbg_target* fsiTarget = util::pdbg::getFsiTrgt(procTarget);
+        uint32_t int_val;
+
+        // get attention interrupts on processor
+        if (RC_SUCCESS == fsi_read(fsiTarget, 0x100b, &int_val))
+        {
+            if (int_val & SBE_ATTN)
+            {
+                trace::err("sbe attention did not clear");
+                recovered = false;
+                break;
+            }
+        }
+        else
+        {
+            // log cfam read error
+            trace::err("cfam read error");
+            recovered = false;
+            break;
+        }
+    }
+
+    if (recovered)
+    {
+        trace::inf("sbe attention cleared");
+    }
+
+    return recovered;
+}
+
+/**
+ * @brief Check for active checkstop attention
+ *
+ * @param procInstance - proc to check for attentions
+ *
+ * @pre pdbg target associated with proc instance is enabled for fsi access
+ *
+ * @return true if checkstop acive false otherwise
+ * */
+bool checkstopActive(int procInstance)
+{
+    // get fsi target
+    char path[16];
+    sprintf(path, "/proc%d/fsi", procInstance);
+    pdbg_target* fsiTarget = pdbg_target_from_path(nullptr, path);
+    if (nullptr == fsiTarget)
+    {
+        trace::inf("fsi path or target not found");
+        return false;
+    }
+
+    // check for active checkstop attention
+    int r;
+    uint32_t isr_val, isr_mask;
+
+    isr_val = 0xffffffff;
+    r       = fsi_read(fsiTarget, 0x1007, &isr_val);
+    if ((RC_SUCCESS != r) || (0xffffffff == isr_val))
+    {
+        trace::err("cfam 1007 read error");
+        return false;
+    }
+
+    isr_mask = 0xffffffff;
+    r        = fsi_read(fsiTarget, 0x100d, &isr_mask);
+    if ((RC_SUCCESS != r) || (0xffffffff == isr_mask))
+    {
+        trace::err("cfam 100d read error");
+        return false;
+    }
+
+    return activeAttn(isr_val, isr_mask, CHECKSTOP_ATTN);
+}
 
 /**
  * @brief Handle SBE vital attention
  *
- * @param i_attention Attention object
- * @return 0 indicates that the vital attention was successfully handled
- *         1 indicates that the vital attention was NOT successfully handled
+ * @param i_attention - attention object
+ *
+ * @return non-zero if attention was not successfully handled
  */
 int handleVital(Attention* i_attention)
 {
-    int rc = RC_SUCCESS; // assume vital handled
-
     trace::inf("vital handler started");
 
-    // if vital handling enabled, handle vital attention
+    // if vital handling disabled
     if (false == (i_attention->getConfig()->getFlag(enVital)))
     {
         trace::inf("vital handling disabled");
-        rc = RC_NOT_HANDLED;
+        return RC_NOT_HANDLED;
     }
-    else
+
+    // if power fault then we don't do anything
+    sleepSeconds(POWER_FAULT_WAIT);
+    if (util::dbus::powerFault())
     {
-        // wait for power fault handling before starting analyses
-        sleepSeconds(POWER_FAULT_WAIT);
-
-        // generate pel
-        auto pelId = eventVital();
-
-        // conditionally request dump
-        if ((0 != pelId) && (util::dbus::HostRunningState::NotStarted ==
-                             util::dbus::hostRunningState()))
-        {
-            requestDump(pelId, DumpParameters{0, DumpType::SBE});
-        }
-
-        // transition host
-        util::dbus::transitionHost(util::dbus::HostState::Quiesce);
+        trace::inf("power fault was reported");
+        return RC_SUCCESS;
     }
 
-    return rc;
+    // if no checkstop and host is running
+    int instance =
+        pdbg_target_index(i_attention->getTarget()); // get processor number
+
+    if (!checkstopActive(instance) &&
+        util::dbus::HostRunningState::Started == util::dbus::hostRunningState())
+    {
+        // attempt to recover the sbe
+        if (attemptSbeRecovery(instance))
+        {
+            eventVital(levelPelInfo);
+            return RC_SUCCESS;
+        }
+    }
+
+    // host not running, checkstop active or recovery failed
+    auto pelId = eventVital(levelPelError);
+    requestDump(pelId, DumpParameters{0, DumpType::SBE});
+    util::dbus::transitionHost(util::dbus::HostState::Quiesce);
+
+    return RC_SUCCESS;
 }
 
 } // namespace attn