Patch to attempt to reset the OCC when it fails to activate
diff --git a/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch b/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch
new file mode 100644
index 0000000..4b666b0
--- /dev/null
+++ b/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch
@@ -0,0 +1,301 @@
+From 98fc2914b15e89c2324c1636af62225c653e45f9 Mon Sep 17 00:00:00 2001
+From: Doug Gilbert <dgilbert@us.ibm.com>
+Date: Tue, 3 Mar 2015 16:00:29 -0600
+Subject: [PATCH] HTMGT add attempt to reset OCC when OCC Activate fails
+
+Change-Id: I964d2b68216c3ddabae73ce3b851bbc468ec96a7
+RTC: 123180
+---
+ src/include/usr/htmgt/htmgt_reasoncodes.H | 1 +
+ src/usr/htmgt/htmgt.C | 123 ++++++++++++++++++------------
+ src/usr/htmgt/htmgt_activate.C | 9 +++
+ src/usr/htmgt/htmgt_occ.C | 32 +++++++-
+ src/usr/htmgt/htmgt_occ.H | 1 +
+ 5 files changed, 116 insertions(+), 50 deletions(-)
+
+diff --git a/src/include/usr/htmgt/htmgt_reasoncodes.H b/src/include/usr/htmgt/htmgt_reasoncodes.H
+index ade192d..6fe269d 100644
+--- a/src/include/usr/htmgt/htmgt_reasoncodes.H
++++ b/src/include/usr/htmgt/htmgt_reasoncodes.H
+@@ -48,6 +48,7 @@ namespace HTMGT
+ HTMGT_MOD_CHECK_OCC_RSP = 0x92,
+ HTMGT_MOD_PARSE_OCC_RSP = 0x94,
+ HTMGT_MOD_HANLDE_OCC_EXCEPTION = 0xE0,
++ HTMGT_MOD_ENABLE_OCC_ACTUATION = 0xE1,
+ };
+
+ enum htmgtReasonCode
+diff --git a/src/usr/htmgt/htmgt.C b/src/usr/htmgt/htmgt.C
+index aff2500..a2f556f 100644
+--- a/src/usr/htmgt/htmgt.C
++++ b/src/usr/htmgt/htmgt.C
+@@ -184,42 +184,30 @@ namespace HTMGT
+
+ if (NULL != l_err)
+ {
+- TMGT_ERR("OCCs not all active. System will stay in safe mode");
++ TMGT_ERR("OCCs not all active. Attempting OCC Reset");
+ TMGT_CONSOLE("OCCs are not active (rc=0x%04X). "
+- "System will remain in safe mode",
++ "Attempting OCC Reset",
+ l_err->reasonCode());
+- TMGT_INF("Calling HBOCC::stopAllOCCs");
+- errlHndl_t err2 = HBOCC::stopAllOCCs();
++ TMGT_INF("Calling resetOccs");
++ errlHndl_t err2 = OccManager::resetOccs(NULL);
+ if(err2)
+ {
+- TMGT_ERR("stopAllOCCs() failed with 0x%04X",
++ TMGT_ERR("OccManager:;resetOccs failed with 0x%04X",
+ err2->reasonCode());
+- ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+- }
+-
+- // Update error log to unrecoverable and set SRC
+- // to indicate the system will remain in safe mode
+- /*@
+- * @errortype
+- * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
+- * @moduleid HTMGT_MOD_LOAD_START_STATUS
+- * @userdata1[0:7] load/start completed
+- * @devdesc OCCs did not all reach active state,
+- * system will be in Safe Mode
+- */
+- bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS,
+- HTMGT_RC_OCC_CRIT_FAILURE,
+- i_startCompleted, 0, 0, 1,
+- ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+
+- // Add level 2 support callout
+- l_err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
+- HWAS::SRCI_PRIORITY_MED);
+- // Add HB firmware callout
+- l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+- HWAS::SRCI_PRIORITY_MED);
++ // Set original error log as unrecoverable and commit
++ l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
++ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+
+- ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
++ // Commit occReset error
++ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++ }
++ else
++ {
++ // retry worked - commit original error as informational
++ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
++ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
++ }
+ }
+
+ } // end processOccStartStatus()
+@@ -229,6 +217,19 @@ namespace HTMGT
+ // Notify HTMGT that an OCC has an error to report
+ void processOccError(TARGETING::Target * i_procTarget)
+ {
++ TARGETING::Target* sys = NULL;
++ TARGETING::targetService().getTopLevelTarget(sys);
++ uint8_t safeMode = 0;
++
++ // If the system is in safemode then can't talk to OCCs -
++ // ignore call to processOccError
++ if(sys &&
++ sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
++ safeMode)
++ {
++ return;
++ }
++
+ bool polledOneOcc = false;
+ OccManager::buildOccs();
+
+@@ -347,29 +348,57 @@ namespace HTMGT
+ // Set the OCC state
+ errlHndl_t enableOccActuation(bool i_occActivation)
+ {
+- occStateId targetState = OCC_STATE_ACTIVE;
+- if (false == i_occActivation)
+- {
+- targetState = OCC_STATE_OBSERVATION;
+- }
++ errlHndl_t l_err = NULL;
++ TARGETING::Target* sys = NULL;
++
++ TARGETING::targetService().getTopLevelTarget(sys);
++ uint8_t safeMode = 0;
+
+- // Set state for all OCCs
+- errlHndl_t l_err = OccManager::setOccState(targetState);
+- if (NULL == l_err)
++ // If the system is in safemode then can't talk to OCCs -
++ // ignore call to enableOccActuation
++ if(sys &&
++ sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
++ safeMode)
+ {
+- TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
+- targetState);
++ /*@
++ * @errortype
++ * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
++ * @moduleid HTMGT_MOD_ENABLE_OCC_ACTUATION
++ * @userdata1[0:7] OCC activate [1==true][0==false]
++ * @devdesc Invalid operation when OCCs are in safemode
++ */
++ bldErrLog(l_err,
++ HTMGT_MOD_ENABLE_OCC_ACTUATION,
++ HTMGT_RC_OCC_CRIT_FAILURE,
++ i_occActivation, 0, 0, 1,
++ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ }
+-
+- if (OccManager::occNeedsReset())
++ else
+ {
+- TMGT_ERR("enableOccActuation(): OCCs need to be reset");
+- // Don't pass failed target as OCC should have already
+- // been marked as failed during the poll.
+- errlHndl_t err2 = OccManager::resetOccs(NULL);
+- if(err2)
++ occStateId targetState = OCC_STATE_ACTIVE;
++ if (false == i_occActivation)
+ {
+- ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++ targetState = OCC_STATE_OBSERVATION;
++ }
++
++ // Set state for all OCCs
++ l_err = OccManager::setOccState(targetState);
++ if (NULL == l_err)
++ {
++ TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
++ targetState);
++ }
++
++ if (OccManager::occNeedsReset())
++ {
++ TMGT_ERR("enableOccActuation(): OCCs need to be reset");
++ // Don't pass failed target as OCC should have already
++ // been marked as failed during the poll.
++ errlHndl_t err2 = OccManager::resetOccs(NULL);
++ if(err2)
++ {
++ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++ }
+ }
+ }
+
+diff --git a/src/usr/htmgt/htmgt_activate.C b/src/usr/htmgt/htmgt_activate.C
+index 7f54d6d..4cb46f0 100644
+--- a/src/usr/htmgt/htmgt_activate.C
++++ b/src/usr/htmgt/htmgt_activate.C
+@@ -39,6 +39,7 @@
+
+ #include <ipmi/ipmisensor.H>
+ #include <sys/time.h>
++#include <console/consoleif.H>
+
+ using namespace TARGETING;
+
+@@ -163,6 +164,14 @@ namespace HTMGT
+ l_err = occ->ipmiSensor(i_activate);
+ if( l_err )
+ {
++ TMGT_ERR("setOccActiveSensors failed. (OCC%d state:%d)",
++ occ->getInstance(),
++ i_activate);
++
++ TMGT_CONSOLE("setOccActiveSensors failed. (OCC%d state:%d)",
++ occ->getInstance(),
++ i_activate);
++
+ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ }
+ }
+diff --git a/src/usr/htmgt/htmgt_occ.C b/src/usr/htmgt/htmgt_occ.C
+index 8a539f4..bd95987 100644
+--- a/src/usr/htmgt/htmgt_occ.C
++++ b/src/usr/htmgt/htmgt_occ.C
+@@ -248,7 +248,8 @@ namespace HTMGT
+ OccManager::OccManager()
+ :iv_occMaster(NULL),
+ iv_state(OCC_STATE_UNKNOWN),
+- iv_targetState(OCC_STATE_ACTIVE)
++ iv_targetState(OCC_STATE_ACTIVE),
++ iv_resetCount(0)
+ {
+ }
+
+@@ -590,6 +591,19 @@ namespace HTMGT
+ ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
+ }
+
++ if(NULL == i_failedOccTarget)
++ {
++ ++iv_resetCount; // increment system reset count
++
++ TMGT_INF("resetOCCs: Incrementing system OCC reset count to %d",
++ iv_resetCount);
++
++ if(iv_resetCount > OCC_RESET_COUNT_THRESHOLD)
++ {
++ atThreshold = true;
++ }
++ }
++
+ for(occList_t::const_iterator occ = iv_occArray.begin();
+ occ != iv_occArray.end();
+ ++occ)
+@@ -663,7 +677,7 @@ namespace HTMGT
+ */
+ bldErrLog(err,
+ HTMTG_MOD_OCC_RESET,
+- HTMGT_RC_OCC_RESET_THREHOLD,
++ HTMGT_RC_OCC_CRIT_FAILURE,
+ 0, 0, 0, 0,
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ }
+@@ -673,6 +687,13 @@ namespace HTMGT
+ {
+ err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+
++ // Add level 2 support callout
++ err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
++ HWAS::SRCI_PRIORITY_MED);
++ // Add HB firmware callout
++ err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
++ HWAS::SRCI_PRIORITY_MED);
++
+ TARGETING::Target* sys = NULL;
+ TARGETING::targetService().getTopLevelTarget(sys);
+ uint8_t safeMode = 1;
+@@ -683,8 +704,13 @@ namespace HTMGT
+ sys->setAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
+ }
+
+- TMGT_ERR("_resetOccs: Safe Mode RC: 0x%04X (OCC%d)",
++ TMGT_ERR("_resetOccs: Safe Mode (RC: 0x%04X OCC%d)",
+ cv_safeReturnCode, cv_safeOccInstance);
++
++ TMGT_CONSOLE("OCCs are not active. The system will remain in "
++ "safe mode (RC: 0x%04x for OCC%d)",
++ cv_safeReturnCode,
++ cv_safeOccInstance);
+ }
+
+ return err;
+diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H
+index dec19b8..5ac545a 100644
+--- a/src/usr/htmgt/htmgt_occ.H
++++ b/src/usr/htmgt/htmgt_occ.H
+@@ -507,6 +507,7 @@ namespace HTMGT
+ occList_t iv_occArray;
+ occStateId iv_state;
+ occStateId iv_targetState;
++ uint8_t iv_resetCount;
+
+ /**
+ * @brief SRC that caused system to enter safe mode
+--
+1.8.2.2
+