Merge pull request #78 from ploetzma/master
eSel to Sel and xScom OCC Patch. Update Habanero XML for CPU freq sensor fix
diff --git a/openpower/package/habanero-xml/habanero-xml.mk b/openpower/package/habanero-xml/habanero-xml.mk
index b2e759b..e1abd28 100644
--- a/openpower/package/habanero-xml/habanero-xml.mk
+++ b/openpower/package/habanero-xml/habanero-xml.mk
@@ -4,7 +4,7 @@
#
################################################################################
-HABANERO_XML_VERSION ?= f98000504ec8fafb4f8547c0427f0a8056e7b8b7
+HABANERO_XML_VERSION ?= 4c1e936c0329384020b126efe69d2148cfe48960
HABANERO_XML_SITE ?= $(call github,open-power,habanero-xml,$(HABANERO_XML_VERSION))
HABANERO_XML_LICENSE = Apache-2.0
diff --git a/openpower/package/hostboot/hostboot-0009-Sel-instead-of-eSel.patch b/openpower/package/hostboot/hostboot-0009-Sel-instead-of-eSel.patch
new file mode 100644
index 0000000..6b4c82d
--- /dev/null
+++ b/openpower/package/hostboot/hostboot-0009-Sel-instead-of-eSel.patch
@@ -0,0 +1,201 @@
+From 03029acd024ac886296f8ed0cbc711d9b81b26da Mon Sep 17 00:00:00 2001
+From: Brian Horton <brianh@linux.ibm.com>
+Date: Mon, 2 Mar 2015 12:12:28 -0600
+Subject: [PATCH] change error log to SEL processing
+
+for hostboot runtime, do not send eSEL (AMI bug)
+for hostboot ipl, send down SEL following eSEL
+
+Change-Id: I86ee9766e27548c3f7f72fbdbfd76c8a8be7da73
+RTC: 124971
+---
+ src/include/usr/ipmi/ipmisel.H | 18 ++++++++++--
+ src/usr/errl/errlmanager_common.C | 4 ++-
+ src/usr/ipmi/ipmisel.C | 61 ++++++++++++++++++++++++++++++++-------
+ 3 files changed, 70 insertions(+), 13 deletions(-)
+
+diff --git a/src/include/usr/ipmi/ipmisel.H b/src/include/usr/ipmi/ipmisel.H
+index de2dd32..91e3651 100644
+--- a/src/include/usr/ipmi/ipmisel.H
++++ b/src/include/usr/ipmi/ipmisel.H
+@@ -65,11 +65,13 @@ namespace IPMISEL
+ * @param[in] size of eSEL data
+ * @param[in] eid of errorlog for this eSEL (for ack)
+ * @param[in] event_dir_type for this eSEL
++ * @param[in] event_offset for this eSEL
+ * @param[in] sensorType that caused the error/eSEL
+ * @param[in] sensorNumber that caused the error/eSEL
+ */
+ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
+- uint32_t i_eid, uint8_t i_eventDirType,
++ uint32_t i_eid,
++ uint8_t i_eventDirType, uint8_t i_eventOffset,
+ uint8_t i_sensorType, uint8_t i_sensorNumber);
+
+ // per IPMI Spec, section 32.1 SEL Event Records
+@@ -85,6 +87,7 @@ namespace IPMISEL
+ format_ipmi_version_2_0 = 0x04,
+ };
+
++ // event_type, per section 42.1 of the IPMI spec
+ enum sel_event_dir_type
+ {
+ event_unspecified = 0x00,
+@@ -93,11 +96,22 @@ namespace IPMISEL
+ event_predictive = 0x04,
+ event_limit = 0x05,
+ event_permformance = 0x06,
++ event_transition = 0x07,
++ event_OEM = 0x70,
+ };
+
+ enum sel_event_data
+ {
+- event_data1_ami = 0xAA,
++ event_data1_ami = 0xAA,
++ event_data1_trans_to_ok = 0x00,
++ event_data1_trans_to_noncrit_from_ok = 0x01,
++ event_data1_trans_to_crit_from_less = 0x02,
++ event_data1_trans_to_non_recv_from_less = 0x03,
++ event_data1_trans_to_non_crit_from_more = 0x04,
++ event_data1_trans_to_crit_from_non_r = 0x05,
++ event_data1_trans_to_non_recoverable = 0x06,
++ event_data1_trans_monitor = 0x07,
++ event_data1_trans_informational = 0x08,
+ };
+
+ enum sel_generator_id
+diff --git a/src/usr/errl/errlmanager_common.C b/src/usr/errl/errlmanager_common.C
+index a64ed3b..6c68818 100644
+--- a/src/usr/errl/errlmanager_common.C
++++ b/src/usr/errl/errlmanager_common.C
+@@ -135,7 +135,9 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err)
+ "sendErrLogToBmc: sensor %.2x/%.2x, size %d",
+ l_sensorType, l_sensorNumber, l_pelSize);
+ IPMISEL::sendESEL(l_pelData, l_pelSize,
+- io_err->eid(), IPMISEL::event_unspecified,
++ io_err->eid(),
++ IPMISEL::event_transition,
++ IPMISEL::event_data1_trans_to_non_recoverable,
+ l_sensorType, l_sensorNumber);
+
+ // free the buffer
+diff --git a/src/usr/ipmi/ipmisel.C b/src/usr/ipmi/ipmisel.C
+index c05c60f..49dcee6 100644
+--- a/src/usr/ipmi/ipmisel.C
++++ b/src/usr/ipmi/ipmisel.C
+@@ -82,7 +82,8 @@ enum esel_retry
+ namespace IPMISEL
+ {
+ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
+- uint32_t i_eid, uint8_t i_eventDirType,
++ uint32_t i_eid,
++ uint8_t i_eventDirType, uint8_t i_eventOffset,
+ uint8_t i_sensorType, uint8_t i_sensorNumber)
+ {
+ IPMI_TRAC(ENTER_MRK "sendESEL()");
+@@ -100,13 +101,13 @@ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
+
+ // create the sel record of information
+ selRecord l_sel;
+- l_sel.record_type = record_type_ami_esel;
++ l_sel.record_type = record_type_system_event;
+ l_sel.generator_id = generator_id_ami;
+ l_sel.evm_format_version = format_ipmi_version_2_0;
+ l_sel.sensor_type = i_sensorType;
+ l_sel.sensor_number = i_sensorNumber;
+ l_sel.event_dir_type = i_eventDirType;
+- l_sel.event_data1 = event_data1_ami;
++ l_sel.event_data1 = i_eventOffset;
+
+ eselInitData *eselData =
+ new eselInitData(&l_sel, i_eselData, i_dataSize);
+@@ -216,13 +217,16 @@ void send_esel(eselInitData * i_data,
+ {
+ IPMI_TRAC(ENTER_MRK "send_esel");
+ uint8_t* data = NULL;
+- const size_t l_eSELlen = i_data->dataSize;
+
+ size_t len = 0;
+- uint8_t reserveID[2] = {0,0};
+ uint8_t esel_recordID[2] = {0,0};
++ uint8_t sel_recordID[2] = {0,0};
+
++#ifndef __HOSTBOOT_RUNTIME
++// TODO RTC: 124972 take this out when runtime supports the eSEL
+ do{
++ const size_t l_eSELlen = i_data->dataSize;
++ uint8_t reserveID[2] = {0,0};
+ // we need to send down the extended sel data (eSEL), which is
+ // longer than the protocol buffer, so we need to do a reservation and
+ // call the AMI partial_add_esel command multiple times
+@@ -258,6 +262,9 @@ void send_esel(eselInitData * i_data,
+ // copy in the SEL event record data
+ memcpy(&data[PARTIAL_ADD_ESEL_REQ], i_data->eSel,
+ sizeof(selRecord));
++ // update to make this what AMI eSEL wants
++ data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] = record_type_ami_esel;
++ data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,event_data1)] = event_data1_ami;
+
+ o_cc = IPMI::CC_UNKBAD;
+ TRACFBIN( g_trac_ipmi, INFO_MRK"1st partial_add_esel:", data, len);
+@@ -338,17 +345,51 @@ void send_esel(eselInitData * i_data,
+ // BMC returns the recordID, it's always the same (unless
+ // there's a major BMC bug...)
+ storeReserveRecord(esel_recordID,data);
++ } // while eSELindex
++ }while(0);
++#endif
++
++ // if eSEL wasn't created due to an error, we don't want to continue
++ if(o_err == NULL)
++ {
++ // if the eSEL wasn't created due to a bad completion code, we will
++ // still try to send down a SEL that we create, which will contain
++ // the eSEL recordID (if it was successful)
++ delete [] data;
++ len = sizeof(IPMISEL::selRecord);
++ data = new uint8_t[len];
++
++ // copy in the SEL event record data
++ memcpy(data, i_data->eSel, sizeof(IPMISEL::selRecord));
++ // copy the eSEL recordID (if it was created) into the extra data area
++ data[offsetof(selRecord,event_data2)] = esel_recordID[1];
++ data[offsetof(selRecord,event_data3)] = esel_recordID[0];
++
++ // use local cc so that we don't corrupt the esel from above
++ IPMI::completion_code l_cc = IPMI::CC_UNKBAD;
++ TRACFBIN( g_trac_ipmi, INFO_MRK"add_sel:", data, len);
++ o_err = IPMI::sendrecv(IPMI::add_sel(),l_cc,len,data);
++ if(o_err)
++ {
++ IPMI_TRAC(ERR_MRK "error from add_sel");
+ }
+- if(o_err || (o_cc != IPMI::CC_OK))
++ else if (l_cc != IPMI::CC_OK)
+ {
+- break;
++ IPMI_TRAC(ERR_MRK "failed add_sel, l_cc %02x", l_cc);
+ }
+- }while(0);
++ else
++ {
++ // if CC_OK, then len = 2 and data contains the recordID of the new SEL
++ storeReserveRecord(sel_recordID,data);
++ }
++ }
+
+ delete[] data;
+
+- IPMI_TRAC(EXIT_MRK "send_esel (o_err %.8X, o_cc x%.2x, recID=x%x%x)",
+- o_err ? o_err->plid() : NULL, o_cc, esel_recordID[1], esel_recordID[0]);
++ IPMI_TRAC(EXIT_MRK
++ "send_esel o_err=%.8X, o_cc=x%.2x, sel recID=x%x%x, esel recID=x%x%x",
++ o_err ? o_err->plid() : NULL, o_cc, sel_recordID[1], sel_recordID[0],
++ esel_recordID[1], esel_recordID[0]);
+
+ return;
+ } // send_esel
+--
+1.8.2.2
+
diff --git a/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch b/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch
new file mode 100644
index 0000000..4b666b0
--- /dev/null
+++ b/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch
@@ -0,0 +1,301 @@
+From 98fc2914b15e89c2324c1636af62225c653e45f9 Mon Sep 17 00:00:00 2001
+From: Doug Gilbert <dgilbert@us.ibm.com>
+Date: Tue, 3 Mar 2015 16:00:29 -0600
+Subject: [PATCH] HTMGT add attempt to reset OCC when OCC Activate fails
+
+Change-Id: I964d2b68216c3ddabae73ce3b851bbc468ec96a7
+RTC: 123180
+---
+ src/include/usr/htmgt/htmgt_reasoncodes.H | 1 +
+ src/usr/htmgt/htmgt.C | 123 ++++++++++++++++++------------
+ src/usr/htmgt/htmgt_activate.C | 9 +++
+ src/usr/htmgt/htmgt_occ.C | 32 +++++++-
+ src/usr/htmgt/htmgt_occ.H | 1 +
+ 5 files changed, 116 insertions(+), 50 deletions(-)
+
+diff --git a/src/include/usr/htmgt/htmgt_reasoncodes.H b/src/include/usr/htmgt/htmgt_reasoncodes.H
+index ade192d..6fe269d 100644
+--- a/src/include/usr/htmgt/htmgt_reasoncodes.H
++++ b/src/include/usr/htmgt/htmgt_reasoncodes.H
+@@ -48,6 +48,7 @@ namespace HTMGT
+ HTMGT_MOD_CHECK_OCC_RSP = 0x92,
+ HTMGT_MOD_PARSE_OCC_RSP = 0x94,
+ HTMGT_MOD_HANLDE_OCC_EXCEPTION = 0xE0,
++ HTMGT_MOD_ENABLE_OCC_ACTUATION = 0xE1,
+ };
+
+ enum htmgtReasonCode
+diff --git a/src/usr/htmgt/htmgt.C b/src/usr/htmgt/htmgt.C
+index aff2500..a2f556f 100644
+--- a/src/usr/htmgt/htmgt.C
++++ b/src/usr/htmgt/htmgt.C
+@@ -184,42 +184,30 @@ namespace HTMGT
+
+ if (NULL != l_err)
+ {
+- TMGT_ERR("OCCs not all active. System will stay in safe mode");
++ TMGT_ERR("OCCs not all active. Attempting OCC Reset");
+ TMGT_CONSOLE("OCCs are not active (rc=0x%04X). "
+- "System will remain in safe mode",
++ "Attempting OCC Reset",
+ l_err->reasonCode());
+- TMGT_INF("Calling HBOCC::stopAllOCCs");
+- errlHndl_t err2 = HBOCC::stopAllOCCs();
++ TMGT_INF("Calling resetOccs");
++ errlHndl_t err2 = OccManager::resetOccs(NULL);
+ if(err2)
+ {
+- TMGT_ERR("stopAllOCCs() failed with 0x%04X",
++ TMGT_ERR("OccManager:;resetOccs failed with 0x%04X",
+ err2->reasonCode());
+- ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+- }
+-
+- // Update error log to unrecoverable and set SRC
+- // to indicate the system will remain in safe mode
+- /*@
+- * @errortype
+- * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
+- * @moduleid HTMGT_MOD_LOAD_START_STATUS
+- * @userdata1[0:7] load/start completed
+- * @devdesc OCCs did not all reach active state,
+- * system will be in Safe Mode
+- */
+- bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS,
+- HTMGT_RC_OCC_CRIT_FAILURE,
+- i_startCompleted, 0, 0, 1,
+- ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+
+- // Add level 2 support callout
+- l_err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
+- HWAS::SRCI_PRIORITY_MED);
+- // Add HB firmware callout
+- l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+- HWAS::SRCI_PRIORITY_MED);
++ // Set original error log as unrecoverable and commit
++ l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
++ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+
+- ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
++ // Commit occReset error
++ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++ }
++ else
++ {
++ // retry worked - commit original error as informational
++ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
++ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
++ }
+ }
+
+ } // end processOccStartStatus()
+@@ -229,6 +217,19 @@ namespace HTMGT
+ // Notify HTMGT that an OCC has an error to report
+ void processOccError(TARGETING::Target * i_procTarget)
+ {
++ TARGETING::Target* sys = NULL;
++ TARGETING::targetService().getTopLevelTarget(sys);
++ uint8_t safeMode = 0;
++
++ // If the system is in safemode then can't talk to OCCs -
++ // ignore call to processOccError
++ if(sys &&
++ sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
++ safeMode)
++ {
++ return;
++ }
++
+ bool polledOneOcc = false;
+ OccManager::buildOccs();
+
+@@ -347,29 +348,57 @@ namespace HTMGT
+ // Set the OCC state
+ errlHndl_t enableOccActuation(bool i_occActivation)
+ {
+- occStateId targetState = OCC_STATE_ACTIVE;
+- if (false == i_occActivation)
+- {
+- targetState = OCC_STATE_OBSERVATION;
+- }
++ errlHndl_t l_err = NULL;
++ TARGETING::Target* sys = NULL;
++
++ TARGETING::targetService().getTopLevelTarget(sys);
++ uint8_t safeMode = 0;
+
+- // Set state for all OCCs
+- errlHndl_t l_err = OccManager::setOccState(targetState);
+- if (NULL == l_err)
++ // If the system is in safemode then can't talk to OCCs -
++ // ignore call to enableOccActuation
++ if(sys &&
++ sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
++ safeMode)
+ {
+- TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
+- targetState);
++ /*@
++ * @errortype
++ * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
++ * @moduleid HTMGT_MOD_ENABLE_OCC_ACTUATION
++ * @userdata1[0:7] OCC activate [1==true][0==false]
++ * @devdesc Invalid operation when OCCs are in safemode
++ */
++ bldErrLog(l_err,
++ HTMGT_MOD_ENABLE_OCC_ACTUATION,
++ HTMGT_RC_OCC_CRIT_FAILURE,
++ i_occActivation, 0, 0, 1,
++ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ }
+-
+- if (OccManager::occNeedsReset())
++ else
+ {
+- TMGT_ERR("enableOccActuation(): OCCs need to be reset");
+- // Don't pass failed target as OCC should have already
+- // been marked as failed during the poll.
+- errlHndl_t err2 = OccManager::resetOccs(NULL);
+- if(err2)
++ occStateId targetState = OCC_STATE_ACTIVE;
++ if (false == i_occActivation)
+ {
+- ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++ targetState = OCC_STATE_OBSERVATION;
++ }
++
++ // Set state for all OCCs
++ l_err = OccManager::setOccState(targetState);
++ if (NULL == l_err)
++ {
++ TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
++ targetState);
++ }
++
++ if (OccManager::occNeedsReset())
++ {
++ TMGT_ERR("enableOccActuation(): OCCs need to be reset");
++ // Don't pass failed target as OCC should have already
++ // been marked as failed during the poll.
++ errlHndl_t err2 = OccManager::resetOccs(NULL);
++ if(err2)
++ {
++ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++ }
+ }
+ }
+
+diff --git a/src/usr/htmgt/htmgt_activate.C b/src/usr/htmgt/htmgt_activate.C
+index 7f54d6d..4cb46f0 100644
+--- a/src/usr/htmgt/htmgt_activate.C
++++ b/src/usr/htmgt/htmgt_activate.C
+@@ -39,6 +39,7 @@
+
+ #include <ipmi/ipmisensor.H>
+ #include <sys/time.h>
++#include <console/consoleif.H>
+
+ using namespace TARGETING;
+
+@@ -163,6 +164,14 @@ namespace HTMGT
+ l_err = occ->ipmiSensor(i_activate);
+ if( l_err )
+ {
++ TMGT_ERR("setOccActiveSensors failed. (OCC%d state:%d)",
++ occ->getInstance(),
++ i_activate);
++
++ TMGT_CONSOLE("setOccActiveSensors failed. (OCC%d state:%d)",
++ occ->getInstance(),
++ i_activate);
++
+ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ }
+ }
+diff --git a/src/usr/htmgt/htmgt_occ.C b/src/usr/htmgt/htmgt_occ.C
+index 8a539f4..bd95987 100644
+--- a/src/usr/htmgt/htmgt_occ.C
++++ b/src/usr/htmgt/htmgt_occ.C
+@@ -248,7 +248,8 @@ namespace HTMGT
+ OccManager::OccManager()
+ :iv_occMaster(NULL),
+ iv_state(OCC_STATE_UNKNOWN),
+- iv_targetState(OCC_STATE_ACTIVE)
++ iv_targetState(OCC_STATE_ACTIVE),
++ iv_resetCount(0)
+ {
+ }
+
+@@ -590,6 +591,19 @@ namespace HTMGT
+ ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
+ }
+
++ if(NULL == i_failedOccTarget)
++ {
++ ++iv_resetCount; // increment system reset count
++
++ TMGT_INF("resetOCCs: Incrementing system OCC reset count to %d",
++ iv_resetCount);
++
++ if(iv_resetCount > OCC_RESET_COUNT_THRESHOLD)
++ {
++ atThreshold = true;
++ }
++ }
++
+ for(occList_t::const_iterator occ = iv_occArray.begin();
+ occ != iv_occArray.end();
+ ++occ)
+@@ -663,7 +677,7 @@ namespace HTMGT
+ */
+ bldErrLog(err,
+ HTMTG_MOD_OCC_RESET,
+- HTMGT_RC_OCC_RESET_THREHOLD,
++ HTMGT_RC_OCC_CRIT_FAILURE,
+ 0, 0, 0, 0,
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ }
+@@ -673,6 +687,13 @@ namespace HTMGT
+ {
+ err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+
++ // Add level 2 support callout
++ err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
++ HWAS::SRCI_PRIORITY_MED);
++ // Add HB firmware callout
++ err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
++ HWAS::SRCI_PRIORITY_MED);
++
+ TARGETING::Target* sys = NULL;
+ TARGETING::targetService().getTopLevelTarget(sys);
+ uint8_t safeMode = 1;
+@@ -683,8 +704,13 @@ namespace HTMGT
+ sys->setAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
+ }
+
+- TMGT_ERR("_resetOccs: Safe Mode RC: 0x%04X (OCC%d)",
++ TMGT_ERR("_resetOccs: Safe Mode (RC: 0x%04X OCC%d)",
+ cv_safeReturnCode, cv_safeOccInstance);
++
++ TMGT_CONSOLE("OCCs are not active. The system will remain in "
++ "safe mode (RC: 0x%04x for OCC%d)",
++ cv_safeReturnCode,
++ cv_safeOccInstance);
+ }
+
+ return err;
+diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H
+index dec19b8..5ac545a 100644
+--- a/src/usr/htmgt/htmgt_occ.H
++++ b/src/usr/htmgt/htmgt_occ.H
+@@ -507,6 +507,7 @@ namespace HTMGT
+ occList_t iv_occArray;
+ occStateId iv_state;
+ occStateId iv_targetState;
++ uint8_t iv_resetCount;
+
+ /**
+ * @brief SRC that caused system to enter safe mode
+--
+1.8.2.2
+