Merge pull request #78 from ploetzma/master

eSel to Sel and xScom OCC Patch. Update Habanero XML for CPU freq sensor fix
diff --git a/openpower/package/habanero-xml/habanero-xml.mk b/openpower/package/habanero-xml/habanero-xml.mk
index b2e759b..e1abd28 100644
--- a/openpower/package/habanero-xml/habanero-xml.mk
+++ b/openpower/package/habanero-xml/habanero-xml.mk
@@ -4,7 +4,7 @@
 #
 ################################################################################
 
-HABANERO_XML_VERSION ?= f98000504ec8fafb4f8547c0427f0a8056e7b8b7
+HABANERO_XML_VERSION ?= 4c1e936c0329384020b126efe69d2148cfe48960
 HABANERO_XML_SITE ?= $(call github,open-power,habanero-xml,$(HABANERO_XML_VERSION))
 
 HABANERO_XML_LICENSE = Apache-2.0
diff --git a/openpower/package/hostboot/hostboot-0009-Sel-instead-of-eSel.patch b/openpower/package/hostboot/hostboot-0009-Sel-instead-of-eSel.patch
new file mode 100644
index 0000000..6b4c82d
--- /dev/null
+++ b/openpower/package/hostboot/hostboot-0009-Sel-instead-of-eSel.patch
@@ -0,0 +1,201 @@
+From 03029acd024ac886296f8ed0cbc711d9b81b26da Mon Sep 17 00:00:00 2001
+From: Brian Horton <brianh@linux.ibm.com>
+Date: Mon, 2 Mar 2015 12:12:28 -0600
+Subject: [PATCH] change error log to SEL processing
+
+for hostboot runtime, do not send eSEL (AMI bug)
+for hostboot ipl, send down SEL following eSEL
+
+Change-Id: I86ee9766e27548c3f7f72fbdbfd76c8a8be7da73
+RTC: 124971
+---
+ src/include/usr/ipmi/ipmisel.H    | 18 ++++++++++--
+ src/usr/errl/errlmanager_common.C |  4 ++-
+ src/usr/ipmi/ipmisel.C            | 61 ++++++++++++++++++++++++++++++++-------
+ 3 files changed, 70 insertions(+), 13 deletions(-)
+
+diff --git a/src/include/usr/ipmi/ipmisel.H b/src/include/usr/ipmi/ipmisel.H
+index de2dd32..91e3651 100644
+--- a/src/include/usr/ipmi/ipmisel.H
++++ b/src/include/usr/ipmi/ipmisel.H
+@@ -65,11 +65,13 @@ namespace IPMISEL
+      * @param[in] size of eSEL data
+      * @param[in] eid of errorlog for this eSEL (for ack)
+      * @param[in] event_dir_type for this eSEL
++     * @param[in] event_offset for this eSEL
+      * @param[in] sensorType that caused the error/eSEL
+      * @param[in] sensorNumber that caused the error/eSEL
+      */
+     void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
+-                  uint32_t i_eid, uint8_t i_eventDirType,
++                  uint32_t i_eid,
++                  uint8_t i_eventDirType, uint8_t i_eventOffset,
+                   uint8_t i_sensorType, uint8_t i_sensorNumber);
+ 
+     // per IPMI Spec, section 32.1 SEL Event Records
+@@ -85,6 +87,7 @@ namespace IPMISEL
+         format_ipmi_version_2_0 = 0x04,
+     };
+ 
++    // event_type, per section 42.1 of the IPMI spec
+     enum sel_event_dir_type
+     {
+         event_unspecified       = 0x00,
+@@ -93,11 +96,22 @@ namespace IPMISEL
+         event_predictive        = 0x04,
+         event_limit             = 0x05,
+         event_permformance      = 0x06,
++        event_transition        = 0x07,
++        event_OEM               = 0x70,
+     };
+ 
+     enum sel_event_data
+     {
+-        event_data1_ami         = 0xAA,
++        event_data1_ami                         = 0xAA,
++        event_data1_trans_to_ok                 = 0x00,
++        event_data1_trans_to_noncrit_from_ok    = 0x01,
++        event_data1_trans_to_crit_from_less     = 0x02,
++        event_data1_trans_to_non_recv_from_less = 0x03,
++        event_data1_trans_to_non_crit_from_more = 0x04,
++        event_data1_trans_to_crit_from_non_r    = 0x05,
++        event_data1_trans_to_non_recoverable    = 0x06,
++        event_data1_trans_monitor               = 0x07,
++        event_data1_trans_informational         = 0x08,
+     };
+ 
+     enum sel_generator_id
+diff --git a/src/usr/errl/errlmanager_common.C b/src/usr/errl/errlmanager_common.C
+index a64ed3b..6c68818 100644
+--- a/src/usr/errl/errlmanager_common.C
++++ b/src/usr/errl/errlmanager_common.C
+@@ -135,7 +135,9 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err)
+                 "sendErrLogToBmc: sensor %.2x/%.2x, size %d",
+                 l_sensorType, l_sensorNumber, l_pelSize);
+         IPMISEL::sendESEL(l_pelData, l_pelSize,
+-                            io_err->eid(), IPMISEL::event_unspecified,
++                            io_err->eid(),
++                            IPMISEL::event_transition,
++                            IPMISEL::event_data1_trans_to_non_recoverable,
+                             l_sensorType, l_sensorNumber);
+ 
+         // free the buffer
+diff --git a/src/usr/ipmi/ipmisel.C b/src/usr/ipmi/ipmisel.C
+index c05c60f..49dcee6 100644
+--- a/src/usr/ipmi/ipmisel.C
++++ b/src/usr/ipmi/ipmisel.C
+@@ -82,7 +82,8 @@ enum esel_retry
+ namespace IPMISEL
+ {
+ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
+-              uint32_t i_eid, uint8_t i_eventDirType,
++              uint32_t i_eid,
++              uint8_t i_eventDirType, uint8_t i_eventOffset,
+               uint8_t i_sensorType, uint8_t i_sensorNumber)
+ {
+     IPMI_TRAC(ENTER_MRK "sendESEL()");
+@@ -100,13 +101,13 @@ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
+ 
+     // create the sel record of information
+     selRecord l_sel;
+-    l_sel.record_type = record_type_ami_esel;
++    l_sel.record_type = record_type_system_event;
+     l_sel.generator_id = generator_id_ami;
+     l_sel.evm_format_version = format_ipmi_version_2_0;
+     l_sel.sensor_type = i_sensorType;
+     l_sel.sensor_number = i_sensorNumber;
+     l_sel.event_dir_type = i_eventDirType;
+-    l_sel.event_data1 = event_data1_ami;
++    l_sel.event_data1 = i_eventOffset;
+ 
+     eselInitData *eselData =
+         new eselInitData(&l_sel, i_eselData, i_dataSize);
+@@ -216,13 +217,16 @@ void send_esel(eselInitData * i_data,
+ {
+     IPMI_TRAC(ENTER_MRK "send_esel");
+     uint8_t* data = NULL;
+-    const size_t l_eSELlen = i_data->dataSize;
+ 
+     size_t len = 0;
+-    uint8_t reserveID[2] = {0,0};
+     uint8_t esel_recordID[2] = {0,0};
++    uint8_t sel_recordID[2] = {0,0};
+ 
++#ifndef __HOSTBOOT_RUNTIME
++// TODO RTC: 124972 take this out when runtime supports the eSEL
+     do{
++        const size_t l_eSELlen = i_data->dataSize;
++        uint8_t reserveID[2] = {0,0};
+         // we need to send down the extended sel data (eSEL), which is
+         // longer than the protocol buffer, so we need to do a reservation and
+         // call the AMI partial_add_esel command multiple times
+@@ -258,6 +262,9 @@ void send_esel(eselInitData * i_data,
+         // copy in the SEL event record data
+         memcpy(&data[PARTIAL_ADD_ESEL_REQ], i_data->eSel,
+                 sizeof(selRecord));
++        // update to make this what AMI eSEL wants
++        data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] = record_type_ami_esel;
++        data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,event_data1)] = event_data1_ami;
+ 
+         o_cc = IPMI::CC_UNKBAD;
+         TRACFBIN( g_trac_ipmi, INFO_MRK"1st partial_add_esel:", data, len);
+@@ -338,17 +345,51 @@ void send_esel(eselInitData * i_data,
+             // BMC returns the recordID, it's always the same (unless
+             // there's a major BMC bug...)
+             storeReserveRecord(esel_recordID,data);
++        } // while eSELindex
++    }while(0);
++#endif
++
++    // if eSEL wasn't created due to an error, we don't want to continue
++    if(o_err == NULL)
++    {
++        // if the eSEL wasn't created due to a bad completion code, we will
++        // still try to send down a SEL that we create, which will contain
++        // the eSEL recordID (if it was successful)
++        delete [] data;
++        len = sizeof(IPMISEL::selRecord);
++        data = new uint8_t[len];
++
++        // copy in the SEL event record data
++        memcpy(data, i_data->eSel, sizeof(IPMISEL::selRecord));
++        // copy the eSEL recordID (if it was created) into the extra data area
++        data[offsetof(selRecord,event_data2)] = esel_recordID[1];
++        data[offsetof(selRecord,event_data3)] = esel_recordID[0];
++
++        // use local cc so that we don't corrupt the esel from above
++        IPMI::completion_code l_cc = IPMI::CC_UNKBAD;
++        TRACFBIN( g_trac_ipmi, INFO_MRK"add_sel:", data, len);
++        o_err = IPMI::sendrecv(IPMI::add_sel(),l_cc,len,data);
++        if(o_err)
++        {
++            IPMI_TRAC(ERR_MRK "error from add_sel");
+         }
+-        if(o_err || (o_cc != IPMI::CC_OK))
++        else if (l_cc != IPMI::CC_OK)
+         {
+-            break;
++            IPMI_TRAC(ERR_MRK "failed add_sel, l_cc %02x", l_cc);
+         }
+-    }while(0);
++        else
++        {
++            // if CC_OK, then len = 2 and data contains the recordID of the new SEL
++            storeReserveRecord(sel_recordID,data);
++        }
++    }
+ 
+     delete[] data;
+ 
+-    IPMI_TRAC(EXIT_MRK "send_esel (o_err %.8X, o_cc x%.2x, recID=x%x%x)",
+-        o_err ? o_err->plid() : NULL, o_cc, esel_recordID[1], esel_recordID[0]);
++    IPMI_TRAC(EXIT_MRK
++        "send_esel o_err=%.8X, o_cc=x%.2x, sel recID=x%x%x, esel recID=x%x%x",
++        o_err ? o_err->plid() : NULL, o_cc, sel_recordID[1], sel_recordID[0],
++        esel_recordID[1], esel_recordID[0]);
+ 
+     return;
+ } // send_esel
+-- 
+1.8.2.2
+
diff --git a/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch b/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch
new file mode 100644
index 0000000..4b666b0
--- /dev/null
+++ b/openpower/package/hostboot/hostboot-0010-Reset-occ-when-fails-to-activate.patch
@@ -0,0 +1,301 @@
+From 98fc2914b15e89c2324c1636af62225c653e45f9 Mon Sep 17 00:00:00 2001
+From: Doug Gilbert <dgilbert@us.ibm.com>
+Date: Tue, 3 Mar 2015 16:00:29 -0600
+Subject: [PATCH] HTMGT add attempt to reset OCC when OCC Activate fails
+
+Change-Id: I964d2b68216c3ddabae73ce3b851bbc468ec96a7
+RTC: 123180
+---
+ src/include/usr/htmgt/htmgt_reasoncodes.H |   1 +
+ src/usr/htmgt/htmgt.C                     | 123 ++++++++++++++++++------------
+ src/usr/htmgt/htmgt_activate.C            |   9 +++
+ src/usr/htmgt/htmgt_occ.C                 |  32 +++++++-
+ src/usr/htmgt/htmgt_occ.H                 |   1 +
+ 5 files changed, 116 insertions(+), 50 deletions(-)
+
+diff --git a/src/include/usr/htmgt/htmgt_reasoncodes.H b/src/include/usr/htmgt/htmgt_reasoncodes.H
+index ade192d..6fe269d 100644
+--- a/src/include/usr/htmgt/htmgt_reasoncodes.H
++++ b/src/include/usr/htmgt/htmgt_reasoncodes.H
+@@ -48,6 +48,7 @@ namespace HTMGT
+         HTMGT_MOD_CHECK_OCC_RSP         = 0x92,
+         HTMGT_MOD_PARSE_OCC_RSP         = 0x94,
+         HTMGT_MOD_HANLDE_OCC_EXCEPTION  = 0xE0,
++        HTMGT_MOD_ENABLE_OCC_ACTUATION  = 0xE1,
+     };
+ 
+     enum htmgtReasonCode
+diff --git a/src/usr/htmgt/htmgt.C b/src/usr/htmgt/htmgt.C
+index aff2500..a2f556f 100644
+--- a/src/usr/htmgt/htmgt.C
++++ b/src/usr/htmgt/htmgt.C
+@@ -184,42 +184,30 @@ namespace HTMGT
+ 
+         if (NULL != l_err)
+         {
+-            TMGT_ERR("OCCs not all active.  System will stay in safe mode");
++            TMGT_ERR("OCCs not all active.  Attempting OCC Reset");
+             TMGT_CONSOLE("OCCs are not active (rc=0x%04X). "
+-                         "System will remain in safe mode",
++                         "Attempting OCC Reset",
+                          l_err->reasonCode());
+-            TMGT_INF("Calling HBOCC::stopAllOCCs");
+-            errlHndl_t err2 = HBOCC::stopAllOCCs();
++            TMGT_INF("Calling resetOccs");
++            errlHndl_t err2 = OccManager::resetOccs(NULL);
+             if(err2)
+             {
+-                TMGT_ERR("stopAllOCCs() failed with 0x%04X",
++                TMGT_ERR("OccManager:;resetOccs failed with 0x%04X",
+                          err2->reasonCode());
+-                ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+-            }
+-
+-            // Update error log to unrecoverable and set SRC
+-            // to indicate the system will remain in safe mode
+-            /*@
+-             * @errortype
+-             * @reasoncode      HTMGT_RC_OCC_CRIT_FAILURE
+-             * @moduleid        HTMGT_MOD_LOAD_START_STATUS
+-             * @userdata1[0:7]  load/start completed
+-             * @devdesc         OCCs did not all reach active state,
+-             *                  system will be in Safe Mode
+-             */
+-            bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS,
+-                      HTMGT_RC_OCC_CRIT_FAILURE,
+-                      i_startCompleted, 0, 0, 1,
+-                      ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ 
+-            // Add level 2 support callout
+-            l_err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
+-                                       HWAS::SRCI_PRIORITY_MED);
+-            // Add HB firmware callout
+-            l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+-                                       HWAS::SRCI_PRIORITY_MED);
++                // Set original error log  as unrecoverable and commit
++                l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
++                ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ 
+-            ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
++                // Commit occReset error
++                ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++            }
++            else
++            {
++                // retry worked - commit original error as informational
++                l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
++                ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
++            }
+         }
+ 
+     } // end processOccStartStatus()
+@@ -229,6 +217,19 @@ namespace HTMGT
+     // Notify HTMGT that an OCC has an error to report
+     void processOccError(TARGETING::Target * i_procTarget)
+     {
++        TARGETING::Target* sys = NULL;
++        TARGETING::targetService().getTopLevelTarget(sys);
++        uint8_t safeMode = 0;
++
++        // If the system is in safemode then can't talk to OCCs -
++        // ignore call to processOccError
++        if(sys &&
++           sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
++           safeMode)
++        {
++            return;
++        }
++
+         bool polledOneOcc = false;
+         OccManager::buildOccs();
+ 
+@@ -347,29 +348,57 @@ namespace HTMGT
+     // Set the OCC state
+     errlHndl_t enableOccActuation(bool i_occActivation)
+     {
+-        occStateId targetState = OCC_STATE_ACTIVE;
+-        if (false == i_occActivation)
+-        {
+-            targetState = OCC_STATE_OBSERVATION;
+-        }
++        errlHndl_t l_err = NULL;
++        TARGETING::Target* sys = NULL;
++
++        TARGETING::targetService().getTopLevelTarget(sys);
++        uint8_t safeMode = 0;
+ 
+-        // Set state for all OCCs
+-        errlHndl_t l_err = OccManager::setOccState(targetState);
+-        if (NULL == l_err)
++        // If the system is in safemode then can't talk to OCCs -
++        // ignore call to enableOccActuation
++        if(sys &&
++           sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
++           safeMode)
+         {
+-            TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
+-                     targetState);
++            /*@
++             * @errortype
++             * @reasoncode      HTMGT_RC_OCC_CRIT_FAILURE
++             * @moduleid        HTMGT_MOD_ENABLE_OCC_ACTUATION
++             * @userdata1[0:7]  OCC activate [1==true][0==false]
++             * @devdesc         Invalid operation when OCCs are in safemode
++             */
++            bldErrLog(l_err,
++                      HTMGT_MOD_ENABLE_OCC_ACTUATION,
++                      HTMGT_RC_OCC_CRIT_FAILURE,
++                      i_occActivation, 0, 0, 1,
++                      ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+         }
+-
+-        if (OccManager::occNeedsReset())
++        else
+         {
+-            TMGT_ERR("enableOccActuation(): OCCs need to be reset");
+-            // Don't pass failed target as OCC should have already
+-            // been marked as failed during the poll.
+-            errlHndl_t err2 = OccManager::resetOccs(NULL);
+-            if(err2)
++            occStateId targetState = OCC_STATE_ACTIVE;
++            if (false == i_occActivation)
+             {
+-                ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++                targetState = OCC_STATE_OBSERVATION;
++            }
++
++            // Set state for all OCCs
++            l_err = OccManager::setOccState(targetState);
++            if (NULL == l_err)
++            {
++                TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
++                         targetState);
++            }
++
++            if (OccManager::occNeedsReset())
++            {
++                TMGT_ERR("enableOccActuation(): OCCs need to be reset");
++                // Don't pass failed target as OCC should have already
++                // been marked as failed during the poll.
++                errlHndl_t err2 = OccManager::resetOccs(NULL);
++                if(err2)
++                {
++                    ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
++                }
+             }
+         }
+ 
+diff --git a/src/usr/htmgt/htmgt_activate.C b/src/usr/htmgt/htmgt_activate.C
+index 7f54d6d..4cb46f0 100644
+--- a/src/usr/htmgt/htmgt_activate.C
++++ b/src/usr/htmgt/htmgt_activate.C
+@@ -39,6 +39,7 @@
+ 
+ #include <ipmi/ipmisensor.H>
+ #include <sys/time.h>
++#include <console/consoleif.H>
+ 
+ using namespace TARGETING;
+ 
+@@ -163,6 +164,14 @@ namespace HTMGT
+             l_err = occ->ipmiSensor(i_activate);
+             if( l_err )
+             {
++                TMGT_ERR("setOccActiveSensors failed. (OCC%d state:%d)",
++                         occ->getInstance(),
++                         i_activate);
++
++                TMGT_CONSOLE("setOccActiveSensors failed. (OCC%d state:%d)",
++                         occ->getInstance(),
++                         i_activate);
++
+                 ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+             }
+         }
+diff --git a/src/usr/htmgt/htmgt_occ.C b/src/usr/htmgt/htmgt_occ.C
+index 8a539f4..bd95987 100644
+--- a/src/usr/htmgt/htmgt_occ.C
++++ b/src/usr/htmgt/htmgt_occ.C
+@@ -248,7 +248,8 @@ namespace HTMGT
+     OccManager::OccManager()
+         :iv_occMaster(NULL),
+         iv_state(OCC_STATE_UNKNOWN),
+-        iv_targetState(OCC_STATE_ACTIVE)
++        iv_targetState(OCC_STATE_ACTIVE),
++        iv_resetCount(0)
+     {
+     }
+ 
+@@ -590,6 +591,19 @@ namespace HTMGT
+             ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
+         }
+ 
++        if(NULL == i_failedOccTarget)
++        {
++            ++iv_resetCount; // increment system reset count
++
++            TMGT_INF("resetOCCs: Incrementing system OCC reset count to %d",
++                     iv_resetCount);
++
++            if(iv_resetCount > OCC_RESET_COUNT_THRESHOLD)
++            {
++                atThreshold = true;
++            }
++        }
++
+         for(occList_t::const_iterator occ = iv_occArray.begin();
+             occ != iv_occArray.end();
+             ++occ)
+@@ -663,7 +677,7 @@ namespace HTMGT
+              */
+             bldErrLog(err,
+                       HTMTG_MOD_OCC_RESET,
+-                      HTMGT_RC_OCC_RESET_THREHOLD,
++                      HTMGT_RC_OCC_CRIT_FAILURE,
+                       0, 0, 0, 0,
+                       ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+         }
+@@ -673,6 +687,13 @@ namespace HTMGT
+         {
+             err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ 
++            // Add level 2 support callout
++            err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
++                                     HWAS::SRCI_PRIORITY_MED);
++            // Add HB firmware callout
++            err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
++                                     HWAS::SRCI_PRIORITY_MED);
++
+             TARGETING::Target* sys = NULL;
+             TARGETING::targetService().getTopLevelTarget(sys);
+             uint8_t safeMode = 1;
+@@ -683,8 +704,13 @@ namespace HTMGT
+                sys->setAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
+             }
+ 
+-            TMGT_ERR("_resetOccs: Safe Mode RC: 0x%04X (OCC%d)",
++            TMGT_ERR("_resetOccs: Safe Mode (RC: 0x%04X OCC%d)",
+                      cv_safeReturnCode, cv_safeOccInstance);
++
++            TMGT_CONSOLE("OCCs are not active. The system will remain in "
++                         "safe mode (RC: 0x%04x  for OCC%d)",
++                         cv_safeReturnCode,
++                         cv_safeOccInstance);
+         }
+ 
+         return err;
+diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H
+index dec19b8..5ac545a 100644
+--- a/src/usr/htmgt/htmgt_occ.H
++++ b/src/usr/htmgt/htmgt_occ.H
+@@ -507,6 +507,7 @@ namespace HTMGT
+             occList_t               iv_occArray;
+             occStateId              iv_state;
+             occStateId              iv_targetState;
++            uint8_t                 iv_resetCount;
+ 
+             /**
+              * @brief SRC that caused system to enter safe mode
+-- 
+1.8.2.2
+