Improve PECI error checking and reporting

It's possible for a PECI command to return successfully with an
error in the completion code.  This change adds checks for the
PECI completion code and will print an error on failure.

Tested:
Injected an IERR and confirmed that PECI data is correctly handled.

Change-Id: I86fc0a99ab04dac4d8b38f9f7e0ee1eb6a39397d
Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp
index f23d25e..b4a8c72 100644
--- a/src/host_error_monitor.cpp
+++ b/src/host_error_monitor.cpp
@@ -231,6 +231,21 @@
                     "OpenBMC.0.1.SsbThermalTrip", NULL);
 }
 
+static inline bool peciError(EPECIStatus peciStatus, uint8_t cc)
+{
+    return (
+        peciStatus != PECI_CC_SUCCESS ||
+        (cc != PECI_DEV_CC_SUCCESS && cc != PECI_DEV_CC_FATAL_MCA_DETECTED));
+}
+
+static void printPECIError(const std::string& reg, const size_t addr,
+                           const EPECIStatus peciStatus, const size_t cc)
+{
+    std::cerr << "Failed to read " << reg << " on CPU address " << addr
+              << ". Error: " << peciStatus << ": cc: 0x" << std::hex << cc
+              << "\n";
+}
+
 static void initializeErrorState();
 static void initializeHostState()
 {
@@ -498,9 +513,10 @@
 static bool checkIERRCPUs()
 {
     bool cpuIERRFound = false;
-    for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
+    for (size_t cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
          cpu++, addr++)
     {
+        EPECIStatus peciStatus = PECI_CC_SUCCESS;
         uint8_t cc = 0;
         CPUModel model{};
         uint8_t stepping = 0;
@@ -517,9 +533,11 @@
                 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
                 // that caused the IERR
                 uint32_t mcaErrSrcLog = 0;
-                if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
-                                     &cc) != PECI_CC_SUCCESS)
+                peciStatus = peci_RdPkgConfig(addr, 0, 5, 4,
+                                              (uint8_t*)&mcaErrSrcLog, &cc);
+                if (peciError(peciStatus, cc))
                 {
+                    printPECIError("MCA_ERR_SRC_LOG", addr, peciStatus, cc);
                     continue;
                 }
                 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
@@ -531,9 +549,10 @@
                     // Next check if it's a CPU/VR mismatch by reading the
                     // IA32_MC4_STATUS MSR (0x411)
                     uint64_t mc4Status = 0;
-                    if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
-                        PECI_CC_SUCCESS)
+                    peciStatus = peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc);
+                    if (peciError(peciStatus, cc))
                     {
+                        printPECIError("IA32_MC4_STATUS", addr, peciStatus, cc);
                         continue;
                     }
                     // Check MSEC bits 31:24 for
@@ -552,10 +571,13 @@
                     // non-zero value of CORE_FIVR_ERR_LOG (B(1) D30 F2 offset
                     // 80h)
                     uint32_t coreFIVRErrLog = 0;
-                    if (peci_RdPCIConfigLocal(
-                            addr, 1, 30, 2, 0x80, sizeof(uint32_t),
-                            (uint8_t*)&coreFIVRErrLog, &cc) != PECI_CC_SUCCESS)
+                    peciStatus = peci_RdPCIConfigLocal(
+                        addr, 1, 30, 2, 0x80, sizeof(uint32_t),
+                        (uint8_t*)&coreFIVRErrLog, &cc);
+                    if (peciError(peciStatus, cc))
                     {
+                        printPECIError("CORE_FIVR_ERR_LOG", addr, peciStatus,
+                                       cc);
                         continue;
                     }
                     if (coreFIVRErrLog)
@@ -568,11 +590,13 @@
                     // non-zero value of UNCORE_FIVR_ERR_LOG (B(1) D30 F2 offset
                     // 84h)
                     uint32_t uncoreFIVRErrLog = 0;
-                    if (peci_RdPCIConfigLocal(addr, 1, 30, 2, 0x84,
-                                              sizeof(uint32_t),
-                                              (uint8_t*)&uncoreFIVRErrLog,
-                                              &cc) != PECI_CC_SUCCESS)
+                    peciStatus = peci_RdPCIConfigLocal(
+                        addr, 1, 30, 2, 0x84, sizeof(uint32_t),
+                        (uint8_t*)&uncoreFIVRErrLog, &cc);
+                    if (peciError(peciStatus, cc))
                     {
+                        printPECIError("UNCORE_FIVR_ERR_LOG", addr, peciStatus,
+                                       cc);
                         continue;
                     }
                     if (uncoreFIVRErrLog)
@@ -602,9 +626,11 @@
                 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
                 // that caused the IERR
                 uint32_t mcaErrSrcLog = 0;
-                if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
-                                     &cc) != PECI_CC_SUCCESS)
+                peciStatus = peci_RdPkgConfig(addr, 0, 5, 4,
+                                              (uint8_t*)&mcaErrSrcLog, &cc);
+                if (peciError(peciStatus, cc))
                 {
+                    printPECIError("MCA_ERR_SRC_LOG", addr, peciStatus, cc);
                     continue;
                 }
                 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
@@ -616,9 +642,10 @@
                     // Next check if it's a CPU/VR mismatch by reading the
                     // IA32_MC4_STATUS MSR (0x411)
                     uint64_t mc4Status = 0;
-                    if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
-                        PECI_CC_SUCCESS)
+                    peciStatus = peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc);
+                    if (peciError(peciStatus, cc))
                     {
+                        printPECIError("IA32_MC4_STATUS", addr, peciStatus, cc);
                         continue;
                     }
                     // TODO: Update MSEC/MSCOD_31_24 check
@@ -639,16 +666,22 @@
                     // C0h and C4h) (Note: Bus 31 is accessed on PECI as bus 14)
                     uint32_t coreFIVRErrLog0 = 0;
                     uint32_t coreFIVRErrLog1 = 0;
-                    if (peci_RdEndPointConfigPciLocal(
-                            addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
-                            (uint8_t*)&coreFIVRErrLog0, &cc) != PECI_CC_SUCCESS)
+                    peciStatus = peci_RdEndPointConfigPciLocal(
+                        addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
+                        (uint8_t*)&coreFIVRErrLog0, &cc);
+                    if (peciError(peciStatus, cc))
                     {
+                        printPECIError("CORE_FIVR_ERR_LOG_0", addr, peciStatus,
+                                       cc);
                         continue;
                     }
-                    if (peci_RdEndPointConfigPciLocal(
-                            addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
-                            (uint8_t*)&coreFIVRErrLog1, &cc) != PECI_CC_SUCCESS)
+                    peciStatus = peci_RdEndPointConfigPciLocal(
+                        addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
+                        (uint8_t*)&coreFIVRErrLog1, &cc);
+                    if (peciError(peciStatus, cc))
                     {
+                        printPECIError("CORE_FIVR_ERR_LOG_1", addr, peciStatus,
+                                       cc);
                         continue;
                     }
                     if (coreFIVRErrLog0 || coreFIVRErrLog1)
@@ -661,11 +694,13 @@
                     // non-zero value of UNCORE_FIVR_ERR_LOG (B(31) D30 F2
                     // offset 84h) (Note: Bus 31 is accessed on PECI as bus 14)
                     uint32_t uncoreFIVRErrLog = 0;
-                    if (peci_RdEndPointConfigPciLocal(
-                            addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
-                            (uint8_t*)&uncoreFIVRErrLog,
-                            &cc) != PECI_CC_SUCCESS)
+                    peciStatus = peci_RdEndPointConfigPciLocal(
+                        addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
+                        (uint8_t*)&uncoreFIVRErrLog, &cc);
+                    if (peciError(peciStatus, cc))
                     {
+                        printPECIError("UNCORE_FIVR_ERR_LOG", addr, peciStatus,
+                                       cc);
                         continue;
                     }
                     if (uncoreFIVRErrLog)
@@ -1112,11 +1147,12 @@
 {
     int errPinSts = (1 << errPin);
     std::bitset<MAX_CPUS> errPinCPUs = 0;
-    for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
+    for (size_t cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
          cpu++, addr++)
     {
         if (peci_Ping(addr) == PECI_CC_SUCCESS)
         {
+            EPECIStatus peciStatus = PECI_CC_SUCCESS;
             uint8_t cc = 0;
             CPUModel model{};
             uint8_t stepping = 0;
@@ -1133,12 +1169,16 @@
                     // Check the ERRPINSTS to see if this is the CPU that caused
                     // the ERRx (B(0) D8 F0 offset 210h)
                     uint32_t errpinsts = 0;
-                    if (peci_RdPCIConfigLocal(
-                            addr, 0, 8, 0, 0x210, sizeof(uint32_t),
-                            (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
+                    peciStatus = peci_RdPCIConfigLocal(
+                        addr, 0, 8, 0, 0x210, sizeof(uint32_t),
+                        (uint8_t*)&errpinsts, &cc);
+                    if (peciError(peciStatus, cc))
                     {
-                        errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
+                        printPECIError("ERRPINSTS", addr, peciStatus, cc);
+                        continue;
                     }
+
+                    errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
                     break;
                 }
                 case icx:
@@ -1147,12 +1187,16 @@
                     // the ERRx (B(30) D0 F3 offset 274h) (Note: Bus 30 is
                     // accessed on PECI as bus 13)
                     uint32_t errpinsts = 0;
-                    if (peci_RdEndPointConfigPciLocal(
-                            addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
-                            (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
+                    peciStatus = peci_RdEndPointConfigPciLocal(
+                        addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
+                        (uint8_t*)&errpinsts, &cc);
+                    if (peciError(peciStatus, cc))
                     {
-                        errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
+                        printPECIError("ERRPINSTS", addr, peciStatus, cc);
+                        continue;
                     }
+
+                    errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
                     break;
                 }
             }