Move ERR0 and ERR1 monitor to the new architecture

Add an error monitor for CPU_ERR0 and CPU_ERR1 that polls to check
if it is asserted through the timeout and logs the event.  If it is
ever not asserted, it will wait for an interrupt to start polling
again.

Change-Id: I16eb5500f4c08226b7e4e879eb7732caa5262db1
Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
diff --git a/include/error_monitors/err_pin_monitor.hpp b/include/error_monitors/err_pin_monitor.hpp
new file mode 100644
index 0000000..ddafb2b
--- /dev/null
+++ b/include/error_monitors/err_pin_monitor.hpp
@@ -0,0 +1,159 @@
+/*
+// Copyright (c) 2021 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+#pragma once
+#include <systemd/sd-journal.h>
+
+#include <error_monitors/base_gpio_poll_monitor.hpp>
+#include <host_error_monitor.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <bitset>
+
+namespace host_error_monitor::err_pin_monitor
+{
+static constexpr bool debug = false;
+
+class ErrPinMonitor :
+    public host_error_monitor::base_gpio_poll_monitor::BaseGPIOPollMonitor
+{
+    size_t errPin;
+    std::bitset<MAX_CPUS> errPinCPUs;
+    const static host_error_monitor::base_gpio_poll_monitor::AssertValue
+        assertValue =
+            host_error_monitor::base_gpio_poll_monitor::AssertValue::lowAssert;
+    const static constexpr size_t errPinPollingTimeMs = 1000;
+    const static constexpr size_t errPinTimeoutMs = 90000;
+
+    void logEvent()
+    {
+        if (errPinCPUs.none())
+        {
+            return errPinTimeoutLog();
+        }
+
+        for (size_t i = 0; i < errPinCPUs.size(); i++)
+        {
+            if (errPinCPUs[i])
+            {
+                errPinTimeoutLog(i);
+            }
+        }
+    }
+
+    void errPinTimeoutLog()
+    {
+        std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
+
+        sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
+                        LOG_INFO, "REDFISH_MESSAGE_ID=%s",
+                        "OpenBMC.0.1.CPUError", "REDFISH_MESSAGE_ARGS=%s",
+                        msg.c_str(), NULL);
+    }
+
+    void errPinTimeoutLog(const int cpuNum)
+    {
+        std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
+                          std::to_string(cpuNum + 1);
+
+        sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
+                        LOG_INFO, "REDFISH_MESSAGE_ID=%s",
+                        "OpenBMC.0.1.CPUError", "REDFISH_MESSAGE_ARGS=%s",
+                        msg.c_str(), NULL);
+    }
+
+    void checkErrPinCPUs()
+    {
+        errPinCPUs.reset();
+        for (size_t cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
+             cpu++, addr++)
+        {
+            EPECIStatus peciStatus = PECI_CC_SUCCESS;
+            uint8_t cc = 0;
+            CPUModel model{};
+            uint8_t stepping = 0;
+            peciStatus = peci_GetCPUID(addr, &model, &stepping, &cc);
+            if (peciStatus != PECI_CC_SUCCESS)
+            {
+                if (peciStatus != PECI_CC_CPU_NOT_PRESENT)
+                {
+                    printPECIError("CPUID", addr, peciStatus, cc);
+                }
+                continue;
+            }
+
+            switch (model)
+            {
+                case skx:
+                {
+                    // Check the ERRPINSTS to see if this is the CPU that
+                    // caused the ERRx (B(0) D8 F0 offset 210h)
+                    uint32_t errpinsts = 0;
+                    peciStatus = peci_RdPCIConfigLocal(
+                        addr, 0, 8, 0, 0x210, sizeof(uint32_t),
+                        (uint8_t*)&errpinsts, &cc);
+                    if (peciError(peciStatus, cc))
+                    {
+                        printPECIError("ERRPINSTS", addr, peciStatus, cc);
+                        continue;
+                    }
+
+                    errPinCPUs[cpu] = (errpinsts & (1 << errPin)) != 0;
+                    break;
+                }
+                case icx:
+                {
+                    // Check the ERRPINSTS to see if this is the CPU that
+                    // caused the ERRx (B(30) D0 F3 offset 274h) (Note: Bus
+                    // 30 is accessed on PECI as bus 13)
+                    uint32_t errpinsts = 0;
+                    peciStatus = peci_RdEndPointConfigPciLocal(
+                        addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
+                        (uint8_t*)&errpinsts, &cc);
+                    if (peciError(peciStatus, cc))
+                    {
+                        printPECIError("ERRPINSTS", addr, peciStatus, cc);
+                        continue;
+                    }
+
+                    errPinCPUs[cpu] = (errpinsts & (1 << errPin)) != 0;
+                    break;
+                }
+            }
+        }
+    }
+
+    void startPolling() override
+    {
+        checkErrPinCPUs();
+        host_error_monitor::base_gpio_poll_monitor::BaseGPIOPollMonitor::
+            startPolling();
+    }
+
+  public:
+    ErrPinMonitor(boost::asio::io_service& io,
+                  std::shared_ptr<sdbusplus::asio::connection> conn,
+                  const std::string& signalName, const size_t errPin) :
+        BaseGPIOPollMonitor(io, conn, signalName, assertValue,
+                            errPinPollingTimeMs, errPinTimeoutMs),
+        errPin(errPin)
+    {
+        if (valid)
+        {
+            startPolling();
+        }
+    }
+};
+} // namespace host_error_monitor::err_pin_monitor
diff --git a/include/host_error_monitor.hpp b/include/host_error_monitor.hpp
index eee1fab..90a946e 100644
--- a/include/host_error_monitor.hpp
+++ b/include/host_error_monitor.hpp
@@ -92,4 +92,19 @@
         "com.intel.crashdump.Stored", "GenerateStoredLog", triggerType);
 }
 
+static inline bool peciError(EPECIStatus peciStatus, uint8_t cc)
+{
+    return (
+        peciStatus != PECI_CC_SUCCESS ||
+        (cc != PECI_DEV_CC_SUCCESS && cc != PECI_DEV_CC_FATAL_MCA_DETECTED));
+}
+
+static void printPECIError(const std::string& reg, const size_t addr,
+                           const EPECIStatus peciStatus, const size_t cc)
+{
+    std::cerr << "Failed to read " << reg << " on CPU address " << addr
+              << ". Error: " << peciStatus << ": cc: 0x" << std::hex << cc
+              << "\n";
+}
+
 } // namespace host_error_monitor
diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp
index b502a3d..584e96c 100644
--- a/src/host_error_monitor.cpp
+++ b/src/host_error_monitor.cpp
@@ -53,20 +53,12 @@
 // Timers
 // Timer for CATERR asserted
 static boost::asio::steady_timer caterrAssertTimer(io);
-// Timer for ERR0 asserted
-static boost::asio::steady_timer err0AssertTimer(io);
-// Timer for ERR1 asserted
-static boost::asio::steady_timer err1AssertTimer(io);
 // Timer for ERR2 asserted
 static boost::asio::steady_timer err2AssertTimer(io);
 
 // GPIO Lines and Event Descriptors
 static gpiod::line caterrLine;
 static boost::asio::posix::stream_descriptor caterrEvent(io);
-static gpiod::line err0Line;
-static boost::asio::posix::stream_descriptor err0Event(io);
-static gpiod::line err1Line;
-static boost::asio::posix::stream_descriptor err1Event(io);
 static gpiod::line err2Line;
 static boost::asio::posix::stream_descriptor err2Event(io);
 static gpiod::line cpu1FIVRFaultLine;
@@ -211,21 +203,6 @@
                     "OpenBMC.0.1.SsbThermalTrip", NULL);
 }
 
-static inline bool peciError(EPECIStatus peciStatus, uint8_t cc)
-{
-    return (
-        peciStatus != PECI_CC_SUCCESS ||
-        (cc != PECI_DEV_CC_SUCCESS && cc != PECI_DEV_CC_FATAL_MCA_DETECTED));
-}
-
-static void printPECIError(const std::string& reg, const size_t addr,
-                           const EPECIStatus peciStatus, const size_t cc)
-{
-    std::cerr << "Failed to read " << reg << " on CPU address " << addr
-              << ". Error: " << peciStatus << ": cc: 0x" << std::hex << cc
-              << "\n";
-}
-
 static void initializeErrorState();
 static void init()
 {
@@ -301,8 +278,6 @@
                 // No host events should fire while off, so cancel any pending
                 // timers
                 caterrAssertTimer.cancel();
-                err0AssertTimer.cancel();
-                err1AssertTimer.cancel();
                 err2AssertTimer.cancel();
             }
             else
@@ -1161,78 +1136,6 @@
     });
 }
 
-static void err0AssertHandler()
-{
-    // Handle the standard ERR0 detection and logging
-    const static constexpr int err0 = 0;
-    errXAssertHandler(err0, err0AssertTimer);
-}
-
-static void err0Handler()
-{
-    if (!hostOff)
-    {
-        gpiod::line_event gpioLineEvent = err0Line.event_read();
-
-        bool err0 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
-        if (err0)
-        {
-            err0AssertHandler();
-        }
-        else
-        {
-            err0AssertTimer.cancel();
-        }
-    }
-    err0Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
-                         [](const boost::system::error_code ec) {
-                             if (ec)
-                             {
-                                 std::cerr
-                                     << "err0 handler error: " << ec.message()
-                                     << "\n";
-                                 return;
-                             }
-                             err0Handler();
-                         });
-}
-
-static void err1AssertHandler()
-{
-    // Handle the standard ERR1 detection and logging
-    const static constexpr int err1 = 1;
-    errXAssertHandler(err1, err1AssertTimer);
-}
-
-static void err1Handler()
-{
-    if (!hostOff)
-    {
-        gpiod::line_event gpioLineEvent = err1Line.event_read();
-
-        bool err1 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
-        if (err1)
-        {
-            err1AssertHandler();
-        }
-        else
-        {
-            err1AssertTimer.cancel();
-        }
-    }
-    err1Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
-                         [](const boost::system::error_code ec) {
-                             if (ec)
-                             {
-                                 std::cerr
-                                     << "err1 handler error: " << ec.message()
-                                     << "\n";
-                                 return;
-                             }
-                             err1Handler();
-                         });
-}
-
 static void err2AssertHandler()
 {
     // Handle the standard ERR2 detection and logging
@@ -1318,18 +1221,6 @@
                                                                associations);
     }
 
-    // Handle CPU_ERR0 if it's asserted now
-    if (err0Line.get_value() == 0)
-    {
-        err0AssertHandler();
-    }
-
-    // Handle CPU_ERR1 if it's asserted now
-    if (err1Line.get_value() == 0)
-    {
-        err1AssertHandler();
-    }
-
     // Handle CPU_ERR2 if it's asserted now
     if (err2Line.get_value() == 0)
     {
@@ -1475,22 +1366,6 @@
         return -1;
     }
 
-    // Request CPU_ERR0 GPIO events
-    if (!host_error_monitor::requestGPIOEvents(
-            "CPU_ERR0", host_error_monitor::err0Handler,
-            host_error_monitor::err0Line, host_error_monitor::err0Event))
-    {
-        return -1;
-    }
-
-    // Request CPU_ERR1 GPIO events
-    if (!host_error_monitor::requestGPIOEvents(
-            "CPU_ERR1", host_error_monitor::err1Handler,
-            host_error_monitor::err1Line, host_error_monitor::err1Event))
-    {
-        return -1;
-    }
-
     // Request CPU_ERR2 GPIO events
     if (!host_error_monitor::requestGPIOEvents(
             "CPU_ERR2", host_error_monitor::err2Handler,