Add the host error monitor with CATERR detection

This adds the host error monitor to monitor GPIOs for errors from
the host.  To start, it can detect and do basic handling of CATERR
assertions:

   1. Detect CATERR assertion
   2. Check power is on
   3. Make sure CATERR is held for at least 2 seconds
   4. Trigger crashdump
   5. Check the CATERR reset setting and power-cycle the system
      if enabled

Tested:
Injected an IERR and confirmed that it was detected and handled
correctly.

Change-Id: I444c0d92f66583c88e0f27b9870f92e1339c103e
Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..ae9ad39
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,98 @@
+---
+Language:        Cpp
+# BasedOnStyle:  LLVM
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: false
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass:      true
+  AfterControlStatement: true
+  AfterEnum:       true
+  AfterFunction:   true
+  AfterNamespace:  true
+  AfterObjCDeclaration: true
+  AfterStruct:     true
+  AfterUnion:      true
+  BeforeCatch:     true
+  BeforeElse:      true
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: AfterColon
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+PointerAlignment: Left
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeBlocks: Regroup
+IncludeCategories:
+  - Regex:           '^[<"](gtest|gmock)'
+    Priority:        5
+  - Regex:           '^"config.h"'
+    Priority:        -1
+  - Regex:           '^".*\.hpp"'
+    Priority:        1
+  - Regex:           '^<.*\.h>'
+    Priority:        2
+  - Regex:           '^<.*'
+    Priority:        3
+  - Regex:           '.*'
+    Priority:        4
+IndentCaseLabels: true
+IndentWidth:     4
+IndentWrappedFunctionNames: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+ReflowComments:  true
+SortIncludes:    true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        4
+UseTab:          Never
+...
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..ce7dbc1
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,36 @@
+cmake_minimum_required (VERSION 3.6)
+project (host-error-monitor CXX)
+set (CMAKE_CXX_STANDARD 17)
+set (CMAKE_CXX_STANDARD_REQUIRED ON)
+
+add_executable (host-error-monitor src/host_error_monitor.cpp)
+
+target_include_directories (host-error-monitor PRIVATE ${CMAKE_SOURCE_DIR})
+
+target_link_libraries (host-error-monitor sdbusplus -lsystemd gpiodcxx)
+
+include_directories (${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+install (TARGETS host-error-monitor
+         RUNTIME DESTINATION bin
+         LIBRARY DESTINATION lib
+         ARCHIVE DESTINATION lib/static)
+
+find_package (Boost 1.66 REQUIRED)
+include_directories (${BOOST_SRC_DIR})
+
+add_definitions (-DBOOST_ERROR_CODE_HEADER_ONLY)
+add_definitions (-DBOOST_SYSTEM_NO_DEPRECATED)
+add_definitions (-DBOOST_ALL_NO_LIB)
+add_definitions (-DBOOST_NO_RTTI)
+add_definitions (-DBOOST_NO_TYPEID)
+add_definitions (-DBOOST_ASIO_DISABLE_THREADS)
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-rtti")
+
+set (
+    SERVICE_FILES
+    ${PROJECT_SOURCE_DIR}/service_files/xyz.openbmc_project.HostErrorMonitor.service
+)
+install (FILES ${SERVICE_FILES} DESTINATION /lib/systemd/system/)
diff --git a/cmake-format.json b/cmake-format.json
new file mode 100644
index 0000000..4a701ae
--- /dev/null
+++ b/cmake-format.json
@@ -0,0 +1,12 @@
+{
+  "enum_char": ".",
+  "line_ending": "unix",
+  "bullet_char": "*",
+  "max_subargs_per_line": 99,
+  "command_case": "lower",
+  "tab_size": 4,
+  "line_width": 80,
+  "separate_fn_name_with_space": true,
+  "dangle_parens": true,
+  "separate_ctrl_name_with_space": true
+}
diff --git a/service_files/xyz.openbmc_project.HostErrorMonitor.service b/service_files/xyz.openbmc_project.HostErrorMonitor.service
new file mode 100644
index 0000000..cf789e9
--- /dev/null
+++ b/service_files/xyz.openbmc_project.HostErrorMonitor.service
@@ -0,0 +1,10 @@
+[Unit]
+Description=Host Error Monitor
+
+[Service]
+Restart=always
+ExecStart=/usr/bin/host-error-monitor
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp
new file mode 100644
index 0000000..308c3c9
--- /dev/null
+++ b/src/host_error_monitor.cpp
@@ -0,0 +1,309 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+#include <boost/asio/posix/stream_descriptor.hpp>
+#include <gpiod.hpp>
+#include <iostream>
+#include <sdbusplus/asio/object_server.hpp>
+
+namespace host_error_monitor
+{
+static boost::asio::io_service io;
+static std::shared_ptr<sdbusplus::asio::connection> conn;
+
+static bool hostOff = true;
+
+const static constexpr size_t caterrTimeoutMs = 2000;
+const static constexpr size_t crashdumpTimeoutS = 300;
+
+// Timers
+// Timer for CATERR asserted
+static boost::asio::steady_timer caterrAssertTimer(io);
+
+// GPIO Lines and Event Descriptors
+static gpiod::line caterrLine;
+static boost::asio::posix::stream_descriptor caterrEvent(io);
+
+static void initializeHostState()
+{
+    conn->async_method_call(
+        [](boost::system::error_code ec,
+           const std::variant<std::string>& property) {
+            if (ec)
+            {
+                return;
+            }
+            const std::string* state = std::get_if<std::string>(&property);
+            if (state == nullptr)
+            {
+                std::cerr << "Unable to read host state value\n";
+                return;
+            }
+            hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
+        },
+        "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
+        "org.freedesktop.DBus.Properties", "Get",
+        "xyz.openbmc_project.State.Host", "CurrentHostState");
+}
+
+static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
+{
+    return std::make_shared<sdbusplus::bus::match::match>(
+        *conn,
+        "type='signal',interface='org.freedesktop.DBus.Properties',"
+        "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
+        "Host'",
+        [](sdbusplus::message::message& msg) {
+            std::string interfaceName;
+            boost::container::flat_map<std::string, std::variant<std::string>>
+                propertiesChanged;
+            std::string state;
+            try
+            {
+                msg.read(interfaceName, propertiesChanged);
+                state =
+                    std::get<std::string>(propertiesChanged.begin()->second);
+            }
+            catch (std::exception& e)
+            {
+                std::cerr << "Unable to read host state\n";
+                return;
+            }
+            hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
+
+            // No host events should fire while off, so cancel any pending
+            // timers
+            if (hostOff)
+            {
+                caterrAssertTimer.cancel();
+            }
+        });
+}
+
+static bool requestGPIOEvents(
+    const std::string& name, const std::function<void()>& handler,
+    gpiod::line& gpioLine,
+    boost::asio::posix::stream_descriptor& gpioEventDescriptor)
+{
+    // Find the GPIO line
+    gpioLine = gpiod::find_line(name);
+    if (!gpioLine)
+    {
+        std::cerr << "Failed to find the " << name << " line\n";
+        return false;
+    }
+
+    try
+    {
+        gpioLine.request(
+            {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
+    }
+    catch (std::exception&)
+    {
+        std::cerr << "Failed to request events for " << name << "\n";
+        return false;
+    }
+
+    int gpioLineFd = gpioLine.event_get_fd();
+    if (gpioLineFd < 0)
+    {
+        std::cerr << "Failed to get " << name << " fd\n";
+        return false;
+    }
+
+    gpioEventDescriptor.assign(gpioLineFd);
+
+    gpioEventDescriptor.async_wait(
+        boost::asio::posix::stream_descriptor::wait_read,
+        [&name, handler](const boost::system::error_code ec) {
+            if (ec)
+            {
+                std::cerr << name << " fd handler error: " << ec.message()
+                          << "\n";
+                return;
+            }
+            handler();
+        });
+    return true;
+}
+
+static void startPowerCycle()
+{
+    conn->async_method_call(
+        [](boost::system::error_code ec) {
+            if (ec)
+            {
+                std::cerr << "failed to set Chassis State\n";
+            }
+        },
+        "xyz.openbmc_project.State.Chassis",
+        "/xyz/openbmc_project/state/chassis0",
+        "org.freedesktop.DBus.Properties", "Set",
+        "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
+        std::variant<std::string>{
+            "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
+}
+
+static void startCrashdumpAndRecovery(bool recoverSystem)
+{
+    std::cout << "Starting crashdump\n";
+    static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
+    static boost::asio::steady_timer crashdumpTimer(io);
+
+    crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
+        *conn,
+        "type='signal',interface='org.freedesktop.DBus.Properties',"
+        "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
+        [recoverSystem](sdbusplus::message::message& msg) {
+            crashdumpTimer.cancel();
+            std::cout << "Crashdump completed\n";
+            if (recoverSystem)
+            {
+                std::cout << "Recovering the system\n";
+                startPowerCycle();
+            }
+            crashdumpCompleteMatch.reset();
+        });
+
+    crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
+    crashdumpTimer.async_wait([](const boost::system::error_code ec) {
+        if (ec)
+        {
+            // operation_aborted is expected if timer is canceled
+            if (ec != boost::asio::error::operation_aborted)
+            {
+                std::cerr << "Crashdump async_wait failed: " << ec.message()
+                          << "\n";
+            }
+            std::cout << "Crashdump timer canceled\n";
+            return;
+        }
+        std::cerr << "Crashdump failed to complete before timeout\n";
+        crashdumpCompleteMatch.reset();
+    });
+
+    conn->async_method_call(
+        [](boost::system::error_code ec) {
+            if (ec)
+            {
+                std::cerr << "failed to start Crashdump\n";
+                crashdumpTimer.cancel();
+                crashdumpCompleteMatch.reset();
+            }
+        },
+        "com.intel.crashdump", "/com/intel/crashdump",
+        "com.intel.crashdump.Stored", "GenerateStoredLog");
+}
+
+static void caterrHandler()
+{
+    if (!hostOff)
+    {
+        gpiod::line_event gpioLineEvent = caterrLine.event_read();
+
+        bool caterr =
+            gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
+        if (caterr)
+        {
+            std::cout << "CPU CATERR detected, starting timer\n";
+            caterrAssertTimer.expires_after(
+                std::chrono::milliseconds(caterrTimeoutMs));
+            caterrAssertTimer.async_wait(
+                [](const boost::system::error_code ec) {
+                    if (ec)
+                    {
+                        // operation_aborted is expected if timer is canceled
+                        // before completion.
+                        if (ec != boost::asio::error::operation_aborted)
+                        {
+                            std::cerr << "caterr timeout async_wait failed: "
+                                      << ec.message() << "\n";
+                        }
+                        std::cout << "CATERR assert timer canceled\n";
+                        return;
+                    }
+                    std::cout << "CATERR asset timer completed\n";
+                    conn->async_method_call(
+                        [](boost::system::error_code ec,
+                           const std::variant<bool>& property) {
+                            if (ec)
+                            {
+                                return;
+                            }
+                            const bool* reset = std::get_if<bool>(&property);
+                            if (reset == nullptr)
+                            {
+                                std::cerr
+                                    << "Unable to read reset on CATERR value\n";
+                                return;
+                            }
+                            startCrashdumpAndRecovery(*reset);
+                        },
+                        "xyz.openbmc_project.Settings",
+                        "/xyz/openbmc_project/control/processor_error_config",
+                        "org.freedesktop.DBus.Properties", "Get",
+                        "xyz.openbmc_project.Control.Processor.ErrConfig",
+                        "ResetOnCATERR");
+                });
+        }
+        else
+        {
+            caterrAssertTimer.cancel();
+        }
+    }
+    caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
+                           [](const boost::system::error_code ec) {
+                               if (ec)
+                               {
+                                   std::cerr << "caterr handler error: "
+                                             << ec.message() << "\n";
+                                   return;
+                               }
+                               caterrHandler();
+                           });
+}
+} // namespace host_error_monitor
+
+int main(int argc, char* argv[])
+{
+    // setup connection to dbus
+    host_error_monitor::conn =
+        std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
+
+    // Host Error Monitor Object
+    host_error_monitor::conn->request_name(
+        "xyz.openbmc_project.HostErrorMonitor");
+    sdbusplus::asio::object_server server =
+        sdbusplus::asio::object_server(host_error_monitor::conn);
+
+    // Start tracking host state
+    std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
+        host_error_monitor::startHostStateMonitor();
+
+    // Initialize the host state
+    host_error_monitor::initializeHostState();
+
+    // Request CPU_CATERR GPIO events
+    if (!host_error_monitor::requestGPIOEvents(
+            "CPU_CATERR", host_error_monitor::caterrHandler,
+            host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
+    {
+        return -1;
+    }
+
+    host_error_monitor::io.run();
+
+    return 0;
+}