watchdog: Collect hostboot dump when watchdog times out

The hostboot dump collection to be initiated by watchdog_timeout
is disabled by default. When watchdog times out, only error
message corresponding to watchdog timeout is logged. To enable
hostboot dump collection whenever watchdog times out, the meson
option 'hostboot-dump-collection' must be enabled.

Testing - with meson option 'hostboot-dump-collection' enabled:
Ran watchdog_timeout:
case-1: CurrentHostState - off, AutoReboot - false
- Verified PEL object was not created
- Verified hostboot dump was not created
- Verified the Host State changed to Quiesce

case-2: CurrentHostState - off, AutoReboot - true
- Verified PEL object was created
- Verified hostboot dump was not created
- Verified the Host State changed to Running

case-3: CurrentHostState - Running, AutoBoot - false
- Verified PEL object was not created
- Verified hostboot dump was not created
- Verified the Host State changed to Quiesce

case-4: CurrentHostState - Running, AutoBoot - true, default timeout = 300s
- Verified PEL object was created
- Verified hostboot dump was created
- Observed Host state moving to either Running or Quiesce

case-5: CurrentHostState - Running, AutoBoot - true, specified timeout = 5s
- Verified PEL object was created
- Verified hostboot dump was created
- Observed Host state moving to either Running or Quiesce

Docker Unit test: passed

Signed-off-by: Shantappa Teekappanavar <sbteeks@yahoo.com>
Change-Id: Ib92d0c2f282816fb742cf07c1cb876b2cc093c12
diff --git a/meson.build b/meson.build
index 0996878..ae9962f 100644
--- a/meson.build
+++ b/meson.build
@@ -73,10 +73,21 @@
     endforeach
 endforeach
 
+conf_data = configuration_data()
+if get_option('hostboot-dump-collection').enabled()
+    conf_data.set('HOSTBOOT_DUMP_COLLECTION', true)
+    subdir('watchdog')
+else
+    conf_data.set('HOSTBOOT_DUMP_COLLECTION', false)
+    watchdog_lib = []
+endif
+
 executable('watchdog_timeout',
     'watchdog_timeout.cpp',
+    configure_file(output: 'config.h', configuration: conf_data),
     generated_sources,
     dependencies: deps,
+    link_with: watchdog_lib,
     include_directories: include_directories('gen'),
     implicit_include_directories: true,
     install: true
diff --git a/meson_options.txt b/meson_options.txt
new file mode 100644
index 0000000..c271593
--- /dev/null
+++ b/meson_options.txt
@@ -0,0 +1,5 @@
+# Feature to enable hostboot dump collection when watchdog times out
+option('hostboot-dump-collection',
+       type: 'feature',
+       value: 'disabled',
+       description : 'Enables hostboot dump collection')
diff --git a/watchdog/meson.build b/watchdog/meson.build
new file mode 100644
index 0000000..a75d0e3
--- /dev/null
+++ b/watchdog/meson.build
@@ -0,0 +1,21 @@
+# Source files
+watchdog_src = files(
+    'watchdog_dbus.cpp',
+    'watchdog_logging.cpp',
+    'watchdog_handler.cpp',
+    'watchdog_common.cpp',
+    'watchdog_main.cpp',
+)
+
+# Library dependencies
+watchdog_deps = [
+    sdbusplus
+]
+
+# Create static library
+watchdog_lib = static_library(
+    'watchdog_lib',
+    watchdog_src,
+    dependencies: watchdog_deps,
+    install: false,
+)
diff --git a/watchdog/watchdog_common.cpp b/watchdog/watchdog_common.cpp
new file mode 100644
index 0000000..f5b52ff
--- /dev/null
+++ b/watchdog/watchdog_common.cpp
@@ -0,0 +1,66 @@
+#include <libpdbg.h>
+
+#include <phosphor-logging/log.hpp>
+#include <sdbusplus/bus.hpp>
+#include <watchdog_common.hpp>
+#include <watchdog_logging.hpp>
+
+#include <map>
+
+namespace watchdog
+{
+namespace dump
+{
+
+using namespace phosphor::logging;
+
+void transitionHost(const std::string& target)
+{
+    constexpr auto systemdService = "org.freedesktop.systemd1";
+    constexpr auto systemdObjPath = "/org/freedesktop/systemd1";
+    constexpr auto systemdInterface = "org.freedesktop.systemd1.Manager";
+
+    auto bus = sdbusplus::bus::new_system();
+    auto method = bus.new_method_call(systemdService, systemdObjPath,
+                                      systemdInterface, "StartUnit");
+
+    method.append(target); // target unit to start
+    method.append("replace");
+
+    bus.call_noreply(method); // start the service
+}
+
+bool isAutoRebootEnabled()
+{
+    constexpr auto settingsService = "xyz.openbmc_project.Settings";
+    constexpr auto settingsPath =
+        "/xyz/openbmc_project/control/host0/auto_reboot";
+    constexpr auto settingsIntf = "org.freedesktop.DBus.Properties";
+    constexpr auto rebootPolicy =
+        "xyz.openbmc_project.Control.Boot.RebootPolicy";
+
+    auto bus = sdbusplus::bus::new_system();
+    auto method =
+        bus.new_method_call(settingsService, settingsPath, settingsIntf, "Get");
+
+    method.append(rebootPolicy);
+    method.append("AutoReboot");
+
+    bool autoReboot = false;
+    try
+    {
+        auto reply = bus.call(method);
+        std::variant<bool> result;
+        reply.read(result);
+        autoReboot = std::get<bool>(result);
+    }
+    catch (const sdbusplus::exception::SdBusError& e)
+    {
+        log<level::ERR>("Error in AutoReboot Get", entry("ERROR=%s", e.what()));
+    }
+
+    return autoReboot;
+}
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_common.hpp b/watchdog/watchdog_common.hpp
new file mode 100644
index 0000000..a2b8ecc
--- /dev/null
+++ b/watchdog/watchdog_common.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <string>
+
+namespace watchdog
+{
+namespace dump
+{
+
+/**
+ * @brief Transition the host state
+ *
+ * @param target - the target to transition the host to
+ */
+void transitionHost(const std::string& target);
+
+/** @brief Read state of autoreboot property via dbus */
+bool isAutoRebootEnabled();
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_dbus.cpp b/watchdog/watchdog_dbus.cpp
new file mode 100644
index 0000000..6fe7563
--- /dev/null
+++ b/watchdog/watchdog_dbus.cpp
@@ -0,0 +1,134 @@
+#include <unistd.h>
+
+#include <phosphor-logging/log.hpp>
+#include <watchdog_dbus.hpp>
+#include <watchdog_logging.hpp>
+#include <xyz/openbmc_project/State/Boot/Progress/server.hpp>
+
+#include <string>
+#include <vector>
+
+namespace watchdog
+{
+namespace dump
+{
+
+using namespace phosphor::logging;
+
+int dbusMethod(const std::string& path, const std::string& interface,
+               const std::string& function, sdbusplus::message::message& method,
+               const std::string& extended)
+{
+    int rc = RC_DBUS_ERROR; // assume error
+
+    try
+    {
+        constexpr auto serviceFind = "xyz.openbmc_project.ObjectMapper";
+        constexpr auto pathFind = "/xyz/openbmc_project/object_mapper";
+        constexpr auto interfaceFind = "xyz.openbmc_project.ObjectMapper";
+        constexpr auto functionFind = "GetObject";
+
+        auto bus = sdbusplus::bus::new_system(); // using system dbus
+
+        // method to find service from object path and object interface
+        auto newMethod = bus.new_method_call(serviceFind, pathFind,
+                                             interfaceFind, functionFind);
+
+        // find the service for specified object path and interface
+        newMethod.append(path.c_str());
+        newMethod.append(std::vector<std::string>({interface}));
+        auto reply = bus.call(newMethod);
+
+        // dbus call results
+        std::map<std::string, std::vector<std::string>> responseFindService;
+        reply.read(responseFindService);
+
+        // If we successfully found the service associated with the dbus object
+        // path and interface then create a method for the specified interface
+        // and function.
+        if (!responseFindService.empty())
+        {
+            auto service = responseFindService.begin()->first;
+
+            // Some methods (e.g. get attribute) take an extended parameter
+            if (extended == "")
+            {
+                // return the method
+                method =
+                    bus.new_method_call(service.c_str(), path.c_str(),
+                                        interface.c_str(), function.c_str());
+            }
+            else
+            {
+                // return extended method
+                method =
+                    bus.new_method_call(service.c_str(), path.c_str(),
+                                        extended.c_str(), function.c_str());
+            }
+
+            rc = RC_SUCCESS;
+        }
+        else
+        {
+            // This trace will be picked up in event log
+            log<level::INFO>("dbusMethod service not found");
+            std::string traceMsgPath = std::string(path, maxTraceLen);
+            log<level::INFO>(traceMsgPath.c_str());
+            std::string traceMsgIface = std::string(interface, maxTraceLen);
+            log<level::INFO>(traceMsgIface.c_str());
+        }
+    }
+    catch (const sdbusplus::exception::SdBusError& e)
+    {
+        log<level::ERR>("Error in dbusMethod", entry("ERROR=%s", e.what()));
+    }
+
+    return rc;
+}
+
+uint32_t createPel(const std::string& eventType,
+                   std::map<std::string, std::string>& additional,
+                   const std::vector<FFDCTuple>& ffdc)
+{
+    // Create returns plid
+    int plid = 0;
+
+    // Need to provide pid when using create or create-with-ffdc methods
+    additional.emplace("_PID", std::to_string(getpid()));
+
+    // Sdbus call specifics
+    constexpr auto interface = "org.open_power.Logging.PEL";
+    constexpr auto function = "CreatePELWithFFDCFiles";
+
+    sdbusplus::message::message method;
+
+    if (0 == dbusMethod(pathLogging, interface, function, method))
+    {
+        try
+        {
+            // append additional dbus call paramaters
+            method.append(eventType, levelPelError, additional, ffdc);
+
+            // using system dbus
+            auto bus = sdbusplus::bus::new_system();
+            auto response = bus.call(method);
+
+            // reply will be tuple containing bmc log id, platform log id
+            std::tuple<uint32_t, uint32_t> reply = {0, 0};
+
+            // parse dbus response into reply
+            response.read(reply);
+            plid = std::get<1>(reply); // platform log id is tuple "second"
+        }
+        catch (const sdbusplus::exception::SdBusError& e)
+        {
+            log<level::ERR>("Error in createPel CreatePELWithFFDCFiles",
+                            entry("ERROR=%s", e.what()));
+        }
+    }
+
+    return plid; // platform log id or 0
+}
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_dbus.hpp b/watchdog/watchdog_dbus.hpp
new file mode 100644
index 0000000..5de5691
--- /dev/null
+++ b/watchdog/watchdog_dbus.hpp
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <sdbusplus/bus.hpp>
+#include <xyz/openbmc_project/Logging/Create/server.hpp>
+
+#include <string>
+
+namespace watchdog
+{
+namespace dump
+{
+
+enum ReturnCodes
+{
+    RC_SUCCESS = 0,
+    RC_NOT_HANDLED = 1,
+    RC_DBUS_ERROR = 2
+};
+
+using FFDCFormat =
+    sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat;
+
+using FFDCTuple =
+    std::tuple<FFDCFormat, uint8_t, uint8_t, sdbusplus::message::unix_fd>;
+
+/**
+ * @brief Create a dbus method
+ *
+ * Find the dbus service associated with the dbus object path and create
+ * a dbus method for calling the specified dbus interface and function.
+ *
+ * @param path - dbus object path
+ * @param interface - dbus method interface
+ * @param function - dbus interface function
+ * @param method - method that is created
+ * @param extended - optional for extended methods
+ * @return non-zero if error
+ *
+ **/
+int dbusMethod(const std::string& path, const std::string& interface,
+               const std::string& function, sdbusplus::message::message& method,
+               const std::string& extended = "");
+
+/**
+ * @brief Create a PEL for the specified event type
+ *
+ * The additional data provided in the map will be placed in a user data
+ * section of the PEL.
+ *
+ * @param  eventType - the event type
+ * @param  additional - map of additional data
+ * @param  ffdc - vector of ffdc data
+ * @return Platform log id or 0 if error
+ */
+uint32_t createPel(const std::string& eventType,
+                   std::map<std::string, std::string>& additional,
+                   const std::vector<FFDCTuple>& ffdc);
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_handler.cpp b/watchdog/watchdog_handler.cpp
new file mode 100644
index 0000000..7add985
--- /dev/null
+++ b/watchdog/watchdog_handler.cpp
@@ -0,0 +1,144 @@
+#include <phosphor-logging/log.hpp>
+#include <sdbusplus/bus.hpp>
+#include <sdbusplus/bus/match.hpp>
+#include <watchdog_dbus.hpp>
+#include <watchdog_handler.hpp>
+#include <watchdog_logging.hpp>
+
+namespace watchdog
+{
+namespace dump
+{
+
+using namespace phosphor::logging;
+
+/**
+ * @brief Callback for dump request properties change signal monitor
+ *
+ * @param msg - dbus message from the dbus match infrastructure
+ * @param path - the object path we are monitoring
+ * @param inProgress - used to break out of our dbus wait loop
+ * @return Always non-zero indicating no error, no cascading callbacks
+ */
+uint dumpStatusChanged(sdbusplus::message::message& msg, std::string path,
+                       bool& inProgress)
+{
+    // reply (msg) will be a property change message
+    std::string interface;
+    std::map<std::string, std::variant<std::string, uint8_t>> property;
+    msg.read(interface, property);
+
+    // looking for property Status changes
+    std::string propertyType = "Status";
+    auto dumpStatus = property.find(propertyType);
+
+    if (dumpStatus != property.end())
+    {
+        const std::string* status =
+            std::get_if<std::string>(&(dumpStatus->second));
+
+        if ((nullptr != status) && ("xyz.openbmc_project.Common.Progress."
+                                    "OperationStatus.InProgress" != *status))
+        {
+            // dump is done, trace some info and change in progress flag
+            log<level::INFO>(path.c_str());
+            log<level::INFO>((*status).c_str());
+            inProgress = false;
+        }
+    }
+
+    return 1; // non-negative return code for successful callback
+}
+
+/**
+ * @brief Register a callback for dump progress status changes
+ *
+ * @param path - the object path of the dump to monitor
+ * @param timeout - timeout - timeout interval in seconds
+ */
+void monitorDump(const std::string& path, const uint32_t timeout)
+{
+    bool inProgress = true; // callback will update this
+
+    // setup the signal match rules and callback
+    std::string matchInterface = "xyz.openbmc_project.Common.Progress";
+    auto bus = sdbusplus::bus::new_system();
+
+    std::unique_ptr<sdbusplus::bus::match_t> match =
+        std::make_unique<sdbusplus::bus::match_t>(
+            bus,
+            sdbusplus::bus::match::rules::propertiesChanged(
+                path.c_str(), matchInterface.c_str()),
+            [&](auto& msg) {
+                return dumpStatusChanged(msg, path, inProgress);
+            });
+
+    // wait for dump status to be completed (complete == true)
+    // or until timeout interval
+    log<level::INFO>("hbdump requested");
+    bool timedOut = false;
+    uint32_t secondsCount = 0;
+    while ((true == inProgress) && !timedOut)
+    {
+        bus.wait(std::chrono::seconds(1));
+        bus.process_discard();
+
+        if (++secondsCount == timeout)
+        {
+            timedOut = true;
+        }
+    }
+
+    if (timedOut)
+    {
+        log<level::ERR>("hbdump dump progress status did not change to "
+                        "complete within the timeout interval, exiting...");
+    }
+    else
+    {
+        log<level::INFO>("hbdump completed");
+    }
+}
+
+void requestDump(const uint32_t logId, const uint32_t timeout)
+{
+    constexpr auto path = "/org/openpower/dump";
+    constexpr auto interface = "xyz.openbmc_project.Dump.Create";
+    constexpr auto function = "CreateDump";
+
+    sdbusplus::message::message method;
+
+    if (0 == dbusMethod(path, interface, function, method))
+    {
+        try
+        {
+            // dbus call arguments
+            std::map<std::string, std::variant<std::string, uint64_t>>
+                createParams;
+            createParams["com.ibm.Dump.Create.CreateParameters.DumpType"] =
+                "com.ibm.Dump.Create.DumpType.Hostboot";
+            createParams["com.ibm.Dump.Create.CreateParameters.ErrorLogId"] =
+                uint64_t(logId);
+            method.append(createParams);
+
+            // using system dbus
+            auto bus = sdbusplus::bus::new_system();
+            auto response = bus.call(method);
+
+            // reply will be type dbus::ObjectPath
+            sdbusplus::message::object_path reply;
+            response.read(reply);
+
+            // monitor dump progress
+            monitorDump(reply, timeout);
+        }
+        catch (const sdbusplus::exception::SdBusError& e)
+        {
+            log<level::ERR>("Error in requestDump",
+                            entry("ERROR=%s", e.what()));
+        }
+    }
+}
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_handler.hpp b/watchdog/watchdog_handler.hpp
new file mode 100644
index 0000000..57bbdf8
--- /dev/null
+++ b/watchdog/watchdog_handler.hpp
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <stdint.h>
+
+/**
+ * @brief Hostboot dump collector handler
+ *
+ * Handle collection due to host going down
+ */
+
+namespace watchdog
+{
+namespace dump
+{
+
+/**
+ * @brief Request a dump from the dump manager
+ *
+ * Request a dump from the dump manager and register a monitor for observing
+ * the dump progress.
+ *
+ * @param logId - the id of the event log associated with this dump request
+ * @param timeout - timeout interval in seconds
+ *
+ */
+void requestDump(const uint32_t logId, const uint32_t timeout);
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_logging.cpp b/watchdog/watchdog_logging.cpp
new file mode 100644
index 0000000..7d5bdd9
--- /dev/null
+++ b/watchdog/watchdog_logging.cpp
@@ -0,0 +1,44 @@
+#include <unistd.h>
+
+#include <watchdog_dbus.hpp>
+#include <watchdog_handler.hpp>
+#include <watchdog_logging.hpp>
+
+namespace watchdog
+{
+namespace dump
+{
+
+/**
+ * @brief Log an event handled by the dump handler
+ *
+ * @param additional - Additional PEL data
+ * @param timeout - timeout interval in seconds
+ */
+void event(std::map<std::string, std::string>& additional,
+           const uint32_t timeout)
+{
+
+    std::string eventName = "org.open_power.Host.Boot.Error.WatchdogTimeout";
+
+    // CreatePELWithFFDCFiles requires a vector of FFDCTuple.
+    auto emptyFfdc = std::vector<FFDCTuple>{};
+
+    // Create PEL with additional data.
+    auto pelId = createPel(eventName, additional, emptyFfdc);
+
+    requestDump(pelId, timeout); // will not return until dump is complete
+}
+
+void eventWatchdogTimeout(const uint32_t timeout)
+{
+    // Additional data to be added to PEL object
+    // Currently we don't have anything to add
+    // Keeping this for now in case if we have to add
+    // any data corresponding to watchdog timeout
+    std::map<std::string, std::string> additionalData;
+    event(additionalData, timeout);
+}
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_logging.hpp b/watchdog/watchdog_logging.hpp
new file mode 100644
index 0000000..10d3283
--- /dev/null
+++ b/watchdog/watchdog_logging.hpp
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <cstddef> // for size_t
+#include <map>
+#include <string>
+#include <vector>
+
+namespace watchdog
+{
+namespace dump
+{
+
+constexpr int maxTraceLen = 64; // characters
+
+constexpr auto pathLogging = "/xyz/openbmc_project/logging";
+constexpr auto levelPelError = "xyz.openbmc_project.Logging.Entry.Level.Error";
+
+/**
+ * @brief Commit watchdog timeout handler failure event to log
+ *
+ * @param timeout - timeout interval in seconds
+ */
+void eventWatchdogTimeout(const uint32_t timeout);
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_main.cpp b/watchdog/watchdog_main.cpp
new file mode 100644
index 0000000..47aa41e
--- /dev/null
+++ b/watchdog/watchdog_main.cpp
@@ -0,0 +1,29 @@
+#include <watchdog_common.hpp>
+#include <watchdog_logging.hpp>
+
+namespace watchdog
+{
+namespace dump
+{
+
+void triggerHostbootDump(const uint32_t timeout)
+{
+    constexpr auto HOST_STATE_DIAGNOSTIC_MODE =
+        "obmc-host-diagnostic-mode@0.target";
+    constexpr auto HOST_STATE_QUIESCE_TGT = "obmc-host-quiesce@0.target";
+
+    // Put system into diagnostic mode
+    transitionHost(HOST_STATE_DIAGNOSTIC_MODE);
+
+    // Collect Hostboot dump if auto reboot is enabled
+    if (isAutoRebootEnabled())
+    {
+        eventWatchdogTimeout(timeout);
+    }
+
+    // Put system into quiesce state
+    transitionHost(HOST_STATE_QUIESCE_TGT);
+}
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog/watchdog_main.hpp b/watchdog/watchdog_main.hpp
new file mode 100644
index 0000000..1b1e5a6
--- /dev/null
+++ b/watchdog/watchdog_main.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <stdint.h>
+
+/**
+ * @brief Main function to initiate Hostboot dump
+ *
+ */
+
+namespace watchdog
+{
+namespace dump
+{
+
+/**
+ * @brief Initiate Hostboot dump collection
+ *
+ * @param timeout - timeout interval in seconds
+ */
+void triggerHostbootDump(const uint32_t timeout);
+
+} // namespace dump
+} // namespace watchdog
diff --git a/watchdog_timeout.cpp b/watchdog_timeout.cpp
index 10361f2..ca22ba3 100644
--- a/watchdog_timeout.cpp
+++ b/watchdog_timeout.cpp
@@ -1,14 +1,38 @@
+#include <config.h>
+
+#include <CLI/CLI.hpp>
+
+#ifdef HOSTBOOT_DUMP_COLLECTION
+#include <watchdog/watchdog_main.hpp>
+#else
 #include "org/open_power/Host/Boot/error.hpp"
 #include "phosphor-logging/elog-errors.hpp"
 
 #include <phosphor-logging/elog.hpp>
+#endif
 
-int main(int /*argc*/, char** /*argv*/)
+int main(int argc, char* argv[])
 {
+    CLI::App app{"Hostboot dump collector for watchdog timeout"};
+
+#ifdef HOSTBOOT_DUMP_COLLECTION
+    uint32_t timeoutInterval = 1500; // in seconds
+    app.add_option("-t,--timeout", timeoutInterval,
+                   "Set timeout interval for watchdog timeout in seconds");
+#endif
+
+    CLI11_PARSE(app, argc, argv);
+
+#ifdef HOSTBOOT_DUMP_COLLECTION
+    using namespace watchdog::dump;
+    // TODO: trigger SBE dump if in SBE window otherwise hostboot dump
+    triggerHostbootDump(timeoutInterval);
+#else
     using namespace phosphor::logging;
     using error =
         sdbusplus::org::open_power::Host::Boot::Error::WatchdogTimedOut;
     report<error>();
+#endif
 
     return 0;
 }