Monitor UCD90160 for faults at runtime
Add the RuntimeMonitor class that will monitor the
UCD90160 faults in 2 ways:
1) Watch for the PowerLost signal, meaning system
PGOOD was lost. When it occurs, analyze the
chip for errors and then issue a proper shutdown
so a faulted device doesn't keep getting power.
2) Poll on an interval for nonfatal errors that need
to be logged but don't cause a PGOOD loss.
The main executable can now launch either the PGOODMonitor
or the RuntimeMonitor based on commandline arguments.
Change-Id: If2856f173d5d6288d8333538334b4b4cb4a60097
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
diff --git a/power-sequencer/Makefile.am b/power-sequencer/Makefile.am
index 485ae3e..1f199ce 100644
--- a/power-sequencer/Makefile.am
+++ b/power-sequencer/Makefile.am
@@ -9,6 +9,7 @@
gpio.cpp \
main.cpp \
pgood_monitor.cpp \
+ runtime_monitor.cpp \
ucd90160.cpp \
ucd90160_defs.cpp
diff --git a/power-sequencer/argument.cpp b/power-sequencer/argument.cpp
index d668de1..e76adce 100644
--- a/power-sequencer/argument.cpp
+++ b/power-sequencer/argument.cpp
@@ -28,8 +28,12 @@
std::cerr << "Usage: " << argv[0] << " [options]\n";
std::cerr << "Options:\n";
std::cerr << " --help Print this menu\n";
- std::cerr << " --action=<action> Action: pgood-monitor\n";
- std::cerr << " --interval=<interval> Time to allow PGOOD to come up\n";
+ std::cerr << " --action=<action> Action: pgood-monitor "
+ "or runtime-monitor\n";
+ std::cerr << " --interval=<interval> Interval in seconds:\n";
+ std::cerr << " PGOOD monitor: time allowed for PGOOD to come up\n";
+ std::cerr << " Runtime monitor: polling interval.\n";
+
std::cerr << std::flush;
}
diff --git a/power-sequencer/main.cpp b/power-sequencer/main.cpp
index 074660d..f324d3e 100644
--- a/power-sequencer/main.cpp
+++ b/power-sequencer/main.cpp
@@ -18,6 +18,7 @@
#include <phosphor-logging/log.hpp>
#include "argument.hpp"
#include "pgood_monitor.hpp"
+#include "runtime_monitor.hpp"
#include "ucd90160.hpp"
using namespace witherspoon::power;
@@ -28,7 +29,7 @@
ArgumentParser args{argc, argv};
auto action = args["action"];
- if (action != "pgood-monitor")
+ if ((action != "pgood-monitor") && (action != "runtime-monitor"))
{
std::cerr << "Invalid action\n";
args.usage(argv);
@@ -59,7 +60,22 @@
auto device = std::make_unique<UCD90160>(0);
- PGOODMonitor monitor{std::move(device), bus, event, interval};
+ std::unique_ptr<DeviceMonitor> monitor;
- return monitor.run();
+ if (action == "pgood-monitor")
+ {
+ //If PGOOD doesn't turn on within a certain
+ //time, analyze the device for errors
+ monitor = std::make_unique<PGOODMonitor>(
+ std::move(device), bus, event, interval);
+ }
+ else //runtime-monitor
+ {
+ //Continuously monitor this device both by polling
+ //and on 'power lost' signals.
+ monitor = std::make_unique<RuntimeMonitor>(
+ std::move(device), bus, event, interval);
+ }
+
+ return monitor->run();
}
diff --git a/power-sequencer/runtime_monitor.cpp b/power-sequencer/runtime_monitor.cpp
new file mode 100644
index 0000000..3873ac0
--- /dev/null
+++ b/power-sequencer/runtime_monitor.cpp
@@ -0,0 +1,58 @@
+/**
+ * Copyright © 2017 IBM Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <phosphor-logging/log.hpp>
+#include "runtime_monitor.hpp"
+#include "utility.hpp"
+
+namespace witherspoon
+{
+namespace power
+{
+
+using namespace phosphor::logging;
+
+int RuntimeMonitor::run()
+{
+ device->clearFaults();
+
+ return DeviceMonitor::run();
+}
+
+void RuntimeMonitor::onPowerLost(sdbusplus::message::message& msg)
+{
+ log<level::INFO>("PGOOD failure detected. Checking for faults.");
+
+ try
+ {
+ timer.stop();
+
+ device->onFailure();
+
+ //Note: This application only runs when the system has
+ //power, so it will be killed by systemd sometime shortly
+ //after this power off is issued.
+
+ util::powerOff(bus);
+ }
+ catch (std::exception& e)
+ {
+ //No need to crash
+ log<level::ERR>(e.what());
+ }
+}
+
+}
+}
diff --git a/power-sequencer/runtime_monitor.hpp b/power-sequencer/runtime_monitor.hpp
new file mode 100644
index 0000000..aae17ed
--- /dev/null
+++ b/power-sequencer/runtime_monitor.hpp
@@ -0,0 +1,114 @@
+#pragma once
+
+#include <sdbusplus/bus.hpp>
+#include <sdbusplus/server.hpp>
+#include "device.hpp"
+#include "event.hpp"
+#include "device_monitor.hpp"
+#include "timer.hpp"
+
+namespace witherspoon
+{
+namespace power
+{
+
+/**
+ * @class RuntimeMonitor
+ *
+ * Monitors the power sequencer for faults at runtime
+ *
+ * Triggers the power sequencer fault check 2 different ways:
+ *
+ * 1) Listens for the PowerLost signal that indicates master
+ * PGOOD was dropped due to a fatal fault. After the analysis,
+ * a power off will be issued so the sequencer will stop
+ * driving power to a faulted component.
+ *
+ * 2) Polls for faults, as some won't always drop PGOOD.
+ *
+ * The application this runs in will only run while PGOOD is
+ * expected to be asserted, so any loss of PGOOD is considered
+ * an error.
+ */
+class RuntimeMonitor : public DeviceMonitor
+{
+ public:
+
+ RuntimeMonitor() = delete;
+ ~RuntimeMonitor() = default;
+ RuntimeMonitor(const RuntimeMonitor&) = delete;
+ RuntimeMonitor& operator=(const RuntimeMonitor&) = delete;
+ RuntimeMonitor(RuntimeMonitor&&) = delete;
+ RuntimeMonitor& operator=(RuntimeMonitor&&) = delete;
+
+ /**
+ * Constructor
+ *
+ * @param[in] d - the device to monitor
+ * @param[in] b - D-Bus bus object
+ * @param[in] e - event object
+ * @param[in] i - poll interval
+ */
+ RuntimeMonitor(std::unique_ptr<witherspoon::power::Device>&& d,
+ sdbusplus::bus::bus& b,
+ witherspoon::power::event::Event& e,
+ std::chrono::seconds& i) :
+ DeviceMonitor(std::move(d), e, i),
+ bus(b),
+ match(bus,
+ getMatchString(),
+ std::bind(std::mem_fn(&RuntimeMonitor::onPowerLost),
+ this, std::placeholders::_1))
+ {
+ }
+
+ /**
+ * Clears faults and then runs DeviceMonitor::run to
+ * call Device::analyze() on an ongoing interval.
+ *
+ * @return the return value from sd_event_loop()
+ */
+ int run() override;
+
+ private:
+
+ /**
+ * The PowerLost signal handler.
+ *
+ * After doing an analysis, will issue a power off
+ * as some device has a power fault and needs to be
+ * properly shut down.
+ *
+ * @param[in] msg - D-Bus message for callback
+ */
+ void onPowerLost(sdbusplus::message::message& msg);
+
+ /**
+ * Returns the match string for the PowerLost signal
+ */
+ std::string getMatchString()
+ {
+ using namespace sdbusplus::bus::match::rules;
+
+ std::string s =
+ type::signal() +
+ path("/org/openbmc/control/power0") +
+ interface("org.openbmc.control.Power") +
+ member("PowerLost");
+
+ return s;
+ }
+
+ /**
+ * The D-Bus object
+ */
+ sdbusplus::bus::bus& bus;
+
+ /**
+ * Match object for PowerLost signals
+ */
+ sdbusplus::bus::match_t match;
+};
+
+}
+}