host-check: discover host state within state manager
Currently the phosphor-host-state-manager is started before the logic
has run to detect if the host is already running. This results in
phosphor-host-state-manager temporarily reporting that the host is not
running, even if it actually is. This can cause confusion for clients
monitoring this property.
The solution is to move the logic which discovers if the host is running
into phosphor-host-state-manager. Having the logic to do this in a
separate application was a nice separation of concerns but when the
requirement is a co-req, best to just combine them.
This change results in the phosphor-host-state-manager service starting
later in the boot to BMC Ready but testing has shown no impacts to
overall time to reach BMC Ready or impacts to other services.
As a part of this change, the phosphor-reset-sensor-states service was
moved out of the obmc-host-reset target to ensure it and the
phosphor-host-state-manager service have the correct dependency between
them. The phosphor-host-state-manager service now ensures it runs after
the pldm and ipmi services if they are being started (they are
utilized to check if the host is running).
Testing:
- Chassis On, Host On
Jul 30 14:40:37 rainxxx phosphor-host-state-manager[696]: Check if host is running
Jul 30 14:40:39 rainxxx phosphor-host-state-manager[696]: Host is running!
Jul 30 14:40:39 rainxxx phosphor-host-state-manager[696]: Initial Host State will be Running
- Chassis Off, Host Off
Jul 30 14:55:17 rainxxx phosphor-host-state-manager[710]: Check if host is running
Jul 30 14:55:17 rainxxx phosphor-host-state-manager[710]: Chassis power not on, exit
Jul 30 14:55:17 rainxxx phosphor-host-state-manager[710]: Initial Host State will be Off
- Chassis On, Host Off
Jul 30 14:57:11 rainxxx phosphor-host-state-manager[1193]: Check if host is running
Jul 30 14:57:18 rainxxx phosphor-host-state-manager[1193]: Host is not running!
Jul 30 14:57:18 rainxxx phosphor-host-state-manager[1193]: Initial Host State will be Off
Signed-off-by: Andrew Geissler <geissonator@yahoo.com>
Change-Id: I938902f526fba2f857be4b21b01149f8250b972d
diff --git a/host_check.cpp b/host_check.cpp
new file mode 100644
index 0000000..23715a1
--- /dev/null
+++ b/host_check.cpp
@@ -0,0 +1,196 @@
+#include "config.h"
+
+#include "host_check.hpp"
+
+#include <unistd.h>
+
+#include <boost/range/adaptor/reversed.hpp>
+#include <phosphor-logging/log.hpp>
+#include <sdbusplus/bus.hpp>
+#include <sdbusplus/exception.hpp>
+#include <xyz/openbmc_project/Condition/HostFirmware/server.hpp>
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <thread>
+#include <vector>
+
+using namespace std::literals;
+using namespace phosphor::logging;
+using namespace sdbusplus::xyz::openbmc_project::Condition::server;
+using sdbusplus::exception::SdBusError;
+
+// Required strings for sending the msg to check on host
+constexpr auto MAPPER_BUSNAME = "xyz.openbmc_project.ObjectMapper";
+constexpr auto MAPPER_PATH = "/xyz/openbmc_project/object_mapper";
+constexpr auto MAPPER_INTERFACE = "xyz.openbmc_project.ObjectMapper";
+constexpr auto CONDITION_HOST_INTERFACE =
+ "xyz.openbmc_project.Condition.HostFirmware";
+constexpr auto CONDITION_HOST_PROPERTY = "CurrentFirmwareCondition";
+constexpr auto PROPERTY_INTERFACE = "org.freedesktop.DBus.Properties";
+
+constexpr auto CHASSIS_STATE_SVC = "xyz.openbmc_project.State.Chassis";
+constexpr auto CHASSIS_STATE_PATH = "/xyz/openbmc_project/state/chassis0";
+constexpr auto CHASSIS_STATE_INTF = "xyz.openbmc_project.State.Chassis";
+constexpr auto CHASSIS_STATE_POWER_PROP = "CurrentPowerState";
+
+// Find all implementations of Condition interface and check if host is
+// running over it
+bool checkFirmwareConditionRunning(sdbusplus::bus::bus& bus)
+{
+ // Find all implementations of host firmware condition interface
+ auto mapper = bus.new_method_call(MAPPER_BUSNAME, MAPPER_PATH,
+ MAPPER_INTERFACE, "GetSubTree");
+
+ mapper.append("/", 0, std::vector<std::string>({CONDITION_HOST_INTERFACE}));
+
+ std::map<std::string, std::map<std::string, std::vector<std::string>>>
+ mapperResponse;
+
+ try
+ {
+ auto mapperResponseMsg = bus.call(mapper);
+ mapperResponseMsg.read(mapperResponse);
+ }
+ catch (const SdBusError& e)
+ {
+ log<level::ERR>(
+ "Error in mapper GetSubTree call for HostFirmware condition",
+ entry("ERROR=%s", e.what()));
+ throw;
+ }
+
+ if (mapperResponse.empty())
+ {
+ log<level::INFO>(
+ "Mapper response for HostFirmware conditions is empty!");
+ return false;
+ }
+
+ // Now read the CurrentFirmwareCondition from all interfaces we found
+ // Currently there are two implementations of this interface. One by IPMI
+ // and one by PLDM. The IPMI interface does a realtime check with the host
+ // when the interface is called. This means if the host is not running,
+ // we will have to wait for the timeout (currently set to 3 seconds). The
+ // PLDM interface reads a cached state. The PLDM service does not put itself
+ // on D-Bus until it has checked with the host. Therefore it's most
+ // efficient to call the PLDM interface first. Do that by going in reverse
+ // of the interfaces returned to us (PLDM will be last if available)
+ for (const auto& [path, services] :
+ boost::adaptors::reverse(mapperResponse))
+ {
+ for (const auto& serviceIter : services)
+ {
+ const std::string& service = serviceIter.first;
+
+ try
+ {
+ auto method = bus.new_method_call(service.c_str(), path.c_str(),
+ PROPERTY_INTERFACE, "Get");
+ method.append(CONDITION_HOST_INTERFACE,
+ CONDITION_HOST_PROPERTY);
+
+ auto response = bus.call(method);
+
+ std::variant<std::string> currentFwCond;
+ response.read(currentFwCond);
+
+ if (std::get<std::string>(currentFwCond) ==
+ "xyz.openbmc_project.Condition.HostFirmware."
+ "FirmwareCondition."
+ "Running")
+ {
+ return true;
+ }
+ }
+ catch (const SdBusError& e)
+ {
+ log<level::ERR>("Error reading HostFirmware condition",
+ entry("ERROR=%s", e.what()),
+ entry("SERVICE=%s", service.c_str()),
+ entry("PATH=%s", path.c_str()));
+ throw;
+ }
+ }
+ }
+ return false;
+}
+
+// Helper function to check if chassis power is on
+bool isChassiPowerOn(sdbusplus::bus::bus& bus)
+{
+ try
+ {
+ auto method = bus.new_method_call(CHASSIS_STATE_SVC, CHASSIS_STATE_PATH,
+ PROPERTY_INTERFACE, "Get");
+ method.append(CHASSIS_STATE_INTF, CHASSIS_STATE_POWER_PROP);
+
+ auto response = bus.call(method);
+
+ std::variant<std::string> currentPowerState;
+ response.read(currentPowerState);
+
+ if (std::get<std::string>(currentPowerState) ==
+ "xyz.openbmc_project.State.Chassis.PowerState.On")
+ {
+ return true;
+ }
+ }
+ catch (const SdBusError& e)
+ {
+ log<level::ERR>("Error reading Chassis Power State",
+ entry("ERROR=%s", e.what()),
+ entry("SERVICE=%s", CHASSIS_STATE_SVC),
+ entry("PATH=%s", CHASSIS_STATE_PATH));
+
+ throw;
+ }
+ return false;
+}
+
+bool isHostRunning()
+{
+ log<level::INFO>("Check if host is running");
+
+ auto bus = sdbusplus::bus::new_default();
+
+ // No need to check if chassis power is not on
+ if (!isChassiPowerOn(bus))
+ {
+ log<level::INFO>("Chassis power not on, exit");
+ return false;
+ }
+
+ // This applications systemd service is setup to only run after all other
+ // application that could possibly implement the needed interface have
+ // been started. However, the use of mapper to find those interfaces means
+ // we have a condition where the interface may be on D-Bus but not stored
+ // within mapper yet. Keep it simple and just build one retry into the
+ // check if it's found the host is not up. This service is only called if
+ // chassis power is on when the BMC comes up, so this wont impact most
+ // normal cases where the BMC is rebooted with chassis power off. In
+ // cases where chassis power is on, the host is likely running so we want
+ // to be sure we check all interfaces
+ for (int i = 0; i < 2; i++)
+ {
+ // Give mapper a small window to introspect new objects on bus
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
+ if (checkFirmwareConditionRunning(bus))
+ {
+ log<level::INFO>("Host is running!");
+ // Create file for host instance and create in filesystem to
+ // indicate to services that host is running
+ auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
+ size++; // null
+ std::unique_ptr<char[]> buf(new char[size]);
+ std::snprintf(buf.get(), size, HOST_RUNNING_FILE, 0);
+ std::ofstream outfile(buf.get());
+ outfile.close();
+ return true;
+ }
+ }
+ log<level::INFO>("Host is not running!");
+ return false;
+}