blob: 69bf42261806689a01e4aee81aace185c6c26bcd [file] [log] [blame]
Andrew Geissler65c01012021-06-15 14:03:34 -05001extern "C"
2{
3#include "libpdbg.h"
4}
5
Jayanth Othayoth25e39c82021-07-12 01:00:17 -05006#include "extensions/phal/common_utils.hpp"
Jayanth Othayoth6552de02021-07-12 00:55:57 -05007#include "extensions/phal/create_pel.hpp"
Jayanth Othayothc3d6b872021-07-28 05:07:25 -05008#include "extensions/phal/pdbg_utils.hpp"
Andrew Geissler65c01012021-06-15 14:03:34 -05009#include "p10_cfam.hpp"
Andrew Geissler65c01012021-06-15 14:03:34 -050010#include "registration.hpp"
11
12#include <phosphor-logging/log.hpp>
Andrew Geissler211f8a92023-02-03 12:45:29 -070013#include <sdbusplus/bus.hpp>
Andrew Geissler65c01012021-06-15 14:03:34 -050014
15#include <cstdio>
16#include <fstream>
17#include <memory>
18
19namespace openpower
20{
21namespace phal
22{
23
24using namespace openpower::cfam::p10;
25using namespace phosphor::logging;
26
Andrew Geissler211f8a92023-02-03 12:45:29 -070027/** Best effort function to create a BMC dump */
28void createBmcDump()
29{
30 auto bus = sdbusplus::bus::new_default();
31
32 auto method = bus.new_method_call(
33 "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc",
34 "xyz.openbmc_project.Dump.Create", "CreateDump");
35 method.append(
36 std::vector<
37 std::pair<std::string, std::variant<std::string, uint64_t>>>());
38 try
39 {
40 bus.call_noreply(method);
41 }
42 catch (const sdbusplus::exception_t& e)
43 {
44 log<level::ERR>("Exception raised creating BMC dump",
45 entry("EXCEPTION=%s", e.what()));
46 // just continue, failing to collect a dump should not cause further
47 // issues in this path
48 }
49}
50
Andrew Geissler65c01012021-06-15 14:03:34 -050051/**
52 * This is the backup plan to ensuring the host is not running before the
53 * BMC issues a power off to the system. Prior to this procedure being called,
54 * the BMC has tried all other communication mechanisms to talk with the host
55 * and they have failed. The design is that the host firmware will write the
56 * value 0xA5000001 to Mailbox scratch register 12 when they are up and running
57 * to a point where communication to the BMC is no longer required to function.
58 * On a power off or shutdown this register is cleared by the host and BMC
59 * firmware. If the BMC sees the 0xA5000001 pattern in the scratch register
60 * then it assumes the host is running and will leave power on to the system.
61 */
62void checkHostRunning()
63{
64 struct pdbg_target* procTarget;
65
66 try
67 {
68 phal_init();
69 }
Patrick Williams1a9a5a62021-10-06 13:05:06 -050070 catch (const std::exception& ex)
Andrew Geissler65c01012021-06-15 14:03:34 -050071 {
72 // This should "never" happen so just throw the exception and let
73 // our systemd error handling process this
74 log<level::ERR>("Exception raised during init PHAL",
75 entry("EXCEPTION=%s", ex.what()));
76 throw std::runtime_error("PHAL initialization failed");
77 }
78
79 pdbg_for_each_class_target("proc", procTarget)
80 {
81 // Only check the primary proc
82 if (!isPrimaryProc(procTarget))
83 {
84 continue;
85 }
86
87 uint32_t val = 0;
88 constexpr uint32_t HOST_RUNNING_INDICATION = 0xA5000001;
89 auto rc = getCFAM(procTarget, P10_SCRATCH_REG_12, val);
90 if ((rc == 0) && (val != HOST_RUNNING_INDICATION))
91 {
92 log<level::INFO>("CFAM read indicates host is not running",
93 entry("CFAM=0x%X", val));
94 return;
95 }
96
97 if (rc != 0)
98 {
99 // On error, we have to assume host is up so just fall through
100 // to code below
101 log<level::ERR>("CFAM read error, assume host is running");
102 }
103 else if (val == HOST_RUNNING_INDICATION)
104 {
105 // This is not good. Normal communication path to host did not work
106 // but CFAM indicates host is running.
107 log<level::ERR>("CFAM read indicates host is running");
108 }
109
Andrew Geissler61febf02021-06-22 17:19:32 -0500110 // Create an error so user knows system is in a bad state
Jayanth Othayothac95c562021-07-16 05:56:04 -0500111 openpower::pel::createPEL("org.open_power.PHAL.Error.HostRunning");
Andrew Geissler65c01012021-06-15 14:03:34 -0500112
113 // Create file for host instance and create in filesystem to
114 // indicate to services that host is running.
115 // This file is cleared by the phosphor-state-manager once the host
116 // start target completes.
117 constexpr auto HOST_RUNNING_FILE = "/run/openbmc/host@%d-on";
118 auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
119 size++; // null
120 std::unique_ptr<char[]> buf(new char[size]);
121 std::snprintf(buf.get(), size, HOST_RUNNING_FILE, 0);
122 std::ofstream outfile(buf.get());
123 outfile.close();
Andrew Geissler211f8a92023-02-03 12:45:29 -0700124
125 // Try to create BMC dump for further debug
126 createBmcDump();
127
Andrew Geissler65c01012021-06-15 14:03:34 -0500128 return;
129 }
130
131 // We should "never" make it here. If we did it implies no primary processor
132 // was found. Once again, rely on systemd recovery if this happens
133 log<level::ERR>("No primary processor found in checkHostRunning");
134 throw std::runtime_error("No primary processor found in checkHostRunning");
135}
136
Andrew Geissleraa599152021-06-24 10:10:43 -0500137/**
138 * The BMC is to make a best effort to clear the CFAM register used by PHYP
139 * to indicate it is running when the host is stopped. This procedure will do
140 * that.
141 */
142void clearHostRunning()
143{
144 struct pdbg_target* procTarget;
145 log<level::INFO>("Entering clearHostRunning");
146
147 try
148 {
149 phal_init();
150 }
Patrick Williams1a9a5a62021-10-06 13:05:06 -0500151 catch (const std::exception& ex)
Andrew Geissleraa599152021-06-24 10:10:43 -0500152 {
153 // This should "never" happen so just throw the exception and let
154 // our systemd error handling process this
155 log<level::ERR>("Exception raised during init PHAL",
156 entry("EXCEPTION=%s", ex.what()));
157 throw std::runtime_error("PHAL initialization failed");
158 }
159
160 pdbg_for_each_class_target("proc", procTarget)
161 {
162 // Only check the primary proc
163 if (!isPrimaryProc(procTarget))
164 {
165 continue;
166 }
167
168 constexpr uint32_t HOST_NOT_RUNNING_INDICATION = 0;
169 auto rc = putCFAM(procTarget, P10_SCRATCH_REG_12,
170 HOST_NOT_RUNNING_INDICATION);
171 if (rc != 0)
172 {
173 log<level::ERR>("CFAM write to clear host running status failed");
174 }
175
176 // It's best effort, so just return either way
177 return;
178 }
179 log<level::ERR>("No primary processor found in clearHostRunning");
180}
181
Andrew Geissler65c01012021-06-15 14:03:34 -0500182REGISTER_PROCEDURE("checkHostRunning", checkHostRunning)
Andrew Geissleraa599152021-06-24 10:10:43 -0500183REGISTER_PROCEDURE("clearHostRunning", clearHostRunning)
Andrew Geissler65c01012021-06-15 14:03:34 -0500184
185} // namespace phal
186} // namespace openpower