blob: 308c3c9eca615ea657572121199cd276954a106b [file] [log] [blame]
Jason M. Bills1490b142019-07-01 15:48:43 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16#include <boost/asio/posix/stream_descriptor.hpp>
17#include <gpiod.hpp>
18#include <iostream>
19#include <sdbusplus/asio/object_server.hpp>
20
21namespace host_error_monitor
22{
23static boost::asio::io_service io;
24static std::shared_ptr<sdbusplus::asio::connection> conn;
25
26static bool hostOff = true;
27
28const static constexpr size_t caterrTimeoutMs = 2000;
29const static constexpr size_t crashdumpTimeoutS = 300;
30
31// Timers
32// Timer for CATERR asserted
33static boost::asio::steady_timer caterrAssertTimer(io);
34
35// GPIO Lines and Event Descriptors
36static gpiod::line caterrLine;
37static boost::asio::posix::stream_descriptor caterrEvent(io);
38
39static void initializeHostState()
40{
41 conn->async_method_call(
42 [](boost::system::error_code ec,
43 const std::variant<std::string>& property) {
44 if (ec)
45 {
46 return;
47 }
48 const std::string* state = std::get_if<std::string>(&property);
49 if (state == nullptr)
50 {
51 std::cerr << "Unable to read host state value\n";
52 return;
53 }
54 hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
55 },
56 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
57 "org.freedesktop.DBus.Properties", "Get",
58 "xyz.openbmc_project.State.Host", "CurrentHostState");
59}
60
61static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
62{
63 return std::make_shared<sdbusplus::bus::match::match>(
64 *conn,
65 "type='signal',interface='org.freedesktop.DBus.Properties',"
66 "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
67 "Host'",
68 [](sdbusplus::message::message& msg) {
69 std::string interfaceName;
70 boost::container::flat_map<std::string, std::variant<std::string>>
71 propertiesChanged;
72 std::string state;
73 try
74 {
75 msg.read(interfaceName, propertiesChanged);
76 state =
77 std::get<std::string>(propertiesChanged.begin()->second);
78 }
79 catch (std::exception& e)
80 {
81 std::cerr << "Unable to read host state\n";
82 return;
83 }
84 hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
85
86 // No host events should fire while off, so cancel any pending
87 // timers
88 if (hostOff)
89 {
90 caterrAssertTimer.cancel();
91 }
92 });
93}
94
95static bool requestGPIOEvents(
96 const std::string& name, const std::function<void()>& handler,
97 gpiod::line& gpioLine,
98 boost::asio::posix::stream_descriptor& gpioEventDescriptor)
99{
100 // Find the GPIO line
101 gpioLine = gpiod::find_line(name);
102 if (!gpioLine)
103 {
104 std::cerr << "Failed to find the " << name << " line\n";
105 return false;
106 }
107
108 try
109 {
110 gpioLine.request(
111 {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
112 }
113 catch (std::exception&)
114 {
115 std::cerr << "Failed to request events for " << name << "\n";
116 return false;
117 }
118
119 int gpioLineFd = gpioLine.event_get_fd();
120 if (gpioLineFd < 0)
121 {
122 std::cerr << "Failed to get " << name << " fd\n";
123 return false;
124 }
125
126 gpioEventDescriptor.assign(gpioLineFd);
127
128 gpioEventDescriptor.async_wait(
129 boost::asio::posix::stream_descriptor::wait_read,
130 [&name, handler](const boost::system::error_code ec) {
131 if (ec)
132 {
133 std::cerr << name << " fd handler error: " << ec.message()
134 << "\n";
135 return;
136 }
137 handler();
138 });
139 return true;
140}
141
142static void startPowerCycle()
143{
144 conn->async_method_call(
145 [](boost::system::error_code ec) {
146 if (ec)
147 {
148 std::cerr << "failed to set Chassis State\n";
149 }
150 },
151 "xyz.openbmc_project.State.Chassis",
152 "/xyz/openbmc_project/state/chassis0",
153 "org.freedesktop.DBus.Properties", "Set",
154 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
155 std::variant<std::string>{
156 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
157}
158
159static void startCrashdumpAndRecovery(bool recoverSystem)
160{
161 std::cout << "Starting crashdump\n";
162 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
163 static boost::asio::steady_timer crashdumpTimer(io);
164
165 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
166 *conn,
167 "type='signal',interface='org.freedesktop.DBus.Properties',"
168 "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
169 [recoverSystem](sdbusplus::message::message& msg) {
170 crashdumpTimer.cancel();
171 std::cout << "Crashdump completed\n";
172 if (recoverSystem)
173 {
174 std::cout << "Recovering the system\n";
175 startPowerCycle();
176 }
177 crashdumpCompleteMatch.reset();
178 });
179
180 crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
181 crashdumpTimer.async_wait([](const boost::system::error_code ec) {
182 if (ec)
183 {
184 // operation_aborted is expected if timer is canceled
185 if (ec != boost::asio::error::operation_aborted)
186 {
187 std::cerr << "Crashdump async_wait failed: " << ec.message()
188 << "\n";
189 }
190 std::cout << "Crashdump timer canceled\n";
191 return;
192 }
193 std::cerr << "Crashdump failed to complete before timeout\n";
194 crashdumpCompleteMatch.reset();
195 });
196
197 conn->async_method_call(
198 [](boost::system::error_code ec) {
199 if (ec)
200 {
201 std::cerr << "failed to start Crashdump\n";
202 crashdumpTimer.cancel();
203 crashdumpCompleteMatch.reset();
204 }
205 },
206 "com.intel.crashdump", "/com/intel/crashdump",
207 "com.intel.crashdump.Stored", "GenerateStoredLog");
208}
209
210static void caterrHandler()
211{
212 if (!hostOff)
213 {
214 gpiod::line_event gpioLineEvent = caterrLine.event_read();
215
216 bool caterr =
217 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
218 if (caterr)
219 {
220 std::cout << "CPU CATERR detected, starting timer\n";
221 caterrAssertTimer.expires_after(
222 std::chrono::milliseconds(caterrTimeoutMs));
223 caterrAssertTimer.async_wait(
224 [](const boost::system::error_code ec) {
225 if (ec)
226 {
227 // operation_aborted is expected if timer is canceled
228 // before completion.
229 if (ec != boost::asio::error::operation_aborted)
230 {
231 std::cerr << "caterr timeout async_wait failed: "
232 << ec.message() << "\n";
233 }
234 std::cout << "CATERR assert timer canceled\n";
235 return;
236 }
237 std::cout << "CATERR asset timer completed\n";
238 conn->async_method_call(
239 [](boost::system::error_code ec,
240 const std::variant<bool>& property) {
241 if (ec)
242 {
243 return;
244 }
245 const bool* reset = std::get_if<bool>(&property);
246 if (reset == nullptr)
247 {
248 std::cerr
249 << "Unable to read reset on CATERR value\n";
250 return;
251 }
252 startCrashdumpAndRecovery(*reset);
253 },
254 "xyz.openbmc_project.Settings",
255 "/xyz/openbmc_project/control/processor_error_config",
256 "org.freedesktop.DBus.Properties", "Get",
257 "xyz.openbmc_project.Control.Processor.ErrConfig",
258 "ResetOnCATERR");
259 });
260 }
261 else
262 {
263 caterrAssertTimer.cancel();
264 }
265 }
266 caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
267 [](const boost::system::error_code ec) {
268 if (ec)
269 {
270 std::cerr << "caterr handler error: "
271 << ec.message() << "\n";
272 return;
273 }
274 caterrHandler();
275 });
276}
277} // namespace host_error_monitor
278
279int main(int argc, char* argv[])
280{
281 // setup connection to dbus
282 host_error_monitor::conn =
283 std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
284
285 // Host Error Monitor Object
286 host_error_monitor::conn->request_name(
287 "xyz.openbmc_project.HostErrorMonitor");
288 sdbusplus::asio::object_server server =
289 sdbusplus::asio::object_server(host_error_monitor::conn);
290
291 // Start tracking host state
292 std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
293 host_error_monitor::startHostStateMonitor();
294
295 // Initialize the host state
296 host_error_monitor::initializeHostState();
297
298 // Request CPU_CATERR GPIO events
299 if (!host_error_monitor::requestGPIOEvents(
300 "CPU_CATERR", host_error_monitor::caterrHandler,
301 host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
302 {
303 return -1;
304 }
305
306 host_error_monitor::io.run();
307
308 return 0;
309}