blob: 389e0d12fb77c531d2d4c3c052b1055db47434af [file] [log] [blame]
Jason M. Bills1490b142019-07-01 15:48:43 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
Jason M. Bills6a2cb692019-08-06 11:03:49 -070016#include <peci.h>
Chen Yugange6c0f1c2019-08-02 20:36:42 +080017#include <systemd/sd-journal.h>
18
Jason M. Bills1490b142019-07-01 15:48:43 -070019#include <boost/asio/posix/stream_descriptor.hpp>
20#include <gpiod.hpp>
Jason M. Bills1490b142019-07-01 15:48:43 -070021#include <sdbusplus/asio/object_server.hpp>
Jason M. Bills48e5dff2020-06-10 13:47:47 -070022
23#include <bitset>
24#include <iostream>
Jason M. Billsd1a19f62019-08-06 11:52:58 -070025#include <variant>
Jason M. Bills1490b142019-07-01 15:48:43 -070026
27namespace host_error_monitor
28{
29static boost::asio::io_service io;
30static std::shared_ptr<sdbusplus::asio::connection> conn;
Jason M. Billsc4b91f22019-11-26 17:04:50 -080031static std::shared_ptr<sdbusplus::asio::dbus_interface> hostErrorTimeoutIface;
Jason M. Bills1490b142019-07-01 15:48:43 -070032
33static bool hostOff = true;
34
Jason M. Billsc4b91f22019-11-26 17:04:50 -080035static size_t caterrTimeoutMs = 2000;
36const static constexpr size_t caterrTimeoutMsMax = 600000; // 10 minutes maximum
Jason M. Billscbf78532019-08-16 15:32:11 -070037const static constexpr size_t errTimeoutMs = 90000;
Jason M. Bills89922f82019-08-06 11:10:02 -070038const static constexpr size_t smiTimeoutMs = 90000;
Jason M. Bills1490b142019-07-01 15:48:43 -070039const static constexpr size_t crashdumpTimeoutS = 300;
40
41// Timers
42// Timer for CATERR asserted
43static boost::asio::steady_timer caterrAssertTimer(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070044// Timer for ERR0 asserted
45static boost::asio::steady_timer err0AssertTimer(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070046// Timer for ERR1 asserted
47static boost::asio::steady_timer err1AssertTimer(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070048// Timer for ERR2 asserted
49static boost::asio::steady_timer err2AssertTimer(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070050// Timer for SMI asserted
51static boost::asio::steady_timer smiAssertTimer(io);
Jason M. Bills1490b142019-07-01 15:48:43 -070052
53// GPIO Lines and Event Descriptors
54static gpiod::line caterrLine;
55static boost::asio::posix::stream_descriptor caterrEvent(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070056static gpiod::line err0Line;
57static boost::asio::posix::stream_descriptor err0Event(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070058static gpiod::line err1Line;
59static boost::asio::posix::stream_descriptor err1Event(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070060static gpiod::line err2Line;
61static boost::asio::posix::stream_descriptor err2Event(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070062static gpiod::line smiLine;
63static boost::asio::posix::stream_descriptor smiEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070064static gpiod::line cpu1FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070065static gpiod::line cpu1ThermtripLine;
66static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070067static gpiod::line cpu2FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070068static gpiod::line cpu2ThermtripLine;
69static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io);
Jason M. Bills250fa632019-08-28 15:58:25 -070070static gpiod::line cpu1VRHotLine;
71static boost::asio::posix::stream_descriptor cpu1VRHotEvent(io);
72static gpiod::line cpu2VRHotLine;
Jason M. Bills9647ba72019-08-29 14:19:19 -070073static boost::asio::posix::stream_descriptor cpu1MemABCDVRHotEvent(io);
74static gpiod::line cpu1MemEFGHVRHotLine;
75static boost::asio::posix::stream_descriptor cpu1MemEFGHVRHotEvent(io);
76static gpiod::line cpu2MemABCDVRHotLine;
Jason M. Bills250fa632019-08-28 15:58:25 -070077static boost::asio::posix::stream_descriptor cpu2VRHotEvent(io);
Jason M. Bills9647ba72019-08-29 14:19:19 -070078static gpiod::line cpu1MemABCDVRHotLine;
79static boost::asio::posix::stream_descriptor cpu2MemABCDVRHotEvent(io);
80static gpiod::line cpu2MemEFGHVRHotLine;
81static boost::asio::posix::stream_descriptor cpu2MemEFGHVRHotEvent(io);
Chen Yugange6c0f1c2019-08-02 20:36:42 +080082//----------------------------------
83// PCH_BMC_THERMTRIP function related definition
84//----------------------------------
Chen Yugange6c0f1c2019-08-02 20:36:42 +080085static gpiod::line pchThermtripLine;
86static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +000087//----------------------------------
88// CPU_MEM_THERM_EVENT function related definition
89//----------------------------------
90static gpiod::line cpu1MemtripLine;
91static boost::asio::posix::stream_descriptor cpu1MemtripEvent(io);
92static gpiod::line cpu2MemtripLine;
93static boost::asio::posix::stream_descriptor cpu2MemtripEvent(io);
jayaprakash Mutyala53099c42020-03-15 00:16:26 +000094//---------------------------------
95// CPU_MISMATCH function related definition
96//---------------------------------
97static gpiod::line cpu1MismatchLine;
98static gpiod::line cpu2MismatchLine;
Jason M. Bills1490b142019-07-01 15:48:43 -070099
Yong Li061eb032020-02-26 15:06:18 +0800100// beep function for CPU error
101const static constexpr uint8_t beepCPUErr2 = 5;
102
103static void beep(const uint8_t& beepPriority)
104{
105 conn->async_method_call(
106 [](boost::system::error_code ec) {
107 if (ec)
108 {
109 std::cerr << "beep returned error with "
110 "async_method_call (ec = "
111 << ec << ")\n";
112 return;
113 }
114 },
115 "xyz.openbmc_project.BeepCode", "/xyz/openbmc_project/BeepCode",
116 "xyz.openbmc_project.BeepCode", "Beep", uint8_t(beepPriority));
117}
118
Jason M. Billsa3397932019-08-06 11:07:21 -0700119static void cpuIERRLog()
120{
121 sd_journal_send("MESSAGE=HostError: IERR", "PRIORITY=%i", LOG_INFO,
122 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
123 "REDFISH_MESSAGE_ARGS=%s", "IERR", NULL);
124}
125
126static void cpuIERRLog(const int cpuNum)
127{
128 std::string msg = "IERR on CPU " + std::to_string(cpuNum + 1);
129
130 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
131 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
132 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
133}
134
135static void cpuIERRLog(const int cpuNum, const std::string& type)
136{
137 std::string msg = type + " IERR on CPU " + std::to_string(cpuNum + 1);
138
139 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
140 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
141 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
142}
143
Jason M. Billscbf78532019-08-16 15:32:11 -0700144static void cpuERRXLog(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700145{
Jason M. Billscbf78532019-08-16 15:32:11 -0700146 std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
147
148 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
149 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
150 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700151}
152
Jason M. Billscbf78532019-08-16 15:32:11 -0700153static void cpuERRXLog(const int errPin, const int cpuNum)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700154{
Jason M. Billscbf78532019-08-16 15:32:11 -0700155 std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
156 std::to_string(cpuNum + 1);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700157
158 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
159 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
160 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
161}
162
Jason M. Bills89922f82019-08-06 11:10:02 -0700163static void smiTimeoutLog()
164{
165 sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
166 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
167 "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
168}
169
Jason M. Bills45e87e02019-09-09 14:45:38 -0700170static void cpuBootFIVRFaultLog(const int cpuNum)
171{
172 std::string msg = "Boot FIVR Fault on CPU " + std::to_string(cpuNum);
173
174 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
175 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
176 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
177}
178
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700179static void cpuThermTripLog(const int cpuNum)
180{
181 std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip";
182
183 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
184 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
185 "OpenBMC.0.1.CPUThermalTrip", "REDFISH_MESSAGE_ARGS=%d",
186 cpuNum, NULL);
187}
188
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000189static void memThermTripLog(const int cpuNum)
190{
191 std::string cpuNumber = "CPU " + std::to_string(cpuNum);
192 std::string msg = cpuNumber + " Memory Thermal trip.";
193
194 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
195 LOG_ERR, "REDFISH_MESSAGE_ID=%s",
196 "OpenBMC.0.1.MemoryThermTrip", "REDFISH_MESSAGE_ARGS=%s",
197 cpuNumber.c_str(), NULL);
198}
199
jayaprakash Mutyala53099c42020-03-15 00:16:26 +0000200static void cpuMismatchLog(const int cpuNum)
201{
202 std::string msg = "CPU " + std::to_string(cpuNum) + " mismatch";
203
204 sd_journal_send("MESSAGE= %s", msg.c_str(), "PRIORITY=%i", LOG_ERR,
205 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUMismatch",
206 "REDFISH_MESSAGE_ARGS=%d", cpuNum, NULL);
207}
208
Jason M. Bills250fa632019-08-28 15:58:25 -0700209static void cpuVRHotLog(const std::string& vr)
210{
211 std::string msg = vr + " Voltage Regulator Overheated.";
212
213 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
214 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
215 "OpenBMC.0.1.VoltageRegulatorOverheated",
216 "REDFISH_MESSAGE_ARGS=%s", vr.c_str(), NULL);
217}
218
Jason M. Bills08866542019-08-16 12:04:19 -0700219static void ssbThermTripLog()
220{
221 sd_journal_send("MESSAGE=HostError: SSB thermal trip", "PRIORITY=%i",
222 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
223 "OpenBMC.0.1.SsbThermalTrip", NULL);
224}
225
Jason M. Billsa15c2522019-08-16 10:01:44 -0700226static void initializeErrorState();
Jason M. Bills1490b142019-07-01 15:48:43 -0700227static void initializeHostState()
228{
229 conn->async_method_call(
230 [](boost::system::error_code ec,
231 const std::variant<std::string>& property) {
232 if (ec)
233 {
234 return;
235 }
236 const std::string* state = std::get_if<std::string>(&property);
237 if (state == nullptr)
238 {
239 std::cerr << "Unable to read host state value\n";
240 return;
241 }
242 hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
Jason M. Billsa15c2522019-08-16 10:01:44 -0700243 // If the system is on, initialize the error state
244 if (!hostOff)
245 {
246 initializeErrorState();
247 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700248 },
249 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
250 "org.freedesktop.DBus.Properties", "Get",
251 "xyz.openbmc_project.State.Host", "CurrentHostState");
252}
253
254static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
255{
256 return std::make_shared<sdbusplus::bus::match::match>(
257 *conn,
258 "type='signal',interface='org.freedesktop.DBus.Properties',"
259 "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
260 "Host'",
261 [](sdbusplus::message::message& msg) {
262 std::string interfaceName;
263 boost::container::flat_map<std::string, std::variant<std::string>>
264 propertiesChanged;
265 std::string state;
266 try
267 {
268 msg.read(interfaceName, propertiesChanged);
269 state =
270 std::get<std::string>(propertiesChanged.begin()->second);
271 }
272 catch (std::exception& e)
273 {
274 std::cerr << "Unable to read host state\n";
275 return;
276 }
277 hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
278
Jason M. Bills1490b142019-07-01 15:48:43 -0700279 if (hostOff)
280 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700281 // No host events should fire while off, so cancel any pending
282 // timers
Jason M. Bills1490b142019-07-01 15:48:43 -0700283 caterrAssertTimer.cancel();
Jason M. Bills8c584392019-08-19 11:05:51 -0700284 err0AssertTimer.cancel();
Jason M. Bills75af3962019-08-19 11:07:17 -0700285 err1AssertTimer.cancel();
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700286 err2AssertTimer.cancel();
Jason M. Bills89922f82019-08-06 11:10:02 -0700287 smiAssertTimer.cancel();
Jason M. Bills1490b142019-07-01 15:48:43 -0700288 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700289 else
290 {
291 // Handle any initial errors when the host turns on
292 initializeErrorState();
293 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700294 });
295}
296
297static bool requestGPIOEvents(
298 const std::string& name, const std::function<void()>& handler,
299 gpiod::line& gpioLine,
300 boost::asio::posix::stream_descriptor& gpioEventDescriptor)
301{
302 // Find the GPIO line
303 gpioLine = gpiod::find_line(name);
304 if (!gpioLine)
305 {
306 std::cerr << "Failed to find the " << name << " line\n";
307 return false;
308 }
309
310 try
311 {
312 gpioLine.request(
313 {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
314 }
315 catch (std::exception&)
316 {
317 std::cerr << "Failed to request events for " << name << "\n";
318 return false;
319 }
320
321 int gpioLineFd = gpioLine.event_get_fd();
322 if (gpioLineFd < 0)
323 {
324 std::cerr << "Failed to get " << name << " fd\n";
325 return false;
326 }
327
328 gpioEventDescriptor.assign(gpioLineFd);
329
330 gpioEventDescriptor.async_wait(
331 boost::asio::posix::stream_descriptor::wait_read,
332 [&name, handler](const boost::system::error_code ec) {
333 if (ec)
334 {
335 std::cerr << name << " fd handler error: " << ec.message()
336 << "\n";
337 return;
338 }
339 handler();
340 });
341 return true;
342}
343
Jason M. Bills45e87e02019-09-09 14:45:38 -0700344static bool requestGPIOInput(const std::string& name, gpiod::line& gpioLine)
345{
346 // Find the GPIO line
347 gpioLine = gpiod::find_line(name);
348 if (!gpioLine)
349 {
350 std::cerr << "Failed to find the " << name << " line.\n";
351 return false;
352 }
353
354 // Request GPIO input
355 try
356 {
357 gpioLine.request({__FUNCTION__, gpiod::line_request::DIRECTION_INPUT});
358 }
359 catch (std::exception&)
360 {
361 std::cerr << "Failed to request " << name << " input\n";
362 return false;
363 }
364
365 return true;
366}
367
Jason M. Bills1490b142019-07-01 15:48:43 -0700368static void startPowerCycle()
369{
370 conn->async_method_call(
371 [](boost::system::error_code ec) {
372 if (ec)
373 {
374 std::cerr << "failed to set Chassis State\n";
375 }
376 },
377 "xyz.openbmc_project.State.Chassis",
378 "/xyz/openbmc_project/state/chassis0",
379 "org.freedesktop.DBus.Properties", "Set",
380 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
381 std::variant<std::string>{
382 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
383}
384
Jason M. Billsb61766b2019-11-26 17:02:44 -0800385static void startCrashdumpAndRecovery(bool recoverSystem,
386 const std::string& triggerType)
Jason M. Bills1490b142019-07-01 15:48:43 -0700387{
388 std::cout << "Starting crashdump\n";
389 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
390 static boost::asio::steady_timer crashdumpTimer(io);
391
392 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
393 *conn,
394 "type='signal',interface='org.freedesktop.DBus.Properties',"
395 "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
396 [recoverSystem](sdbusplus::message::message& msg) {
397 crashdumpTimer.cancel();
398 std::cout << "Crashdump completed\n";
399 if (recoverSystem)
400 {
401 std::cout << "Recovering the system\n";
402 startPowerCycle();
403 }
404 crashdumpCompleteMatch.reset();
405 });
406
407 crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
408 crashdumpTimer.async_wait([](const boost::system::error_code ec) {
409 if (ec)
410 {
411 // operation_aborted is expected if timer is canceled
412 if (ec != boost::asio::error::operation_aborted)
413 {
414 std::cerr << "Crashdump async_wait failed: " << ec.message()
415 << "\n";
416 }
417 std::cout << "Crashdump timer canceled\n";
418 return;
419 }
420 std::cerr << "Crashdump failed to complete before timeout\n";
421 crashdumpCompleteMatch.reset();
422 });
423
424 conn->async_method_call(
425 [](boost::system::error_code ec) {
426 if (ec)
427 {
428 std::cerr << "failed to start Crashdump\n";
429 crashdumpTimer.cancel();
430 crashdumpCompleteMatch.reset();
431 }
432 },
433 "com.intel.crashdump", "/com/intel/crashdump",
Jason M. Billsb61766b2019-11-26 17:02:44 -0800434 "com.intel.crashdump.Stored", "GenerateStoredLog", triggerType);
Jason M. Bills1490b142019-07-01 15:48:43 -0700435}
436
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700437static void incrementCPUErrorCount(int cpuNum)
438{
439 std::string propertyName = "ErrorCountCPU" + std::to_string(cpuNum + 1);
440
441 // Get the current count
442 conn->async_method_call(
443 [propertyName](boost::system::error_code ec,
444 const std::variant<uint8_t>& property) {
445 if (ec)
446 {
447 std::cerr << "Failed to read " << propertyName << ": "
448 << ec.message() << "\n";
449 return;
450 }
451 const uint8_t* errorCountVariant = std::get_if<uint8_t>(&property);
452 if (errorCountVariant == nullptr)
453 {
454 std::cerr << propertyName << " invalid\n";
455 return;
456 }
457 uint8_t errorCount = *errorCountVariant;
458 if (errorCount == std::numeric_limits<uint8_t>::max())
459 {
460 std::cerr << "Maximum error count reached\n";
461 return;
462 }
463 // Increment the count
464 errorCount++;
465 conn->async_method_call(
466 [propertyName](boost::system::error_code ec) {
467 if (ec)
468 {
469 std::cerr << "Failed to set " << propertyName << ": "
470 << ec.message() << "\n";
471 }
472 },
473 "xyz.openbmc_project.Settings",
474 "/xyz/openbmc_project/control/processor_error_config",
475 "org.freedesktop.DBus.Properties", "Set",
476 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName,
477 std::variant<uint8_t>{errorCount});
478 },
479 "xyz.openbmc_project.Settings",
480 "/xyz/openbmc_project/control/processor_error_config",
481 "org.freedesktop.DBus.Properties", "Get",
482 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName);
483}
484
Jason M. Billsa3397932019-08-06 11:07:21 -0700485static bool checkIERRCPUs()
486{
487 bool cpuIERRFound = false;
488 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
489 cpu++, addr++)
490 {
491 uint8_t cc = 0;
492 CPUModel model{};
493 uint8_t stepping = 0;
494 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
495 {
496 std::cerr << "Cannot get CPUID!\n";
497 continue;
498 }
499
500 switch (model)
501 {
502 case skx:
503 {
504 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
505 // that caused the IERR
506 uint32_t mcaErrSrcLog = 0;
507 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
508 &cc) != PECI_CC_SUCCESS)
509 {
510 continue;
511 }
512 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
513 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
514 {
515 // TODO: Light the CPU fault LED?
516 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700517 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700518 // Next check if it's a CPU/VR mismatch by reading the
519 // IA32_MC4_STATUS MSR (0x411)
520 uint64_t mc4Status = 0;
521 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
522 PECI_CC_SUCCESS)
523 {
524 continue;
525 }
526 // Check MSEC bits 31:24 for
527 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
528 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
529 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
530 if ((mc4Status & (0x40 << 24)) ||
531 (mc4Status & (0x42 << 24)) ||
532 (mc4Status & (0x43 << 24)))
533 {
534 cpuIERRLog(cpu, "CPU/VR Mismatch");
535 continue;
536 }
537
538 // Next check if it's a Core FIVR fault by looking for a
539 // non-zero value of CORE_FIVR_ERR_LOG (B(1) D30 F2 offset
540 // 80h)
541 uint32_t coreFIVRErrLog = 0;
542 if (peci_RdPCIConfigLocal(
543 addr, 1, 30, 2, 0x80, sizeof(uint32_t),
544 (uint8_t*)&coreFIVRErrLog, &cc) != PECI_CC_SUCCESS)
545 {
546 continue;
547 }
548 if (coreFIVRErrLog)
549 {
550 cpuIERRLog(cpu, "Core FIVR Fault");
551 continue;
552 }
553
554 // Next check if it's an Uncore FIVR fault by looking for a
555 // non-zero value of UNCORE_FIVR_ERR_LOG (B(1) D30 F2 offset
556 // 84h)
557 uint32_t uncoreFIVRErrLog = 0;
558 if (peci_RdPCIConfigLocal(addr, 1, 30, 2, 0x84,
559 sizeof(uint32_t),
560 (uint8_t*)&uncoreFIVRErrLog,
561 &cc) != PECI_CC_SUCCESS)
562 {
563 continue;
564 }
565 if (uncoreFIVRErrLog)
566 {
567 cpuIERRLog(cpu, "Uncore FIVR Fault");
568 continue;
569 }
570
571 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
572 // both zero, but MSEC bits 31:24 have either
573 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
574 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
575 // uncore FIVR fault
576 if (!coreFIVRErrLog && !uncoreFIVRErrLog &&
577 ((mc4Status & (0x51 << 24)) ||
578 (mc4Status & (0x52 << 24))))
579 {
580 cpuIERRLog(cpu, "Uncore FIVR Fault");
581 continue;
582 }
583 cpuIERRLog(cpu);
584 }
585 break;
586 }
587 case icx:
588 {
589 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
590 // that caused the IERR
591 uint32_t mcaErrSrcLog = 0;
592 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
593 &cc) != PECI_CC_SUCCESS)
594 {
595 continue;
596 }
597 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
598 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
599 {
600 // TODO: Light the CPU fault LED?
601 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700602 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700603 // Next check if it's a CPU/VR mismatch by reading the
604 // IA32_MC4_STATUS MSR (0x411)
605 uint64_t mc4Status = 0;
606 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
607 PECI_CC_SUCCESS)
608 {
609 continue;
610 }
611 // TODO: Update MSEC/MSCOD_31_24 check
612 // Check MSEC bits 31:24 for
613 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
614 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
615 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
616 if ((mc4Status & (0x40 << 24)) ||
617 (mc4Status & (0x42 << 24)) ||
618 (mc4Status & (0x43 << 24)))
619 {
620 cpuIERRLog(cpu, "CPU/VR Mismatch");
621 continue;
622 }
623
624 // Next check if it's a Core FIVR fault by looking for a
625 // non-zero value of CORE_FIVR_ERR_LOG (B(31) D30 F2 offsets
626 // C0h and C4h) (Note: Bus 31 is accessed on PECI as bus 14)
627 uint32_t coreFIVRErrLog0 = 0;
628 uint32_t coreFIVRErrLog1 = 0;
629 if (peci_RdEndPointConfigPciLocal(
630 addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
631 (uint8_t*)&coreFIVRErrLog0, &cc) != PECI_CC_SUCCESS)
632 {
633 continue;
634 }
635 if (peci_RdEndPointConfigPciLocal(
636 addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
637 (uint8_t*)&coreFIVRErrLog1, &cc) != PECI_CC_SUCCESS)
638 {
639 continue;
640 }
641 if (coreFIVRErrLog0 || coreFIVRErrLog1)
642 {
643 cpuIERRLog(cpu, "Core FIVR Fault");
644 continue;
645 }
646
647 // Next check if it's an Uncore FIVR fault by looking for a
648 // non-zero value of UNCORE_FIVR_ERR_LOG (B(31) D30 F2
649 // offset 84h) (Note: Bus 31 is accessed on PECI as bus 14)
650 uint32_t uncoreFIVRErrLog = 0;
651 if (peci_RdEndPointConfigPciLocal(
652 addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
653 (uint8_t*)&uncoreFIVRErrLog,
654 &cc) != PECI_CC_SUCCESS)
655 {
656 continue;
657 }
658 if (uncoreFIVRErrLog)
659 {
660 cpuIERRLog(cpu, "Uncore FIVR Fault");
661 continue;
662 }
663
664 // TODO: Update MSEC/MSCOD_31_24 check
665 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
666 // both zero, but MSEC bits 31:24 have either
667 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
668 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
669 // uncore FIVR fault
670 if (!coreFIVRErrLog0 && !coreFIVRErrLog1 &&
671 !uncoreFIVRErrLog &&
672 ((mc4Status & (0x51 << 24)) ||
673 (mc4Status & (0x52 << 24))))
674 {
675 cpuIERRLog(cpu, "Uncore FIVR Fault");
676 continue;
677 }
678 cpuIERRLog(cpu);
679 }
680 break;
681 }
682 }
683 }
684 return cpuIERRFound;
685}
686
Jason M. Billsa15c2522019-08-16 10:01:44 -0700687static void caterrAssertHandler()
688{
Jason M. Billsa15c2522019-08-16 10:01:44 -0700689 caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs));
690 caterrAssertTimer.async_wait([](const boost::system::error_code ec) {
691 if (ec)
692 {
693 // operation_aborted is expected if timer is canceled
694 // before completion.
695 if (ec != boost::asio::error::operation_aborted)
696 {
697 std::cerr << "caterr timeout async_wait failed: "
698 << ec.message() << "\n";
699 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700700 return;
701 }
Jason M. Billsa3397932019-08-06 11:07:21 -0700702 std::cerr << "CATERR asserted for " << std::to_string(caterrTimeoutMs)
703 << " ms\n";
704 if (!checkIERRCPUs())
705 {
706 cpuIERRLog();
707 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700708 conn->async_method_call(
709 [](boost::system::error_code ec,
710 const std::variant<bool>& property) {
711 if (ec)
712 {
713 return;
714 }
715 const bool* reset = std::get_if<bool>(&property);
716 if (reset == nullptr)
717 {
718 std::cerr << "Unable to read reset on CATERR value\n";
719 return;
720 }
Jason M. Billsb61766b2019-11-26 17:02:44 -0800721 startCrashdumpAndRecovery(*reset, "IERR");
Jason M. Billsa15c2522019-08-16 10:01:44 -0700722 },
723 "xyz.openbmc_project.Settings",
724 "/xyz/openbmc_project/control/processor_error_config",
725 "org.freedesktop.DBus.Properties", "Get",
726 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR");
727 });
728}
729
Jason M. Bills1490b142019-07-01 15:48:43 -0700730static void caterrHandler()
731{
732 if (!hostOff)
733 {
734 gpiod::line_event gpioLineEvent = caterrLine.event_read();
735
736 bool caterr =
737 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
738 if (caterr)
739 {
Jason M. Billsa15c2522019-08-16 10:01:44 -0700740 caterrAssertHandler();
Jason M. Bills1490b142019-07-01 15:48:43 -0700741 }
742 else
743 {
744 caterrAssertTimer.cancel();
745 }
746 }
747 caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
748 [](const boost::system::error_code ec) {
749 if (ec)
750 {
751 std::cerr << "caterr handler error: "
752 << ec.message() << "\n";
753 return;
754 }
755 caterrHandler();
756 });
757}
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700758
Jason M. Billse94f5e12019-09-13 11:11:34 -0700759static void cpu1ThermtripAssertHandler()
760{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700761 if (cpu1FIVRFaultLine.get_value() == 0)
762 {
763 cpuBootFIVRFaultLog(1);
764 }
765 else
766 {
767 cpuThermTripLog(1);
768 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700769}
770
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700771static void cpu1ThermtripHandler()
772{
Jason M. Bills84951142020-04-17 15:57:11 -0700773 gpiod::line_event gpioLineEvent = cpu1ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700774
Jason M. Bills84951142020-04-17 15:57:11 -0700775 bool cpu1Thermtrip =
776 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
777 if (cpu1Thermtrip)
778 {
779 cpu1ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700780 }
Jason M. Bills84951142020-04-17 15:57:11 -0700781
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700782 cpu1ThermtripEvent.async_wait(
783 boost::asio::posix::stream_descriptor::wait_read,
784 [](const boost::system::error_code ec) {
785 if (ec)
786 {
787 std::cerr << "CPU 1 Thermtrip handler error: " << ec.message()
788 << "\n";
789 return;
790 }
791 cpu1ThermtripHandler();
792 });
793}
794
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000795static void cpu1MemtripHandler()
796{
797 if (!hostOff)
798 {
799 gpiod::line_event gpioLineEvent = cpu1MemtripLine.event_read();
800
801 bool cpu1Memtrip =
802 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
803 if (cpu1Memtrip)
804 {
805 memThermTripLog(1);
806 }
807 }
808 cpu1MemtripEvent.async_wait(
809 boost::asio::posix::stream_descriptor::wait_read,
810 [](const boost::system::error_code ec) {
811 if (ec)
812 {
813 std::cerr << "CPU 1 Memory Thermaltrip handler error: "
814 << ec.message() << "\n";
815 return;
816 }
817 cpu1MemtripHandler();
818 });
819}
820
Jason M. Billse94f5e12019-09-13 11:11:34 -0700821static void cpu2ThermtripAssertHandler()
822{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700823 if (cpu2FIVRFaultLine.get_value() == 0)
824 {
825 cpuBootFIVRFaultLog(2);
826 }
827 else
828 {
829 cpuThermTripLog(2);
830 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700831}
832
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700833static void cpu2ThermtripHandler()
834{
Jason M. Bills84951142020-04-17 15:57:11 -0700835 gpiod::line_event gpioLineEvent = cpu2ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700836
Jason M. Bills84951142020-04-17 15:57:11 -0700837 bool cpu2Thermtrip =
838 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
839 if (cpu2Thermtrip)
840 {
841 cpu2ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700842 }
Jason M. Bills84951142020-04-17 15:57:11 -0700843
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700844 cpu2ThermtripEvent.async_wait(
845 boost::asio::posix::stream_descriptor::wait_read,
846 [](const boost::system::error_code ec) {
847 if (ec)
848 {
849 std::cerr << "CPU 2 Thermtrip handler error: " << ec.message()
850 << "\n";
851 return;
852 }
853 cpu2ThermtripHandler();
854 });
855}
856
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000857static void cpu2MemtripHandler()
858{
859 if (!hostOff)
860 {
861 gpiod::line_event gpioLineEvent = cpu2MemtripLine.event_read();
862
863 bool cpu2Memtrip =
864 gpioLineEvent.event_type == gpiod::line_event::RISING_EDGE;
865 if (cpu2Memtrip)
866 {
867 memThermTripLog(2);
868 }
869 }
870 cpu2MemtripEvent.async_wait(
871 boost::asio::posix::stream_descriptor::wait_read,
872 [](const boost::system::error_code ec) {
873 if (ec)
874 {
875 std::cerr << "CPU 2 Memory Thermaltrip handler error: "
876 << ec.message() << "\n";
877 return;
878 }
879 cpu2MemtripHandler();
880 });
881}
882
Jason M. Billse94f5e12019-09-13 11:11:34 -0700883static void cpu1VRHotAssertHandler()
884{
885 cpuVRHotLog("CPU 1");
886}
887
Jason M. Bills250fa632019-08-28 15:58:25 -0700888static void cpu1VRHotHandler()
889{
Jason M. Bills84951142020-04-17 15:57:11 -0700890 gpiod::line_event gpioLineEvent = cpu1VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700891
Jason M. Bills84951142020-04-17 15:57:11 -0700892 bool cpu1VRHot =
893 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
894 if (cpu1VRHot)
895 {
896 cpu1VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700897 }
Jason M. Bills84951142020-04-17 15:57:11 -0700898
Jason M. Bills250fa632019-08-28 15:58:25 -0700899 cpu1VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
900 [](const boost::system::error_code ec) {
901 if (ec)
902 {
903 std::cerr << "CPU 1 VRHot handler error: "
904 << ec.message() << "\n";
905 return;
906 }
907 cpu1VRHotHandler();
908 });
909}
910
Jason M. Billse94f5e12019-09-13 11:11:34 -0700911static void cpu1MemABCDVRHotAssertHandler()
912{
913 cpuVRHotLog("CPU 1 Memory ABCD");
914}
915
Jason M. Bills9647ba72019-08-29 14:19:19 -0700916static void cpu1MemABCDVRHotHandler()
917{
Jason M. Bills84951142020-04-17 15:57:11 -0700918 gpiod::line_event gpioLineEvent = cpu1MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700919
Jason M. Bills84951142020-04-17 15:57:11 -0700920 bool cpu1MemABCDVRHot =
921 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
922 if (cpu1MemABCDVRHot)
923 {
924 cpu1MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700925 }
Jason M. Bills84951142020-04-17 15:57:11 -0700926
Jason M. Bills9647ba72019-08-29 14:19:19 -0700927 cpu1MemABCDVRHotEvent.async_wait(
928 boost::asio::posix::stream_descriptor::wait_read,
929 [](const boost::system::error_code ec) {
930 if (ec)
931 {
932 std::cerr << "CPU 1 Memory ABCD VRHot handler error: "
933 << ec.message() << "\n";
934 return;
935 }
936 cpu1MemABCDVRHotHandler();
937 });
938}
939
Jason M. Billse94f5e12019-09-13 11:11:34 -0700940static void cpu1MemEFGHVRHotAssertHandler()
941{
942 cpuVRHotLog("CPU 1 Memory EFGH");
943}
944
Jason M. Bills9647ba72019-08-29 14:19:19 -0700945static void cpu1MemEFGHVRHotHandler()
946{
Jason M. Bills84951142020-04-17 15:57:11 -0700947 gpiod::line_event gpioLineEvent = cpu1MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700948
Jason M. Bills84951142020-04-17 15:57:11 -0700949 bool cpu1MemEFGHVRHot =
950 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
951 if (cpu1MemEFGHVRHot)
952 {
953 cpu1MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700954 }
Jason M. Bills84951142020-04-17 15:57:11 -0700955
Jason M. Bills9647ba72019-08-29 14:19:19 -0700956 cpu1MemEFGHVRHotEvent.async_wait(
957 boost::asio::posix::stream_descriptor::wait_read,
958 [](const boost::system::error_code ec) {
959 if (ec)
960 {
961 std::cerr << "CPU 1 Memory EFGH VRHot handler error: "
962 << ec.message() << "\n";
963 return;
964 }
965 cpu1MemEFGHVRHotHandler();
966 });
967}
968
Jason M. Billse94f5e12019-09-13 11:11:34 -0700969static void cpu2VRHotAssertHandler()
970{
971 cpuVRHotLog("CPU 2");
972}
973
Jason M. Bills250fa632019-08-28 15:58:25 -0700974static void cpu2VRHotHandler()
975{
Jason M. Bills84951142020-04-17 15:57:11 -0700976 gpiod::line_event gpioLineEvent = cpu2VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700977
Jason M. Bills84951142020-04-17 15:57:11 -0700978 bool cpu2VRHot =
979 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
980 if (cpu2VRHot)
981 {
982 cpu2VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700983 }
Jason M. Bills84951142020-04-17 15:57:11 -0700984
Jason M. Bills250fa632019-08-28 15:58:25 -0700985 cpu2VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
986 [](const boost::system::error_code ec) {
987 if (ec)
988 {
989 std::cerr << "CPU 2 VRHot handler error: "
990 << ec.message() << "\n";
991 return;
992 }
993 cpu2VRHotHandler();
994 });
995}
996
Jason M. Billse94f5e12019-09-13 11:11:34 -0700997static void cpu2MemABCDVRHotAssertHandler()
998{
999 cpuVRHotLog("CPU 2 Memory ABCD");
1000}
1001
Jason M. Bills9647ba72019-08-29 14:19:19 -07001002static void cpu2MemABCDVRHotHandler()
1003{
Jason M. Bills84951142020-04-17 15:57:11 -07001004 gpiod::line_event gpioLineEvent = cpu2MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001005
Jason M. Bills84951142020-04-17 15:57:11 -07001006 bool cpu2MemABCDVRHot =
1007 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1008 if (cpu2MemABCDVRHot)
1009 {
1010 cpu2MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001011 }
Jason M. Bills84951142020-04-17 15:57:11 -07001012
Jason M. Bills9647ba72019-08-29 14:19:19 -07001013 cpu2MemABCDVRHotEvent.async_wait(
1014 boost::asio::posix::stream_descriptor::wait_read,
1015 [](const boost::system::error_code ec) {
1016 if (ec)
1017 {
1018 std::cerr << "CPU 2 Memory ABCD VRHot handler error: "
1019 << ec.message() << "\n";
1020 return;
1021 }
1022 cpu2MemABCDVRHotHandler();
1023 });
1024}
1025
Jason M. Billse94f5e12019-09-13 11:11:34 -07001026static void cpu2MemEFGHVRHotAssertHandler()
1027{
1028 cpuVRHotLog("CPU 2 Memory EFGH");
1029}
1030
Jason M. Bills9647ba72019-08-29 14:19:19 -07001031static void cpu2MemEFGHVRHotHandler()
1032{
Jason M. Bills84951142020-04-17 15:57:11 -07001033 gpiod::line_event gpioLineEvent = cpu2MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001034
Jason M. Bills84951142020-04-17 15:57:11 -07001035 bool cpu2MemEFGHVRHot =
1036 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1037 if (cpu2MemEFGHVRHot)
1038 {
1039 cpu2MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001040 }
Jason M. Bills84951142020-04-17 15:57:11 -07001041
Jason M. Bills9647ba72019-08-29 14:19:19 -07001042 cpu2MemEFGHVRHotEvent.async_wait(
1043 boost::asio::posix::stream_descriptor::wait_read,
1044 [](const boost::system::error_code ec) {
1045 if (ec)
1046 {
1047 std::cerr << "CPU 2 Memory EFGH VRHot handler error: "
1048 << ec.message() << "\n";
1049 return;
1050 }
1051 cpu2MemEFGHVRHotHandler();
1052 });
1053}
1054
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001055static void pchThermtripHandler()
1056{
Jason M. Bills84951142020-04-17 15:57:11 -07001057 gpiod::line_event gpioLineEvent = pchThermtripLine.event_read();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001058
Jason M. Bills84951142020-04-17 15:57:11 -07001059 bool pchThermtrip =
1060 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1061 if (pchThermtrip)
1062 {
1063 ssbThermTripLog();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001064 }
Jason M. Bills84951142020-04-17 15:57:11 -07001065
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001066 pchThermtripEvent.async_wait(
1067 boost::asio::posix::stream_descriptor::wait_read,
1068 [](const boost::system::error_code ec) {
1069 if (ec)
1070 {
1071 std::cerr << "PCH Thermal trip handler error: " << ec.message()
1072 << "\n";
1073 return;
1074 }
1075 pchThermtripHandler();
1076 });
1077}
1078
Jason M. Billscbf78532019-08-16 15:32:11 -07001079static std::bitset<MAX_CPUS> checkERRPinCPUs(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001080{
Jason M. Billscbf78532019-08-16 15:32:11 -07001081 int errPinSts = (1 << errPin);
1082 std::bitset<MAX_CPUS> errPinCPUs = 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001083 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
1084 cpu++, addr++)
1085 {
1086 if (peci_Ping(addr) == PECI_CC_SUCCESS)
1087 {
1088 uint8_t cc = 0;
1089 CPUModel model{};
1090 uint8_t stepping = 0;
1091 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
1092 {
1093 std::cerr << "Cannot get CPUID!\n";
1094 continue;
1095 }
1096
1097 switch (model)
1098 {
1099 case skx:
1100 {
1101 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001102 // the ERRx (B(0) D8 F0 offset 210h)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001103 uint32_t errpinsts = 0;
1104 if (peci_RdPCIConfigLocal(
1105 addr, 0, 8, 0, 0x210, sizeof(uint32_t),
1106 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1107 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001108 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001109 }
1110 break;
1111 }
1112 case icx:
1113 {
1114 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001115 // the ERRx (B(30) D0 F3 offset 274h) (Note: Bus 30 is
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001116 // accessed on PECI as bus 13)
1117 uint32_t errpinsts = 0;
1118 if (peci_RdEndPointConfigPciLocal(
1119 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
1120 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1121 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001122 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001123 }
1124 break;
1125 }
1126 }
1127 }
1128 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001129 return errPinCPUs;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001130}
1131
Jason M. Billscbf78532019-08-16 15:32:11 -07001132static void errXAssertHandler(const int errPin,
1133 boost::asio::steady_timer& errXAssertTimer)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001134{
Jason M. Billscbf78532019-08-16 15:32:11 -07001135 // ERRx status is not guaranteed through the timeout, so save which
1136 // CPUs have it asserted
1137 std::bitset<MAX_CPUS> errPinCPUs = checkERRPinCPUs(errPin);
1138 errXAssertTimer.expires_after(std::chrono::milliseconds(errTimeoutMs));
1139 errXAssertTimer.async_wait([errPin, errPinCPUs](
1140 const boost::system::error_code ec) {
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001141 if (ec)
1142 {
1143 // operation_aborted is expected if timer is canceled before
1144 // completion.
1145 if (ec != boost::asio::error::operation_aborted)
1146 {
1147 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1148 << "\n";
1149 }
1150 return;
1151 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001152 std::cerr << "ERR" << std::to_string(errPin) << " asserted for "
1153 << std::to_string(errTimeoutMs) << " ms\n";
1154 if (errPinCPUs.count())
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001155 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001156 for (int i = 0; i < errPinCPUs.size(); i++)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001157 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001158 if (errPinCPUs[i])
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001159 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001160 cpuERRXLog(errPin, i);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001161 }
1162 }
1163 }
1164 else
1165 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001166 cpuERRXLog(errPin);
1167 }
1168 });
1169}
1170
Jason M. Bills8c584392019-08-19 11:05:51 -07001171static void err0AssertHandler()
1172{
1173 // Handle the standard ERR0 detection and logging
1174 const static constexpr int err0 = 0;
1175 errXAssertHandler(err0, err0AssertTimer);
1176}
1177
1178static void err0Handler()
1179{
1180 if (!hostOff)
1181 {
1182 gpiod::line_event gpioLineEvent = err0Line.event_read();
1183
1184 bool err0 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1185 if (err0)
1186 {
1187 err0AssertHandler();
1188 }
1189 else
1190 {
1191 err0AssertTimer.cancel();
1192 }
1193 }
1194 err0Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1195 [](const boost::system::error_code ec) {
1196 if (ec)
1197 {
1198 std::cerr
1199 << "err0 handler error: " << ec.message()
1200 << "\n";
1201 return;
1202 }
1203 err0Handler();
1204 });
1205}
1206
Jason M. Bills75af3962019-08-19 11:07:17 -07001207static void err1AssertHandler()
1208{
1209 // Handle the standard ERR1 detection and logging
1210 const static constexpr int err1 = 1;
1211 errXAssertHandler(err1, err1AssertTimer);
1212}
1213
1214static void err1Handler()
1215{
1216 if (!hostOff)
1217 {
1218 gpiod::line_event gpioLineEvent = err1Line.event_read();
1219
1220 bool err1 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1221 if (err1)
1222 {
1223 err1AssertHandler();
1224 }
1225 else
1226 {
1227 err1AssertTimer.cancel();
1228 }
1229 }
1230 err1Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1231 [](const boost::system::error_code ec) {
1232 if (ec)
1233 {
1234 std::cerr
1235 << "err1 handler error: " << ec.message()
1236 << "\n";
1237 return;
1238 }
1239 err1Handler();
1240 });
1241}
1242
Jason M. Billscbf78532019-08-16 15:32:11 -07001243static void err2AssertHandler()
1244{
1245 // Handle the standard ERR2 detection and logging
1246 const static constexpr int err2 = 2;
1247 errXAssertHandler(err2, err2AssertTimer);
1248 // Also handle reset for ERR2
1249 err2AssertTimer.async_wait([](const boost::system::error_code ec) {
1250 if (ec)
1251 {
1252 // operation_aborted is expected if timer is canceled before
1253 // completion.
1254 if (ec != boost::asio::error::operation_aborted)
1255 {
1256 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1257 << "\n";
1258 }
1259 return;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001260 }
1261 conn->async_method_call(
1262 [](boost::system::error_code ec,
1263 const std::variant<bool>& property) {
1264 if (ec)
1265 {
1266 return;
1267 }
1268 const bool* reset = std::get_if<bool>(&property);
1269 if (reset == nullptr)
1270 {
1271 std::cerr << "Unable to read reset on ERR2 value\n";
1272 return;
1273 }
Jason M. Billsb61766b2019-11-26 17:02:44 -08001274 startCrashdumpAndRecovery(*reset, "ERR2 Timeout");
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001275 },
1276 "xyz.openbmc_project.Settings",
1277 "/xyz/openbmc_project/control/processor_error_config",
1278 "org.freedesktop.DBus.Properties", "Get",
1279 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnERR2");
Yong Li061eb032020-02-26 15:06:18 +08001280
1281 beep(beepCPUErr2);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001282 });
1283}
1284
1285static void err2Handler()
1286{
1287 if (!hostOff)
1288 {
1289 gpiod::line_event gpioLineEvent = err2Line.event_read();
1290
1291 bool err2 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1292 if (err2)
1293 {
1294 err2AssertHandler();
1295 }
1296 else
1297 {
1298 err2AssertTimer.cancel();
1299 }
1300 }
1301 err2Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1302 [](const boost::system::error_code ec) {
1303 if (ec)
1304 {
1305 std::cerr
1306 << "err2 handler error: " << ec.message()
1307 << "\n";
1308 return;
1309 }
1310 err2Handler();
1311 });
1312}
1313
Jason M. Bills89922f82019-08-06 11:10:02 -07001314static void smiAssertHandler()
1315{
1316 smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
1317 smiAssertTimer.async_wait([](const boost::system::error_code ec) {
1318 if (ec)
1319 {
1320 // operation_aborted is expected if timer is canceled before
1321 // completion.
1322 if (ec != boost::asio::error::operation_aborted)
1323 {
1324 std::cerr << "smi timeout async_wait failed: " << ec.message()
1325 << "\n";
1326 }
1327 return;
1328 }
1329 std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
1330 << " ms\n";
1331 smiTimeoutLog();
1332 conn->async_method_call(
1333 [](boost::system::error_code ec,
1334 const std::variant<bool>& property) {
1335 if (ec)
1336 {
1337 return;
1338 }
1339 const bool* reset = std::get_if<bool>(&property);
1340 if (reset == nullptr)
1341 {
1342 std::cerr << "Unable to read reset on SMI value\n";
1343 return;
1344 }
Jason M. Bills94785442020-01-07 15:22:09 -08001345#ifdef HOST_ERROR_CRASHDUMP_ON_SMI_TIMEOUT
Jason M. Billsb61766b2019-11-26 17:02:44 -08001346 startCrashdumpAndRecovery(*reset, "SMI Timeout");
Jason M. Bills94785442020-01-07 15:22:09 -08001347#else
1348 if (*reset)
1349 {
1350 std::cout << "Recovering the system\n";
1351 startPowerCycle();
1352 }
1353#endif
Jason M. Bills89922f82019-08-06 11:10:02 -07001354 },
1355 "xyz.openbmc_project.Settings",
1356 "/xyz/openbmc_project/control/bmc_reset_disables",
1357 "org.freedesktop.DBus.Properties", "Get",
1358 "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
1359 });
1360}
1361
1362static void smiHandler()
1363{
1364 if (!hostOff)
1365 {
1366 gpiod::line_event gpioLineEvent = smiLine.event_read();
1367
1368 bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1369 if (smi)
1370 {
1371 smiAssertHandler();
1372 }
1373 else
1374 {
1375 smiAssertTimer.cancel();
1376 }
1377 }
1378 smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1379 [](const boost::system::error_code ec) {
1380 if (ec)
1381 {
1382 std::cerr
1383 << "smi handler error: " << ec.message()
1384 << "\n";
1385 return;
1386 }
1387 smiHandler();
1388 });
1389}
1390
Jason M. Billsa15c2522019-08-16 10:01:44 -07001391static void initializeErrorState()
1392{
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001393 // Handle CPU1_MISMATCH if it's asserted now
1394 if (cpu1MismatchLine.get_value() == 1)
1395 {
1396 cpuMismatchLog(1);
1397 }
1398
1399 // Handle CPU2_MISMATCH if it's asserted now
1400 if (cpu2MismatchLine.get_value() == 1)
1401 {
1402 cpuMismatchLog(2);
1403 }
1404
Jason M. Billsa15c2522019-08-16 10:01:44 -07001405 // Handle CPU_CATERR if it's asserted now
1406 if (caterrLine.get_value() == 0)
1407 {
1408 caterrAssertHandler();
1409 }
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001410
Jason M. Bills8c584392019-08-19 11:05:51 -07001411 // Handle CPU_ERR0 if it's asserted now
1412 if (err0Line.get_value() == 0)
1413 {
1414 err0AssertHandler();
1415 }
1416
Jason M. Bills75af3962019-08-19 11:07:17 -07001417 // Handle CPU_ERR1 if it's asserted now
1418 if (err1Line.get_value() == 0)
1419 {
1420 err1AssertHandler();
1421 }
1422
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001423 // Handle CPU_ERR2 if it's asserted now
1424 if (err2Line.get_value() == 0)
1425 {
1426 err2AssertHandler();
1427 }
Jason M. Bills89922f82019-08-06 11:10:02 -07001428
1429 // Handle SMI if it's asserted now
1430 if (smiLine.get_value() == 0)
1431 {
1432 smiAssertHandler();
1433 }
Jason M. Bills08866542019-08-16 12:04:19 -07001434
Jason M. Billse94f5e12019-09-13 11:11:34 -07001435 // Handle CPU1_THERMTRIP if it's asserted now
1436 if (cpu1ThermtripLine.get_value() == 0)
1437 {
1438 cpu1ThermtripAssertHandler();
1439 }
1440
1441 // Handle CPU2_THERMTRIP if it's asserted now
1442 if (cpu2ThermtripLine.get_value() == 0)
1443 {
1444 cpu2ThermtripAssertHandler();
1445 }
1446
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001447 // Handle CPU1_MEM_THERM_EVENT (CPU1 DIMM Thermal trip) if it's asserted now
1448 if (cpu1MemtripLine.get_value() == 0)
1449 {
1450 memThermTripLog(1);
1451 }
1452
1453 // Handle CPU2_MEM_THERM_EVENT (CPU2 DIMM Thermal trip) if it's asserted now
1454 if (cpu2MemtripLine.get_value() == 0)
1455 {
1456 memThermTripLog(2);
1457 }
1458
Jason M. Billse94f5e12019-09-13 11:11:34 -07001459 // Handle CPU1_VRHOT if it's asserted now
1460 if (cpu1VRHotLine.get_value() == 0)
1461 {
1462 cpu1VRHotAssertHandler();
1463 }
1464
1465 // Handle CPU1_MEM_ABCD_VRHOT if it's asserted now
1466 if (cpu1MemABCDVRHotLine.get_value() == 0)
1467 {
1468 cpu1MemABCDVRHotAssertHandler();
1469 }
1470
1471 // Handle CPU1_MEM_EFGH_VRHOT if it's asserted now
1472 if (cpu1MemEFGHVRHotLine.get_value() == 0)
1473 {
1474 cpu1MemEFGHVRHotAssertHandler();
1475 }
1476
1477 // Handle CPU2_VRHOT if it's asserted now
1478 if (cpu2VRHotLine.get_value() == 0)
1479 {
1480 cpu2VRHotAssertHandler();
1481 }
1482
1483 // Handle CPU2_MEM_ABCD_VRHOT if it's asserted now
1484 if (cpu2MemABCDVRHotLine.get_value() == 0)
1485 {
1486 cpu2MemABCDVRHotAssertHandler();
1487 }
1488
1489 // Handle CPU2_MEM_EFGH_VRHOT if it's asserted now
1490 if (cpu2MemEFGHVRHotLine.get_value() == 0)
1491 {
1492 cpu2MemEFGHVRHotAssertHandler();
1493 }
1494
Jason M. Bills08866542019-08-16 12:04:19 -07001495 // Handle PCH_BMC_THERMTRIP if it's asserted now
1496 if (pchThermtripLine.get_value() == 0)
1497 {
1498 ssbThermTripLog();
1499 }
Jason M. Billsa15c2522019-08-16 10:01:44 -07001500}
Jason M. Bills1490b142019-07-01 15:48:43 -07001501} // namespace host_error_monitor
1502
1503int main(int argc, char* argv[])
1504{
1505 // setup connection to dbus
1506 host_error_monitor::conn =
1507 std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
1508
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001509 // Host Error Monitor Service
Jason M. Bills1490b142019-07-01 15:48:43 -07001510 host_error_monitor::conn->request_name(
1511 "xyz.openbmc_project.HostErrorMonitor");
1512 sdbusplus::asio::object_server server =
1513 sdbusplus::asio::object_server(host_error_monitor::conn);
1514
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001515 // Restart Cause Interface
1516 host_error_monitor::hostErrorTimeoutIface =
1517 server.add_interface("/xyz/openbmc_project/host_error_monitor",
1518 "xyz.openbmc_project.HostErrorMonitor.Timeout");
1519
1520 host_error_monitor::hostErrorTimeoutIface->register_property(
1521 "IERRTimeoutMs", host_error_monitor::caterrTimeoutMs,
1522 [](const std::size_t& requested, std::size_t& resp) {
1523 if (requested > host_error_monitor::caterrTimeoutMsMax)
1524 {
1525 std::cerr << "IERRTimeoutMs update to " << requested
1526 << "ms rejected. Cannot be greater than "
1527 << host_error_monitor::caterrTimeoutMsMax << "ms.\n";
1528 return 0;
1529 }
1530 std::cerr << "IERRTimeoutMs updated to " << requested << "ms\n";
1531 host_error_monitor::caterrTimeoutMs = requested;
1532 resp = requested;
1533 return 1;
1534 },
1535 [](std::size_t& resp) { return host_error_monitor::caterrTimeoutMs; });
1536 host_error_monitor::hostErrorTimeoutIface->initialize();
1537
Jason M. Bills1490b142019-07-01 15:48:43 -07001538 // Start tracking host state
1539 std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
1540 host_error_monitor::startHostStateMonitor();
1541
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001542 // Request CPU1_MISMATCH GPIO events
1543 if (!host_error_monitor::requestGPIOInput(
1544 "CPU1_MISMATCH", host_error_monitor::cpu1MismatchLine))
1545 {
1546 return -1;
1547 }
1548
1549 // Request CPU2_MISMATCH GPIO events
1550 if (!host_error_monitor::requestGPIOInput(
1551 "CPU2_MISMATCH", host_error_monitor::cpu2MismatchLine))
1552 {
1553 return -1;
1554 }
1555
Jason M. Bills1490b142019-07-01 15:48:43 -07001556 // Initialize the host state
1557 host_error_monitor::initializeHostState();
1558
1559 // Request CPU_CATERR GPIO events
1560 if (!host_error_monitor::requestGPIOEvents(
1561 "CPU_CATERR", host_error_monitor::caterrHandler,
1562 host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
1563 {
1564 return -1;
1565 }
1566
Jason M. Bills8c584392019-08-19 11:05:51 -07001567 // Request CPU_ERR0 GPIO events
1568 if (!host_error_monitor::requestGPIOEvents(
1569 "CPU_ERR0", host_error_monitor::err0Handler,
1570 host_error_monitor::err0Line, host_error_monitor::err0Event))
1571 {
1572 return -1;
1573 }
1574
Jason M. Bills75af3962019-08-19 11:07:17 -07001575 // Request CPU_ERR1 GPIO events
1576 if (!host_error_monitor::requestGPIOEvents(
1577 "CPU_ERR1", host_error_monitor::err1Handler,
1578 host_error_monitor::err1Line, host_error_monitor::err1Event))
1579 {
1580 return -1;
1581 }
1582
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001583 // Request CPU_ERR2 GPIO events
1584 if (!host_error_monitor::requestGPIOEvents(
1585 "CPU_ERR2", host_error_monitor::err2Handler,
1586 host_error_monitor::err2Line, host_error_monitor::err2Event))
1587 {
1588 return -1;
1589 }
1590
Jason M. Bills89922f82019-08-06 11:10:02 -07001591 // Request SMI GPIO events
1592 if (!host_error_monitor::requestGPIOEvents(
1593 "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
1594 host_error_monitor::smiEvent))
1595 {
1596 return -1;
1597 }
1598
Jason M. Bills45e87e02019-09-09 14:45:38 -07001599 // Request CPU1_FIVR_FAULT GPIO input
1600 if (!host_error_monitor::requestGPIOInput(
1601 "CPU1_FIVR_FAULT", host_error_monitor::cpu1FIVRFaultLine))
1602 {
1603 return -1;
1604 }
1605
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001606 // Request CPU1_THERMTRIP GPIO events
1607 if (!host_error_monitor::requestGPIOEvents(
1608 "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler,
1609 host_error_monitor::cpu1ThermtripLine,
1610 host_error_monitor::cpu1ThermtripEvent))
1611 {
1612 return -1;
1613 }
1614
Jason M. Bills45e87e02019-09-09 14:45:38 -07001615 // Request CPU2_FIVR_FAULT GPIO input
1616 if (!host_error_monitor::requestGPIOInput(
1617 "CPU2_FIVR_FAULT", host_error_monitor::cpu2FIVRFaultLine))
1618 {
1619 return -1;
1620 }
1621
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001622 // Request CPU2_THERMTRIP GPIO events
1623 if (!host_error_monitor::requestGPIOEvents(
1624 "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler,
1625 host_error_monitor::cpu2ThermtripLine,
1626 host_error_monitor::cpu2ThermtripEvent))
1627 {
1628 return -1;
1629 }
1630
Jason M. Bills250fa632019-08-28 15:58:25 -07001631 // Request CPU1_VRHOT GPIO events
1632 if (!host_error_monitor::requestGPIOEvents(
1633 "CPU1_VRHOT", host_error_monitor::cpu1VRHotHandler,
1634 host_error_monitor::cpu1VRHotLine,
1635 host_error_monitor::cpu1VRHotEvent))
1636 {
1637 return -1;
1638 }
1639
Jason M. Bills9647ba72019-08-29 14:19:19 -07001640 // Request CPU1_MEM_ABCD_VRHOT GPIO events
1641 if (!host_error_monitor::requestGPIOEvents(
1642 "CPU1_MEM_ABCD_VRHOT", host_error_monitor::cpu1MemABCDVRHotHandler,
1643 host_error_monitor::cpu1MemABCDVRHotLine,
1644 host_error_monitor::cpu1MemABCDVRHotEvent))
1645 {
1646 return -1;
1647 }
1648
1649 // Request CPU1_MEM_EFGH_VRHOT GPIO events
1650 if (!host_error_monitor::requestGPIOEvents(
1651 "CPU1_MEM_EFGH_VRHOT", host_error_monitor::cpu1MemEFGHVRHotHandler,
1652 host_error_monitor::cpu1MemEFGHVRHotLine,
1653 host_error_monitor::cpu1MemEFGHVRHotEvent))
1654 {
1655 return -1;
1656 }
1657
Jason M. Bills250fa632019-08-28 15:58:25 -07001658 // Request CPU2_VRHOT GPIO events
1659 if (!host_error_monitor::requestGPIOEvents(
1660 "CPU2_VRHOT", host_error_monitor::cpu2VRHotHandler,
1661 host_error_monitor::cpu2VRHotLine,
1662 host_error_monitor::cpu2VRHotEvent))
1663 {
1664 return -1;
1665 }
1666
Jason M. Bills9647ba72019-08-29 14:19:19 -07001667 // Request CPU2_MEM_ABCD_VRHOT GPIO events
1668 if (!host_error_monitor::requestGPIOEvents(
1669 "CPU2_MEM_ABCD_VRHOT", host_error_monitor::cpu2MemABCDVRHotHandler,
1670 host_error_monitor::cpu2MemABCDVRHotLine,
1671 host_error_monitor::cpu2MemABCDVRHotEvent))
1672 {
1673 return -1;
1674 }
1675
1676 // Request CPU2_MEM_EFGH_VRHOT GPIO events
1677 if (!host_error_monitor::requestGPIOEvents(
1678 "CPU2_MEM_EFGH_VRHOT", host_error_monitor::cpu2MemEFGHVRHotHandler,
1679 host_error_monitor::cpu2MemEFGHVRHotLine,
1680 host_error_monitor::cpu2MemEFGHVRHotEvent))
1681 {
1682 return -1;
1683 }
1684
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001685 // Request PCH_BMC_THERMTRIP GPIO events
1686 if (!host_error_monitor::requestGPIOEvents(
1687 "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,
1688 host_error_monitor::pchThermtripLine,
1689 host_error_monitor::pchThermtripEvent))
1690 {
1691 return -1;
1692 }
1693
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001694 // Request CPU1_MEM_THERM_EVENT GPIO events
1695 if (!host_error_monitor::requestGPIOEvents(
1696 "CPU1_MEM_THERM_EVENT", host_error_monitor::cpu1MemtripHandler,
1697 host_error_monitor::cpu1MemtripLine,
1698 host_error_monitor::cpu1MemtripEvent))
1699 {
1700 return -1;
1701 }
1702
1703 // Request CPU2_MEM_THERM_EVENT GPIO events
1704 if (!host_error_monitor::requestGPIOEvents(
1705 "CPU2_MEM_THERM_EVENT", host_error_monitor::cpu2MemtripHandler,
1706 host_error_monitor::cpu2MemtripLine,
1707 host_error_monitor::cpu2MemtripEvent))
1708 {
1709 return -1;
1710 }
1711
Jason M. Bills1490b142019-07-01 15:48:43 -07001712 host_error_monitor::io.run();
1713
1714 return 0;
1715}