blob: e0cb2e464c1687be1a5aa402c6102535fa6202b3 [file] [log] [blame]
Jason M. Bills1490b142019-07-01 15:48:43 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
Jason M. Bills6a2cb692019-08-06 11:03:49 -070016#include <peci.h>
Chen Yugange6c0f1c2019-08-02 20:36:42 +080017#include <systemd/sd-journal.h>
18
Jason M. Bills6a2cb692019-08-06 11:03:49 -070019#include <bitset>
Jason M. Bills1490b142019-07-01 15:48:43 -070020#include <boost/asio/posix/stream_descriptor.hpp>
21#include <gpiod.hpp>
22#include <iostream>
23#include <sdbusplus/asio/object_server.hpp>
Jason M. Billsd1a19f62019-08-06 11:52:58 -070024#include <variant>
Jason M. Bills1490b142019-07-01 15:48:43 -070025
26namespace host_error_monitor
27{
28static boost::asio::io_service io;
29static std::shared_ptr<sdbusplus::asio::connection> conn;
30
31static bool hostOff = true;
32
33const static constexpr size_t caterrTimeoutMs = 2000;
Jason M. Billscbf78532019-08-16 15:32:11 -070034const static constexpr size_t errTimeoutMs = 90000;
Jason M. Bills89922f82019-08-06 11:10:02 -070035const static constexpr size_t smiTimeoutMs = 90000;
Jason M. Bills1490b142019-07-01 15:48:43 -070036const static constexpr size_t crashdumpTimeoutS = 300;
37
38// Timers
39// Timer for CATERR asserted
40static boost::asio::steady_timer caterrAssertTimer(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070041// Timer for ERR0 asserted
42static boost::asio::steady_timer err0AssertTimer(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070043// Timer for ERR1 asserted
44static boost::asio::steady_timer err1AssertTimer(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070045// Timer for ERR2 asserted
46static boost::asio::steady_timer err2AssertTimer(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070047// Timer for SMI asserted
48static boost::asio::steady_timer smiAssertTimer(io);
Jason M. Bills1490b142019-07-01 15:48:43 -070049
50// GPIO Lines and Event Descriptors
51static gpiod::line caterrLine;
52static boost::asio::posix::stream_descriptor caterrEvent(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070053static gpiod::line err0Line;
54static boost::asio::posix::stream_descriptor err0Event(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070055static gpiod::line err1Line;
56static boost::asio::posix::stream_descriptor err1Event(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070057static gpiod::line err2Line;
58static boost::asio::posix::stream_descriptor err2Event(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070059static gpiod::line smiLine;
60static boost::asio::posix::stream_descriptor smiEvent(io);
Jason M. Bills78c5eed2019-08-28 14:00:40 -070061static gpiod::line cpu1ThermtripLine;
62static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io);
63static gpiod::line cpu2ThermtripLine;
64static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io);
Jason M. Bills250fa632019-08-28 15:58:25 -070065static gpiod::line cpu1VRHotLine;
66static boost::asio::posix::stream_descriptor cpu1VRHotEvent(io);
67static gpiod::line cpu2VRHotLine;
68static boost::asio::posix::stream_descriptor cpu2VRHotEvent(io);
Chen Yugange6c0f1c2019-08-02 20:36:42 +080069//----------------------------------
70// PCH_BMC_THERMTRIP function related definition
71//----------------------------------
Chen Yugange6c0f1c2019-08-02 20:36:42 +080072static gpiod::line pchThermtripLine;
73static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
Jason M. Bills1490b142019-07-01 15:48:43 -070074
Jason M. Billsa3397932019-08-06 11:07:21 -070075static void cpuIERRLog()
76{
77 sd_journal_send("MESSAGE=HostError: IERR", "PRIORITY=%i", LOG_INFO,
78 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
79 "REDFISH_MESSAGE_ARGS=%s", "IERR", NULL);
80}
81
82static void cpuIERRLog(const int cpuNum)
83{
84 std::string msg = "IERR on CPU " + std::to_string(cpuNum + 1);
85
86 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
87 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
88 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
89}
90
91static void cpuIERRLog(const int cpuNum, const std::string& type)
92{
93 std::string msg = type + " IERR on CPU " + std::to_string(cpuNum + 1);
94
95 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
96 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
97 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
98}
99
Jason M. Billscbf78532019-08-16 15:32:11 -0700100static void cpuERRXLog(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700101{
Jason M. Billscbf78532019-08-16 15:32:11 -0700102 std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
103
104 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
105 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
106 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700107}
108
Jason M. Billscbf78532019-08-16 15:32:11 -0700109static void cpuERRXLog(const int errPin, const int cpuNum)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700110{
Jason M. Billscbf78532019-08-16 15:32:11 -0700111 std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
112 std::to_string(cpuNum + 1);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700113
114 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
115 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
116 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
117}
118
Jason M. Bills89922f82019-08-06 11:10:02 -0700119static void smiTimeoutLog()
120{
121 sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
122 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
123 "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
124}
125
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700126static void cpuThermTripLog(const int cpuNum)
127{
128 std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip";
129
130 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
131 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
132 "OpenBMC.0.1.CPUThermalTrip", "REDFISH_MESSAGE_ARGS=%d",
133 cpuNum, NULL);
134}
135
Jason M. Bills250fa632019-08-28 15:58:25 -0700136static void cpuVRHotLog(const std::string& vr)
137{
138 std::string msg = vr + " Voltage Regulator Overheated.";
139
140 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
141 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
142 "OpenBMC.0.1.VoltageRegulatorOverheated",
143 "REDFISH_MESSAGE_ARGS=%s", vr.c_str(), NULL);
144}
145
Jason M. Bills08866542019-08-16 12:04:19 -0700146static void ssbThermTripLog()
147{
148 sd_journal_send("MESSAGE=HostError: SSB thermal trip", "PRIORITY=%i",
149 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
150 "OpenBMC.0.1.SsbThermalTrip", NULL);
151}
152
Jason M. Billsa15c2522019-08-16 10:01:44 -0700153static void initializeErrorState();
Jason M. Bills1490b142019-07-01 15:48:43 -0700154static void initializeHostState()
155{
156 conn->async_method_call(
157 [](boost::system::error_code ec,
158 const std::variant<std::string>& property) {
159 if (ec)
160 {
161 return;
162 }
163 const std::string* state = std::get_if<std::string>(&property);
164 if (state == nullptr)
165 {
166 std::cerr << "Unable to read host state value\n";
167 return;
168 }
169 hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
Jason M. Billsa15c2522019-08-16 10:01:44 -0700170 // If the system is on, initialize the error state
171 if (!hostOff)
172 {
173 initializeErrorState();
174 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700175 },
176 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
177 "org.freedesktop.DBus.Properties", "Get",
178 "xyz.openbmc_project.State.Host", "CurrentHostState");
179}
180
181static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
182{
183 return std::make_shared<sdbusplus::bus::match::match>(
184 *conn,
185 "type='signal',interface='org.freedesktop.DBus.Properties',"
186 "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
187 "Host'",
188 [](sdbusplus::message::message& msg) {
189 std::string interfaceName;
190 boost::container::flat_map<std::string, std::variant<std::string>>
191 propertiesChanged;
192 std::string state;
193 try
194 {
195 msg.read(interfaceName, propertiesChanged);
196 state =
197 std::get<std::string>(propertiesChanged.begin()->second);
198 }
199 catch (std::exception& e)
200 {
201 std::cerr << "Unable to read host state\n";
202 return;
203 }
204 hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
205
206 // No host events should fire while off, so cancel any pending
207 // timers
208 if (hostOff)
209 {
210 caterrAssertTimer.cancel();
Jason M. Bills8c584392019-08-19 11:05:51 -0700211 err0AssertTimer.cancel();
Jason M. Bills75af3962019-08-19 11:07:17 -0700212 err1AssertTimer.cancel();
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700213 err2AssertTimer.cancel();
Jason M. Bills89922f82019-08-06 11:10:02 -0700214 smiAssertTimer.cancel();
Jason M. Bills1490b142019-07-01 15:48:43 -0700215 }
216 });
217}
218
219static bool requestGPIOEvents(
220 const std::string& name, const std::function<void()>& handler,
221 gpiod::line& gpioLine,
222 boost::asio::posix::stream_descriptor& gpioEventDescriptor)
223{
224 // Find the GPIO line
225 gpioLine = gpiod::find_line(name);
226 if (!gpioLine)
227 {
228 std::cerr << "Failed to find the " << name << " line\n";
229 return false;
230 }
231
232 try
233 {
234 gpioLine.request(
235 {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
236 }
237 catch (std::exception&)
238 {
239 std::cerr << "Failed to request events for " << name << "\n";
240 return false;
241 }
242
243 int gpioLineFd = gpioLine.event_get_fd();
244 if (gpioLineFd < 0)
245 {
246 std::cerr << "Failed to get " << name << " fd\n";
247 return false;
248 }
249
250 gpioEventDescriptor.assign(gpioLineFd);
251
252 gpioEventDescriptor.async_wait(
253 boost::asio::posix::stream_descriptor::wait_read,
254 [&name, handler](const boost::system::error_code ec) {
255 if (ec)
256 {
257 std::cerr << name << " fd handler error: " << ec.message()
258 << "\n";
259 return;
260 }
261 handler();
262 });
263 return true;
264}
265
266static void startPowerCycle()
267{
268 conn->async_method_call(
269 [](boost::system::error_code ec) {
270 if (ec)
271 {
272 std::cerr << "failed to set Chassis State\n";
273 }
274 },
275 "xyz.openbmc_project.State.Chassis",
276 "/xyz/openbmc_project/state/chassis0",
277 "org.freedesktop.DBus.Properties", "Set",
278 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
279 std::variant<std::string>{
280 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
281}
282
283static void startCrashdumpAndRecovery(bool recoverSystem)
284{
285 std::cout << "Starting crashdump\n";
286 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
287 static boost::asio::steady_timer crashdumpTimer(io);
288
289 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
290 *conn,
291 "type='signal',interface='org.freedesktop.DBus.Properties',"
292 "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
293 [recoverSystem](sdbusplus::message::message& msg) {
294 crashdumpTimer.cancel();
295 std::cout << "Crashdump completed\n";
296 if (recoverSystem)
297 {
298 std::cout << "Recovering the system\n";
299 startPowerCycle();
300 }
301 crashdumpCompleteMatch.reset();
302 });
303
304 crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
305 crashdumpTimer.async_wait([](const boost::system::error_code ec) {
306 if (ec)
307 {
308 // operation_aborted is expected if timer is canceled
309 if (ec != boost::asio::error::operation_aborted)
310 {
311 std::cerr << "Crashdump async_wait failed: " << ec.message()
312 << "\n";
313 }
314 std::cout << "Crashdump timer canceled\n";
315 return;
316 }
317 std::cerr << "Crashdump failed to complete before timeout\n";
318 crashdumpCompleteMatch.reset();
319 });
320
321 conn->async_method_call(
322 [](boost::system::error_code ec) {
323 if (ec)
324 {
325 std::cerr << "failed to start Crashdump\n";
326 crashdumpTimer.cancel();
327 crashdumpCompleteMatch.reset();
328 }
329 },
330 "com.intel.crashdump", "/com/intel/crashdump",
331 "com.intel.crashdump.Stored", "GenerateStoredLog");
332}
333
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700334static void incrementCPUErrorCount(int cpuNum)
335{
336 std::string propertyName = "ErrorCountCPU" + std::to_string(cpuNum + 1);
337
338 // Get the current count
339 conn->async_method_call(
340 [propertyName](boost::system::error_code ec,
341 const std::variant<uint8_t>& property) {
342 if (ec)
343 {
344 std::cerr << "Failed to read " << propertyName << ": "
345 << ec.message() << "\n";
346 return;
347 }
348 const uint8_t* errorCountVariant = std::get_if<uint8_t>(&property);
349 if (errorCountVariant == nullptr)
350 {
351 std::cerr << propertyName << " invalid\n";
352 return;
353 }
354 uint8_t errorCount = *errorCountVariant;
355 if (errorCount == std::numeric_limits<uint8_t>::max())
356 {
357 std::cerr << "Maximum error count reached\n";
358 return;
359 }
360 // Increment the count
361 errorCount++;
362 conn->async_method_call(
363 [propertyName](boost::system::error_code ec) {
364 if (ec)
365 {
366 std::cerr << "Failed to set " << propertyName << ": "
367 << ec.message() << "\n";
368 }
369 },
370 "xyz.openbmc_project.Settings",
371 "/xyz/openbmc_project/control/processor_error_config",
372 "org.freedesktop.DBus.Properties", "Set",
373 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName,
374 std::variant<uint8_t>{errorCount});
375 },
376 "xyz.openbmc_project.Settings",
377 "/xyz/openbmc_project/control/processor_error_config",
378 "org.freedesktop.DBus.Properties", "Get",
379 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName);
380}
381
Jason M. Billsa3397932019-08-06 11:07:21 -0700382static bool checkIERRCPUs()
383{
384 bool cpuIERRFound = false;
385 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
386 cpu++, addr++)
387 {
388 uint8_t cc = 0;
389 CPUModel model{};
390 uint8_t stepping = 0;
391 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
392 {
393 std::cerr << "Cannot get CPUID!\n";
394 continue;
395 }
396
397 switch (model)
398 {
399 case skx:
400 {
401 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
402 // that caused the IERR
403 uint32_t mcaErrSrcLog = 0;
404 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
405 &cc) != PECI_CC_SUCCESS)
406 {
407 continue;
408 }
409 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
410 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
411 {
412 // TODO: Light the CPU fault LED?
413 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700414 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700415 // Next check if it's a CPU/VR mismatch by reading the
416 // IA32_MC4_STATUS MSR (0x411)
417 uint64_t mc4Status = 0;
418 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
419 PECI_CC_SUCCESS)
420 {
421 continue;
422 }
423 // Check MSEC bits 31:24 for
424 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
425 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
426 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
427 if ((mc4Status & (0x40 << 24)) ||
428 (mc4Status & (0x42 << 24)) ||
429 (mc4Status & (0x43 << 24)))
430 {
431 cpuIERRLog(cpu, "CPU/VR Mismatch");
432 continue;
433 }
434
435 // Next check if it's a Core FIVR fault by looking for a
436 // non-zero value of CORE_FIVR_ERR_LOG (B(1) D30 F2 offset
437 // 80h)
438 uint32_t coreFIVRErrLog = 0;
439 if (peci_RdPCIConfigLocal(
440 addr, 1, 30, 2, 0x80, sizeof(uint32_t),
441 (uint8_t*)&coreFIVRErrLog, &cc) != PECI_CC_SUCCESS)
442 {
443 continue;
444 }
445 if (coreFIVRErrLog)
446 {
447 cpuIERRLog(cpu, "Core FIVR Fault");
448 continue;
449 }
450
451 // Next check if it's an Uncore FIVR fault by looking for a
452 // non-zero value of UNCORE_FIVR_ERR_LOG (B(1) D30 F2 offset
453 // 84h)
454 uint32_t uncoreFIVRErrLog = 0;
455 if (peci_RdPCIConfigLocal(addr, 1, 30, 2, 0x84,
456 sizeof(uint32_t),
457 (uint8_t*)&uncoreFIVRErrLog,
458 &cc) != PECI_CC_SUCCESS)
459 {
460 continue;
461 }
462 if (uncoreFIVRErrLog)
463 {
464 cpuIERRLog(cpu, "Uncore FIVR Fault");
465 continue;
466 }
467
468 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
469 // both zero, but MSEC bits 31:24 have either
470 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
471 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
472 // uncore FIVR fault
473 if (!coreFIVRErrLog && !uncoreFIVRErrLog &&
474 ((mc4Status & (0x51 << 24)) ||
475 (mc4Status & (0x52 << 24))))
476 {
477 cpuIERRLog(cpu, "Uncore FIVR Fault");
478 continue;
479 }
480 cpuIERRLog(cpu);
481 }
482 break;
483 }
484 case icx:
485 {
486 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
487 // that caused the IERR
488 uint32_t mcaErrSrcLog = 0;
489 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
490 &cc) != PECI_CC_SUCCESS)
491 {
492 continue;
493 }
494 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
495 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
496 {
497 // TODO: Light the CPU fault LED?
498 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700499 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700500 // Next check if it's a CPU/VR mismatch by reading the
501 // IA32_MC4_STATUS MSR (0x411)
502 uint64_t mc4Status = 0;
503 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
504 PECI_CC_SUCCESS)
505 {
506 continue;
507 }
508 // TODO: Update MSEC/MSCOD_31_24 check
509 // Check MSEC bits 31:24 for
510 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
511 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
512 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
513 if ((mc4Status & (0x40 << 24)) ||
514 (mc4Status & (0x42 << 24)) ||
515 (mc4Status & (0x43 << 24)))
516 {
517 cpuIERRLog(cpu, "CPU/VR Mismatch");
518 continue;
519 }
520
521 // Next check if it's a Core FIVR fault by looking for a
522 // non-zero value of CORE_FIVR_ERR_LOG (B(31) D30 F2 offsets
523 // C0h and C4h) (Note: Bus 31 is accessed on PECI as bus 14)
524 uint32_t coreFIVRErrLog0 = 0;
525 uint32_t coreFIVRErrLog1 = 0;
526 if (peci_RdEndPointConfigPciLocal(
527 addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
528 (uint8_t*)&coreFIVRErrLog0, &cc) != PECI_CC_SUCCESS)
529 {
530 continue;
531 }
532 if (peci_RdEndPointConfigPciLocal(
533 addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
534 (uint8_t*)&coreFIVRErrLog1, &cc) != PECI_CC_SUCCESS)
535 {
536 continue;
537 }
538 if (coreFIVRErrLog0 || coreFIVRErrLog1)
539 {
540 cpuIERRLog(cpu, "Core FIVR Fault");
541 continue;
542 }
543
544 // Next check if it's an Uncore FIVR fault by looking for a
545 // non-zero value of UNCORE_FIVR_ERR_LOG (B(31) D30 F2
546 // offset 84h) (Note: Bus 31 is accessed on PECI as bus 14)
547 uint32_t uncoreFIVRErrLog = 0;
548 if (peci_RdEndPointConfigPciLocal(
549 addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
550 (uint8_t*)&uncoreFIVRErrLog,
551 &cc) != PECI_CC_SUCCESS)
552 {
553 continue;
554 }
555 if (uncoreFIVRErrLog)
556 {
557 cpuIERRLog(cpu, "Uncore FIVR Fault");
558 continue;
559 }
560
561 // TODO: Update MSEC/MSCOD_31_24 check
562 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
563 // both zero, but MSEC bits 31:24 have either
564 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
565 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
566 // uncore FIVR fault
567 if (!coreFIVRErrLog0 && !coreFIVRErrLog1 &&
568 !uncoreFIVRErrLog &&
569 ((mc4Status & (0x51 << 24)) ||
570 (mc4Status & (0x52 << 24))))
571 {
572 cpuIERRLog(cpu, "Uncore FIVR Fault");
573 continue;
574 }
575 cpuIERRLog(cpu);
576 }
577 break;
578 }
579 }
580 }
581 return cpuIERRFound;
582}
583
Jason M. Billsa15c2522019-08-16 10:01:44 -0700584static void caterrAssertHandler()
585{
Jason M. Billsa15c2522019-08-16 10:01:44 -0700586 caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs));
587 caterrAssertTimer.async_wait([](const boost::system::error_code ec) {
588 if (ec)
589 {
590 // operation_aborted is expected if timer is canceled
591 // before completion.
592 if (ec != boost::asio::error::operation_aborted)
593 {
594 std::cerr << "caterr timeout async_wait failed: "
595 << ec.message() << "\n";
596 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700597 return;
598 }
Jason M. Billsa3397932019-08-06 11:07:21 -0700599 std::cerr << "CATERR asserted for " << std::to_string(caterrTimeoutMs)
600 << " ms\n";
601 if (!checkIERRCPUs())
602 {
603 cpuIERRLog();
604 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700605 conn->async_method_call(
606 [](boost::system::error_code ec,
607 const std::variant<bool>& property) {
608 if (ec)
609 {
610 return;
611 }
612 const bool* reset = std::get_if<bool>(&property);
613 if (reset == nullptr)
614 {
615 std::cerr << "Unable to read reset on CATERR value\n";
616 return;
617 }
618 startCrashdumpAndRecovery(*reset);
619 },
620 "xyz.openbmc_project.Settings",
621 "/xyz/openbmc_project/control/processor_error_config",
622 "org.freedesktop.DBus.Properties", "Get",
623 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR");
624 });
625}
626
Jason M. Bills1490b142019-07-01 15:48:43 -0700627static void caterrHandler()
628{
629 if (!hostOff)
630 {
631 gpiod::line_event gpioLineEvent = caterrLine.event_read();
632
633 bool caterr =
634 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
635 if (caterr)
636 {
Jason M. Billsa15c2522019-08-16 10:01:44 -0700637 caterrAssertHandler();
Jason M. Bills1490b142019-07-01 15:48:43 -0700638 }
639 else
640 {
641 caterrAssertTimer.cancel();
642 }
643 }
644 caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
645 [](const boost::system::error_code ec) {
646 if (ec)
647 {
648 std::cerr << "caterr handler error: "
649 << ec.message() << "\n";
650 return;
651 }
652 caterrHandler();
653 });
654}
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700655
656static void cpu1ThermtripHandler()
657{
658 if (!hostOff)
659 {
660 gpiod::line_event gpioLineEvent = cpu1ThermtripLine.event_read();
661
662 bool cpu1Thermtrip =
663 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
664 if (cpu1Thermtrip)
665 {
666 cpuThermTripLog(1);
667 }
668 }
669 cpu1ThermtripEvent.async_wait(
670 boost::asio::posix::stream_descriptor::wait_read,
671 [](const boost::system::error_code ec) {
672 if (ec)
673 {
674 std::cerr << "CPU 1 Thermtrip handler error: " << ec.message()
675 << "\n";
676 return;
677 }
678 cpu1ThermtripHandler();
679 });
680}
681
682static void cpu2ThermtripHandler()
683{
684 if (!hostOff)
685 {
686 gpiod::line_event gpioLineEvent = cpu2ThermtripLine.event_read();
687
688 bool cpu2Thermtrip =
689 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
690 if (cpu2Thermtrip)
691 {
692 cpuThermTripLog(2);
693 }
694 }
695 cpu2ThermtripEvent.async_wait(
696 boost::asio::posix::stream_descriptor::wait_read,
697 [](const boost::system::error_code ec) {
698 if (ec)
699 {
700 std::cerr << "CPU 2 Thermtrip handler error: " << ec.message()
701 << "\n";
702 return;
703 }
704 cpu2ThermtripHandler();
705 });
706}
707
Jason M. Bills250fa632019-08-28 15:58:25 -0700708static void cpu1VRHotHandler()
709{
710 if (!hostOff)
711 {
712 gpiod::line_event gpioLineEvent = cpu1VRHotLine.event_read();
713
714 bool cpu1VRHot =
715 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
716 if (cpu1VRHot)
717 {
718 cpuVRHotLog("CPU 1");
719 }
720 }
721 cpu1VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
722 [](const boost::system::error_code ec) {
723 if (ec)
724 {
725 std::cerr << "CPU 1 VRHot handler error: "
726 << ec.message() << "\n";
727 return;
728 }
729 cpu1VRHotHandler();
730 });
731}
732
733static void cpu2VRHotHandler()
734{
735 if (!hostOff)
736 {
737 gpiod::line_event gpioLineEvent = cpu2VRHotLine.event_read();
738
739 bool cpu2VRHot =
740 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
741 if (cpu2VRHot)
742 {
743 cpuVRHotLog("CPU 2");
744 }
745 }
746 cpu2VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
747 [](const boost::system::error_code ec) {
748 if (ec)
749 {
750 std::cerr << "CPU 2 VRHot handler error: "
751 << ec.message() << "\n";
752 return;
753 }
754 cpu2VRHotHandler();
755 });
756}
757
Chen Yugange6c0f1c2019-08-02 20:36:42 +0800758static void pchThermtripHandler()
759{
760 if (!hostOff)
761 {
762 gpiod::line_event gpioLineEvent = pchThermtripLine.event_read();
763
764 bool pchThermtrip =
765 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
766 if (pchThermtrip)
767 {
Jason M. Bills08866542019-08-16 12:04:19 -0700768 ssbThermTripLog();
Chen Yugange6c0f1c2019-08-02 20:36:42 +0800769 }
770 }
771 pchThermtripEvent.async_wait(
772 boost::asio::posix::stream_descriptor::wait_read,
773 [](const boost::system::error_code ec) {
774 if (ec)
775 {
776 std::cerr << "PCH Thermal trip handler error: " << ec.message()
777 << "\n";
778 return;
779 }
780 pchThermtripHandler();
781 });
782}
783
Jason M. Billscbf78532019-08-16 15:32:11 -0700784static std::bitset<MAX_CPUS> checkERRPinCPUs(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700785{
Jason M. Billscbf78532019-08-16 15:32:11 -0700786 int errPinSts = (1 << errPin);
787 std::bitset<MAX_CPUS> errPinCPUs = 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700788 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
789 cpu++, addr++)
790 {
791 if (peci_Ping(addr) == PECI_CC_SUCCESS)
792 {
793 uint8_t cc = 0;
794 CPUModel model{};
795 uint8_t stepping = 0;
796 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
797 {
798 std::cerr << "Cannot get CPUID!\n";
799 continue;
800 }
801
802 switch (model)
803 {
804 case skx:
805 {
806 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -0700807 // the ERRx (B(0) D8 F0 offset 210h)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700808 uint32_t errpinsts = 0;
809 if (peci_RdPCIConfigLocal(
810 addr, 0, 8, 0, 0x210, sizeof(uint32_t),
811 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
812 {
Jason M. Billscbf78532019-08-16 15:32:11 -0700813 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700814 }
815 break;
816 }
817 case icx:
818 {
819 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -0700820 // the ERRx (B(30) D0 F3 offset 274h) (Note: Bus 30 is
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700821 // accessed on PECI as bus 13)
822 uint32_t errpinsts = 0;
823 if (peci_RdEndPointConfigPciLocal(
824 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
825 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
826 {
Jason M. Billscbf78532019-08-16 15:32:11 -0700827 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700828 }
829 break;
830 }
831 }
832 }
833 }
Jason M. Billscbf78532019-08-16 15:32:11 -0700834 return errPinCPUs;
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700835}
836
Jason M. Billscbf78532019-08-16 15:32:11 -0700837static void errXAssertHandler(const int errPin,
838 boost::asio::steady_timer& errXAssertTimer)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700839{
Jason M. Billscbf78532019-08-16 15:32:11 -0700840 // ERRx status is not guaranteed through the timeout, so save which
841 // CPUs have it asserted
842 std::bitset<MAX_CPUS> errPinCPUs = checkERRPinCPUs(errPin);
843 errXAssertTimer.expires_after(std::chrono::milliseconds(errTimeoutMs));
844 errXAssertTimer.async_wait([errPin, errPinCPUs](
845 const boost::system::error_code ec) {
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700846 if (ec)
847 {
848 // operation_aborted is expected if timer is canceled before
849 // completion.
850 if (ec != boost::asio::error::operation_aborted)
851 {
852 std::cerr << "err2 timeout async_wait failed: " << ec.message()
853 << "\n";
854 }
855 return;
856 }
Jason M. Billscbf78532019-08-16 15:32:11 -0700857 std::cerr << "ERR" << std::to_string(errPin) << " asserted for "
858 << std::to_string(errTimeoutMs) << " ms\n";
859 if (errPinCPUs.count())
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700860 {
Jason M. Billscbf78532019-08-16 15:32:11 -0700861 for (int i = 0; i < errPinCPUs.size(); i++)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700862 {
Jason M. Billscbf78532019-08-16 15:32:11 -0700863 if (errPinCPUs[i])
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700864 {
Jason M. Billscbf78532019-08-16 15:32:11 -0700865 cpuERRXLog(errPin, i);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700866 }
867 }
868 }
869 else
870 {
Jason M. Billscbf78532019-08-16 15:32:11 -0700871 cpuERRXLog(errPin);
872 }
873 });
874}
875
Jason M. Bills8c584392019-08-19 11:05:51 -0700876static void err0AssertHandler()
877{
878 // Handle the standard ERR0 detection and logging
879 const static constexpr int err0 = 0;
880 errXAssertHandler(err0, err0AssertTimer);
881}
882
883static void err0Handler()
884{
885 if (!hostOff)
886 {
887 gpiod::line_event gpioLineEvent = err0Line.event_read();
888
889 bool err0 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
890 if (err0)
891 {
892 err0AssertHandler();
893 }
894 else
895 {
896 err0AssertTimer.cancel();
897 }
898 }
899 err0Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
900 [](const boost::system::error_code ec) {
901 if (ec)
902 {
903 std::cerr
904 << "err0 handler error: " << ec.message()
905 << "\n";
906 return;
907 }
908 err0Handler();
909 });
910}
911
Jason M. Bills75af3962019-08-19 11:07:17 -0700912static void err1AssertHandler()
913{
914 // Handle the standard ERR1 detection and logging
915 const static constexpr int err1 = 1;
916 errXAssertHandler(err1, err1AssertTimer);
917}
918
919static void err1Handler()
920{
921 if (!hostOff)
922 {
923 gpiod::line_event gpioLineEvent = err1Line.event_read();
924
925 bool err1 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
926 if (err1)
927 {
928 err1AssertHandler();
929 }
930 else
931 {
932 err1AssertTimer.cancel();
933 }
934 }
935 err1Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
936 [](const boost::system::error_code ec) {
937 if (ec)
938 {
939 std::cerr
940 << "err1 handler error: " << ec.message()
941 << "\n";
942 return;
943 }
944 err1Handler();
945 });
946}
947
Jason M. Billscbf78532019-08-16 15:32:11 -0700948static void err2AssertHandler()
949{
950 // Handle the standard ERR2 detection and logging
951 const static constexpr int err2 = 2;
952 errXAssertHandler(err2, err2AssertTimer);
953 // Also handle reset for ERR2
954 err2AssertTimer.async_wait([](const boost::system::error_code ec) {
955 if (ec)
956 {
957 // operation_aborted is expected if timer is canceled before
958 // completion.
959 if (ec != boost::asio::error::operation_aborted)
960 {
961 std::cerr << "err2 timeout async_wait failed: " << ec.message()
962 << "\n";
963 }
964 return;
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700965 }
966 conn->async_method_call(
967 [](boost::system::error_code ec,
968 const std::variant<bool>& property) {
969 if (ec)
970 {
971 return;
972 }
973 const bool* reset = std::get_if<bool>(&property);
974 if (reset == nullptr)
975 {
976 std::cerr << "Unable to read reset on ERR2 value\n";
977 return;
978 }
979 startCrashdumpAndRecovery(*reset);
980 },
981 "xyz.openbmc_project.Settings",
982 "/xyz/openbmc_project/control/processor_error_config",
983 "org.freedesktop.DBus.Properties", "Get",
984 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnERR2");
985 });
986}
987
988static void err2Handler()
989{
990 if (!hostOff)
991 {
992 gpiod::line_event gpioLineEvent = err2Line.event_read();
993
994 bool err2 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
995 if (err2)
996 {
997 err2AssertHandler();
998 }
999 else
1000 {
1001 err2AssertTimer.cancel();
1002 }
1003 }
1004 err2Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1005 [](const boost::system::error_code ec) {
1006 if (ec)
1007 {
1008 std::cerr
1009 << "err2 handler error: " << ec.message()
1010 << "\n";
1011 return;
1012 }
1013 err2Handler();
1014 });
1015}
1016
Jason M. Bills89922f82019-08-06 11:10:02 -07001017static void smiAssertHandler()
1018{
1019 smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
1020 smiAssertTimer.async_wait([](const boost::system::error_code ec) {
1021 if (ec)
1022 {
1023 // operation_aborted is expected if timer is canceled before
1024 // completion.
1025 if (ec != boost::asio::error::operation_aborted)
1026 {
1027 std::cerr << "smi timeout async_wait failed: " << ec.message()
1028 << "\n";
1029 }
1030 return;
1031 }
1032 std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
1033 << " ms\n";
1034 smiTimeoutLog();
1035 conn->async_method_call(
1036 [](boost::system::error_code ec,
1037 const std::variant<bool>& property) {
1038 if (ec)
1039 {
1040 return;
1041 }
1042 const bool* reset = std::get_if<bool>(&property);
1043 if (reset == nullptr)
1044 {
1045 std::cerr << "Unable to read reset on SMI value\n";
1046 return;
1047 }
1048 startCrashdumpAndRecovery(*reset);
1049 },
1050 "xyz.openbmc_project.Settings",
1051 "/xyz/openbmc_project/control/bmc_reset_disables",
1052 "org.freedesktop.DBus.Properties", "Get",
1053 "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
1054 });
1055}
1056
1057static void smiHandler()
1058{
1059 if (!hostOff)
1060 {
1061 gpiod::line_event gpioLineEvent = smiLine.event_read();
1062
1063 bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1064 if (smi)
1065 {
1066 smiAssertHandler();
1067 }
1068 else
1069 {
1070 smiAssertTimer.cancel();
1071 }
1072 }
1073 smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1074 [](const boost::system::error_code ec) {
1075 if (ec)
1076 {
1077 std::cerr
1078 << "smi handler error: " << ec.message()
1079 << "\n";
1080 return;
1081 }
1082 smiHandler();
1083 });
1084}
1085
Jason M. Billsa15c2522019-08-16 10:01:44 -07001086static void initializeErrorState()
1087{
1088 // Handle CPU_CATERR if it's asserted now
1089 if (caterrLine.get_value() == 0)
1090 {
1091 caterrAssertHandler();
1092 }
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001093
Jason M. Bills8c584392019-08-19 11:05:51 -07001094 // Handle CPU_ERR0 if it's asserted now
1095 if (err0Line.get_value() == 0)
1096 {
1097 err0AssertHandler();
1098 }
1099
Jason M. Bills75af3962019-08-19 11:07:17 -07001100 // Handle CPU_ERR1 if it's asserted now
1101 if (err1Line.get_value() == 0)
1102 {
1103 err1AssertHandler();
1104 }
1105
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001106 // Handle CPU_ERR2 if it's asserted now
1107 if (err2Line.get_value() == 0)
1108 {
1109 err2AssertHandler();
1110 }
Jason M. Bills89922f82019-08-06 11:10:02 -07001111
1112 // Handle SMI if it's asserted now
1113 if (smiLine.get_value() == 0)
1114 {
1115 smiAssertHandler();
1116 }
Jason M. Bills08866542019-08-16 12:04:19 -07001117
1118 // Handle PCH_BMC_THERMTRIP if it's asserted now
1119 if (pchThermtripLine.get_value() == 0)
1120 {
1121 ssbThermTripLog();
1122 }
Jason M. Billsa15c2522019-08-16 10:01:44 -07001123}
Jason M. Bills1490b142019-07-01 15:48:43 -07001124} // namespace host_error_monitor
1125
1126int main(int argc, char* argv[])
1127{
1128 // setup connection to dbus
1129 host_error_monitor::conn =
1130 std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
1131
1132 // Host Error Monitor Object
1133 host_error_monitor::conn->request_name(
1134 "xyz.openbmc_project.HostErrorMonitor");
1135 sdbusplus::asio::object_server server =
1136 sdbusplus::asio::object_server(host_error_monitor::conn);
1137
1138 // Start tracking host state
1139 std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
1140 host_error_monitor::startHostStateMonitor();
1141
1142 // Initialize the host state
1143 host_error_monitor::initializeHostState();
1144
1145 // Request CPU_CATERR GPIO events
1146 if (!host_error_monitor::requestGPIOEvents(
1147 "CPU_CATERR", host_error_monitor::caterrHandler,
1148 host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
1149 {
1150 return -1;
1151 }
1152
Jason M. Bills8c584392019-08-19 11:05:51 -07001153 // Request CPU_ERR0 GPIO events
1154 if (!host_error_monitor::requestGPIOEvents(
1155 "CPU_ERR0", host_error_monitor::err0Handler,
1156 host_error_monitor::err0Line, host_error_monitor::err0Event))
1157 {
1158 return -1;
1159 }
1160
Jason M. Bills75af3962019-08-19 11:07:17 -07001161 // Request CPU_ERR1 GPIO events
1162 if (!host_error_monitor::requestGPIOEvents(
1163 "CPU_ERR1", host_error_monitor::err1Handler,
1164 host_error_monitor::err1Line, host_error_monitor::err1Event))
1165 {
1166 return -1;
1167 }
1168
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001169 // Request CPU_ERR2 GPIO events
1170 if (!host_error_monitor::requestGPIOEvents(
1171 "CPU_ERR2", host_error_monitor::err2Handler,
1172 host_error_monitor::err2Line, host_error_monitor::err2Event))
1173 {
1174 return -1;
1175 }
1176
Jason M. Bills89922f82019-08-06 11:10:02 -07001177 // Request SMI GPIO events
1178 if (!host_error_monitor::requestGPIOEvents(
1179 "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
1180 host_error_monitor::smiEvent))
1181 {
1182 return -1;
1183 }
1184
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001185 // Request CPU1_THERMTRIP GPIO events
1186 if (!host_error_monitor::requestGPIOEvents(
1187 "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler,
1188 host_error_monitor::cpu1ThermtripLine,
1189 host_error_monitor::cpu1ThermtripEvent))
1190 {
1191 return -1;
1192 }
1193
1194 // Request CPU2_THERMTRIP GPIO events
1195 if (!host_error_monitor::requestGPIOEvents(
1196 "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler,
1197 host_error_monitor::cpu2ThermtripLine,
1198 host_error_monitor::cpu2ThermtripEvent))
1199 {
1200 return -1;
1201 }
1202
Jason M. Bills250fa632019-08-28 15:58:25 -07001203 // Request CPU1_VRHOT GPIO events
1204 if (!host_error_monitor::requestGPIOEvents(
1205 "CPU1_VRHOT", host_error_monitor::cpu1VRHotHandler,
1206 host_error_monitor::cpu1VRHotLine,
1207 host_error_monitor::cpu1VRHotEvent))
1208 {
1209 return -1;
1210 }
1211
1212 // Request CPU2_VRHOT GPIO events
1213 if (!host_error_monitor::requestGPIOEvents(
1214 "CPU2_VRHOT", host_error_monitor::cpu2VRHotHandler,
1215 host_error_monitor::cpu2VRHotLine,
1216 host_error_monitor::cpu2VRHotEvent))
1217 {
1218 return -1;
1219 }
1220
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001221 // Request PCH_BMC_THERMTRIP GPIO events
1222 if (!host_error_monitor::requestGPIOEvents(
1223 "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,
1224 host_error_monitor::pchThermtripLine,
1225 host_error_monitor::pchThermtripEvent))
1226 {
1227 return -1;
1228 }
1229
Jason M. Bills1490b142019-07-01 15:48:43 -07001230 host_error_monitor::io.run();
1231
1232 return 0;
1233}