blob: 806347f06e799f4bbc91eef3b2f8790d0d71238d [file] [log] [blame]
Jason M. Bills1490b142019-07-01 15:48:43 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
Jason M. Bills6a2cb692019-08-06 11:03:49 -070016#include <peci.h>
Chen Yugange6c0f1c2019-08-02 20:36:42 +080017#include <systemd/sd-journal.h>
18
Jason M. Bills6a2cb692019-08-06 11:03:49 -070019#include <bitset>
Jason M. Bills1490b142019-07-01 15:48:43 -070020#include <boost/asio/posix/stream_descriptor.hpp>
21#include <gpiod.hpp>
22#include <iostream>
23#include <sdbusplus/asio/object_server.hpp>
Jason M. Billsd1a19f62019-08-06 11:52:58 -070024#include <variant>
Jason M. Bills1490b142019-07-01 15:48:43 -070025
26namespace host_error_monitor
27{
28static boost::asio::io_service io;
29static std::shared_ptr<sdbusplus::asio::connection> conn;
Jason M. Billsc4b91f22019-11-26 17:04:50 -080030static std::shared_ptr<sdbusplus::asio::dbus_interface> hostErrorTimeoutIface;
Jason M. Bills1490b142019-07-01 15:48:43 -070031
32static bool hostOff = true;
33
Jason M. Billsc4b91f22019-11-26 17:04:50 -080034static size_t caterrTimeoutMs = 2000;
35const static constexpr size_t caterrTimeoutMsMax = 600000; // 10 minutes maximum
Jason M. Billscbf78532019-08-16 15:32:11 -070036const static constexpr size_t errTimeoutMs = 90000;
Jason M. Bills89922f82019-08-06 11:10:02 -070037const static constexpr size_t smiTimeoutMs = 90000;
Jason M. Bills1490b142019-07-01 15:48:43 -070038const static constexpr size_t crashdumpTimeoutS = 300;
39
40// Timers
41// Timer for CATERR asserted
42static boost::asio::steady_timer caterrAssertTimer(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070043// Timer for ERR0 asserted
44static boost::asio::steady_timer err0AssertTimer(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070045// Timer for ERR1 asserted
46static boost::asio::steady_timer err1AssertTimer(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070047// Timer for ERR2 asserted
48static boost::asio::steady_timer err2AssertTimer(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070049// Timer for SMI asserted
50static boost::asio::steady_timer smiAssertTimer(io);
Jason M. Bills1490b142019-07-01 15:48:43 -070051
52// GPIO Lines and Event Descriptors
53static gpiod::line caterrLine;
54static boost::asio::posix::stream_descriptor caterrEvent(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070055static gpiod::line err0Line;
56static boost::asio::posix::stream_descriptor err0Event(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070057static gpiod::line err1Line;
58static boost::asio::posix::stream_descriptor err1Event(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070059static gpiod::line err2Line;
60static boost::asio::posix::stream_descriptor err2Event(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070061static gpiod::line smiLine;
62static boost::asio::posix::stream_descriptor smiEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070063static gpiod::line cpu1FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070064static gpiod::line cpu1ThermtripLine;
65static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070066static gpiod::line cpu2FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070067static gpiod::line cpu2ThermtripLine;
68static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io);
Jason M. Bills250fa632019-08-28 15:58:25 -070069static gpiod::line cpu1VRHotLine;
70static boost::asio::posix::stream_descriptor cpu1VRHotEvent(io);
71static gpiod::line cpu2VRHotLine;
Jason M. Bills9647ba72019-08-29 14:19:19 -070072static boost::asio::posix::stream_descriptor cpu1MemABCDVRHotEvent(io);
73static gpiod::line cpu1MemEFGHVRHotLine;
74static boost::asio::posix::stream_descriptor cpu1MemEFGHVRHotEvent(io);
75static gpiod::line cpu2MemABCDVRHotLine;
Jason M. Bills250fa632019-08-28 15:58:25 -070076static boost::asio::posix::stream_descriptor cpu2VRHotEvent(io);
Jason M. Bills9647ba72019-08-29 14:19:19 -070077static gpiod::line cpu1MemABCDVRHotLine;
78static boost::asio::posix::stream_descriptor cpu2MemABCDVRHotEvent(io);
79static gpiod::line cpu2MemEFGHVRHotLine;
80static boost::asio::posix::stream_descriptor cpu2MemEFGHVRHotEvent(io);
Chen Yugange6c0f1c2019-08-02 20:36:42 +080081//----------------------------------
82// PCH_BMC_THERMTRIP function related definition
83//----------------------------------
Chen Yugange6c0f1c2019-08-02 20:36:42 +080084static gpiod::line pchThermtripLine;
85static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +000086//----------------------------------
87// CPU_MEM_THERM_EVENT function related definition
88//----------------------------------
89static gpiod::line cpu1MemtripLine;
90static boost::asio::posix::stream_descriptor cpu1MemtripEvent(io);
91static gpiod::line cpu2MemtripLine;
92static boost::asio::posix::stream_descriptor cpu2MemtripEvent(io);
jayaprakash Mutyala53099c42020-03-15 00:16:26 +000093//---------------------------------
94// CPU_MISMATCH function related definition
95//---------------------------------
96static gpiod::line cpu1MismatchLine;
97static gpiod::line cpu2MismatchLine;
Jason M. Bills1490b142019-07-01 15:48:43 -070098
Yong Li061eb032020-02-26 15:06:18 +080099// beep function for CPU error
100const static constexpr uint8_t beepCPUErr2 = 5;
101
102static void beep(const uint8_t& beepPriority)
103{
104 conn->async_method_call(
105 [](boost::system::error_code ec) {
106 if (ec)
107 {
108 std::cerr << "beep returned error with "
109 "async_method_call (ec = "
110 << ec << ")\n";
111 return;
112 }
113 },
114 "xyz.openbmc_project.BeepCode", "/xyz/openbmc_project/BeepCode",
115 "xyz.openbmc_project.BeepCode", "Beep", uint8_t(beepPriority));
116}
117
Jason M. Billsa3397932019-08-06 11:07:21 -0700118static void cpuIERRLog()
119{
120 sd_journal_send("MESSAGE=HostError: IERR", "PRIORITY=%i", LOG_INFO,
121 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
122 "REDFISH_MESSAGE_ARGS=%s", "IERR", NULL);
123}
124
125static void cpuIERRLog(const int cpuNum)
126{
127 std::string msg = "IERR on CPU " + std::to_string(cpuNum + 1);
128
129 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
130 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
131 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
132}
133
134static void cpuIERRLog(const int cpuNum, const std::string& type)
135{
136 std::string msg = type + " IERR on CPU " + std::to_string(cpuNum + 1);
137
138 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
139 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
140 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
141}
142
Jason M. Billscbf78532019-08-16 15:32:11 -0700143static void cpuERRXLog(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700144{
Jason M. Billscbf78532019-08-16 15:32:11 -0700145 std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
146
147 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
148 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
149 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700150}
151
Jason M. Billscbf78532019-08-16 15:32:11 -0700152static void cpuERRXLog(const int errPin, const int cpuNum)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700153{
Jason M. Billscbf78532019-08-16 15:32:11 -0700154 std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
155 std::to_string(cpuNum + 1);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700156
157 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
158 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
159 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
160}
161
Jason M. Bills89922f82019-08-06 11:10:02 -0700162static void smiTimeoutLog()
163{
164 sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
165 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
166 "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
167}
168
Jason M. Bills45e87e02019-09-09 14:45:38 -0700169static void cpuBootFIVRFaultLog(const int cpuNum)
170{
171 std::string msg = "Boot FIVR Fault on CPU " + std::to_string(cpuNum);
172
173 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
174 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
175 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
176}
177
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700178static void cpuThermTripLog(const int cpuNum)
179{
180 std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip";
181
182 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
183 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
184 "OpenBMC.0.1.CPUThermalTrip", "REDFISH_MESSAGE_ARGS=%d",
185 cpuNum, NULL);
186}
187
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000188static void memThermTripLog(const int cpuNum)
189{
190 std::string cpuNumber = "CPU " + std::to_string(cpuNum);
191 std::string msg = cpuNumber + " Memory Thermal trip.";
192
193 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
194 LOG_ERR, "REDFISH_MESSAGE_ID=%s",
195 "OpenBMC.0.1.MemoryThermTrip", "REDFISH_MESSAGE_ARGS=%s",
196 cpuNumber.c_str(), NULL);
197}
198
jayaprakash Mutyala53099c42020-03-15 00:16:26 +0000199static void cpuMismatchLog(const int cpuNum)
200{
201 std::string msg = "CPU " + std::to_string(cpuNum) + " mismatch";
202
203 sd_journal_send("MESSAGE= %s", msg.c_str(), "PRIORITY=%i", LOG_ERR,
204 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUMismatch",
205 "REDFISH_MESSAGE_ARGS=%d", cpuNum, NULL);
206}
207
Jason M. Bills250fa632019-08-28 15:58:25 -0700208static void cpuVRHotLog(const std::string& vr)
209{
210 std::string msg = vr + " Voltage Regulator Overheated.";
211
212 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
213 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
214 "OpenBMC.0.1.VoltageRegulatorOverheated",
215 "REDFISH_MESSAGE_ARGS=%s", vr.c_str(), NULL);
216}
217
Jason M. Bills08866542019-08-16 12:04:19 -0700218static void ssbThermTripLog()
219{
220 sd_journal_send("MESSAGE=HostError: SSB thermal trip", "PRIORITY=%i",
221 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
222 "OpenBMC.0.1.SsbThermalTrip", NULL);
223}
224
Jason M. Billsa15c2522019-08-16 10:01:44 -0700225static void initializeErrorState();
Jason M. Bills1490b142019-07-01 15:48:43 -0700226static void initializeHostState()
227{
228 conn->async_method_call(
229 [](boost::system::error_code ec,
230 const std::variant<std::string>& property) {
231 if (ec)
232 {
233 return;
234 }
235 const std::string* state = std::get_if<std::string>(&property);
236 if (state == nullptr)
237 {
238 std::cerr << "Unable to read host state value\n";
239 return;
240 }
241 hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
Jason M. Billsa15c2522019-08-16 10:01:44 -0700242 // If the system is on, initialize the error state
243 if (!hostOff)
244 {
245 initializeErrorState();
246 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700247 },
248 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
249 "org.freedesktop.DBus.Properties", "Get",
250 "xyz.openbmc_project.State.Host", "CurrentHostState");
251}
252
253static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
254{
255 return std::make_shared<sdbusplus::bus::match::match>(
256 *conn,
257 "type='signal',interface='org.freedesktop.DBus.Properties',"
258 "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
259 "Host'",
260 [](sdbusplus::message::message& msg) {
261 std::string interfaceName;
262 boost::container::flat_map<std::string, std::variant<std::string>>
263 propertiesChanged;
264 std::string state;
265 try
266 {
267 msg.read(interfaceName, propertiesChanged);
268 state =
269 std::get<std::string>(propertiesChanged.begin()->second);
270 }
271 catch (std::exception& e)
272 {
273 std::cerr << "Unable to read host state\n";
274 return;
275 }
276 hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
277
Jason M. Bills1490b142019-07-01 15:48:43 -0700278 if (hostOff)
279 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700280 // No host events should fire while off, so cancel any pending
281 // timers
Jason M. Bills1490b142019-07-01 15:48:43 -0700282 caterrAssertTimer.cancel();
Jason M. Bills8c584392019-08-19 11:05:51 -0700283 err0AssertTimer.cancel();
Jason M. Bills75af3962019-08-19 11:07:17 -0700284 err1AssertTimer.cancel();
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700285 err2AssertTimer.cancel();
Jason M. Bills89922f82019-08-06 11:10:02 -0700286 smiAssertTimer.cancel();
Jason M. Bills1490b142019-07-01 15:48:43 -0700287 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700288 else
289 {
290 // Handle any initial errors when the host turns on
291 initializeErrorState();
292 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700293 });
294}
295
296static bool requestGPIOEvents(
297 const std::string& name, const std::function<void()>& handler,
298 gpiod::line& gpioLine,
299 boost::asio::posix::stream_descriptor& gpioEventDescriptor)
300{
301 // Find the GPIO line
302 gpioLine = gpiod::find_line(name);
303 if (!gpioLine)
304 {
305 std::cerr << "Failed to find the " << name << " line\n";
306 return false;
307 }
308
309 try
310 {
311 gpioLine.request(
312 {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
313 }
314 catch (std::exception&)
315 {
316 std::cerr << "Failed to request events for " << name << "\n";
317 return false;
318 }
319
320 int gpioLineFd = gpioLine.event_get_fd();
321 if (gpioLineFd < 0)
322 {
323 std::cerr << "Failed to get " << name << " fd\n";
324 return false;
325 }
326
327 gpioEventDescriptor.assign(gpioLineFd);
328
329 gpioEventDescriptor.async_wait(
330 boost::asio::posix::stream_descriptor::wait_read,
331 [&name, handler](const boost::system::error_code ec) {
332 if (ec)
333 {
334 std::cerr << name << " fd handler error: " << ec.message()
335 << "\n";
336 return;
337 }
338 handler();
339 });
340 return true;
341}
342
Jason M. Bills45e87e02019-09-09 14:45:38 -0700343static bool requestGPIOInput(const std::string& name, gpiod::line& gpioLine)
344{
345 // Find the GPIO line
346 gpioLine = gpiod::find_line(name);
347 if (!gpioLine)
348 {
349 std::cerr << "Failed to find the " << name << " line.\n";
350 return false;
351 }
352
353 // Request GPIO input
354 try
355 {
356 gpioLine.request({__FUNCTION__, gpiod::line_request::DIRECTION_INPUT});
357 }
358 catch (std::exception&)
359 {
360 std::cerr << "Failed to request " << name << " input\n";
361 return false;
362 }
363
364 return true;
365}
366
Jason M. Bills1490b142019-07-01 15:48:43 -0700367static void startPowerCycle()
368{
369 conn->async_method_call(
370 [](boost::system::error_code ec) {
371 if (ec)
372 {
373 std::cerr << "failed to set Chassis State\n";
374 }
375 },
376 "xyz.openbmc_project.State.Chassis",
377 "/xyz/openbmc_project/state/chassis0",
378 "org.freedesktop.DBus.Properties", "Set",
379 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
380 std::variant<std::string>{
381 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
382}
383
Jason M. Billsb61766b2019-11-26 17:02:44 -0800384static void startCrashdumpAndRecovery(bool recoverSystem,
385 const std::string& triggerType)
Jason M. Bills1490b142019-07-01 15:48:43 -0700386{
387 std::cout << "Starting crashdump\n";
388 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
389 static boost::asio::steady_timer crashdumpTimer(io);
390
391 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
392 *conn,
393 "type='signal',interface='org.freedesktop.DBus.Properties',"
394 "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
395 [recoverSystem](sdbusplus::message::message& msg) {
396 crashdumpTimer.cancel();
397 std::cout << "Crashdump completed\n";
398 if (recoverSystem)
399 {
400 std::cout << "Recovering the system\n";
401 startPowerCycle();
402 }
403 crashdumpCompleteMatch.reset();
404 });
405
406 crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
407 crashdumpTimer.async_wait([](const boost::system::error_code ec) {
408 if (ec)
409 {
410 // operation_aborted is expected if timer is canceled
411 if (ec != boost::asio::error::operation_aborted)
412 {
413 std::cerr << "Crashdump async_wait failed: " << ec.message()
414 << "\n";
415 }
416 std::cout << "Crashdump timer canceled\n";
417 return;
418 }
419 std::cerr << "Crashdump failed to complete before timeout\n";
420 crashdumpCompleteMatch.reset();
421 });
422
423 conn->async_method_call(
424 [](boost::system::error_code ec) {
425 if (ec)
426 {
427 std::cerr << "failed to start Crashdump\n";
428 crashdumpTimer.cancel();
429 crashdumpCompleteMatch.reset();
430 }
431 },
432 "com.intel.crashdump", "/com/intel/crashdump",
Jason M. Billsb61766b2019-11-26 17:02:44 -0800433 "com.intel.crashdump.Stored", "GenerateStoredLog", triggerType);
Jason M. Bills1490b142019-07-01 15:48:43 -0700434}
435
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700436static void incrementCPUErrorCount(int cpuNum)
437{
438 std::string propertyName = "ErrorCountCPU" + std::to_string(cpuNum + 1);
439
440 // Get the current count
441 conn->async_method_call(
442 [propertyName](boost::system::error_code ec,
443 const std::variant<uint8_t>& property) {
444 if (ec)
445 {
446 std::cerr << "Failed to read " << propertyName << ": "
447 << ec.message() << "\n";
448 return;
449 }
450 const uint8_t* errorCountVariant = std::get_if<uint8_t>(&property);
451 if (errorCountVariant == nullptr)
452 {
453 std::cerr << propertyName << " invalid\n";
454 return;
455 }
456 uint8_t errorCount = *errorCountVariant;
457 if (errorCount == std::numeric_limits<uint8_t>::max())
458 {
459 std::cerr << "Maximum error count reached\n";
460 return;
461 }
462 // Increment the count
463 errorCount++;
464 conn->async_method_call(
465 [propertyName](boost::system::error_code ec) {
466 if (ec)
467 {
468 std::cerr << "Failed to set " << propertyName << ": "
469 << ec.message() << "\n";
470 }
471 },
472 "xyz.openbmc_project.Settings",
473 "/xyz/openbmc_project/control/processor_error_config",
474 "org.freedesktop.DBus.Properties", "Set",
475 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName,
476 std::variant<uint8_t>{errorCount});
477 },
478 "xyz.openbmc_project.Settings",
479 "/xyz/openbmc_project/control/processor_error_config",
480 "org.freedesktop.DBus.Properties", "Get",
481 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName);
482}
483
Jason M. Billsa3397932019-08-06 11:07:21 -0700484static bool checkIERRCPUs()
485{
486 bool cpuIERRFound = false;
487 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
488 cpu++, addr++)
489 {
490 uint8_t cc = 0;
491 CPUModel model{};
492 uint8_t stepping = 0;
493 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
494 {
495 std::cerr << "Cannot get CPUID!\n";
496 continue;
497 }
498
499 switch (model)
500 {
501 case skx:
502 {
503 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
504 // that caused the IERR
505 uint32_t mcaErrSrcLog = 0;
506 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
507 &cc) != PECI_CC_SUCCESS)
508 {
509 continue;
510 }
511 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
512 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
513 {
514 // TODO: Light the CPU fault LED?
515 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700516 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700517 // Next check if it's a CPU/VR mismatch by reading the
518 // IA32_MC4_STATUS MSR (0x411)
519 uint64_t mc4Status = 0;
520 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
521 PECI_CC_SUCCESS)
522 {
523 continue;
524 }
525 // Check MSEC bits 31:24 for
526 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
527 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
528 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
529 if ((mc4Status & (0x40 << 24)) ||
530 (mc4Status & (0x42 << 24)) ||
531 (mc4Status & (0x43 << 24)))
532 {
533 cpuIERRLog(cpu, "CPU/VR Mismatch");
534 continue;
535 }
536
537 // Next check if it's a Core FIVR fault by looking for a
538 // non-zero value of CORE_FIVR_ERR_LOG (B(1) D30 F2 offset
539 // 80h)
540 uint32_t coreFIVRErrLog = 0;
541 if (peci_RdPCIConfigLocal(
542 addr, 1, 30, 2, 0x80, sizeof(uint32_t),
543 (uint8_t*)&coreFIVRErrLog, &cc) != PECI_CC_SUCCESS)
544 {
545 continue;
546 }
547 if (coreFIVRErrLog)
548 {
549 cpuIERRLog(cpu, "Core FIVR Fault");
550 continue;
551 }
552
553 // Next check if it's an Uncore FIVR fault by looking for a
554 // non-zero value of UNCORE_FIVR_ERR_LOG (B(1) D30 F2 offset
555 // 84h)
556 uint32_t uncoreFIVRErrLog = 0;
557 if (peci_RdPCIConfigLocal(addr, 1, 30, 2, 0x84,
558 sizeof(uint32_t),
559 (uint8_t*)&uncoreFIVRErrLog,
560 &cc) != PECI_CC_SUCCESS)
561 {
562 continue;
563 }
564 if (uncoreFIVRErrLog)
565 {
566 cpuIERRLog(cpu, "Uncore FIVR Fault");
567 continue;
568 }
569
570 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
571 // both zero, but MSEC bits 31:24 have either
572 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
573 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
574 // uncore FIVR fault
575 if (!coreFIVRErrLog && !uncoreFIVRErrLog &&
576 ((mc4Status & (0x51 << 24)) ||
577 (mc4Status & (0x52 << 24))))
578 {
579 cpuIERRLog(cpu, "Uncore FIVR Fault");
580 continue;
581 }
582 cpuIERRLog(cpu);
583 }
584 break;
585 }
586 case icx:
587 {
588 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
589 // that caused the IERR
590 uint32_t mcaErrSrcLog = 0;
591 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
592 &cc) != PECI_CC_SUCCESS)
593 {
594 continue;
595 }
596 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
597 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
598 {
599 // TODO: Light the CPU fault LED?
600 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700601 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700602 // Next check if it's a CPU/VR mismatch by reading the
603 // IA32_MC4_STATUS MSR (0x411)
604 uint64_t mc4Status = 0;
605 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
606 PECI_CC_SUCCESS)
607 {
608 continue;
609 }
610 // TODO: Update MSEC/MSCOD_31_24 check
611 // Check MSEC bits 31:24 for
612 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
613 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
614 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
615 if ((mc4Status & (0x40 << 24)) ||
616 (mc4Status & (0x42 << 24)) ||
617 (mc4Status & (0x43 << 24)))
618 {
619 cpuIERRLog(cpu, "CPU/VR Mismatch");
620 continue;
621 }
622
623 // Next check if it's a Core FIVR fault by looking for a
624 // non-zero value of CORE_FIVR_ERR_LOG (B(31) D30 F2 offsets
625 // C0h and C4h) (Note: Bus 31 is accessed on PECI as bus 14)
626 uint32_t coreFIVRErrLog0 = 0;
627 uint32_t coreFIVRErrLog1 = 0;
628 if (peci_RdEndPointConfigPciLocal(
629 addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
630 (uint8_t*)&coreFIVRErrLog0, &cc) != PECI_CC_SUCCESS)
631 {
632 continue;
633 }
634 if (peci_RdEndPointConfigPciLocal(
635 addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
636 (uint8_t*)&coreFIVRErrLog1, &cc) != PECI_CC_SUCCESS)
637 {
638 continue;
639 }
640 if (coreFIVRErrLog0 || coreFIVRErrLog1)
641 {
642 cpuIERRLog(cpu, "Core FIVR Fault");
643 continue;
644 }
645
646 // Next check if it's an Uncore FIVR fault by looking for a
647 // non-zero value of UNCORE_FIVR_ERR_LOG (B(31) D30 F2
648 // offset 84h) (Note: Bus 31 is accessed on PECI as bus 14)
649 uint32_t uncoreFIVRErrLog = 0;
650 if (peci_RdEndPointConfigPciLocal(
651 addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
652 (uint8_t*)&uncoreFIVRErrLog,
653 &cc) != PECI_CC_SUCCESS)
654 {
655 continue;
656 }
657 if (uncoreFIVRErrLog)
658 {
659 cpuIERRLog(cpu, "Uncore FIVR Fault");
660 continue;
661 }
662
663 // TODO: Update MSEC/MSCOD_31_24 check
664 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
665 // both zero, but MSEC bits 31:24 have either
666 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
667 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
668 // uncore FIVR fault
669 if (!coreFIVRErrLog0 && !coreFIVRErrLog1 &&
670 !uncoreFIVRErrLog &&
671 ((mc4Status & (0x51 << 24)) ||
672 (mc4Status & (0x52 << 24))))
673 {
674 cpuIERRLog(cpu, "Uncore FIVR Fault");
675 continue;
676 }
677 cpuIERRLog(cpu);
678 }
679 break;
680 }
681 }
682 }
683 return cpuIERRFound;
684}
685
Jason M. Billsa15c2522019-08-16 10:01:44 -0700686static void caterrAssertHandler()
687{
Jason M. Billsa15c2522019-08-16 10:01:44 -0700688 caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs));
689 caterrAssertTimer.async_wait([](const boost::system::error_code ec) {
690 if (ec)
691 {
692 // operation_aborted is expected if timer is canceled
693 // before completion.
694 if (ec != boost::asio::error::operation_aborted)
695 {
696 std::cerr << "caterr timeout async_wait failed: "
697 << ec.message() << "\n";
698 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700699 return;
700 }
Jason M. Billsa3397932019-08-06 11:07:21 -0700701 std::cerr << "CATERR asserted for " << std::to_string(caterrTimeoutMs)
702 << " ms\n";
703 if (!checkIERRCPUs())
704 {
705 cpuIERRLog();
706 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700707 conn->async_method_call(
708 [](boost::system::error_code ec,
709 const std::variant<bool>& property) {
710 if (ec)
711 {
712 return;
713 }
714 const bool* reset = std::get_if<bool>(&property);
715 if (reset == nullptr)
716 {
717 std::cerr << "Unable to read reset on CATERR value\n";
718 return;
719 }
Jason M. Billsb61766b2019-11-26 17:02:44 -0800720 startCrashdumpAndRecovery(*reset, "IERR");
Jason M. Billsa15c2522019-08-16 10:01:44 -0700721 },
722 "xyz.openbmc_project.Settings",
723 "/xyz/openbmc_project/control/processor_error_config",
724 "org.freedesktop.DBus.Properties", "Get",
725 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR");
726 });
727}
728
Jason M. Bills1490b142019-07-01 15:48:43 -0700729static void caterrHandler()
730{
731 if (!hostOff)
732 {
733 gpiod::line_event gpioLineEvent = caterrLine.event_read();
734
735 bool caterr =
736 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
737 if (caterr)
738 {
Jason M. Billsa15c2522019-08-16 10:01:44 -0700739 caterrAssertHandler();
Jason M. Bills1490b142019-07-01 15:48:43 -0700740 }
741 else
742 {
743 caterrAssertTimer.cancel();
744 }
745 }
746 caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
747 [](const boost::system::error_code ec) {
748 if (ec)
749 {
750 std::cerr << "caterr handler error: "
751 << ec.message() << "\n";
752 return;
753 }
754 caterrHandler();
755 });
756}
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700757
Jason M. Billse94f5e12019-09-13 11:11:34 -0700758static void cpu1ThermtripAssertHandler()
759{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700760 if (cpu1FIVRFaultLine.get_value() == 0)
761 {
762 cpuBootFIVRFaultLog(1);
763 }
764 else
765 {
766 cpuThermTripLog(1);
767 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700768}
769
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700770static void cpu1ThermtripHandler()
771{
772 if (!hostOff)
773 {
774 gpiod::line_event gpioLineEvent = cpu1ThermtripLine.event_read();
775
776 bool cpu1Thermtrip =
777 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
778 if (cpu1Thermtrip)
779 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700780 cpu1ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700781 }
782 }
783 cpu1ThermtripEvent.async_wait(
784 boost::asio::posix::stream_descriptor::wait_read,
785 [](const boost::system::error_code ec) {
786 if (ec)
787 {
788 std::cerr << "CPU 1 Thermtrip handler error: " << ec.message()
789 << "\n";
790 return;
791 }
792 cpu1ThermtripHandler();
793 });
794}
795
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000796static void cpu1MemtripHandler()
797{
798 if (!hostOff)
799 {
800 gpiod::line_event gpioLineEvent = cpu1MemtripLine.event_read();
801
802 bool cpu1Memtrip =
803 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
804 if (cpu1Memtrip)
805 {
806 memThermTripLog(1);
807 }
808 }
809 cpu1MemtripEvent.async_wait(
810 boost::asio::posix::stream_descriptor::wait_read,
811 [](const boost::system::error_code ec) {
812 if (ec)
813 {
814 std::cerr << "CPU 1 Memory Thermaltrip handler error: "
815 << ec.message() << "\n";
816 return;
817 }
818 cpu1MemtripHandler();
819 });
820}
821
Jason M. Billse94f5e12019-09-13 11:11:34 -0700822static void cpu2ThermtripAssertHandler()
823{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700824 if (cpu2FIVRFaultLine.get_value() == 0)
825 {
826 cpuBootFIVRFaultLog(2);
827 }
828 else
829 {
830 cpuThermTripLog(2);
831 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700832}
833
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700834static void cpu2ThermtripHandler()
835{
836 if (!hostOff)
837 {
838 gpiod::line_event gpioLineEvent = cpu2ThermtripLine.event_read();
839
840 bool cpu2Thermtrip =
841 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
842 if (cpu2Thermtrip)
843 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700844 cpu2ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700845 }
846 }
847 cpu2ThermtripEvent.async_wait(
848 boost::asio::posix::stream_descriptor::wait_read,
849 [](const boost::system::error_code ec) {
850 if (ec)
851 {
852 std::cerr << "CPU 2 Thermtrip handler error: " << ec.message()
853 << "\n";
854 return;
855 }
856 cpu2ThermtripHandler();
857 });
858}
859
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000860static void cpu2MemtripHandler()
861{
862 if (!hostOff)
863 {
864 gpiod::line_event gpioLineEvent = cpu2MemtripLine.event_read();
865
866 bool cpu2Memtrip =
867 gpioLineEvent.event_type == gpiod::line_event::RISING_EDGE;
868 if (cpu2Memtrip)
869 {
870 memThermTripLog(2);
871 }
872 }
873 cpu2MemtripEvent.async_wait(
874 boost::asio::posix::stream_descriptor::wait_read,
875 [](const boost::system::error_code ec) {
876 if (ec)
877 {
878 std::cerr << "CPU 2 Memory Thermaltrip handler error: "
879 << ec.message() << "\n";
880 return;
881 }
882 cpu2MemtripHandler();
883 });
884}
885
Jason M. Billse94f5e12019-09-13 11:11:34 -0700886static void cpu1VRHotAssertHandler()
887{
888 cpuVRHotLog("CPU 1");
889}
890
Jason M. Bills250fa632019-08-28 15:58:25 -0700891static void cpu1VRHotHandler()
892{
893 if (!hostOff)
894 {
895 gpiod::line_event gpioLineEvent = cpu1VRHotLine.event_read();
896
897 bool cpu1VRHot =
898 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
899 if (cpu1VRHot)
900 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700901 cpu1VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700902 }
903 }
904 cpu1VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
905 [](const boost::system::error_code ec) {
906 if (ec)
907 {
908 std::cerr << "CPU 1 VRHot handler error: "
909 << ec.message() << "\n";
910 return;
911 }
912 cpu1VRHotHandler();
913 });
914}
915
Jason M. Billse94f5e12019-09-13 11:11:34 -0700916static void cpu1MemABCDVRHotAssertHandler()
917{
918 cpuVRHotLog("CPU 1 Memory ABCD");
919}
920
Jason M. Bills9647ba72019-08-29 14:19:19 -0700921static void cpu1MemABCDVRHotHandler()
922{
923 if (!hostOff)
924 {
925 gpiod::line_event gpioLineEvent = cpu1MemABCDVRHotLine.event_read();
926
927 bool cpu1MemABCDVRHot =
928 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
929 if (cpu1MemABCDVRHot)
930 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700931 cpu1MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700932 }
933 }
934 cpu1MemABCDVRHotEvent.async_wait(
935 boost::asio::posix::stream_descriptor::wait_read,
936 [](const boost::system::error_code ec) {
937 if (ec)
938 {
939 std::cerr << "CPU 1 Memory ABCD VRHot handler error: "
940 << ec.message() << "\n";
941 return;
942 }
943 cpu1MemABCDVRHotHandler();
944 });
945}
946
Jason M. Billse94f5e12019-09-13 11:11:34 -0700947static void cpu1MemEFGHVRHotAssertHandler()
948{
949 cpuVRHotLog("CPU 1 Memory EFGH");
950}
951
Jason M. Bills9647ba72019-08-29 14:19:19 -0700952static void cpu1MemEFGHVRHotHandler()
953{
954 if (!hostOff)
955 {
956 gpiod::line_event gpioLineEvent = cpu1MemEFGHVRHotLine.event_read();
957
958 bool cpu1MemEFGHVRHot =
959 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
960 if (cpu1MemEFGHVRHot)
961 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700962 cpu1MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700963 }
964 }
965 cpu1MemEFGHVRHotEvent.async_wait(
966 boost::asio::posix::stream_descriptor::wait_read,
967 [](const boost::system::error_code ec) {
968 if (ec)
969 {
970 std::cerr << "CPU 1 Memory EFGH VRHot handler error: "
971 << ec.message() << "\n";
972 return;
973 }
974 cpu1MemEFGHVRHotHandler();
975 });
976}
977
Jason M. Billse94f5e12019-09-13 11:11:34 -0700978static void cpu2VRHotAssertHandler()
979{
980 cpuVRHotLog("CPU 2");
981}
982
Jason M. Bills250fa632019-08-28 15:58:25 -0700983static void cpu2VRHotHandler()
984{
985 if (!hostOff)
986 {
987 gpiod::line_event gpioLineEvent = cpu2VRHotLine.event_read();
988
989 bool cpu2VRHot =
990 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
991 if (cpu2VRHot)
992 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700993 cpu2VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700994 }
995 }
996 cpu2VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
997 [](const boost::system::error_code ec) {
998 if (ec)
999 {
1000 std::cerr << "CPU 2 VRHot handler error: "
1001 << ec.message() << "\n";
1002 return;
1003 }
1004 cpu2VRHotHandler();
1005 });
1006}
1007
Jason M. Billse94f5e12019-09-13 11:11:34 -07001008static void cpu2MemABCDVRHotAssertHandler()
1009{
1010 cpuVRHotLog("CPU 2 Memory ABCD");
1011}
1012
Jason M. Bills9647ba72019-08-29 14:19:19 -07001013static void cpu2MemABCDVRHotHandler()
1014{
1015 if (!hostOff)
1016 {
1017 gpiod::line_event gpioLineEvent = cpu2MemABCDVRHotLine.event_read();
1018
1019 bool cpu2MemABCDVRHot =
1020 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1021 if (cpu2MemABCDVRHot)
1022 {
Jason M. Billse94f5e12019-09-13 11:11:34 -07001023 cpu2MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001024 }
1025 }
1026 cpu2MemABCDVRHotEvent.async_wait(
1027 boost::asio::posix::stream_descriptor::wait_read,
1028 [](const boost::system::error_code ec) {
1029 if (ec)
1030 {
1031 std::cerr << "CPU 2 Memory ABCD VRHot handler error: "
1032 << ec.message() << "\n";
1033 return;
1034 }
1035 cpu2MemABCDVRHotHandler();
1036 });
1037}
1038
Jason M. Billse94f5e12019-09-13 11:11:34 -07001039static void cpu2MemEFGHVRHotAssertHandler()
1040{
1041 cpuVRHotLog("CPU 2 Memory EFGH");
1042}
1043
Jason M. Bills9647ba72019-08-29 14:19:19 -07001044static void cpu2MemEFGHVRHotHandler()
1045{
1046 if (!hostOff)
1047 {
1048 gpiod::line_event gpioLineEvent = cpu2MemEFGHVRHotLine.event_read();
1049
1050 bool cpu2MemEFGHVRHot =
1051 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1052 if (cpu2MemEFGHVRHot)
1053 {
Jason M. Billse94f5e12019-09-13 11:11:34 -07001054 cpu2MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001055 }
1056 }
1057 cpu2MemEFGHVRHotEvent.async_wait(
1058 boost::asio::posix::stream_descriptor::wait_read,
1059 [](const boost::system::error_code ec) {
1060 if (ec)
1061 {
1062 std::cerr << "CPU 2 Memory EFGH VRHot handler error: "
1063 << ec.message() << "\n";
1064 return;
1065 }
1066 cpu2MemEFGHVRHotHandler();
1067 });
1068}
1069
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001070static void pchThermtripHandler()
1071{
1072 if (!hostOff)
1073 {
1074 gpiod::line_event gpioLineEvent = pchThermtripLine.event_read();
1075
1076 bool pchThermtrip =
1077 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1078 if (pchThermtrip)
1079 {
Jason M. Bills08866542019-08-16 12:04:19 -07001080 ssbThermTripLog();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001081 }
1082 }
1083 pchThermtripEvent.async_wait(
1084 boost::asio::posix::stream_descriptor::wait_read,
1085 [](const boost::system::error_code ec) {
1086 if (ec)
1087 {
1088 std::cerr << "PCH Thermal trip handler error: " << ec.message()
1089 << "\n";
1090 return;
1091 }
1092 pchThermtripHandler();
1093 });
1094}
1095
Jason M. Billscbf78532019-08-16 15:32:11 -07001096static std::bitset<MAX_CPUS> checkERRPinCPUs(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001097{
Jason M. Billscbf78532019-08-16 15:32:11 -07001098 int errPinSts = (1 << errPin);
1099 std::bitset<MAX_CPUS> errPinCPUs = 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001100 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
1101 cpu++, addr++)
1102 {
1103 if (peci_Ping(addr) == PECI_CC_SUCCESS)
1104 {
1105 uint8_t cc = 0;
1106 CPUModel model{};
1107 uint8_t stepping = 0;
1108 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
1109 {
1110 std::cerr << "Cannot get CPUID!\n";
1111 continue;
1112 }
1113
1114 switch (model)
1115 {
1116 case skx:
1117 {
1118 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001119 // the ERRx (B(0) D8 F0 offset 210h)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001120 uint32_t errpinsts = 0;
1121 if (peci_RdPCIConfigLocal(
1122 addr, 0, 8, 0, 0x210, sizeof(uint32_t),
1123 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1124 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001125 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001126 }
1127 break;
1128 }
1129 case icx:
1130 {
1131 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001132 // the ERRx (B(30) D0 F3 offset 274h) (Note: Bus 30 is
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001133 // accessed on PECI as bus 13)
1134 uint32_t errpinsts = 0;
1135 if (peci_RdEndPointConfigPciLocal(
1136 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
1137 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1138 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001139 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001140 }
1141 break;
1142 }
1143 }
1144 }
1145 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001146 return errPinCPUs;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001147}
1148
Jason M. Billscbf78532019-08-16 15:32:11 -07001149static void errXAssertHandler(const int errPin,
1150 boost::asio::steady_timer& errXAssertTimer)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001151{
Jason M. Billscbf78532019-08-16 15:32:11 -07001152 // ERRx status is not guaranteed through the timeout, so save which
1153 // CPUs have it asserted
1154 std::bitset<MAX_CPUS> errPinCPUs = checkERRPinCPUs(errPin);
1155 errXAssertTimer.expires_after(std::chrono::milliseconds(errTimeoutMs));
1156 errXAssertTimer.async_wait([errPin, errPinCPUs](
1157 const boost::system::error_code ec) {
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001158 if (ec)
1159 {
1160 // operation_aborted is expected if timer is canceled before
1161 // completion.
1162 if (ec != boost::asio::error::operation_aborted)
1163 {
1164 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1165 << "\n";
1166 }
1167 return;
1168 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001169 std::cerr << "ERR" << std::to_string(errPin) << " asserted for "
1170 << std::to_string(errTimeoutMs) << " ms\n";
1171 if (errPinCPUs.count())
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001172 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001173 for (int i = 0; i < errPinCPUs.size(); i++)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001174 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001175 if (errPinCPUs[i])
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001176 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001177 cpuERRXLog(errPin, i);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001178 }
1179 }
1180 }
1181 else
1182 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001183 cpuERRXLog(errPin);
1184 }
1185 });
1186}
1187
Jason M. Bills8c584392019-08-19 11:05:51 -07001188static void err0AssertHandler()
1189{
1190 // Handle the standard ERR0 detection and logging
1191 const static constexpr int err0 = 0;
1192 errXAssertHandler(err0, err0AssertTimer);
1193}
1194
1195static void err0Handler()
1196{
1197 if (!hostOff)
1198 {
1199 gpiod::line_event gpioLineEvent = err0Line.event_read();
1200
1201 bool err0 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1202 if (err0)
1203 {
1204 err0AssertHandler();
1205 }
1206 else
1207 {
1208 err0AssertTimer.cancel();
1209 }
1210 }
1211 err0Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1212 [](const boost::system::error_code ec) {
1213 if (ec)
1214 {
1215 std::cerr
1216 << "err0 handler error: " << ec.message()
1217 << "\n";
1218 return;
1219 }
1220 err0Handler();
1221 });
1222}
1223
Jason M. Bills75af3962019-08-19 11:07:17 -07001224static void err1AssertHandler()
1225{
1226 // Handle the standard ERR1 detection and logging
1227 const static constexpr int err1 = 1;
1228 errXAssertHandler(err1, err1AssertTimer);
1229}
1230
1231static void err1Handler()
1232{
1233 if (!hostOff)
1234 {
1235 gpiod::line_event gpioLineEvent = err1Line.event_read();
1236
1237 bool err1 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1238 if (err1)
1239 {
1240 err1AssertHandler();
1241 }
1242 else
1243 {
1244 err1AssertTimer.cancel();
1245 }
1246 }
1247 err1Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1248 [](const boost::system::error_code ec) {
1249 if (ec)
1250 {
1251 std::cerr
1252 << "err1 handler error: " << ec.message()
1253 << "\n";
1254 return;
1255 }
1256 err1Handler();
1257 });
1258}
1259
Jason M. Billscbf78532019-08-16 15:32:11 -07001260static void err2AssertHandler()
1261{
1262 // Handle the standard ERR2 detection and logging
1263 const static constexpr int err2 = 2;
1264 errXAssertHandler(err2, err2AssertTimer);
1265 // Also handle reset for ERR2
1266 err2AssertTimer.async_wait([](const boost::system::error_code ec) {
1267 if (ec)
1268 {
1269 // operation_aborted is expected if timer is canceled before
1270 // completion.
1271 if (ec != boost::asio::error::operation_aborted)
1272 {
1273 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1274 << "\n";
1275 }
1276 return;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001277 }
1278 conn->async_method_call(
1279 [](boost::system::error_code ec,
1280 const std::variant<bool>& property) {
1281 if (ec)
1282 {
1283 return;
1284 }
1285 const bool* reset = std::get_if<bool>(&property);
1286 if (reset == nullptr)
1287 {
1288 std::cerr << "Unable to read reset on ERR2 value\n";
1289 return;
1290 }
Jason M. Billsb61766b2019-11-26 17:02:44 -08001291 startCrashdumpAndRecovery(*reset, "ERR2 Timeout");
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001292 },
1293 "xyz.openbmc_project.Settings",
1294 "/xyz/openbmc_project/control/processor_error_config",
1295 "org.freedesktop.DBus.Properties", "Get",
1296 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnERR2");
Yong Li061eb032020-02-26 15:06:18 +08001297
1298 beep(beepCPUErr2);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001299 });
1300}
1301
1302static void err2Handler()
1303{
1304 if (!hostOff)
1305 {
1306 gpiod::line_event gpioLineEvent = err2Line.event_read();
1307
1308 bool err2 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1309 if (err2)
1310 {
1311 err2AssertHandler();
1312 }
1313 else
1314 {
1315 err2AssertTimer.cancel();
1316 }
1317 }
1318 err2Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1319 [](const boost::system::error_code ec) {
1320 if (ec)
1321 {
1322 std::cerr
1323 << "err2 handler error: " << ec.message()
1324 << "\n";
1325 return;
1326 }
1327 err2Handler();
1328 });
1329}
1330
Jason M. Bills89922f82019-08-06 11:10:02 -07001331static void smiAssertHandler()
1332{
1333 smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
1334 smiAssertTimer.async_wait([](const boost::system::error_code ec) {
1335 if (ec)
1336 {
1337 // operation_aborted is expected if timer is canceled before
1338 // completion.
1339 if (ec != boost::asio::error::operation_aborted)
1340 {
1341 std::cerr << "smi timeout async_wait failed: " << ec.message()
1342 << "\n";
1343 }
1344 return;
1345 }
1346 std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
1347 << " ms\n";
1348 smiTimeoutLog();
1349 conn->async_method_call(
1350 [](boost::system::error_code ec,
1351 const std::variant<bool>& property) {
1352 if (ec)
1353 {
1354 return;
1355 }
1356 const bool* reset = std::get_if<bool>(&property);
1357 if (reset == nullptr)
1358 {
1359 std::cerr << "Unable to read reset on SMI value\n";
1360 return;
1361 }
Jason M. Bills94785442020-01-07 15:22:09 -08001362#ifdef HOST_ERROR_CRASHDUMP_ON_SMI_TIMEOUT
Jason M. Billsb61766b2019-11-26 17:02:44 -08001363 startCrashdumpAndRecovery(*reset, "SMI Timeout");
Jason M. Bills94785442020-01-07 15:22:09 -08001364#else
1365 if (*reset)
1366 {
1367 std::cout << "Recovering the system\n";
1368 startPowerCycle();
1369 }
1370#endif
Jason M. Bills89922f82019-08-06 11:10:02 -07001371 },
1372 "xyz.openbmc_project.Settings",
1373 "/xyz/openbmc_project/control/bmc_reset_disables",
1374 "org.freedesktop.DBus.Properties", "Get",
1375 "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
1376 });
1377}
1378
1379static void smiHandler()
1380{
1381 if (!hostOff)
1382 {
1383 gpiod::line_event gpioLineEvent = smiLine.event_read();
1384
1385 bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1386 if (smi)
1387 {
1388 smiAssertHandler();
1389 }
1390 else
1391 {
1392 smiAssertTimer.cancel();
1393 }
1394 }
1395 smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1396 [](const boost::system::error_code ec) {
1397 if (ec)
1398 {
1399 std::cerr
1400 << "smi handler error: " << ec.message()
1401 << "\n";
1402 return;
1403 }
1404 smiHandler();
1405 });
1406}
1407
Jason M. Billsa15c2522019-08-16 10:01:44 -07001408static void initializeErrorState()
1409{
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001410 // Handle CPU1_MISMATCH if it's asserted now
1411 if (cpu1MismatchLine.get_value() == 1)
1412 {
1413 cpuMismatchLog(1);
1414 }
1415
1416 // Handle CPU2_MISMATCH if it's asserted now
1417 if (cpu2MismatchLine.get_value() == 1)
1418 {
1419 cpuMismatchLog(2);
1420 }
1421
Jason M. Billsa15c2522019-08-16 10:01:44 -07001422 // Handle CPU_CATERR if it's asserted now
1423 if (caterrLine.get_value() == 0)
1424 {
1425 caterrAssertHandler();
1426 }
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001427
Jason M. Bills8c584392019-08-19 11:05:51 -07001428 // Handle CPU_ERR0 if it's asserted now
1429 if (err0Line.get_value() == 0)
1430 {
1431 err0AssertHandler();
1432 }
1433
Jason M. Bills75af3962019-08-19 11:07:17 -07001434 // Handle CPU_ERR1 if it's asserted now
1435 if (err1Line.get_value() == 0)
1436 {
1437 err1AssertHandler();
1438 }
1439
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001440 // Handle CPU_ERR2 if it's asserted now
1441 if (err2Line.get_value() == 0)
1442 {
1443 err2AssertHandler();
1444 }
Jason M. Bills89922f82019-08-06 11:10:02 -07001445
1446 // Handle SMI if it's asserted now
1447 if (smiLine.get_value() == 0)
1448 {
1449 smiAssertHandler();
1450 }
Jason M. Bills08866542019-08-16 12:04:19 -07001451
Jason M. Billse94f5e12019-09-13 11:11:34 -07001452 // Handle CPU1_THERMTRIP if it's asserted now
1453 if (cpu1ThermtripLine.get_value() == 0)
1454 {
1455 cpu1ThermtripAssertHandler();
1456 }
1457
1458 // Handle CPU2_THERMTRIP if it's asserted now
1459 if (cpu2ThermtripLine.get_value() == 0)
1460 {
1461 cpu2ThermtripAssertHandler();
1462 }
1463
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001464 // Handle CPU1_MEM_THERM_EVENT (CPU1 DIMM Thermal trip) if it's asserted now
1465 if (cpu1MemtripLine.get_value() == 0)
1466 {
1467 memThermTripLog(1);
1468 }
1469
1470 // Handle CPU2_MEM_THERM_EVENT (CPU2 DIMM Thermal trip) if it's asserted now
1471 if (cpu2MemtripLine.get_value() == 0)
1472 {
1473 memThermTripLog(2);
1474 }
1475
Jason M. Billse94f5e12019-09-13 11:11:34 -07001476 // Handle CPU1_VRHOT if it's asserted now
1477 if (cpu1VRHotLine.get_value() == 0)
1478 {
1479 cpu1VRHotAssertHandler();
1480 }
1481
1482 // Handle CPU1_MEM_ABCD_VRHOT if it's asserted now
1483 if (cpu1MemABCDVRHotLine.get_value() == 0)
1484 {
1485 cpu1MemABCDVRHotAssertHandler();
1486 }
1487
1488 // Handle CPU1_MEM_EFGH_VRHOT if it's asserted now
1489 if (cpu1MemEFGHVRHotLine.get_value() == 0)
1490 {
1491 cpu1MemEFGHVRHotAssertHandler();
1492 }
1493
1494 // Handle CPU2_VRHOT if it's asserted now
1495 if (cpu2VRHotLine.get_value() == 0)
1496 {
1497 cpu2VRHotAssertHandler();
1498 }
1499
1500 // Handle CPU2_MEM_ABCD_VRHOT if it's asserted now
1501 if (cpu2MemABCDVRHotLine.get_value() == 0)
1502 {
1503 cpu2MemABCDVRHotAssertHandler();
1504 }
1505
1506 // Handle CPU2_MEM_EFGH_VRHOT if it's asserted now
1507 if (cpu2MemEFGHVRHotLine.get_value() == 0)
1508 {
1509 cpu2MemEFGHVRHotAssertHandler();
1510 }
1511
Jason M. Bills08866542019-08-16 12:04:19 -07001512 // Handle PCH_BMC_THERMTRIP if it's asserted now
1513 if (pchThermtripLine.get_value() == 0)
1514 {
1515 ssbThermTripLog();
1516 }
Jason M. Billsa15c2522019-08-16 10:01:44 -07001517}
Jason M. Bills1490b142019-07-01 15:48:43 -07001518} // namespace host_error_monitor
1519
1520int main(int argc, char* argv[])
1521{
1522 // setup connection to dbus
1523 host_error_monitor::conn =
1524 std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
1525
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001526 // Host Error Monitor Service
Jason M. Bills1490b142019-07-01 15:48:43 -07001527 host_error_monitor::conn->request_name(
1528 "xyz.openbmc_project.HostErrorMonitor");
1529 sdbusplus::asio::object_server server =
1530 sdbusplus::asio::object_server(host_error_monitor::conn);
1531
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001532 // Restart Cause Interface
1533 host_error_monitor::hostErrorTimeoutIface =
1534 server.add_interface("/xyz/openbmc_project/host_error_monitor",
1535 "xyz.openbmc_project.HostErrorMonitor.Timeout");
1536
1537 host_error_monitor::hostErrorTimeoutIface->register_property(
1538 "IERRTimeoutMs", host_error_monitor::caterrTimeoutMs,
1539 [](const std::size_t& requested, std::size_t& resp) {
1540 if (requested > host_error_monitor::caterrTimeoutMsMax)
1541 {
1542 std::cerr << "IERRTimeoutMs update to " << requested
1543 << "ms rejected. Cannot be greater than "
1544 << host_error_monitor::caterrTimeoutMsMax << "ms.\n";
1545 return 0;
1546 }
1547 std::cerr << "IERRTimeoutMs updated to " << requested << "ms\n";
1548 host_error_monitor::caterrTimeoutMs = requested;
1549 resp = requested;
1550 return 1;
1551 },
1552 [](std::size_t& resp) { return host_error_monitor::caterrTimeoutMs; });
1553 host_error_monitor::hostErrorTimeoutIface->initialize();
1554
Jason M. Bills1490b142019-07-01 15:48:43 -07001555 // Start tracking host state
1556 std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
1557 host_error_monitor::startHostStateMonitor();
1558
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001559 // Request CPU1_MISMATCH GPIO events
1560 if (!host_error_monitor::requestGPIOInput(
1561 "CPU1_MISMATCH", host_error_monitor::cpu1MismatchLine))
1562 {
1563 return -1;
1564 }
1565
1566 // Request CPU2_MISMATCH GPIO events
1567 if (!host_error_monitor::requestGPIOInput(
1568 "CPU2_MISMATCH", host_error_monitor::cpu2MismatchLine))
1569 {
1570 return -1;
1571 }
1572
Jason M. Bills1490b142019-07-01 15:48:43 -07001573 // Initialize the host state
1574 host_error_monitor::initializeHostState();
1575
1576 // Request CPU_CATERR GPIO events
1577 if (!host_error_monitor::requestGPIOEvents(
1578 "CPU_CATERR", host_error_monitor::caterrHandler,
1579 host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
1580 {
1581 return -1;
1582 }
1583
Jason M. Bills8c584392019-08-19 11:05:51 -07001584 // Request CPU_ERR0 GPIO events
1585 if (!host_error_monitor::requestGPIOEvents(
1586 "CPU_ERR0", host_error_monitor::err0Handler,
1587 host_error_monitor::err0Line, host_error_monitor::err0Event))
1588 {
1589 return -1;
1590 }
1591
Jason M. Bills75af3962019-08-19 11:07:17 -07001592 // Request CPU_ERR1 GPIO events
1593 if (!host_error_monitor::requestGPIOEvents(
1594 "CPU_ERR1", host_error_monitor::err1Handler,
1595 host_error_monitor::err1Line, host_error_monitor::err1Event))
1596 {
1597 return -1;
1598 }
1599
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001600 // Request CPU_ERR2 GPIO events
1601 if (!host_error_monitor::requestGPIOEvents(
1602 "CPU_ERR2", host_error_monitor::err2Handler,
1603 host_error_monitor::err2Line, host_error_monitor::err2Event))
1604 {
1605 return -1;
1606 }
1607
Jason M. Bills89922f82019-08-06 11:10:02 -07001608 // Request SMI GPIO events
1609 if (!host_error_monitor::requestGPIOEvents(
1610 "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
1611 host_error_monitor::smiEvent))
1612 {
1613 return -1;
1614 }
1615
Jason M. Bills45e87e02019-09-09 14:45:38 -07001616 // Request CPU1_FIVR_FAULT GPIO input
1617 if (!host_error_monitor::requestGPIOInput(
1618 "CPU1_FIVR_FAULT", host_error_monitor::cpu1FIVRFaultLine))
1619 {
1620 return -1;
1621 }
1622
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001623 // Request CPU1_THERMTRIP GPIO events
1624 if (!host_error_monitor::requestGPIOEvents(
1625 "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler,
1626 host_error_monitor::cpu1ThermtripLine,
1627 host_error_monitor::cpu1ThermtripEvent))
1628 {
1629 return -1;
1630 }
1631
Jason M. Bills45e87e02019-09-09 14:45:38 -07001632 // Request CPU2_FIVR_FAULT GPIO input
1633 if (!host_error_monitor::requestGPIOInput(
1634 "CPU2_FIVR_FAULT", host_error_monitor::cpu2FIVRFaultLine))
1635 {
1636 return -1;
1637 }
1638
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001639 // Request CPU2_THERMTRIP GPIO events
1640 if (!host_error_monitor::requestGPIOEvents(
1641 "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler,
1642 host_error_monitor::cpu2ThermtripLine,
1643 host_error_monitor::cpu2ThermtripEvent))
1644 {
1645 return -1;
1646 }
1647
Jason M. Bills250fa632019-08-28 15:58:25 -07001648 // Request CPU1_VRHOT GPIO events
1649 if (!host_error_monitor::requestGPIOEvents(
1650 "CPU1_VRHOT", host_error_monitor::cpu1VRHotHandler,
1651 host_error_monitor::cpu1VRHotLine,
1652 host_error_monitor::cpu1VRHotEvent))
1653 {
1654 return -1;
1655 }
1656
Jason M. Bills9647ba72019-08-29 14:19:19 -07001657 // Request CPU1_MEM_ABCD_VRHOT GPIO events
1658 if (!host_error_monitor::requestGPIOEvents(
1659 "CPU1_MEM_ABCD_VRHOT", host_error_monitor::cpu1MemABCDVRHotHandler,
1660 host_error_monitor::cpu1MemABCDVRHotLine,
1661 host_error_monitor::cpu1MemABCDVRHotEvent))
1662 {
1663 return -1;
1664 }
1665
1666 // Request CPU1_MEM_EFGH_VRHOT GPIO events
1667 if (!host_error_monitor::requestGPIOEvents(
1668 "CPU1_MEM_EFGH_VRHOT", host_error_monitor::cpu1MemEFGHVRHotHandler,
1669 host_error_monitor::cpu1MemEFGHVRHotLine,
1670 host_error_monitor::cpu1MemEFGHVRHotEvent))
1671 {
1672 return -1;
1673 }
1674
Jason M. Bills250fa632019-08-28 15:58:25 -07001675 // Request CPU2_VRHOT GPIO events
1676 if (!host_error_monitor::requestGPIOEvents(
1677 "CPU2_VRHOT", host_error_monitor::cpu2VRHotHandler,
1678 host_error_monitor::cpu2VRHotLine,
1679 host_error_monitor::cpu2VRHotEvent))
1680 {
1681 return -1;
1682 }
1683
Jason M. Bills9647ba72019-08-29 14:19:19 -07001684 // Request CPU2_MEM_ABCD_VRHOT GPIO events
1685 if (!host_error_monitor::requestGPIOEvents(
1686 "CPU2_MEM_ABCD_VRHOT", host_error_monitor::cpu2MemABCDVRHotHandler,
1687 host_error_monitor::cpu2MemABCDVRHotLine,
1688 host_error_monitor::cpu2MemABCDVRHotEvent))
1689 {
1690 return -1;
1691 }
1692
1693 // Request CPU2_MEM_EFGH_VRHOT GPIO events
1694 if (!host_error_monitor::requestGPIOEvents(
1695 "CPU2_MEM_EFGH_VRHOT", host_error_monitor::cpu2MemEFGHVRHotHandler,
1696 host_error_monitor::cpu2MemEFGHVRHotLine,
1697 host_error_monitor::cpu2MemEFGHVRHotEvent))
1698 {
1699 return -1;
1700 }
1701
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001702 // Request PCH_BMC_THERMTRIP GPIO events
1703 if (!host_error_monitor::requestGPIOEvents(
1704 "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,
1705 host_error_monitor::pchThermtripLine,
1706 host_error_monitor::pchThermtripEvent))
1707 {
1708 return -1;
1709 }
1710
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001711 // Request CPU1_MEM_THERM_EVENT GPIO events
1712 if (!host_error_monitor::requestGPIOEvents(
1713 "CPU1_MEM_THERM_EVENT", host_error_monitor::cpu1MemtripHandler,
1714 host_error_monitor::cpu1MemtripLine,
1715 host_error_monitor::cpu1MemtripEvent))
1716 {
1717 return -1;
1718 }
1719
1720 // Request CPU2_MEM_THERM_EVENT GPIO events
1721 if (!host_error_monitor::requestGPIOEvents(
1722 "CPU2_MEM_THERM_EVENT", host_error_monitor::cpu2MemtripHandler,
1723 host_error_monitor::cpu2MemtripLine,
1724 host_error_monitor::cpu2MemtripEvent))
1725 {
1726 return -1;
1727 }
1728
Jason M. Bills1490b142019-07-01 15:48:43 -07001729 host_error_monitor::io.run();
1730
1731 return 0;
1732}