blob: b5cde54b524bc015f3e451bdea81a7aaecb3a35a [file] [log] [blame]
Jason M. Bills1490b142019-07-01 15:48:43 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
Jason M. Bills6a2cb692019-08-06 11:03:49 -070016#include <peci.h>
Chen Yugange6c0f1c2019-08-02 20:36:42 +080017#include <systemd/sd-journal.h>
18
Jason M. Bills6a2cb692019-08-06 11:03:49 -070019#include <bitset>
Jason M. Bills1490b142019-07-01 15:48:43 -070020#include <boost/asio/posix/stream_descriptor.hpp>
21#include <gpiod.hpp>
22#include <iostream>
23#include <sdbusplus/asio/object_server.hpp>
Jason M. Billsd1a19f62019-08-06 11:52:58 -070024#include <variant>
Jason M. Bills1490b142019-07-01 15:48:43 -070025
26namespace host_error_monitor
27{
28static boost::asio::io_service io;
29static std::shared_ptr<sdbusplus::asio::connection> conn;
Jason M. Billsc4b91f22019-11-26 17:04:50 -080030static std::shared_ptr<sdbusplus::asio::dbus_interface> hostErrorTimeoutIface;
Jason M. Bills1490b142019-07-01 15:48:43 -070031
32static bool hostOff = true;
33
Jason M. Billsc4b91f22019-11-26 17:04:50 -080034static size_t caterrTimeoutMs = 2000;
35const static constexpr size_t caterrTimeoutMsMax = 600000; // 10 minutes maximum
Jason M. Billscbf78532019-08-16 15:32:11 -070036const static constexpr size_t errTimeoutMs = 90000;
Jason M. Bills89922f82019-08-06 11:10:02 -070037const static constexpr size_t smiTimeoutMs = 90000;
Jason M. Bills1490b142019-07-01 15:48:43 -070038const static constexpr size_t crashdumpTimeoutS = 300;
39
40// Timers
41// Timer for CATERR asserted
42static boost::asio::steady_timer caterrAssertTimer(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070043// Timer for ERR0 asserted
44static boost::asio::steady_timer err0AssertTimer(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070045// Timer for ERR1 asserted
46static boost::asio::steady_timer err1AssertTimer(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070047// Timer for ERR2 asserted
48static boost::asio::steady_timer err2AssertTimer(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070049// Timer for SMI asserted
50static boost::asio::steady_timer smiAssertTimer(io);
Jason M. Bills1490b142019-07-01 15:48:43 -070051
52// GPIO Lines and Event Descriptors
53static gpiod::line caterrLine;
54static boost::asio::posix::stream_descriptor caterrEvent(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070055static gpiod::line err0Line;
56static boost::asio::posix::stream_descriptor err0Event(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070057static gpiod::line err1Line;
58static boost::asio::posix::stream_descriptor err1Event(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070059static gpiod::line err2Line;
60static boost::asio::posix::stream_descriptor err2Event(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070061static gpiod::line smiLine;
62static boost::asio::posix::stream_descriptor smiEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070063static gpiod::line cpu1FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070064static gpiod::line cpu1ThermtripLine;
65static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070066static gpiod::line cpu2FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070067static gpiod::line cpu2ThermtripLine;
68static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io);
Jason M. Bills250fa632019-08-28 15:58:25 -070069static gpiod::line cpu1VRHotLine;
70static boost::asio::posix::stream_descriptor cpu1VRHotEvent(io);
71static gpiod::line cpu2VRHotLine;
Jason M. Bills9647ba72019-08-29 14:19:19 -070072static boost::asio::posix::stream_descriptor cpu1MemABCDVRHotEvent(io);
73static gpiod::line cpu1MemEFGHVRHotLine;
74static boost::asio::posix::stream_descriptor cpu1MemEFGHVRHotEvent(io);
75static gpiod::line cpu2MemABCDVRHotLine;
Jason M. Bills250fa632019-08-28 15:58:25 -070076static boost::asio::posix::stream_descriptor cpu2VRHotEvent(io);
Jason M. Bills9647ba72019-08-29 14:19:19 -070077static gpiod::line cpu1MemABCDVRHotLine;
78static boost::asio::posix::stream_descriptor cpu2MemABCDVRHotEvent(io);
79static gpiod::line cpu2MemEFGHVRHotLine;
80static boost::asio::posix::stream_descriptor cpu2MemEFGHVRHotEvent(io);
Chen Yugange6c0f1c2019-08-02 20:36:42 +080081//----------------------------------
82// PCH_BMC_THERMTRIP function related definition
83//----------------------------------
Chen Yugange6c0f1c2019-08-02 20:36:42 +080084static gpiod::line pchThermtripLine;
85static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +000086//----------------------------------
87// CPU_MEM_THERM_EVENT function related definition
88//----------------------------------
89static gpiod::line cpu1MemtripLine;
90static boost::asio::posix::stream_descriptor cpu1MemtripEvent(io);
91static gpiod::line cpu2MemtripLine;
92static boost::asio::posix::stream_descriptor cpu2MemtripEvent(io);
jayaprakash Mutyala53099c42020-03-15 00:16:26 +000093//---------------------------------
94// CPU_MISMATCH function related definition
95//---------------------------------
96static gpiod::line cpu1MismatchLine;
97static gpiod::line cpu2MismatchLine;
Jason M. Bills1490b142019-07-01 15:48:43 -070098
Yong Li061eb032020-02-26 15:06:18 +080099// beep function for CPU error
100const static constexpr uint8_t beepCPUErr2 = 5;
101
102static void beep(const uint8_t& beepPriority)
103{
104 conn->async_method_call(
105 [](boost::system::error_code ec) {
106 if (ec)
107 {
108 std::cerr << "beep returned error with "
109 "async_method_call (ec = "
110 << ec << ")\n";
111 return;
112 }
113 },
114 "xyz.openbmc_project.BeepCode", "/xyz/openbmc_project/BeepCode",
115 "xyz.openbmc_project.BeepCode", "Beep", uint8_t(beepPriority));
116}
117
Jason M. Billsa3397932019-08-06 11:07:21 -0700118static void cpuIERRLog()
119{
120 sd_journal_send("MESSAGE=HostError: IERR", "PRIORITY=%i", LOG_INFO,
121 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
122 "REDFISH_MESSAGE_ARGS=%s", "IERR", NULL);
123}
124
125static void cpuIERRLog(const int cpuNum)
126{
127 std::string msg = "IERR on CPU " + std::to_string(cpuNum + 1);
128
129 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
130 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
131 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
132}
133
134static void cpuIERRLog(const int cpuNum, const std::string& type)
135{
136 std::string msg = type + " IERR on CPU " + std::to_string(cpuNum + 1);
137
138 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
139 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
140 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
141}
142
Jason M. Billscbf78532019-08-16 15:32:11 -0700143static void cpuERRXLog(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700144{
Jason M. Billscbf78532019-08-16 15:32:11 -0700145 std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
146
147 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
148 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
149 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700150}
151
Jason M. Billscbf78532019-08-16 15:32:11 -0700152static void cpuERRXLog(const int errPin, const int cpuNum)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700153{
Jason M. Billscbf78532019-08-16 15:32:11 -0700154 std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
155 std::to_string(cpuNum + 1);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700156
157 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
158 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
159 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
160}
161
Jason M. Bills89922f82019-08-06 11:10:02 -0700162static void smiTimeoutLog()
163{
164 sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
165 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
166 "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
167}
168
Jason M. Bills45e87e02019-09-09 14:45:38 -0700169static void cpuBootFIVRFaultLog(const int cpuNum)
170{
171 std::string msg = "Boot FIVR Fault on CPU " + std::to_string(cpuNum);
172
173 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
174 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
175 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
176}
177
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700178static void cpuThermTripLog(const int cpuNum)
179{
180 std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip";
181
182 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
183 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
184 "OpenBMC.0.1.CPUThermalTrip", "REDFISH_MESSAGE_ARGS=%d",
185 cpuNum, NULL);
186}
187
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000188static void memThermTripLog(const int cpuNum)
189{
190 std::string cpuNumber = "CPU " + std::to_string(cpuNum);
191 std::string msg = cpuNumber + " Memory Thermal trip.";
192
193 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
194 LOG_ERR, "REDFISH_MESSAGE_ID=%s",
195 "OpenBMC.0.1.MemoryThermTrip", "REDFISH_MESSAGE_ARGS=%s",
196 cpuNumber.c_str(), NULL);
197}
198
jayaprakash Mutyala53099c42020-03-15 00:16:26 +0000199static void cpuMismatchLog(const int cpuNum)
200{
201 std::string msg = "CPU " + std::to_string(cpuNum) + " mismatch";
202
203 sd_journal_send("MESSAGE= %s", msg.c_str(), "PRIORITY=%i", LOG_ERR,
204 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUMismatch",
205 "REDFISH_MESSAGE_ARGS=%d", cpuNum, NULL);
206}
207
Jason M. Bills250fa632019-08-28 15:58:25 -0700208static void cpuVRHotLog(const std::string& vr)
209{
210 std::string msg = vr + " Voltage Regulator Overheated.";
211
212 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
213 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
214 "OpenBMC.0.1.VoltageRegulatorOverheated",
215 "REDFISH_MESSAGE_ARGS=%s", vr.c_str(), NULL);
216}
217
Jason M. Bills08866542019-08-16 12:04:19 -0700218static void ssbThermTripLog()
219{
220 sd_journal_send("MESSAGE=HostError: SSB thermal trip", "PRIORITY=%i",
221 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
222 "OpenBMC.0.1.SsbThermalTrip", NULL);
223}
224
Jason M. Billsa15c2522019-08-16 10:01:44 -0700225static void initializeErrorState();
Jason M. Bills1490b142019-07-01 15:48:43 -0700226static void initializeHostState()
227{
228 conn->async_method_call(
229 [](boost::system::error_code ec,
230 const std::variant<std::string>& property) {
231 if (ec)
232 {
233 return;
234 }
235 const std::string* state = std::get_if<std::string>(&property);
236 if (state == nullptr)
237 {
238 std::cerr << "Unable to read host state value\n";
239 return;
240 }
241 hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
Jason M. Billsa15c2522019-08-16 10:01:44 -0700242 // If the system is on, initialize the error state
243 if (!hostOff)
244 {
245 initializeErrorState();
246 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700247 },
248 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
249 "org.freedesktop.DBus.Properties", "Get",
250 "xyz.openbmc_project.State.Host", "CurrentHostState");
251}
252
253static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
254{
255 return std::make_shared<sdbusplus::bus::match::match>(
256 *conn,
257 "type='signal',interface='org.freedesktop.DBus.Properties',"
258 "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
259 "Host'",
260 [](sdbusplus::message::message& msg) {
261 std::string interfaceName;
262 boost::container::flat_map<std::string, std::variant<std::string>>
263 propertiesChanged;
264 std::string state;
265 try
266 {
267 msg.read(interfaceName, propertiesChanged);
268 state =
269 std::get<std::string>(propertiesChanged.begin()->second);
270 }
271 catch (std::exception& e)
272 {
273 std::cerr << "Unable to read host state\n";
274 return;
275 }
276 hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
277
Jason M. Bills1490b142019-07-01 15:48:43 -0700278 if (hostOff)
279 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700280 // No host events should fire while off, so cancel any pending
281 // timers
Jason M. Bills1490b142019-07-01 15:48:43 -0700282 caterrAssertTimer.cancel();
Jason M. Bills8c584392019-08-19 11:05:51 -0700283 err0AssertTimer.cancel();
Jason M. Bills75af3962019-08-19 11:07:17 -0700284 err1AssertTimer.cancel();
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700285 err2AssertTimer.cancel();
Jason M. Bills89922f82019-08-06 11:10:02 -0700286 smiAssertTimer.cancel();
Jason M. Bills1490b142019-07-01 15:48:43 -0700287 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700288 else
289 {
290 // Handle any initial errors when the host turns on
291 initializeErrorState();
292 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700293 });
294}
295
296static bool requestGPIOEvents(
297 const std::string& name, const std::function<void()>& handler,
298 gpiod::line& gpioLine,
299 boost::asio::posix::stream_descriptor& gpioEventDescriptor)
300{
301 // Find the GPIO line
302 gpioLine = gpiod::find_line(name);
303 if (!gpioLine)
304 {
305 std::cerr << "Failed to find the " << name << " line\n";
306 return false;
307 }
308
309 try
310 {
311 gpioLine.request(
312 {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
313 }
314 catch (std::exception&)
315 {
316 std::cerr << "Failed to request events for " << name << "\n";
317 return false;
318 }
319
320 int gpioLineFd = gpioLine.event_get_fd();
321 if (gpioLineFd < 0)
322 {
323 std::cerr << "Failed to get " << name << " fd\n";
324 return false;
325 }
326
327 gpioEventDescriptor.assign(gpioLineFd);
328
329 gpioEventDescriptor.async_wait(
330 boost::asio::posix::stream_descriptor::wait_read,
331 [&name, handler](const boost::system::error_code ec) {
332 if (ec)
333 {
334 std::cerr << name << " fd handler error: " << ec.message()
335 << "\n";
336 return;
337 }
338 handler();
339 });
340 return true;
341}
342
Jason M. Bills45e87e02019-09-09 14:45:38 -0700343static bool requestGPIOInput(const std::string& name, gpiod::line& gpioLine)
344{
345 // Find the GPIO line
346 gpioLine = gpiod::find_line(name);
347 if (!gpioLine)
348 {
349 std::cerr << "Failed to find the " << name << " line.\n";
350 return false;
351 }
352
353 // Request GPIO input
354 try
355 {
356 gpioLine.request({__FUNCTION__, gpiod::line_request::DIRECTION_INPUT});
357 }
358 catch (std::exception&)
359 {
360 std::cerr << "Failed to request " << name << " input\n";
361 return false;
362 }
363
364 return true;
365}
366
Jason M. Bills1490b142019-07-01 15:48:43 -0700367static void startPowerCycle()
368{
369 conn->async_method_call(
370 [](boost::system::error_code ec) {
371 if (ec)
372 {
373 std::cerr << "failed to set Chassis State\n";
374 }
375 },
376 "xyz.openbmc_project.State.Chassis",
377 "/xyz/openbmc_project/state/chassis0",
378 "org.freedesktop.DBus.Properties", "Set",
379 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
380 std::variant<std::string>{
381 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
382}
383
Jason M. Billsb61766b2019-11-26 17:02:44 -0800384static void startCrashdumpAndRecovery(bool recoverSystem,
385 const std::string& triggerType)
Jason M. Bills1490b142019-07-01 15:48:43 -0700386{
387 std::cout << "Starting crashdump\n";
388 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
389 static boost::asio::steady_timer crashdumpTimer(io);
390
391 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
392 *conn,
393 "type='signal',interface='org.freedesktop.DBus.Properties',"
394 "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
395 [recoverSystem](sdbusplus::message::message& msg) {
396 crashdumpTimer.cancel();
397 std::cout << "Crashdump completed\n";
398 if (recoverSystem)
399 {
400 std::cout << "Recovering the system\n";
401 startPowerCycle();
402 }
403 crashdumpCompleteMatch.reset();
404 });
405
406 crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
407 crashdumpTimer.async_wait([](const boost::system::error_code ec) {
408 if (ec)
409 {
410 // operation_aborted is expected if timer is canceled
411 if (ec != boost::asio::error::operation_aborted)
412 {
413 std::cerr << "Crashdump async_wait failed: " << ec.message()
414 << "\n";
415 }
416 std::cout << "Crashdump timer canceled\n";
417 return;
418 }
419 std::cerr << "Crashdump failed to complete before timeout\n";
420 crashdumpCompleteMatch.reset();
421 });
422
423 conn->async_method_call(
424 [](boost::system::error_code ec) {
425 if (ec)
426 {
427 std::cerr << "failed to start Crashdump\n";
428 crashdumpTimer.cancel();
429 crashdumpCompleteMatch.reset();
430 }
431 },
432 "com.intel.crashdump", "/com/intel/crashdump",
Jason M. Billsb61766b2019-11-26 17:02:44 -0800433 "com.intel.crashdump.Stored", "GenerateStoredLog", triggerType);
Jason M. Bills1490b142019-07-01 15:48:43 -0700434}
435
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700436static void incrementCPUErrorCount(int cpuNum)
437{
438 std::string propertyName = "ErrorCountCPU" + std::to_string(cpuNum + 1);
439
440 // Get the current count
441 conn->async_method_call(
442 [propertyName](boost::system::error_code ec,
443 const std::variant<uint8_t>& property) {
444 if (ec)
445 {
446 std::cerr << "Failed to read " << propertyName << ": "
447 << ec.message() << "\n";
448 return;
449 }
450 const uint8_t* errorCountVariant = std::get_if<uint8_t>(&property);
451 if (errorCountVariant == nullptr)
452 {
453 std::cerr << propertyName << " invalid\n";
454 return;
455 }
456 uint8_t errorCount = *errorCountVariant;
457 if (errorCount == std::numeric_limits<uint8_t>::max())
458 {
459 std::cerr << "Maximum error count reached\n";
460 return;
461 }
462 // Increment the count
463 errorCount++;
464 conn->async_method_call(
465 [propertyName](boost::system::error_code ec) {
466 if (ec)
467 {
468 std::cerr << "Failed to set " << propertyName << ": "
469 << ec.message() << "\n";
470 }
471 },
472 "xyz.openbmc_project.Settings",
473 "/xyz/openbmc_project/control/processor_error_config",
474 "org.freedesktop.DBus.Properties", "Set",
475 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName,
476 std::variant<uint8_t>{errorCount});
477 },
478 "xyz.openbmc_project.Settings",
479 "/xyz/openbmc_project/control/processor_error_config",
480 "org.freedesktop.DBus.Properties", "Get",
481 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName);
482}
483
Jason M. Billsa3397932019-08-06 11:07:21 -0700484static bool checkIERRCPUs()
485{
486 bool cpuIERRFound = false;
487 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
488 cpu++, addr++)
489 {
490 uint8_t cc = 0;
491 CPUModel model{};
492 uint8_t stepping = 0;
493 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
494 {
495 std::cerr << "Cannot get CPUID!\n";
496 continue;
497 }
498
499 switch (model)
500 {
501 case skx:
502 {
503 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
504 // that caused the IERR
505 uint32_t mcaErrSrcLog = 0;
506 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
507 &cc) != PECI_CC_SUCCESS)
508 {
509 continue;
510 }
511 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
512 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
513 {
514 // TODO: Light the CPU fault LED?
515 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700516 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700517 // Next check if it's a CPU/VR mismatch by reading the
518 // IA32_MC4_STATUS MSR (0x411)
519 uint64_t mc4Status = 0;
520 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
521 PECI_CC_SUCCESS)
522 {
523 continue;
524 }
525 // Check MSEC bits 31:24 for
526 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
527 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
528 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
529 if ((mc4Status & (0x40 << 24)) ||
530 (mc4Status & (0x42 << 24)) ||
531 (mc4Status & (0x43 << 24)))
532 {
533 cpuIERRLog(cpu, "CPU/VR Mismatch");
534 continue;
535 }
536
537 // Next check if it's a Core FIVR fault by looking for a
538 // non-zero value of CORE_FIVR_ERR_LOG (B(1) D30 F2 offset
539 // 80h)
540 uint32_t coreFIVRErrLog = 0;
541 if (peci_RdPCIConfigLocal(
542 addr, 1, 30, 2, 0x80, sizeof(uint32_t),
543 (uint8_t*)&coreFIVRErrLog, &cc) != PECI_CC_SUCCESS)
544 {
545 continue;
546 }
547 if (coreFIVRErrLog)
548 {
549 cpuIERRLog(cpu, "Core FIVR Fault");
550 continue;
551 }
552
553 // Next check if it's an Uncore FIVR fault by looking for a
554 // non-zero value of UNCORE_FIVR_ERR_LOG (B(1) D30 F2 offset
555 // 84h)
556 uint32_t uncoreFIVRErrLog = 0;
557 if (peci_RdPCIConfigLocal(addr, 1, 30, 2, 0x84,
558 sizeof(uint32_t),
559 (uint8_t*)&uncoreFIVRErrLog,
560 &cc) != PECI_CC_SUCCESS)
561 {
562 continue;
563 }
564 if (uncoreFIVRErrLog)
565 {
566 cpuIERRLog(cpu, "Uncore FIVR Fault");
567 continue;
568 }
569
570 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
571 // both zero, but MSEC bits 31:24 have either
572 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
573 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
574 // uncore FIVR fault
575 if (!coreFIVRErrLog && !uncoreFIVRErrLog &&
576 ((mc4Status & (0x51 << 24)) ||
577 (mc4Status & (0x52 << 24))))
578 {
579 cpuIERRLog(cpu, "Uncore FIVR Fault");
580 continue;
581 }
582 cpuIERRLog(cpu);
583 }
584 break;
585 }
586 case icx:
587 {
588 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
589 // that caused the IERR
590 uint32_t mcaErrSrcLog = 0;
591 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
592 &cc) != PECI_CC_SUCCESS)
593 {
594 continue;
595 }
596 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
597 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
598 {
599 // TODO: Light the CPU fault LED?
600 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700601 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700602 // Next check if it's a CPU/VR mismatch by reading the
603 // IA32_MC4_STATUS MSR (0x411)
604 uint64_t mc4Status = 0;
605 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
606 PECI_CC_SUCCESS)
607 {
608 continue;
609 }
610 // TODO: Update MSEC/MSCOD_31_24 check
611 // Check MSEC bits 31:24 for
612 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
613 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
614 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
615 if ((mc4Status & (0x40 << 24)) ||
616 (mc4Status & (0x42 << 24)) ||
617 (mc4Status & (0x43 << 24)))
618 {
619 cpuIERRLog(cpu, "CPU/VR Mismatch");
620 continue;
621 }
622
623 // Next check if it's a Core FIVR fault by looking for a
624 // non-zero value of CORE_FIVR_ERR_LOG (B(31) D30 F2 offsets
625 // C0h and C4h) (Note: Bus 31 is accessed on PECI as bus 14)
626 uint32_t coreFIVRErrLog0 = 0;
627 uint32_t coreFIVRErrLog1 = 0;
628 if (peci_RdEndPointConfigPciLocal(
629 addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
630 (uint8_t*)&coreFIVRErrLog0, &cc) != PECI_CC_SUCCESS)
631 {
632 continue;
633 }
634 if (peci_RdEndPointConfigPciLocal(
635 addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
636 (uint8_t*)&coreFIVRErrLog1, &cc) != PECI_CC_SUCCESS)
637 {
638 continue;
639 }
640 if (coreFIVRErrLog0 || coreFIVRErrLog1)
641 {
642 cpuIERRLog(cpu, "Core FIVR Fault");
643 continue;
644 }
645
646 // Next check if it's an Uncore FIVR fault by looking for a
647 // non-zero value of UNCORE_FIVR_ERR_LOG (B(31) D30 F2
648 // offset 84h) (Note: Bus 31 is accessed on PECI as bus 14)
649 uint32_t uncoreFIVRErrLog = 0;
650 if (peci_RdEndPointConfigPciLocal(
651 addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
652 (uint8_t*)&uncoreFIVRErrLog,
653 &cc) != PECI_CC_SUCCESS)
654 {
655 continue;
656 }
657 if (uncoreFIVRErrLog)
658 {
659 cpuIERRLog(cpu, "Uncore FIVR Fault");
660 continue;
661 }
662
663 // TODO: Update MSEC/MSCOD_31_24 check
664 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
665 // both zero, but MSEC bits 31:24 have either
666 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
667 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
668 // uncore FIVR fault
669 if (!coreFIVRErrLog0 && !coreFIVRErrLog1 &&
670 !uncoreFIVRErrLog &&
671 ((mc4Status & (0x51 << 24)) ||
672 (mc4Status & (0x52 << 24))))
673 {
674 cpuIERRLog(cpu, "Uncore FIVR Fault");
675 continue;
676 }
677 cpuIERRLog(cpu);
678 }
679 break;
680 }
681 }
682 }
683 return cpuIERRFound;
684}
685
Jason M. Billsa15c2522019-08-16 10:01:44 -0700686static void caterrAssertHandler()
687{
Jason M. Billsa15c2522019-08-16 10:01:44 -0700688 caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs));
689 caterrAssertTimer.async_wait([](const boost::system::error_code ec) {
690 if (ec)
691 {
692 // operation_aborted is expected if timer is canceled
693 // before completion.
694 if (ec != boost::asio::error::operation_aborted)
695 {
696 std::cerr << "caterr timeout async_wait failed: "
697 << ec.message() << "\n";
698 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700699 return;
700 }
Jason M. Billsa3397932019-08-06 11:07:21 -0700701 std::cerr << "CATERR asserted for " << std::to_string(caterrTimeoutMs)
702 << " ms\n";
703 if (!checkIERRCPUs())
704 {
705 cpuIERRLog();
706 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700707 conn->async_method_call(
708 [](boost::system::error_code ec,
709 const std::variant<bool>& property) {
710 if (ec)
711 {
712 return;
713 }
714 const bool* reset = std::get_if<bool>(&property);
715 if (reset == nullptr)
716 {
717 std::cerr << "Unable to read reset on CATERR value\n";
718 return;
719 }
Jason M. Billsb61766b2019-11-26 17:02:44 -0800720 startCrashdumpAndRecovery(*reset, "IERR");
Jason M. Billsa15c2522019-08-16 10:01:44 -0700721 },
722 "xyz.openbmc_project.Settings",
723 "/xyz/openbmc_project/control/processor_error_config",
724 "org.freedesktop.DBus.Properties", "Get",
725 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR");
726 });
727}
728
Jason M. Bills1490b142019-07-01 15:48:43 -0700729static void caterrHandler()
730{
731 if (!hostOff)
732 {
733 gpiod::line_event gpioLineEvent = caterrLine.event_read();
734
735 bool caterr =
736 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
737 if (caterr)
738 {
Jason M. Billsa15c2522019-08-16 10:01:44 -0700739 caterrAssertHandler();
Jason M. Bills1490b142019-07-01 15:48:43 -0700740 }
741 else
742 {
743 caterrAssertTimer.cancel();
744 }
745 }
746 caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
747 [](const boost::system::error_code ec) {
748 if (ec)
749 {
750 std::cerr << "caterr handler error: "
751 << ec.message() << "\n";
752 return;
753 }
754 caterrHandler();
755 });
756}
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700757
Jason M. Billse94f5e12019-09-13 11:11:34 -0700758static void cpu1ThermtripAssertHandler()
759{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700760 if (cpu1FIVRFaultLine.get_value() == 0)
761 {
762 cpuBootFIVRFaultLog(1);
763 }
764 else
765 {
766 cpuThermTripLog(1);
767 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700768}
769
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700770static void cpu1ThermtripHandler()
771{
Jason M. Bills84951142020-04-17 15:57:11 -0700772 gpiod::line_event gpioLineEvent = cpu1ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700773
Jason M. Bills84951142020-04-17 15:57:11 -0700774 bool cpu1Thermtrip =
775 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
776 if (cpu1Thermtrip)
777 {
778 cpu1ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700779 }
Jason M. Bills84951142020-04-17 15:57:11 -0700780
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700781 cpu1ThermtripEvent.async_wait(
782 boost::asio::posix::stream_descriptor::wait_read,
783 [](const boost::system::error_code ec) {
784 if (ec)
785 {
786 std::cerr << "CPU 1 Thermtrip handler error: " << ec.message()
787 << "\n";
788 return;
789 }
790 cpu1ThermtripHandler();
791 });
792}
793
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000794static void cpu1MemtripHandler()
795{
796 if (!hostOff)
797 {
798 gpiod::line_event gpioLineEvent = cpu1MemtripLine.event_read();
799
800 bool cpu1Memtrip =
801 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
802 if (cpu1Memtrip)
803 {
804 memThermTripLog(1);
805 }
806 }
807 cpu1MemtripEvent.async_wait(
808 boost::asio::posix::stream_descriptor::wait_read,
809 [](const boost::system::error_code ec) {
810 if (ec)
811 {
812 std::cerr << "CPU 1 Memory Thermaltrip handler error: "
813 << ec.message() << "\n";
814 return;
815 }
816 cpu1MemtripHandler();
817 });
818}
819
Jason M. Billse94f5e12019-09-13 11:11:34 -0700820static void cpu2ThermtripAssertHandler()
821{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700822 if (cpu2FIVRFaultLine.get_value() == 0)
823 {
824 cpuBootFIVRFaultLog(2);
825 }
826 else
827 {
828 cpuThermTripLog(2);
829 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700830}
831
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700832static void cpu2ThermtripHandler()
833{
Jason M. Bills84951142020-04-17 15:57:11 -0700834 gpiod::line_event gpioLineEvent = cpu2ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700835
Jason M. Bills84951142020-04-17 15:57:11 -0700836 bool cpu2Thermtrip =
837 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
838 if (cpu2Thermtrip)
839 {
840 cpu2ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700841 }
Jason M. Bills84951142020-04-17 15:57:11 -0700842
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700843 cpu2ThermtripEvent.async_wait(
844 boost::asio::posix::stream_descriptor::wait_read,
845 [](const boost::system::error_code ec) {
846 if (ec)
847 {
848 std::cerr << "CPU 2 Thermtrip handler error: " << ec.message()
849 << "\n";
850 return;
851 }
852 cpu2ThermtripHandler();
853 });
854}
855
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000856static void cpu2MemtripHandler()
857{
858 if (!hostOff)
859 {
860 gpiod::line_event gpioLineEvent = cpu2MemtripLine.event_read();
861
862 bool cpu2Memtrip =
863 gpioLineEvent.event_type == gpiod::line_event::RISING_EDGE;
864 if (cpu2Memtrip)
865 {
866 memThermTripLog(2);
867 }
868 }
869 cpu2MemtripEvent.async_wait(
870 boost::asio::posix::stream_descriptor::wait_read,
871 [](const boost::system::error_code ec) {
872 if (ec)
873 {
874 std::cerr << "CPU 2 Memory Thermaltrip handler error: "
875 << ec.message() << "\n";
876 return;
877 }
878 cpu2MemtripHandler();
879 });
880}
881
Jason M. Billse94f5e12019-09-13 11:11:34 -0700882static void cpu1VRHotAssertHandler()
883{
884 cpuVRHotLog("CPU 1");
885}
886
Jason M. Bills250fa632019-08-28 15:58:25 -0700887static void cpu1VRHotHandler()
888{
Jason M. Bills84951142020-04-17 15:57:11 -0700889 gpiod::line_event gpioLineEvent = cpu1VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700890
Jason M. Bills84951142020-04-17 15:57:11 -0700891 bool cpu1VRHot =
892 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
893 if (cpu1VRHot)
894 {
895 cpu1VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700896 }
Jason M. Bills84951142020-04-17 15:57:11 -0700897
Jason M. Bills250fa632019-08-28 15:58:25 -0700898 cpu1VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
899 [](const boost::system::error_code ec) {
900 if (ec)
901 {
902 std::cerr << "CPU 1 VRHot handler error: "
903 << ec.message() << "\n";
904 return;
905 }
906 cpu1VRHotHandler();
907 });
908}
909
Jason M. Billse94f5e12019-09-13 11:11:34 -0700910static void cpu1MemABCDVRHotAssertHandler()
911{
912 cpuVRHotLog("CPU 1 Memory ABCD");
913}
914
Jason M. Bills9647ba72019-08-29 14:19:19 -0700915static void cpu1MemABCDVRHotHandler()
916{
Jason M. Bills84951142020-04-17 15:57:11 -0700917 gpiod::line_event gpioLineEvent = cpu1MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700918
Jason M. Bills84951142020-04-17 15:57:11 -0700919 bool cpu1MemABCDVRHot =
920 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
921 if (cpu1MemABCDVRHot)
922 {
923 cpu1MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700924 }
Jason M. Bills84951142020-04-17 15:57:11 -0700925
Jason M. Bills9647ba72019-08-29 14:19:19 -0700926 cpu1MemABCDVRHotEvent.async_wait(
927 boost::asio::posix::stream_descriptor::wait_read,
928 [](const boost::system::error_code ec) {
929 if (ec)
930 {
931 std::cerr << "CPU 1 Memory ABCD VRHot handler error: "
932 << ec.message() << "\n";
933 return;
934 }
935 cpu1MemABCDVRHotHandler();
936 });
937}
938
Jason M. Billse94f5e12019-09-13 11:11:34 -0700939static void cpu1MemEFGHVRHotAssertHandler()
940{
941 cpuVRHotLog("CPU 1 Memory EFGH");
942}
943
Jason M. Bills9647ba72019-08-29 14:19:19 -0700944static void cpu1MemEFGHVRHotHandler()
945{
Jason M. Bills84951142020-04-17 15:57:11 -0700946 gpiod::line_event gpioLineEvent = cpu1MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700947
Jason M. Bills84951142020-04-17 15:57:11 -0700948 bool cpu1MemEFGHVRHot =
949 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
950 if (cpu1MemEFGHVRHot)
951 {
952 cpu1MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700953 }
Jason M. Bills84951142020-04-17 15:57:11 -0700954
Jason M. Bills9647ba72019-08-29 14:19:19 -0700955 cpu1MemEFGHVRHotEvent.async_wait(
956 boost::asio::posix::stream_descriptor::wait_read,
957 [](const boost::system::error_code ec) {
958 if (ec)
959 {
960 std::cerr << "CPU 1 Memory EFGH VRHot handler error: "
961 << ec.message() << "\n";
962 return;
963 }
964 cpu1MemEFGHVRHotHandler();
965 });
966}
967
Jason M. Billse94f5e12019-09-13 11:11:34 -0700968static void cpu2VRHotAssertHandler()
969{
970 cpuVRHotLog("CPU 2");
971}
972
Jason M. Bills250fa632019-08-28 15:58:25 -0700973static void cpu2VRHotHandler()
974{
Jason M. Bills84951142020-04-17 15:57:11 -0700975 gpiod::line_event gpioLineEvent = cpu2VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700976
Jason M. Bills84951142020-04-17 15:57:11 -0700977 bool cpu2VRHot =
978 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
979 if (cpu2VRHot)
980 {
981 cpu2VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700982 }
Jason M. Bills84951142020-04-17 15:57:11 -0700983
Jason M. Bills250fa632019-08-28 15:58:25 -0700984 cpu2VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
985 [](const boost::system::error_code ec) {
986 if (ec)
987 {
988 std::cerr << "CPU 2 VRHot handler error: "
989 << ec.message() << "\n";
990 return;
991 }
992 cpu2VRHotHandler();
993 });
994}
995
Jason M. Billse94f5e12019-09-13 11:11:34 -0700996static void cpu2MemABCDVRHotAssertHandler()
997{
998 cpuVRHotLog("CPU 2 Memory ABCD");
999}
1000
Jason M. Bills9647ba72019-08-29 14:19:19 -07001001static void cpu2MemABCDVRHotHandler()
1002{
Jason M. Bills84951142020-04-17 15:57:11 -07001003 gpiod::line_event gpioLineEvent = cpu2MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001004
Jason M. Bills84951142020-04-17 15:57:11 -07001005 bool cpu2MemABCDVRHot =
1006 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1007 if (cpu2MemABCDVRHot)
1008 {
1009 cpu2MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001010 }
Jason M. Bills84951142020-04-17 15:57:11 -07001011
Jason M. Bills9647ba72019-08-29 14:19:19 -07001012 cpu2MemABCDVRHotEvent.async_wait(
1013 boost::asio::posix::stream_descriptor::wait_read,
1014 [](const boost::system::error_code ec) {
1015 if (ec)
1016 {
1017 std::cerr << "CPU 2 Memory ABCD VRHot handler error: "
1018 << ec.message() << "\n";
1019 return;
1020 }
1021 cpu2MemABCDVRHotHandler();
1022 });
1023}
1024
Jason M. Billse94f5e12019-09-13 11:11:34 -07001025static void cpu2MemEFGHVRHotAssertHandler()
1026{
1027 cpuVRHotLog("CPU 2 Memory EFGH");
1028}
1029
Jason M. Bills9647ba72019-08-29 14:19:19 -07001030static void cpu2MemEFGHVRHotHandler()
1031{
Jason M. Bills84951142020-04-17 15:57:11 -07001032 gpiod::line_event gpioLineEvent = cpu2MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001033
Jason M. Bills84951142020-04-17 15:57:11 -07001034 bool cpu2MemEFGHVRHot =
1035 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1036 if (cpu2MemEFGHVRHot)
1037 {
1038 cpu2MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001039 }
Jason M. Bills84951142020-04-17 15:57:11 -07001040
Jason M. Bills9647ba72019-08-29 14:19:19 -07001041 cpu2MemEFGHVRHotEvent.async_wait(
1042 boost::asio::posix::stream_descriptor::wait_read,
1043 [](const boost::system::error_code ec) {
1044 if (ec)
1045 {
1046 std::cerr << "CPU 2 Memory EFGH VRHot handler error: "
1047 << ec.message() << "\n";
1048 return;
1049 }
1050 cpu2MemEFGHVRHotHandler();
1051 });
1052}
1053
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001054static void pchThermtripHandler()
1055{
Jason M. Bills84951142020-04-17 15:57:11 -07001056 gpiod::line_event gpioLineEvent = pchThermtripLine.event_read();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001057
Jason M. Bills84951142020-04-17 15:57:11 -07001058 bool pchThermtrip =
1059 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1060 if (pchThermtrip)
1061 {
1062 ssbThermTripLog();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001063 }
Jason M. Bills84951142020-04-17 15:57:11 -07001064
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001065 pchThermtripEvent.async_wait(
1066 boost::asio::posix::stream_descriptor::wait_read,
1067 [](const boost::system::error_code ec) {
1068 if (ec)
1069 {
1070 std::cerr << "PCH Thermal trip handler error: " << ec.message()
1071 << "\n";
1072 return;
1073 }
1074 pchThermtripHandler();
1075 });
1076}
1077
Jason M. Billscbf78532019-08-16 15:32:11 -07001078static std::bitset<MAX_CPUS> checkERRPinCPUs(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001079{
Jason M. Billscbf78532019-08-16 15:32:11 -07001080 int errPinSts = (1 << errPin);
1081 std::bitset<MAX_CPUS> errPinCPUs = 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001082 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
1083 cpu++, addr++)
1084 {
1085 if (peci_Ping(addr) == PECI_CC_SUCCESS)
1086 {
1087 uint8_t cc = 0;
1088 CPUModel model{};
1089 uint8_t stepping = 0;
1090 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
1091 {
1092 std::cerr << "Cannot get CPUID!\n";
1093 continue;
1094 }
1095
1096 switch (model)
1097 {
1098 case skx:
1099 {
1100 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001101 // the ERRx (B(0) D8 F0 offset 210h)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001102 uint32_t errpinsts = 0;
1103 if (peci_RdPCIConfigLocal(
1104 addr, 0, 8, 0, 0x210, sizeof(uint32_t),
1105 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1106 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001107 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001108 }
1109 break;
1110 }
1111 case icx:
1112 {
1113 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001114 // the ERRx (B(30) D0 F3 offset 274h) (Note: Bus 30 is
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001115 // accessed on PECI as bus 13)
1116 uint32_t errpinsts = 0;
1117 if (peci_RdEndPointConfigPciLocal(
1118 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
1119 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1120 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001121 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001122 }
1123 break;
1124 }
1125 }
1126 }
1127 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001128 return errPinCPUs;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001129}
1130
Jason M. Billscbf78532019-08-16 15:32:11 -07001131static void errXAssertHandler(const int errPin,
1132 boost::asio::steady_timer& errXAssertTimer)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001133{
Jason M. Billscbf78532019-08-16 15:32:11 -07001134 // ERRx status is not guaranteed through the timeout, so save which
1135 // CPUs have it asserted
1136 std::bitset<MAX_CPUS> errPinCPUs = checkERRPinCPUs(errPin);
1137 errXAssertTimer.expires_after(std::chrono::milliseconds(errTimeoutMs));
1138 errXAssertTimer.async_wait([errPin, errPinCPUs](
1139 const boost::system::error_code ec) {
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001140 if (ec)
1141 {
1142 // operation_aborted is expected if timer is canceled before
1143 // completion.
1144 if (ec != boost::asio::error::operation_aborted)
1145 {
1146 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1147 << "\n";
1148 }
1149 return;
1150 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001151 std::cerr << "ERR" << std::to_string(errPin) << " asserted for "
1152 << std::to_string(errTimeoutMs) << " ms\n";
1153 if (errPinCPUs.count())
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001154 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001155 for (int i = 0; i < errPinCPUs.size(); i++)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001156 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001157 if (errPinCPUs[i])
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001158 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001159 cpuERRXLog(errPin, i);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001160 }
1161 }
1162 }
1163 else
1164 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001165 cpuERRXLog(errPin);
1166 }
1167 });
1168}
1169
Jason M. Bills8c584392019-08-19 11:05:51 -07001170static void err0AssertHandler()
1171{
1172 // Handle the standard ERR0 detection and logging
1173 const static constexpr int err0 = 0;
1174 errXAssertHandler(err0, err0AssertTimer);
1175}
1176
1177static void err0Handler()
1178{
1179 if (!hostOff)
1180 {
1181 gpiod::line_event gpioLineEvent = err0Line.event_read();
1182
1183 bool err0 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1184 if (err0)
1185 {
1186 err0AssertHandler();
1187 }
1188 else
1189 {
1190 err0AssertTimer.cancel();
1191 }
1192 }
1193 err0Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1194 [](const boost::system::error_code ec) {
1195 if (ec)
1196 {
1197 std::cerr
1198 << "err0 handler error: " << ec.message()
1199 << "\n";
1200 return;
1201 }
1202 err0Handler();
1203 });
1204}
1205
Jason M. Bills75af3962019-08-19 11:07:17 -07001206static void err1AssertHandler()
1207{
1208 // Handle the standard ERR1 detection and logging
1209 const static constexpr int err1 = 1;
1210 errXAssertHandler(err1, err1AssertTimer);
1211}
1212
1213static void err1Handler()
1214{
1215 if (!hostOff)
1216 {
1217 gpiod::line_event gpioLineEvent = err1Line.event_read();
1218
1219 bool err1 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1220 if (err1)
1221 {
1222 err1AssertHandler();
1223 }
1224 else
1225 {
1226 err1AssertTimer.cancel();
1227 }
1228 }
1229 err1Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1230 [](const boost::system::error_code ec) {
1231 if (ec)
1232 {
1233 std::cerr
1234 << "err1 handler error: " << ec.message()
1235 << "\n";
1236 return;
1237 }
1238 err1Handler();
1239 });
1240}
1241
Jason M. Billscbf78532019-08-16 15:32:11 -07001242static void err2AssertHandler()
1243{
1244 // Handle the standard ERR2 detection and logging
1245 const static constexpr int err2 = 2;
1246 errXAssertHandler(err2, err2AssertTimer);
1247 // Also handle reset for ERR2
1248 err2AssertTimer.async_wait([](const boost::system::error_code ec) {
1249 if (ec)
1250 {
1251 // operation_aborted is expected if timer is canceled before
1252 // completion.
1253 if (ec != boost::asio::error::operation_aborted)
1254 {
1255 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1256 << "\n";
1257 }
1258 return;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001259 }
1260 conn->async_method_call(
1261 [](boost::system::error_code ec,
1262 const std::variant<bool>& property) {
1263 if (ec)
1264 {
1265 return;
1266 }
1267 const bool* reset = std::get_if<bool>(&property);
1268 if (reset == nullptr)
1269 {
1270 std::cerr << "Unable to read reset on ERR2 value\n";
1271 return;
1272 }
Jason M. Billsb61766b2019-11-26 17:02:44 -08001273 startCrashdumpAndRecovery(*reset, "ERR2 Timeout");
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001274 },
1275 "xyz.openbmc_project.Settings",
1276 "/xyz/openbmc_project/control/processor_error_config",
1277 "org.freedesktop.DBus.Properties", "Get",
1278 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnERR2");
Yong Li061eb032020-02-26 15:06:18 +08001279
1280 beep(beepCPUErr2);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001281 });
1282}
1283
1284static void err2Handler()
1285{
1286 if (!hostOff)
1287 {
1288 gpiod::line_event gpioLineEvent = err2Line.event_read();
1289
1290 bool err2 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1291 if (err2)
1292 {
1293 err2AssertHandler();
1294 }
1295 else
1296 {
1297 err2AssertTimer.cancel();
1298 }
1299 }
1300 err2Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1301 [](const boost::system::error_code ec) {
1302 if (ec)
1303 {
1304 std::cerr
1305 << "err2 handler error: " << ec.message()
1306 << "\n";
1307 return;
1308 }
1309 err2Handler();
1310 });
1311}
1312
Jason M. Bills89922f82019-08-06 11:10:02 -07001313static void smiAssertHandler()
1314{
1315 smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
1316 smiAssertTimer.async_wait([](const boost::system::error_code ec) {
1317 if (ec)
1318 {
1319 // operation_aborted is expected if timer is canceled before
1320 // completion.
1321 if (ec != boost::asio::error::operation_aborted)
1322 {
1323 std::cerr << "smi timeout async_wait failed: " << ec.message()
1324 << "\n";
1325 }
1326 return;
1327 }
1328 std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
1329 << " ms\n";
1330 smiTimeoutLog();
1331 conn->async_method_call(
1332 [](boost::system::error_code ec,
1333 const std::variant<bool>& property) {
1334 if (ec)
1335 {
1336 return;
1337 }
1338 const bool* reset = std::get_if<bool>(&property);
1339 if (reset == nullptr)
1340 {
1341 std::cerr << "Unable to read reset on SMI value\n";
1342 return;
1343 }
Jason M. Bills94785442020-01-07 15:22:09 -08001344#ifdef HOST_ERROR_CRASHDUMP_ON_SMI_TIMEOUT
Jason M. Billsb61766b2019-11-26 17:02:44 -08001345 startCrashdumpAndRecovery(*reset, "SMI Timeout");
Jason M. Bills94785442020-01-07 15:22:09 -08001346#else
1347 if (*reset)
1348 {
1349 std::cout << "Recovering the system\n";
1350 startPowerCycle();
1351 }
1352#endif
Jason M. Bills89922f82019-08-06 11:10:02 -07001353 },
1354 "xyz.openbmc_project.Settings",
1355 "/xyz/openbmc_project/control/bmc_reset_disables",
1356 "org.freedesktop.DBus.Properties", "Get",
1357 "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
1358 });
1359}
1360
1361static void smiHandler()
1362{
1363 if (!hostOff)
1364 {
1365 gpiod::line_event gpioLineEvent = smiLine.event_read();
1366
1367 bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1368 if (smi)
1369 {
1370 smiAssertHandler();
1371 }
1372 else
1373 {
1374 smiAssertTimer.cancel();
1375 }
1376 }
1377 smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1378 [](const boost::system::error_code ec) {
1379 if (ec)
1380 {
1381 std::cerr
1382 << "smi handler error: " << ec.message()
1383 << "\n";
1384 return;
1385 }
1386 smiHandler();
1387 });
1388}
1389
Jason M. Billsa15c2522019-08-16 10:01:44 -07001390static void initializeErrorState()
1391{
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001392 // Handle CPU1_MISMATCH if it's asserted now
1393 if (cpu1MismatchLine.get_value() == 1)
1394 {
1395 cpuMismatchLog(1);
1396 }
1397
1398 // Handle CPU2_MISMATCH if it's asserted now
1399 if (cpu2MismatchLine.get_value() == 1)
1400 {
1401 cpuMismatchLog(2);
1402 }
1403
Jason M. Billsa15c2522019-08-16 10:01:44 -07001404 // Handle CPU_CATERR if it's asserted now
1405 if (caterrLine.get_value() == 0)
1406 {
1407 caterrAssertHandler();
1408 }
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001409
Jason M. Bills8c584392019-08-19 11:05:51 -07001410 // Handle CPU_ERR0 if it's asserted now
1411 if (err0Line.get_value() == 0)
1412 {
1413 err0AssertHandler();
1414 }
1415
Jason M. Bills75af3962019-08-19 11:07:17 -07001416 // Handle CPU_ERR1 if it's asserted now
1417 if (err1Line.get_value() == 0)
1418 {
1419 err1AssertHandler();
1420 }
1421
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001422 // Handle CPU_ERR2 if it's asserted now
1423 if (err2Line.get_value() == 0)
1424 {
1425 err2AssertHandler();
1426 }
Jason M. Bills89922f82019-08-06 11:10:02 -07001427
1428 // Handle SMI if it's asserted now
1429 if (smiLine.get_value() == 0)
1430 {
1431 smiAssertHandler();
1432 }
Jason M. Bills08866542019-08-16 12:04:19 -07001433
Jason M. Billse94f5e12019-09-13 11:11:34 -07001434 // Handle CPU1_THERMTRIP if it's asserted now
1435 if (cpu1ThermtripLine.get_value() == 0)
1436 {
1437 cpu1ThermtripAssertHandler();
1438 }
1439
1440 // Handle CPU2_THERMTRIP if it's asserted now
1441 if (cpu2ThermtripLine.get_value() == 0)
1442 {
1443 cpu2ThermtripAssertHandler();
1444 }
1445
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001446 // Handle CPU1_MEM_THERM_EVENT (CPU1 DIMM Thermal trip) if it's asserted now
1447 if (cpu1MemtripLine.get_value() == 0)
1448 {
1449 memThermTripLog(1);
1450 }
1451
1452 // Handle CPU2_MEM_THERM_EVENT (CPU2 DIMM Thermal trip) if it's asserted now
1453 if (cpu2MemtripLine.get_value() == 0)
1454 {
1455 memThermTripLog(2);
1456 }
1457
Jason M. Billse94f5e12019-09-13 11:11:34 -07001458 // Handle CPU1_VRHOT if it's asserted now
1459 if (cpu1VRHotLine.get_value() == 0)
1460 {
1461 cpu1VRHotAssertHandler();
1462 }
1463
1464 // Handle CPU1_MEM_ABCD_VRHOT if it's asserted now
1465 if (cpu1MemABCDVRHotLine.get_value() == 0)
1466 {
1467 cpu1MemABCDVRHotAssertHandler();
1468 }
1469
1470 // Handle CPU1_MEM_EFGH_VRHOT if it's asserted now
1471 if (cpu1MemEFGHVRHotLine.get_value() == 0)
1472 {
1473 cpu1MemEFGHVRHotAssertHandler();
1474 }
1475
1476 // Handle CPU2_VRHOT if it's asserted now
1477 if (cpu2VRHotLine.get_value() == 0)
1478 {
1479 cpu2VRHotAssertHandler();
1480 }
1481
1482 // Handle CPU2_MEM_ABCD_VRHOT if it's asserted now
1483 if (cpu2MemABCDVRHotLine.get_value() == 0)
1484 {
1485 cpu2MemABCDVRHotAssertHandler();
1486 }
1487
1488 // Handle CPU2_MEM_EFGH_VRHOT if it's asserted now
1489 if (cpu2MemEFGHVRHotLine.get_value() == 0)
1490 {
1491 cpu2MemEFGHVRHotAssertHandler();
1492 }
1493
Jason M. Bills08866542019-08-16 12:04:19 -07001494 // Handle PCH_BMC_THERMTRIP if it's asserted now
1495 if (pchThermtripLine.get_value() == 0)
1496 {
1497 ssbThermTripLog();
1498 }
Jason M. Billsa15c2522019-08-16 10:01:44 -07001499}
Jason M. Bills1490b142019-07-01 15:48:43 -07001500} // namespace host_error_monitor
1501
1502int main(int argc, char* argv[])
1503{
1504 // setup connection to dbus
1505 host_error_monitor::conn =
1506 std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
1507
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001508 // Host Error Monitor Service
Jason M. Bills1490b142019-07-01 15:48:43 -07001509 host_error_monitor::conn->request_name(
1510 "xyz.openbmc_project.HostErrorMonitor");
1511 sdbusplus::asio::object_server server =
1512 sdbusplus::asio::object_server(host_error_monitor::conn);
1513
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001514 // Restart Cause Interface
1515 host_error_monitor::hostErrorTimeoutIface =
1516 server.add_interface("/xyz/openbmc_project/host_error_monitor",
1517 "xyz.openbmc_project.HostErrorMonitor.Timeout");
1518
1519 host_error_monitor::hostErrorTimeoutIface->register_property(
1520 "IERRTimeoutMs", host_error_monitor::caterrTimeoutMs,
1521 [](const std::size_t& requested, std::size_t& resp) {
1522 if (requested > host_error_monitor::caterrTimeoutMsMax)
1523 {
1524 std::cerr << "IERRTimeoutMs update to " << requested
1525 << "ms rejected. Cannot be greater than "
1526 << host_error_monitor::caterrTimeoutMsMax << "ms.\n";
1527 return 0;
1528 }
1529 std::cerr << "IERRTimeoutMs updated to " << requested << "ms\n";
1530 host_error_monitor::caterrTimeoutMs = requested;
1531 resp = requested;
1532 return 1;
1533 },
1534 [](std::size_t& resp) { return host_error_monitor::caterrTimeoutMs; });
1535 host_error_monitor::hostErrorTimeoutIface->initialize();
1536
Jason M. Bills1490b142019-07-01 15:48:43 -07001537 // Start tracking host state
1538 std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
1539 host_error_monitor::startHostStateMonitor();
1540
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001541 // Request CPU1_MISMATCH GPIO events
1542 if (!host_error_monitor::requestGPIOInput(
1543 "CPU1_MISMATCH", host_error_monitor::cpu1MismatchLine))
1544 {
1545 return -1;
1546 }
1547
1548 // Request CPU2_MISMATCH GPIO events
1549 if (!host_error_monitor::requestGPIOInput(
1550 "CPU2_MISMATCH", host_error_monitor::cpu2MismatchLine))
1551 {
1552 return -1;
1553 }
1554
Jason M. Bills1490b142019-07-01 15:48:43 -07001555 // Initialize the host state
1556 host_error_monitor::initializeHostState();
1557
1558 // Request CPU_CATERR GPIO events
1559 if (!host_error_monitor::requestGPIOEvents(
1560 "CPU_CATERR", host_error_monitor::caterrHandler,
1561 host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
1562 {
1563 return -1;
1564 }
1565
Jason M. Bills8c584392019-08-19 11:05:51 -07001566 // Request CPU_ERR0 GPIO events
1567 if (!host_error_monitor::requestGPIOEvents(
1568 "CPU_ERR0", host_error_monitor::err0Handler,
1569 host_error_monitor::err0Line, host_error_monitor::err0Event))
1570 {
1571 return -1;
1572 }
1573
Jason M. Bills75af3962019-08-19 11:07:17 -07001574 // Request CPU_ERR1 GPIO events
1575 if (!host_error_monitor::requestGPIOEvents(
1576 "CPU_ERR1", host_error_monitor::err1Handler,
1577 host_error_monitor::err1Line, host_error_monitor::err1Event))
1578 {
1579 return -1;
1580 }
1581
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001582 // Request CPU_ERR2 GPIO events
1583 if (!host_error_monitor::requestGPIOEvents(
1584 "CPU_ERR2", host_error_monitor::err2Handler,
1585 host_error_monitor::err2Line, host_error_monitor::err2Event))
1586 {
1587 return -1;
1588 }
1589
Jason M. Bills89922f82019-08-06 11:10:02 -07001590 // Request SMI GPIO events
1591 if (!host_error_monitor::requestGPIOEvents(
1592 "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
1593 host_error_monitor::smiEvent))
1594 {
1595 return -1;
1596 }
1597
Jason M. Bills45e87e02019-09-09 14:45:38 -07001598 // Request CPU1_FIVR_FAULT GPIO input
1599 if (!host_error_monitor::requestGPIOInput(
1600 "CPU1_FIVR_FAULT", host_error_monitor::cpu1FIVRFaultLine))
1601 {
1602 return -1;
1603 }
1604
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001605 // Request CPU1_THERMTRIP GPIO events
1606 if (!host_error_monitor::requestGPIOEvents(
1607 "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler,
1608 host_error_monitor::cpu1ThermtripLine,
1609 host_error_monitor::cpu1ThermtripEvent))
1610 {
1611 return -1;
1612 }
1613
Jason M. Bills45e87e02019-09-09 14:45:38 -07001614 // Request CPU2_FIVR_FAULT GPIO input
1615 if (!host_error_monitor::requestGPIOInput(
1616 "CPU2_FIVR_FAULT", host_error_monitor::cpu2FIVRFaultLine))
1617 {
1618 return -1;
1619 }
1620
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001621 // Request CPU2_THERMTRIP GPIO events
1622 if (!host_error_monitor::requestGPIOEvents(
1623 "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler,
1624 host_error_monitor::cpu2ThermtripLine,
1625 host_error_monitor::cpu2ThermtripEvent))
1626 {
1627 return -1;
1628 }
1629
Jason M. Bills250fa632019-08-28 15:58:25 -07001630 // Request CPU1_VRHOT GPIO events
1631 if (!host_error_monitor::requestGPIOEvents(
1632 "CPU1_VRHOT", host_error_monitor::cpu1VRHotHandler,
1633 host_error_monitor::cpu1VRHotLine,
1634 host_error_monitor::cpu1VRHotEvent))
1635 {
1636 return -1;
1637 }
1638
Jason M. Bills9647ba72019-08-29 14:19:19 -07001639 // Request CPU1_MEM_ABCD_VRHOT GPIO events
1640 if (!host_error_monitor::requestGPIOEvents(
1641 "CPU1_MEM_ABCD_VRHOT", host_error_monitor::cpu1MemABCDVRHotHandler,
1642 host_error_monitor::cpu1MemABCDVRHotLine,
1643 host_error_monitor::cpu1MemABCDVRHotEvent))
1644 {
1645 return -1;
1646 }
1647
1648 // Request CPU1_MEM_EFGH_VRHOT GPIO events
1649 if (!host_error_monitor::requestGPIOEvents(
1650 "CPU1_MEM_EFGH_VRHOT", host_error_monitor::cpu1MemEFGHVRHotHandler,
1651 host_error_monitor::cpu1MemEFGHVRHotLine,
1652 host_error_monitor::cpu1MemEFGHVRHotEvent))
1653 {
1654 return -1;
1655 }
1656
Jason M. Bills250fa632019-08-28 15:58:25 -07001657 // Request CPU2_VRHOT GPIO events
1658 if (!host_error_monitor::requestGPIOEvents(
1659 "CPU2_VRHOT", host_error_monitor::cpu2VRHotHandler,
1660 host_error_monitor::cpu2VRHotLine,
1661 host_error_monitor::cpu2VRHotEvent))
1662 {
1663 return -1;
1664 }
1665
Jason M. Bills9647ba72019-08-29 14:19:19 -07001666 // Request CPU2_MEM_ABCD_VRHOT GPIO events
1667 if (!host_error_monitor::requestGPIOEvents(
1668 "CPU2_MEM_ABCD_VRHOT", host_error_monitor::cpu2MemABCDVRHotHandler,
1669 host_error_monitor::cpu2MemABCDVRHotLine,
1670 host_error_monitor::cpu2MemABCDVRHotEvent))
1671 {
1672 return -1;
1673 }
1674
1675 // Request CPU2_MEM_EFGH_VRHOT GPIO events
1676 if (!host_error_monitor::requestGPIOEvents(
1677 "CPU2_MEM_EFGH_VRHOT", host_error_monitor::cpu2MemEFGHVRHotHandler,
1678 host_error_monitor::cpu2MemEFGHVRHotLine,
1679 host_error_monitor::cpu2MemEFGHVRHotEvent))
1680 {
1681 return -1;
1682 }
1683
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001684 // Request PCH_BMC_THERMTRIP GPIO events
1685 if (!host_error_monitor::requestGPIOEvents(
1686 "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,
1687 host_error_monitor::pchThermtripLine,
1688 host_error_monitor::pchThermtripEvent))
1689 {
1690 return -1;
1691 }
1692
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001693 // Request CPU1_MEM_THERM_EVENT GPIO events
1694 if (!host_error_monitor::requestGPIOEvents(
1695 "CPU1_MEM_THERM_EVENT", host_error_monitor::cpu1MemtripHandler,
1696 host_error_monitor::cpu1MemtripLine,
1697 host_error_monitor::cpu1MemtripEvent))
1698 {
1699 return -1;
1700 }
1701
1702 // Request CPU2_MEM_THERM_EVENT GPIO events
1703 if (!host_error_monitor::requestGPIOEvents(
1704 "CPU2_MEM_THERM_EVENT", host_error_monitor::cpu2MemtripHandler,
1705 host_error_monitor::cpu2MemtripLine,
1706 host_error_monitor::cpu2MemtripEvent))
1707 {
1708 return -1;
1709 }
1710
Jason M. Bills1490b142019-07-01 15:48:43 -07001711 host_error_monitor::io.run();
1712
1713 return 0;
1714}