blob: 88ed84a09deab92c41ed7379d36b9f12e35df168 [file] [log] [blame]
Jason M. Bills1490b142019-07-01 15:48:43 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
Jason M. Bills6a2cb692019-08-06 11:03:49 -070016#include <peci.h>
Chen Yugange6c0f1c2019-08-02 20:36:42 +080017#include <systemd/sd-journal.h>
18
Jason M. Bills1490b142019-07-01 15:48:43 -070019#include <boost/asio/posix/stream_descriptor.hpp>
20#include <gpiod.hpp>
Jason M. Bills1490b142019-07-01 15:48:43 -070021#include <sdbusplus/asio/object_server.hpp>
Jason M. Bills48e5dff2020-06-10 13:47:47 -070022
23#include <bitset>
24#include <iostream>
Jason M. Billsd1a19f62019-08-06 11:52:58 -070025#include <variant>
Jason M. Bills1490b142019-07-01 15:48:43 -070026
27namespace host_error_monitor
28{
29static boost::asio::io_service io;
30static std::shared_ptr<sdbusplus::asio::connection> conn;
Jason M. Billsc4b91f22019-11-26 17:04:50 -080031static std::shared_ptr<sdbusplus::asio::dbus_interface> hostErrorTimeoutIface;
Jason M. Bills1490b142019-07-01 15:48:43 -070032
33static bool hostOff = true;
34
Jason M. Billsc4b91f22019-11-26 17:04:50 -080035static size_t caterrTimeoutMs = 2000;
36const static constexpr size_t caterrTimeoutMsMax = 600000; // 10 minutes maximum
Jason M. Billscbf78532019-08-16 15:32:11 -070037const static constexpr size_t errTimeoutMs = 90000;
Jason M. Bills89922f82019-08-06 11:10:02 -070038const static constexpr size_t smiTimeoutMs = 90000;
Jason M. Bills1490b142019-07-01 15:48:43 -070039const static constexpr size_t crashdumpTimeoutS = 300;
40
41// Timers
42// Timer for CATERR asserted
43static boost::asio::steady_timer caterrAssertTimer(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070044// Timer for ERR0 asserted
45static boost::asio::steady_timer err0AssertTimer(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070046// Timer for ERR1 asserted
47static boost::asio::steady_timer err1AssertTimer(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070048// Timer for ERR2 asserted
49static boost::asio::steady_timer err2AssertTimer(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070050// Timer for SMI asserted
51static boost::asio::steady_timer smiAssertTimer(io);
Jason M. Bills1490b142019-07-01 15:48:43 -070052
53// GPIO Lines and Event Descriptors
54static gpiod::line caterrLine;
55static boost::asio::posix::stream_descriptor caterrEvent(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070056static gpiod::line err0Line;
57static boost::asio::posix::stream_descriptor err0Event(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070058static gpiod::line err1Line;
59static boost::asio::posix::stream_descriptor err1Event(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070060static gpiod::line err2Line;
61static boost::asio::posix::stream_descriptor err2Event(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070062static gpiod::line smiLine;
63static boost::asio::posix::stream_descriptor smiEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070064static gpiod::line cpu1FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070065static gpiod::line cpu1ThermtripLine;
66static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070067static gpiod::line cpu2FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070068static gpiod::line cpu2ThermtripLine;
69static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io);
Jason M. Bills250fa632019-08-28 15:58:25 -070070static gpiod::line cpu1VRHotLine;
71static boost::asio::posix::stream_descriptor cpu1VRHotEvent(io);
72static gpiod::line cpu2VRHotLine;
Jason M. Bills9647ba72019-08-29 14:19:19 -070073static boost::asio::posix::stream_descriptor cpu1MemABCDVRHotEvent(io);
74static gpiod::line cpu1MemEFGHVRHotLine;
75static boost::asio::posix::stream_descriptor cpu1MemEFGHVRHotEvent(io);
76static gpiod::line cpu2MemABCDVRHotLine;
Jason M. Bills250fa632019-08-28 15:58:25 -070077static boost::asio::posix::stream_descriptor cpu2VRHotEvent(io);
Jason M. Bills9647ba72019-08-29 14:19:19 -070078static gpiod::line cpu1MemABCDVRHotLine;
79static boost::asio::posix::stream_descriptor cpu2MemABCDVRHotEvent(io);
80static gpiod::line cpu2MemEFGHVRHotLine;
81static boost::asio::posix::stream_descriptor cpu2MemEFGHVRHotEvent(io);
Chen Yugange6c0f1c2019-08-02 20:36:42 +080082//----------------------------------
83// PCH_BMC_THERMTRIP function related definition
84//----------------------------------
Chen Yugange6c0f1c2019-08-02 20:36:42 +080085static gpiod::line pchThermtripLine;
86static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +000087//----------------------------------
88// CPU_MEM_THERM_EVENT function related definition
89//----------------------------------
90static gpiod::line cpu1MemtripLine;
91static boost::asio::posix::stream_descriptor cpu1MemtripEvent(io);
92static gpiod::line cpu2MemtripLine;
93static boost::asio::posix::stream_descriptor cpu2MemtripEvent(io);
jayaprakash Mutyala53099c42020-03-15 00:16:26 +000094//---------------------------------
95// CPU_MISMATCH function related definition
96//---------------------------------
97static gpiod::line cpu1MismatchLine;
98static gpiod::line cpu2MismatchLine;
Jason M. Bills1490b142019-07-01 15:48:43 -070099
Yong Li061eb032020-02-26 15:06:18 +0800100// beep function for CPU error
Yong Li8c798c72020-04-22 15:29:07 +0800101const static constexpr uint8_t beepCPUIERR = 4;
Yong Li061eb032020-02-26 15:06:18 +0800102const static constexpr uint8_t beepCPUErr2 = 5;
103
104static void beep(const uint8_t& beepPriority)
105{
106 conn->async_method_call(
107 [](boost::system::error_code ec) {
108 if (ec)
109 {
110 std::cerr << "beep returned error with "
111 "async_method_call (ec = "
112 << ec << ")\n";
113 return;
114 }
115 },
116 "xyz.openbmc_project.BeepCode", "/xyz/openbmc_project/BeepCode",
117 "xyz.openbmc_project.BeepCode", "Beep", uint8_t(beepPriority));
118}
119
Jason M. Billsa3397932019-08-06 11:07:21 -0700120static void cpuIERRLog()
121{
122 sd_journal_send("MESSAGE=HostError: IERR", "PRIORITY=%i", LOG_INFO,
123 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
124 "REDFISH_MESSAGE_ARGS=%s", "IERR", NULL);
125}
126
127static void cpuIERRLog(const int cpuNum)
128{
129 std::string msg = "IERR on CPU " + std::to_string(cpuNum + 1);
130
131 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
132 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
133 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
134}
135
136static void cpuIERRLog(const int cpuNum, const std::string& type)
137{
138 std::string msg = type + " IERR on CPU " + std::to_string(cpuNum + 1);
139
140 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
141 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
142 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
143}
144
Jason M. Billscbf78532019-08-16 15:32:11 -0700145static void cpuERRXLog(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700146{
Jason M. Billscbf78532019-08-16 15:32:11 -0700147 std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
148
149 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
150 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
151 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700152}
153
Jason M. Billscbf78532019-08-16 15:32:11 -0700154static void cpuERRXLog(const int errPin, const int cpuNum)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700155{
Jason M. Billscbf78532019-08-16 15:32:11 -0700156 std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
157 std::to_string(cpuNum + 1);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700158
159 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
160 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
161 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
162}
163
Jason M. Bills89922f82019-08-06 11:10:02 -0700164static void smiTimeoutLog()
165{
166 sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
167 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
168 "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
169}
170
Jason M. Bills45e87e02019-09-09 14:45:38 -0700171static void cpuBootFIVRFaultLog(const int cpuNum)
172{
173 std::string msg = "Boot FIVR Fault on CPU " + std::to_string(cpuNum);
174
175 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
176 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
177 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
178}
179
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700180static void cpuThermTripLog(const int cpuNum)
181{
182 std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip";
183
184 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
185 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
186 "OpenBMC.0.1.CPUThermalTrip", "REDFISH_MESSAGE_ARGS=%d",
187 cpuNum, NULL);
188}
189
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000190static void memThermTripLog(const int cpuNum)
191{
192 std::string cpuNumber = "CPU " + std::to_string(cpuNum);
193 std::string msg = cpuNumber + " Memory Thermal trip.";
194
195 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
196 LOG_ERR, "REDFISH_MESSAGE_ID=%s",
197 "OpenBMC.0.1.MemoryThermTrip", "REDFISH_MESSAGE_ARGS=%s",
198 cpuNumber.c_str(), NULL);
199}
200
jayaprakash Mutyala53099c42020-03-15 00:16:26 +0000201static void cpuMismatchLog(const int cpuNum)
202{
203 std::string msg = "CPU " + std::to_string(cpuNum) + " mismatch";
204
205 sd_journal_send("MESSAGE= %s", msg.c_str(), "PRIORITY=%i", LOG_ERR,
206 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUMismatch",
207 "REDFISH_MESSAGE_ARGS=%d", cpuNum, NULL);
208}
209
Jason M. Bills250fa632019-08-28 15:58:25 -0700210static void cpuVRHotLog(const std::string& vr)
211{
212 std::string msg = vr + " Voltage Regulator Overheated.";
213
214 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
215 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
216 "OpenBMC.0.1.VoltageRegulatorOverheated",
217 "REDFISH_MESSAGE_ARGS=%s", vr.c_str(), NULL);
218}
219
Jason M. Bills08866542019-08-16 12:04:19 -0700220static void ssbThermTripLog()
221{
222 sd_journal_send("MESSAGE=HostError: SSB thermal trip", "PRIORITY=%i",
223 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
224 "OpenBMC.0.1.SsbThermalTrip", NULL);
225}
226
Jason M. Billsa15c2522019-08-16 10:01:44 -0700227static void initializeErrorState();
Jason M. Bills1490b142019-07-01 15:48:43 -0700228static void initializeHostState()
229{
230 conn->async_method_call(
231 [](boost::system::error_code ec,
232 const std::variant<std::string>& property) {
233 if (ec)
234 {
235 return;
236 }
237 const std::string* state = std::get_if<std::string>(&property);
238 if (state == nullptr)
239 {
240 std::cerr << "Unable to read host state value\n";
241 return;
242 }
243 hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
Jason M. Billsa15c2522019-08-16 10:01:44 -0700244 // If the system is on, initialize the error state
245 if (!hostOff)
246 {
247 initializeErrorState();
248 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700249 },
250 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
251 "org.freedesktop.DBus.Properties", "Get",
252 "xyz.openbmc_project.State.Host", "CurrentHostState");
253}
254
255static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
256{
257 return std::make_shared<sdbusplus::bus::match::match>(
258 *conn,
259 "type='signal',interface='org.freedesktop.DBus.Properties',"
260 "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
261 "Host'",
262 [](sdbusplus::message::message& msg) {
263 std::string interfaceName;
264 boost::container::flat_map<std::string, std::variant<std::string>>
265 propertiesChanged;
266 std::string state;
267 try
268 {
269 msg.read(interfaceName, propertiesChanged);
270 state =
271 std::get<std::string>(propertiesChanged.begin()->second);
272 }
273 catch (std::exception& e)
274 {
275 std::cerr << "Unable to read host state\n";
276 return;
277 }
278 hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
279
Jason M. Bills1490b142019-07-01 15:48:43 -0700280 if (hostOff)
281 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700282 // No host events should fire while off, so cancel any pending
283 // timers
Jason M. Bills1490b142019-07-01 15:48:43 -0700284 caterrAssertTimer.cancel();
Jason M. Bills8c584392019-08-19 11:05:51 -0700285 err0AssertTimer.cancel();
Jason M. Bills75af3962019-08-19 11:07:17 -0700286 err1AssertTimer.cancel();
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700287 err2AssertTimer.cancel();
Jason M. Bills89922f82019-08-06 11:10:02 -0700288 smiAssertTimer.cancel();
Jason M. Bills1490b142019-07-01 15:48:43 -0700289 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700290 else
291 {
292 // Handle any initial errors when the host turns on
293 initializeErrorState();
294 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700295 });
296}
297
298static bool requestGPIOEvents(
299 const std::string& name, const std::function<void()>& handler,
300 gpiod::line& gpioLine,
301 boost::asio::posix::stream_descriptor& gpioEventDescriptor)
302{
303 // Find the GPIO line
304 gpioLine = gpiod::find_line(name);
305 if (!gpioLine)
306 {
307 std::cerr << "Failed to find the " << name << " line\n";
308 return false;
309 }
310
311 try
312 {
313 gpioLine.request(
314 {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
315 }
316 catch (std::exception&)
317 {
318 std::cerr << "Failed to request events for " << name << "\n";
319 return false;
320 }
321
322 int gpioLineFd = gpioLine.event_get_fd();
323 if (gpioLineFd < 0)
324 {
325 std::cerr << "Failed to get " << name << " fd\n";
326 return false;
327 }
328
329 gpioEventDescriptor.assign(gpioLineFd);
330
331 gpioEventDescriptor.async_wait(
332 boost::asio::posix::stream_descriptor::wait_read,
333 [&name, handler](const boost::system::error_code ec) {
334 if (ec)
335 {
336 std::cerr << name << " fd handler error: " << ec.message()
337 << "\n";
338 return;
339 }
340 handler();
341 });
342 return true;
343}
344
Jason M. Bills45e87e02019-09-09 14:45:38 -0700345static bool requestGPIOInput(const std::string& name, gpiod::line& gpioLine)
346{
347 // Find the GPIO line
348 gpioLine = gpiod::find_line(name);
349 if (!gpioLine)
350 {
351 std::cerr << "Failed to find the " << name << " line.\n";
352 return false;
353 }
354
355 // Request GPIO input
356 try
357 {
358 gpioLine.request({__FUNCTION__, gpiod::line_request::DIRECTION_INPUT});
359 }
360 catch (std::exception&)
361 {
362 std::cerr << "Failed to request " << name << " input\n";
363 return false;
364 }
365
366 return true;
367}
368
Jason M. Bills1490b142019-07-01 15:48:43 -0700369static void startPowerCycle()
370{
371 conn->async_method_call(
372 [](boost::system::error_code ec) {
373 if (ec)
374 {
375 std::cerr << "failed to set Chassis State\n";
376 }
377 },
378 "xyz.openbmc_project.State.Chassis",
379 "/xyz/openbmc_project/state/chassis0",
380 "org.freedesktop.DBus.Properties", "Set",
381 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
382 std::variant<std::string>{
383 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
384}
385
Jason M. Billsb61766b2019-11-26 17:02:44 -0800386static void startCrashdumpAndRecovery(bool recoverSystem,
387 const std::string& triggerType)
Jason M. Bills1490b142019-07-01 15:48:43 -0700388{
389 std::cout << "Starting crashdump\n";
390 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
391 static boost::asio::steady_timer crashdumpTimer(io);
392
393 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
394 *conn,
395 "type='signal',interface='org.freedesktop.DBus.Properties',"
396 "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
397 [recoverSystem](sdbusplus::message::message& msg) {
398 crashdumpTimer.cancel();
399 std::cout << "Crashdump completed\n";
400 if (recoverSystem)
401 {
402 std::cout << "Recovering the system\n";
403 startPowerCycle();
404 }
405 crashdumpCompleteMatch.reset();
406 });
407
408 crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
409 crashdumpTimer.async_wait([](const boost::system::error_code ec) {
410 if (ec)
411 {
412 // operation_aborted is expected if timer is canceled
413 if (ec != boost::asio::error::operation_aborted)
414 {
415 std::cerr << "Crashdump async_wait failed: " << ec.message()
416 << "\n";
417 }
418 std::cout << "Crashdump timer canceled\n";
419 return;
420 }
421 std::cerr << "Crashdump failed to complete before timeout\n";
422 crashdumpCompleteMatch.reset();
423 });
424
425 conn->async_method_call(
426 [](boost::system::error_code ec) {
427 if (ec)
428 {
429 std::cerr << "failed to start Crashdump\n";
430 crashdumpTimer.cancel();
431 crashdumpCompleteMatch.reset();
432 }
433 },
434 "com.intel.crashdump", "/com/intel/crashdump",
Jason M. Billsb61766b2019-11-26 17:02:44 -0800435 "com.intel.crashdump.Stored", "GenerateStoredLog", triggerType);
Jason M. Bills1490b142019-07-01 15:48:43 -0700436}
437
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700438static void incrementCPUErrorCount(int cpuNum)
439{
440 std::string propertyName = "ErrorCountCPU" + std::to_string(cpuNum + 1);
441
442 // Get the current count
443 conn->async_method_call(
444 [propertyName](boost::system::error_code ec,
445 const std::variant<uint8_t>& property) {
446 if (ec)
447 {
448 std::cerr << "Failed to read " << propertyName << ": "
449 << ec.message() << "\n";
450 return;
451 }
452 const uint8_t* errorCountVariant = std::get_if<uint8_t>(&property);
453 if (errorCountVariant == nullptr)
454 {
455 std::cerr << propertyName << " invalid\n";
456 return;
457 }
458 uint8_t errorCount = *errorCountVariant;
459 if (errorCount == std::numeric_limits<uint8_t>::max())
460 {
461 std::cerr << "Maximum error count reached\n";
462 return;
463 }
464 // Increment the count
465 errorCount++;
466 conn->async_method_call(
467 [propertyName](boost::system::error_code ec) {
468 if (ec)
469 {
470 std::cerr << "Failed to set " << propertyName << ": "
471 << ec.message() << "\n";
472 }
473 },
474 "xyz.openbmc_project.Settings",
475 "/xyz/openbmc_project/control/processor_error_config",
476 "org.freedesktop.DBus.Properties", "Set",
477 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName,
478 std::variant<uint8_t>{errorCount});
479 },
480 "xyz.openbmc_project.Settings",
481 "/xyz/openbmc_project/control/processor_error_config",
482 "org.freedesktop.DBus.Properties", "Get",
483 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName);
484}
485
Jason M. Billsa3397932019-08-06 11:07:21 -0700486static bool checkIERRCPUs()
487{
488 bool cpuIERRFound = false;
489 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
490 cpu++, addr++)
491 {
492 uint8_t cc = 0;
493 CPUModel model{};
494 uint8_t stepping = 0;
495 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
496 {
497 std::cerr << "Cannot get CPUID!\n";
498 continue;
499 }
500
501 switch (model)
502 {
503 case skx:
504 {
505 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
506 // that caused the IERR
507 uint32_t mcaErrSrcLog = 0;
508 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
509 &cc) != PECI_CC_SUCCESS)
510 {
511 continue;
512 }
513 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
514 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
515 {
516 // TODO: Light the CPU fault LED?
517 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700518 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700519 // Next check if it's a CPU/VR mismatch by reading the
520 // IA32_MC4_STATUS MSR (0x411)
521 uint64_t mc4Status = 0;
522 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
523 PECI_CC_SUCCESS)
524 {
525 continue;
526 }
527 // Check MSEC bits 31:24 for
528 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
529 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
530 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
531 if ((mc4Status & (0x40 << 24)) ||
532 (mc4Status & (0x42 << 24)) ||
533 (mc4Status & (0x43 << 24)))
534 {
535 cpuIERRLog(cpu, "CPU/VR Mismatch");
536 continue;
537 }
538
539 // Next check if it's a Core FIVR fault by looking for a
540 // non-zero value of CORE_FIVR_ERR_LOG (B(1) D30 F2 offset
541 // 80h)
542 uint32_t coreFIVRErrLog = 0;
543 if (peci_RdPCIConfigLocal(
544 addr, 1, 30, 2, 0x80, sizeof(uint32_t),
545 (uint8_t*)&coreFIVRErrLog, &cc) != PECI_CC_SUCCESS)
546 {
547 continue;
548 }
549 if (coreFIVRErrLog)
550 {
551 cpuIERRLog(cpu, "Core FIVR Fault");
552 continue;
553 }
554
555 // Next check if it's an Uncore FIVR fault by looking for a
556 // non-zero value of UNCORE_FIVR_ERR_LOG (B(1) D30 F2 offset
557 // 84h)
558 uint32_t uncoreFIVRErrLog = 0;
559 if (peci_RdPCIConfigLocal(addr, 1, 30, 2, 0x84,
560 sizeof(uint32_t),
561 (uint8_t*)&uncoreFIVRErrLog,
562 &cc) != PECI_CC_SUCCESS)
563 {
564 continue;
565 }
566 if (uncoreFIVRErrLog)
567 {
568 cpuIERRLog(cpu, "Uncore FIVR Fault");
569 continue;
570 }
571
572 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
573 // both zero, but MSEC bits 31:24 have either
574 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
575 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
576 // uncore FIVR fault
577 if (!coreFIVRErrLog && !uncoreFIVRErrLog &&
578 ((mc4Status & (0x51 << 24)) ||
579 (mc4Status & (0x52 << 24))))
580 {
581 cpuIERRLog(cpu, "Uncore FIVR Fault");
582 continue;
583 }
584 cpuIERRLog(cpu);
585 }
586 break;
587 }
588 case icx:
589 {
590 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
591 // that caused the IERR
592 uint32_t mcaErrSrcLog = 0;
593 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
594 &cc) != PECI_CC_SUCCESS)
595 {
596 continue;
597 }
598 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
599 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
600 {
601 // TODO: Light the CPU fault LED?
602 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700603 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700604 // Next check if it's a CPU/VR mismatch by reading the
605 // IA32_MC4_STATUS MSR (0x411)
606 uint64_t mc4Status = 0;
607 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
608 PECI_CC_SUCCESS)
609 {
610 continue;
611 }
612 // TODO: Update MSEC/MSCOD_31_24 check
613 // Check MSEC bits 31:24 for
614 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
615 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
616 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
617 if ((mc4Status & (0x40 << 24)) ||
618 (mc4Status & (0x42 << 24)) ||
619 (mc4Status & (0x43 << 24)))
620 {
621 cpuIERRLog(cpu, "CPU/VR Mismatch");
622 continue;
623 }
624
625 // Next check if it's a Core FIVR fault by looking for a
626 // non-zero value of CORE_FIVR_ERR_LOG (B(31) D30 F2 offsets
627 // C0h and C4h) (Note: Bus 31 is accessed on PECI as bus 14)
628 uint32_t coreFIVRErrLog0 = 0;
629 uint32_t coreFIVRErrLog1 = 0;
630 if (peci_RdEndPointConfigPciLocal(
631 addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
632 (uint8_t*)&coreFIVRErrLog0, &cc) != PECI_CC_SUCCESS)
633 {
634 continue;
635 }
636 if (peci_RdEndPointConfigPciLocal(
637 addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
638 (uint8_t*)&coreFIVRErrLog1, &cc) != PECI_CC_SUCCESS)
639 {
640 continue;
641 }
642 if (coreFIVRErrLog0 || coreFIVRErrLog1)
643 {
644 cpuIERRLog(cpu, "Core FIVR Fault");
645 continue;
646 }
647
648 // Next check if it's an Uncore FIVR fault by looking for a
649 // non-zero value of UNCORE_FIVR_ERR_LOG (B(31) D30 F2
650 // offset 84h) (Note: Bus 31 is accessed on PECI as bus 14)
651 uint32_t uncoreFIVRErrLog = 0;
652 if (peci_RdEndPointConfigPciLocal(
653 addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
654 (uint8_t*)&uncoreFIVRErrLog,
655 &cc) != PECI_CC_SUCCESS)
656 {
657 continue;
658 }
659 if (uncoreFIVRErrLog)
660 {
661 cpuIERRLog(cpu, "Uncore FIVR Fault");
662 continue;
663 }
664
665 // TODO: Update MSEC/MSCOD_31_24 check
666 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
667 // both zero, but MSEC bits 31:24 have either
668 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
669 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
670 // uncore FIVR fault
671 if (!coreFIVRErrLog0 && !coreFIVRErrLog1 &&
672 !uncoreFIVRErrLog &&
673 ((mc4Status & (0x51 << 24)) ||
674 (mc4Status & (0x52 << 24))))
675 {
676 cpuIERRLog(cpu, "Uncore FIVR Fault");
677 continue;
678 }
679 cpuIERRLog(cpu);
680 }
681 break;
682 }
683 }
684 }
685 return cpuIERRFound;
686}
687
Jason M. Billsa15c2522019-08-16 10:01:44 -0700688static void caterrAssertHandler()
689{
Jason M. Billsa15c2522019-08-16 10:01:44 -0700690 caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs));
691 caterrAssertTimer.async_wait([](const boost::system::error_code ec) {
692 if (ec)
693 {
694 // operation_aborted is expected if timer is canceled
695 // before completion.
696 if (ec != boost::asio::error::operation_aborted)
697 {
698 std::cerr << "caterr timeout async_wait failed: "
699 << ec.message() << "\n";
700 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700701 return;
702 }
Jason M. Billsa3397932019-08-06 11:07:21 -0700703 std::cerr << "CATERR asserted for " << std::to_string(caterrTimeoutMs)
704 << " ms\n";
Yong Li8c798c72020-04-22 15:29:07 +0800705 beep(beepCPUIERR);
Jason M. Billsa3397932019-08-06 11:07:21 -0700706 if (!checkIERRCPUs())
707 {
708 cpuIERRLog();
709 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700710 conn->async_method_call(
711 [](boost::system::error_code ec,
712 const std::variant<bool>& property) {
713 if (ec)
714 {
715 return;
716 }
717 const bool* reset = std::get_if<bool>(&property);
718 if (reset == nullptr)
719 {
720 std::cerr << "Unable to read reset on CATERR value\n";
721 return;
722 }
Jason M. Billsb61766b2019-11-26 17:02:44 -0800723 startCrashdumpAndRecovery(*reset, "IERR");
Jason M. Billsa15c2522019-08-16 10:01:44 -0700724 },
725 "xyz.openbmc_project.Settings",
726 "/xyz/openbmc_project/control/processor_error_config",
727 "org.freedesktop.DBus.Properties", "Get",
728 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR");
729 });
730}
731
Jason M. Bills1490b142019-07-01 15:48:43 -0700732static void caterrHandler()
733{
734 if (!hostOff)
735 {
736 gpiod::line_event gpioLineEvent = caterrLine.event_read();
737
738 bool caterr =
739 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
740 if (caterr)
741 {
Jason M. Billsa15c2522019-08-16 10:01:44 -0700742 caterrAssertHandler();
Jason M. Bills1490b142019-07-01 15:48:43 -0700743 }
744 else
745 {
746 caterrAssertTimer.cancel();
747 }
748 }
749 caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
750 [](const boost::system::error_code ec) {
751 if (ec)
752 {
753 std::cerr << "caterr handler error: "
754 << ec.message() << "\n";
755 return;
756 }
757 caterrHandler();
758 });
759}
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700760
Jason M. Billse94f5e12019-09-13 11:11:34 -0700761static void cpu1ThermtripAssertHandler()
762{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700763 if (cpu1FIVRFaultLine.get_value() == 0)
764 {
765 cpuBootFIVRFaultLog(1);
766 }
767 else
768 {
769 cpuThermTripLog(1);
770 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700771}
772
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700773static void cpu1ThermtripHandler()
774{
Jason M. Bills84951142020-04-17 15:57:11 -0700775 gpiod::line_event gpioLineEvent = cpu1ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700776
Jason M. Bills84951142020-04-17 15:57:11 -0700777 bool cpu1Thermtrip =
778 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
779 if (cpu1Thermtrip)
780 {
781 cpu1ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700782 }
Jason M. Bills84951142020-04-17 15:57:11 -0700783
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700784 cpu1ThermtripEvent.async_wait(
785 boost::asio::posix::stream_descriptor::wait_read,
786 [](const boost::system::error_code ec) {
787 if (ec)
788 {
789 std::cerr << "CPU 1 Thermtrip handler error: " << ec.message()
790 << "\n";
791 return;
792 }
793 cpu1ThermtripHandler();
794 });
795}
796
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000797static void cpu1MemtripHandler()
798{
799 if (!hostOff)
800 {
801 gpiod::line_event gpioLineEvent = cpu1MemtripLine.event_read();
802
803 bool cpu1Memtrip =
804 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
805 if (cpu1Memtrip)
806 {
807 memThermTripLog(1);
808 }
809 }
810 cpu1MemtripEvent.async_wait(
811 boost::asio::posix::stream_descriptor::wait_read,
812 [](const boost::system::error_code ec) {
813 if (ec)
814 {
815 std::cerr << "CPU 1 Memory Thermaltrip handler error: "
816 << ec.message() << "\n";
817 return;
818 }
819 cpu1MemtripHandler();
820 });
821}
822
Jason M. Billse94f5e12019-09-13 11:11:34 -0700823static void cpu2ThermtripAssertHandler()
824{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700825 if (cpu2FIVRFaultLine.get_value() == 0)
826 {
827 cpuBootFIVRFaultLog(2);
828 }
829 else
830 {
831 cpuThermTripLog(2);
832 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700833}
834
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700835static void cpu2ThermtripHandler()
836{
Jason M. Bills84951142020-04-17 15:57:11 -0700837 gpiod::line_event gpioLineEvent = cpu2ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700838
Jason M. Bills84951142020-04-17 15:57:11 -0700839 bool cpu2Thermtrip =
840 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
841 if (cpu2Thermtrip)
842 {
843 cpu2ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700844 }
Jason M. Bills84951142020-04-17 15:57:11 -0700845
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700846 cpu2ThermtripEvent.async_wait(
847 boost::asio::posix::stream_descriptor::wait_read,
848 [](const boost::system::error_code ec) {
849 if (ec)
850 {
851 std::cerr << "CPU 2 Thermtrip handler error: " << ec.message()
852 << "\n";
853 return;
854 }
855 cpu2ThermtripHandler();
856 });
857}
858
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000859static void cpu2MemtripHandler()
860{
861 if (!hostOff)
862 {
863 gpiod::line_event gpioLineEvent = cpu2MemtripLine.event_read();
864
865 bool cpu2Memtrip =
866 gpioLineEvent.event_type == gpiod::line_event::RISING_EDGE;
867 if (cpu2Memtrip)
868 {
869 memThermTripLog(2);
870 }
871 }
872 cpu2MemtripEvent.async_wait(
873 boost::asio::posix::stream_descriptor::wait_read,
874 [](const boost::system::error_code ec) {
875 if (ec)
876 {
877 std::cerr << "CPU 2 Memory Thermaltrip handler error: "
878 << ec.message() << "\n";
879 return;
880 }
881 cpu2MemtripHandler();
882 });
883}
884
Jason M. Billse94f5e12019-09-13 11:11:34 -0700885static void cpu1VRHotAssertHandler()
886{
887 cpuVRHotLog("CPU 1");
888}
889
Jason M. Bills250fa632019-08-28 15:58:25 -0700890static void cpu1VRHotHandler()
891{
Jason M. Bills84951142020-04-17 15:57:11 -0700892 gpiod::line_event gpioLineEvent = cpu1VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700893
Jason M. Bills84951142020-04-17 15:57:11 -0700894 bool cpu1VRHot =
895 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
896 if (cpu1VRHot)
897 {
898 cpu1VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700899 }
Jason M. Bills84951142020-04-17 15:57:11 -0700900
Jason M. Bills250fa632019-08-28 15:58:25 -0700901 cpu1VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
902 [](const boost::system::error_code ec) {
903 if (ec)
904 {
905 std::cerr << "CPU 1 VRHot handler error: "
906 << ec.message() << "\n";
907 return;
908 }
909 cpu1VRHotHandler();
910 });
911}
912
Jason M. Billse94f5e12019-09-13 11:11:34 -0700913static void cpu1MemABCDVRHotAssertHandler()
914{
915 cpuVRHotLog("CPU 1 Memory ABCD");
916}
917
Jason M. Bills9647ba72019-08-29 14:19:19 -0700918static void cpu1MemABCDVRHotHandler()
919{
Jason M. Bills84951142020-04-17 15:57:11 -0700920 gpiod::line_event gpioLineEvent = cpu1MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700921
Jason M. Bills84951142020-04-17 15:57:11 -0700922 bool cpu1MemABCDVRHot =
923 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
924 if (cpu1MemABCDVRHot)
925 {
926 cpu1MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700927 }
Jason M. Bills84951142020-04-17 15:57:11 -0700928
Jason M. Bills9647ba72019-08-29 14:19:19 -0700929 cpu1MemABCDVRHotEvent.async_wait(
930 boost::asio::posix::stream_descriptor::wait_read,
931 [](const boost::system::error_code ec) {
932 if (ec)
933 {
934 std::cerr << "CPU 1 Memory ABCD VRHot handler error: "
935 << ec.message() << "\n";
936 return;
937 }
938 cpu1MemABCDVRHotHandler();
939 });
940}
941
Jason M. Billse94f5e12019-09-13 11:11:34 -0700942static void cpu1MemEFGHVRHotAssertHandler()
943{
944 cpuVRHotLog("CPU 1 Memory EFGH");
945}
946
Jason M. Bills9647ba72019-08-29 14:19:19 -0700947static void cpu1MemEFGHVRHotHandler()
948{
Jason M. Bills84951142020-04-17 15:57:11 -0700949 gpiod::line_event gpioLineEvent = cpu1MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700950
Jason M. Bills84951142020-04-17 15:57:11 -0700951 bool cpu1MemEFGHVRHot =
952 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
953 if (cpu1MemEFGHVRHot)
954 {
955 cpu1MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700956 }
Jason M. Bills84951142020-04-17 15:57:11 -0700957
Jason M. Bills9647ba72019-08-29 14:19:19 -0700958 cpu1MemEFGHVRHotEvent.async_wait(
959 boost::asio::posix::stream_descriptor::wait_read,
960 [](const boost::system::error_code ec) {
961 if (ec)
962 {
963 std::cerr << "CPU 1 Memory EFGH VRHot handler error: "
964 << ec.message() << "\n";
965 return;
966 }
967 cpu1MemEFGHVRHotHandler();
968 });
969}
970
Jason M. Billse94f5e12019-09-13 11:11:34 -0700971static void cpu2VRHotAssertHandler()
972{
973 cpuVRHotLog("CPU 2");
974}
975
Jason M. Bills250fa632019-08-28 15:58:25 -0700976static void cpu2VRHotHandler()
977{
Jason M. Bills84951142020-04-17 15:57:11 -0700978 gpiod::line_event gpioLineEvent = cpu2VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700979
Jason M. Bills84951142020-04-17 15:57:11 -0700980 bool cpu2VRHot =
981 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
982 if (cpu2VRHot)
983 {
984 cpu2VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700985 }
Jason M. Bills84951142020-04-17 15:57:11 -0700986
Jason M. Bills250fa632019-08-28 15:58:25 -0700987 cpu2VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
988 [](const boost::system::error_code ec) {
989 if (ec)
990 {
991 std::cerr << "CPU 2 VRHot handler error: "
992 << ec.message() << "\n";
993 return;
994 }
995 cpu2VRHotHandler();
996 });
997}
998
Jason M. Billse94f5e12019-09-13 11:11:34 -0700999static void cpu2MemABCDVRHotAssertHandler()
1000{
1001 cpuVRHotLog("CPU 2 Memory ABCD");
1002}
1003
Jason M. Bills9647ba72019-08-29 14:19:19 -07001004static void cpu2MemABCDVRHotHandler()
1005{
Jason M. Bills84951142020-04-17 15:57:11 -07001006 gpiod::line_event gpioLineEvent = cpu2MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001007
Jason M. Bills84951142020-04-17 15:57:11 -07001008 bool cpu2MemABCDVRHot =
1009 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1010 if (cpu2MemABCDVRHot)
1011 {
1012 cpu2MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001013 }
Jason M. Bills84951142020-04-17 15:57:11 -07001014
Jason M. Bills9647ba72019-08-29 14:19:19 -07001015 cpu2MemABCDVRHotEvent.async_wait(
1016 boost::asio::posix::stream_descriptor::wait_read,
1017 [](const boost::system::error_code ec) {
1018 if (ec)
1019 {
1020 std::cerr << "CPU 2 Memory ABCD VRHot handler error: "
1021 << ec.message() << "\n";
1022 return;
1023 }
1024 cpu2MemABCDVRHotHandler();
1025 });
1026}
1027
Jason M. Billse94f5e12019-09-13 11:11:34 -07001028static void cpu2MemEFGHVRHotAssertHandler()
1029{
1030 cpuVRHotLog("CPU 2 Memory EFGH");
1031}
1032
Jason M. Bills9647ba72019-08-29 14:19:19 -07001033static void cpu2MemEFGHVRHotHandler()
1034{
Jason M. Bills84951142020-04-17 15:57:11 -07001035 gpiod::line_event gpioLineEvent = cpu2MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001036
Jason M. Bills84951142020-04-17 15:57:11 -07001037 bool cpu2MemEFGHVRHot =
1038 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1039 if (cpu2MemEFGHVRHot)
1040 {
1041 cpu2MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001042 }
Jason M. Bills84951142020-04-17 15:57:11 -07001043
Jason M. Bills9647ba72019-08-29 14:19:19 -07001044 cpu2MemEFGHVRHotEvent.async_wait(
1045 boost::asio::posix::stream_descriptor::wait_read,
1046 [](const boost::system::error_code ec) {
1047 if (ec)
1048 {
1049 std::cerr << "CPU 2 Memory EFGH VRHot handler error: "
1050 << ec.message() << "\n";
1051 return;
1052 }
1053 cpu2MemEFGHVRHotHandler();
1054 });
1055}
1056
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001057static void pchThermtripHandler()
1058{
Jason M. Bills84951142020-04-17 15:57:11 -07001059 gpiod::line_event gpioLineEvent = pchThermtripLine.event_read();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001060
Jason M. Bills84951142020-04-17 15:57:11 -07001061 bool pchThermtrip =
1062 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1063 if (pchThermtrip)
1064 {
1065 ssbThermTripLog();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001066 }
Jason M. Bills84951142020-04-17 15:57:11 -07001067
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001068 pchThermtripEvent.async_wait(
1069 boost::asio::posix::stream_descriptor::wait_read,
1070 [](const boost::system::error_code ec) {
1071 if (ec)
1072 {
1073 std::cerr << "PCH Thermal trip handler error: " << ec.message()
1074 << "\n";
1075 return;
1076 }
1077 pchThermtripHandler();
1078 });
1079}
1080
Jason M. Billscbf78532019-08-16 15:32:11 -07001081static std::bitset<MAX_CPUS> checkERRPinCPUs(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001082{
Jason M. Billscbf78532019-08-16 15:32:11 -07001083 int errPinSts = (1 << errPin);
1084 std::bitset<MAX_CPUS> errPinCPUs = 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001085 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
1086 cpu++, addr++)
1087 {
1088 if (peci_Ping(addr) == PECI_CC_SUCCESS)
1089 {
1090 uint8_t cc = 0;
1091 CPUModel model{};
1092 uint8_t stepping = 0;
1093 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
1094 {
1095 std::cerr << "Cannot get CPUID!\n";
1096 continue;
1097 }
1098
1099 switch (model)
1100 {
1101 case skx:
1102 {
1103 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001104 // the ERRx (B(0) D8 F0 offset 210h)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001105 uint32_t errpinsts = 0;
1106 if (peci_RdPCIConfigLocal(
1107 addr, 0, 8, 0, 0x210, sizeof(uint32_t),
1108 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1109 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001110 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001111 }
1112 break;
1113 }
1114 case icx:
1115 {
1116 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001117 // the ERRx (B(30) D0 F3 offset 274h) (Note: Bus 30 is
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001118 // accessed on PECI as bus 13)
1119 uint32_t errpinsts = 0;
1120 if (peci_RdEndPointConfigPciLocal(
1121 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
1122 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1123 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001124 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001125 }
1126 break;
1127 }
1128 }
1129 }
1130 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001131 return errPinCPUs;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001132}
1133
Jason M. Billscbf78532019-08-16 15:32:11 -07001134static void errXAssertHandler(const int errPin,
1135 boost::asio::steady_timer& errXAssertTimer)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001136{
Jason M. Billscbf78532019-08-16 15:32:11 -07001137 // ERRx status is not guaranteed through the timeout, so save which
1138 // CPUs have it asserted
1139 std::bitset<MAX_CPUS> errPinCPUs = checkERRPinCPUs(errPin);
1140 errXAssertTimer.expires_after(std::chrono::milliseconds(errTimeoutMs));
1141 errXAssertTimer.async_wait([errPin, errPinCPUs](
1142 const boost::system::error_code ec) {
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001143 if (ec)
1144 {
1145 // operation_aborted is expected if timer is canceled before
1146 // completion.
1147 if (ec != boost::asio::error::operation_aborted)
1148 {
1149 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1150 << "\n";
1151 }
1152 return;
1153 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001154 std::cerr << "ERR" << std::to_string(errPin) << " asserted for "
1155 << std::to_string(errTimeoutMs) << " ms\n";
1156 if (errPinCPUs.count())
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001157 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001158 for (int i = 0; i < errPinCPUs.size(); i++)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001159 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001160 if (errPinCPUs[i])
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001161 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001162 cpuERRXLog(errPin, i);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001163 }
1164 }
1165 }
1166 else
1167 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001168 cpuERRXLog(errPin);
1169 }
1170 });
1171}
1172
Jason M. Bills8c584392019-08-19 11:05:51 -07001173static void err0AssertHandler()
1174{
1175 // Handle the standard ERR0 detection and logging
1176 const static constexpr int err0 = 0;
1177 errXAssertHandler(err0, err0AssertTimer);
1178}
1179
1180static void err0Handler()
1181{
1182 if (!hostOff)
1183 {
1184 gpiod::line_event gpioLineEvent = err0Line.event_read();
1185
1186 bool err0 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1187 if (err0)
1188 {
1189 err0AssertHandler();
1190 }
1191 else
1192 {
1193 err0AssertTimer.cancel();
1194 }
1195 }
1196 err0Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1197 [](const boost::system::error_code ec) {
1198 if (ec)
1199 {
1200 std::cerr
1201 << "err0 handler error: " << ec.message()
1202 << "\n";
1203 return;
1204 }
1205 err0Handler();
1206 });
1207}
1208
Jason M. Bills75af3962019-08-19 11:07:17 -07001209static void err1AssertHandler()
1210{
1211 // Handle the standard ERR1 detection and logging
1212 const static constexpr int err1 = 1;
1213 errXAssertHandler(err1, err1AssertTimer);
1214}
1215
1216static void err1Handler()
1217{
1218 if (!hostOff)
1219 {
1220 gpiod::line_event gpioLineEvent = err1Line.event_read();
1221
1222 bool err1 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1223 if (err1)
1224 {
1225 err1AssertHandler();
1226 }
1227 else
1228 {
1229 err1AssertTimer.cancel();
1230 }
1231 }
1232 err1Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1233 [](const boost::system::error_code ec) {
1234 if (ec)
1235 {
1236 std::cerr
1237 << "err1 handler error: " << ec.message()
1238 << "\n";
1239 return;
1240 }
1241 err1Handler();
1242 });
1243}
1244
Jason M. Billscbf78532019-08-16 15:32:11 -07001245static void err2AssertHandler()
1246{
1247 // Handle the standard ERR2 detection and logging
1248 const static constexpr int err2 = 2;
1249 errXAssertHandler(err2, err2AssertTimer);
1250 // Also handle reset for ERR2
1251 err2AssertTimer.async_wait([](const boost::system::error_code ec) {
1252 if (ec)
1253 {
1254 // operation_aborted is expected if timer is canceled before
1255 // completion.
1256 if (ec != boost::asio::error::operation_aborted)
1257 {
1258 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1259 << "\n";
1260 }
1261 return;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001262 }
1263 conn->async_method_call(
1264 [](boost::system::error_code ec,
1265 const std::variant<bool>& property) {
1266 if (ec)
1267 {
1268 return;
1269 }
1270 const bool* reset = std::get_if<bool>(&property);
1271 if (reset == nullptr)
1272 {
1273 std::cerr << "Unable to read reset on ERR2 value\n";
1274 return;
1275 }
Jason M. Billsb61766b2019-11-26 17:02:44 -08001276 startCrashdumpAndRecovery(*reset, "ERR2 Timeout");
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001277 },
1278 "xyz.openbmc_project.Settings",
1279 "/xyz/openbmc_project/control/processor_error_config",
1280 "org.freedesktop.DBus.Properties", "Get",
1281 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnERR2");
Yong Li061eb032020-02-26 15:06:18 +08001282
1283 beep(beepCPUErr2);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001284 });
1285}
1286
1287static void err2Handler()
1288{
1289 if (!hostOff)
1290 {
1291 gpiod::line_event gpioLineEvent = err2Line.event_read();
1292
1293 bool err2 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1294 if (err2)
1295 {
1296 err2AssertHandler();
1297 }
1298 else
1299 {
1300 err2AssertTimer.cancel();
1301 }
1302 }
1303 err2Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1304 [](const boost::system::error_code ec) {
1305 if (ec)
1306 {
1307 std::cerr
1308 << "err2 handler error: " << ec.message()
1309 << "\n";
1310 return;
1311 }
1312 err2Handler();
1313 });
1314}
1315
Jason M. Bills89922f82019-08-06 11:10:02 -07001316static void smiAssertHandler()
1317{
1318 smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
1319 smiAssertTimer.async_wait([](const boost::system::error_code ec) {
1320 if (ec)
1321 {
1322 // operation_aborted is expected if timer is canceled before
1323 // completion.
1324 if (ec != boost::asio::error::operation_aborted)
1325 {
1326 std::cerr << "smi timeout async_wait failed: " << ec.message()
1327 << "\n";
1328 }
1329 return;
1330 }
1331 std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
1332 << " ms\n";
1333 smiTimeoutLog();
1334 conn->async_method_call(
1335 [](boost::system::error_code ec,
1336 const std::variant<bool>& property) {
1337 if (ec)
1338 {
1339 return;
1340 }
1341 const bool* reset = std::get_if<bool>(&property);
1342 if (reset == nullptr)
1343 {
1344 std::cerr << "Unable to read reset on SMI value\n";
1345 return;
1346 }
Jason M. Bills94785442020-01-07 15:22:09 -08001347#ifdef HOST_ERROR_CRASHDUMP_ON_SMI_TIMEOUT
Jason M. Billsb61766b2019-11-26 17:02:44 -08001348 startCrashdumpAndRecovery(*reset, "SMI Timeout");
Jason M. Bills94785442020-01-07 15:22:09 -08001349#else
1350 if (*reset)
1351 {
1352 std::cout << "Recovering the system\n";
1353 startPowerCycle();
1354 }
1355#endif
Jason M. Bills89922f82019-08-06 11:10:02 -07001356 },
1357 "xyz.openbmc_project.Settings",
1358 "/xyz/openbmc_project/control/bmc_reset_disables",
1359 "org.freedesktop.DBus.Properties", "Get",
1360 "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
1361 });
1362}
1363
1364static void smiHandler()
1365{
1366 if (!hostOff)
1367 {
1368 gpiod::line_event gpioLineEvent = smiLine.event_read();
1369
1370 bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1371 if (smi)
1372 {
1373 smiAssertHandler();
1374 }
1375 else
1376 {
1377 smiAssertTimer.cancel();
1378 }
1379 }
1380 smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1381 [](const boost::system::error_code ec) {
1382 if (ec)
1383 {
1384 std::cerr
1385 << "smi handler error: " << ec.message()
1386 << "\n";
1387 return;
1388 }
1389 smiHandler();
1390 });
1391}
1392
Jason M. Billsa15c2522019-08-16 10:01:44 -07001393static void initializeErrorState()
1394{
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001395 // Handle CPU1_MISMATCH if it's asserted now
1396 if (cpu1MismatchLine.get_value() == 1)
1397 {
1398 cpuMismatchLog(1);
1399 }
1400
1401 // Handle CPU2_MISMATCH if it's asserted now
1402 if (cpu2MismatchLine.get_value() == 1)
1403 {
1404 cpuMismatchLog(2);
1405 }
1406
Jason M. Billsa15c2522019-08-16 10:01:44 -07001407 // Handle CPU_CATERR if it's asserted now
1408 if (caterrLine.get_value() == 0)
1409 {
1410 caterrAssertHandler();
1411 }
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001412
Jason M. Bills8c584392019-08-19 11:05:51 -07001413 // Handle CPU_ERR0 if it's asserted now
1414 if (err0Line.get_value() == 0)
1415 {
1416 err0AssertHandler();
1417 }
1418
Jason M. Bills75af3962019-08-19 11:07:17 -07001419 // Handle CPU_ERR1 if it's asserted now
1420 if (err1Line.get_value() == 0)
1421 {
1422 err1AssertHandler();
1423 }
1424
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001425 // Handle CPU_ERR2 if it's asserted now
1426 if (err2Line.get_value() == 0)
1427 {
1428 err2AssertHandler();
1429 }
Jason M. Bills89922f82019-08-06 11:10:02 -07001430
1431 // Handle SMI if it's asserted now
1432 if (smiLine.get_value() == 0)
1433 {
1434 smiAssertHandler();
1435 }
Jason M. Bills08866542019-08-16 12:04:19 -07001436
Jason M. Billse94f5e12019-09-13 11:11:34 -07001437 // Handle CPU1_THERMTRIP if it's asserted now
1438 if (cpu1ThermtripLine.get_value() == 0)
1439 {
1440 cpu1ThermtripAssertHandler();
1441 }
1442
1443 // Handle CPU2_THERMTRIP if it's asserted now
1444 if (cpu2ThermtripLine.get_value() == 0)
1445 {
1446 cpu2ThermtripAssertHandler();
1447 }
1448
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001449 // Handle CPU1_MEM_THERM_EVENT (CPU1 DIMM Thermal trip) if it's asserted now
1450 if (cpu1MemtripLine.get_value() == 0)
1451 {
1452 memThermTripLog(1);
1453 }
1454
1455 // Handle CPU2_MEM_THERM_EVENT (CPU2 DIMM Thermal trip) if it's asserted now
1456 if (cpu2MemtripLine.get_value() == 0)
1457 {
1458 memThermTripLog(2);
1459 }
1460
Jason M. Billse94f5e12019-09-13 11:11:34 -07001461 // Handle CPU1_VRHOT if it's asserted now
1462 if (cpu1VRHotLine.get_value() == 0)
1463 {
1464 cpu1VRHotAssertHandler();
1465 }
1466
1467 // Handle CPU1_MEM_ABCD_VRHOT if it's asserted now
1468 if (cpu1MemABCDVRHotLine.get_value() == 0)
1469 {
1470 cpu1MemABCDVRHotAssertHandler();
1471 }
1472
1473 // Handle CPU1_MEM_EFGH_VRHOT if it's asserted now
1474 if (cpu1MemEFGHVRHotLine.get_value() == 0)
1475 {
1476 cpu1MemEFGHVRHotAssertHandler();
1477 }
1478
1479 // Handle CPU2_VRHOT if it's asserted now
1480 if (cpu2VRHotLine.get_value() == 0)
1481 {
1482 cpu2VRHotAssertHandler();
1483 }
1484
1485 // Handle CPU2_MEM_ABCD_VRHOT if it's asserted now
1486 if (cpu2MemABCDVRHotLine.get_value() == 0)
1487 {
1488 cpu2MemABCDVRHotAssertHandler();
1489 }
1490
1491 // Handle CPU2_MEM_EFGH_VRHOT if it's asserted now
1492 if (cpu2MemEFGHVRHotLine.get_value() == 0)
1493 {
1494 cpu2MemEFGHVRHotAssertHandler();
1495 }
1496
Jason M. Bills08866542019-08-16 12:04:19 -07001497 // Handle PCH_BMC_THERMTRIP if it's asserted now
1498 if (pchThermtripLine.get_value() == 0)
1499 {
1500 ssbThermTripLog();
1501 }
Jason M. Billsa15c2522019-08-16 10:01:44 -07001502}
Jason M. Bills1490b142019-07-01 15:48:43 -07001503} // namespace host_error_monitor
1504
1505int main(int argc, char* argv[])
1506{
1507 // setup connection to dbus
1508 host_error_monitor::conn =
1509 std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
1510
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001511 // Host Error Monitor Service
Jason M. Bills1490b142019-07-01 15:48:43 -07001512 host_error_monitor::conn->request_name(
1513 "xyz.openbmc_project.HostErrorMonitor");
1514 sdbusplus::asio::object_server server =
1515 sdbusplus::asio::object_server(host_error_monitor::conn);
1516
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001517 // Restart Cause Interface
1518 host_error_monitor::hostErrorTimeoutIface =
1519 server.add_interface("/xyz/openbmc_project/host_error_monitor",
1520 "xyz.openbmc_project.HostErrorMonitor.Timeout");
1521
1522 host_error_monitor::hostErrorTimeoutIface->register_property(
1523 "IERRTimeoutMs", host_error_monitor::caterrTimeoutMs,
1524 [](const std::size_t& requested, std::size_t& resp) {
1525 if (requested > host_error_monitor::caterrTimeoutMsMax)
1526 {
1527 std::cerr << "IERRTimeoutMs update to " << requested
1528 << "ms rejected. Cannot be greater than "
1529 << host_error_monitor::caterrTimeoutMsMax << "ms.\n";
1530 return 0;
1531 }
1532 std::cerr << "IERRTimeoutMs updated to " << requested << "ms\n";
1533 host_error_monitor::caterrTimeoutMs = requested;
1534 resp = requested;
1535 return 1;
1536 },
1537 [](std::size_t& resp) { return host_error_monitor::caterrTimeoutMs; });
1538 host_error_monitor::hostErrorTimeoutIface->initialize();
1539
Jason M. Bills1490b142019-07-01 15:48:43 -07001540 // Start tracking host state
1541 std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
1542 host_error_monitor::startHostStateMonitor();
1543
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001544 // Request CPU1_MISMATCH GPIO events
1545 if (!host_error_monitor::requestGPIOInput(
1546 "CPU1_MISMATCH", host_error_monitor::cpu1MismatchLine))
1547 {
1548 return -1;
1549 }
1550
1551 // Request CPU2_MISMATCH GPIO events
1552 if (!host_error_monitor::requestGPIOInput(
1553 "CPU2_MISMATCH", host_error_monitor::cpu2MismatchLine))
1554 {
1555 return -1;
1556 }
1557
Jason M. Bills1490b142019-07-01 15:48:43 -07001558 // Initialize the host state
1559 host_error_monitor::initializeHostState();
1560
1561 // Request CPU_CATERR GPIO events
1562 if (!host_error_monitor::requestGPIOEvents(
1563 "CPU_CATERR", host_error_monitor::caterrHandler,
1564 host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
1565 {
1566 return -1;
1567 }
1568
Jason M. Bills8c584392019-08-19 11:05:51 -07001569 // Request CPU_ERR0 GPIO events
1570 if (!host_error_monitor::requestGPIOEvents(
1571 "CPU_ERR0", host_error_monitor::err0Handler,
1572 host_error_monitor::err0Line, host_error_monitor::err0Event))
1573 {
1574 return -1;
1575 }
1576
Jason M. Bills75af3962019-08-19 11:07:17 -07001577 // Request CPU_ERR1 GPIO events
1578 if (!host_error_monitor::requestGPIOEvents(
1579 "CPU_ERR1", host_error_monitor::err1Handler,
1580 host_error_monitor::err1Line, host_error_monitor::err1Event))
1581 {
1582 return -1;
1583 }
1584
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001585 // Request CPU_ERR2 GPIO events
1586 if (!host_error_monitor::requestGPIOEvents(
1587 "CPU_ERR2", host_error_monitor::err2Handler,
1588 host_error_monitor::err2Line, host_error_monitor::err2Event))
1589 {
1590 return -1;
1591 }
1592
Jason M. Bills89922f82019-08-06 11:10:02 -07001593 // Request SMI GPIO events
1594 if (!host_error_monitor::requestGPIOEvents(
1595 "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
1596 host_error_monitor::smiEvent))
1597 {
1598 return -1;
1599 }
1600
Jason M. Bills45e87e02019-09-09 14:45:38 -07001601 // Request CPU1_FIVR_FAULT GPIO input
1602 if (!host_error_monitor::requestGPIOInput(
1603 "CPU1_FIVR_FAULT", host_error_monitor::cpu1FIVRFaultLine))
1604 {
1605 return -1;
1606 }
1607
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001608 // Request CPU1_THERMTRIP GPIO events
1609 if (!host_error_monitor::requestGPIOEvents(
1610 "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler,
1611 host_error_monitor::cpu1ThermtripLine,
1612 host_error_monitor::cpu1ThermtripEvent))
1613 {
1614 return -1;
1615 }
1616
Jason M. Bills45e87e02019-09-09 14:45:38 -07001617 // Request CPU2_FIVR_FAULT GPIO input
1618 if (!host_error_monitor::requestGPIOInput(
1619 "CPU2_FIVR_FAULT", host_error_monitor::cpu2FIVRFaultLine))
1620 {
1621 return -1;
1622 }
1623
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001624 // Request CPU2_THERMTRIP GPIO events
1625 if (!host_error_monitor::requestGPIOEvents(
1626 "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler,
1627 host_error_monitor::cpu2ThermtripLine,
1628 host_error_monitor::cpu2ThermtripEvent))
1629 {
1630 return -1;
1631 }
1632
Jason M. Bills250fa632019-08-28 15:58:25 -07001633 // Request CPU1_VRHOT GPIO events
1634 if (!host_error_monitor::requestGPIOEvents(
1635 "CPU1_VRHOT", host_error_monitor::cpu1VRHotHandler,
1636 host_error_monitor::cpu1VRHotLine,
1637 host_error_monitor::cpu1VRHotEvent))
1638 {
1639 return -1;
1640 }
1641
Jason M. Bills9647ba72019-08-29 14:19:19 -07001642 // Request CPU1_MEM_ABCD_VRHOT GPIO events
1643 if (!host_error_monitor::requestGPIOEvents(
1644 "CPU1_MEM_ABCD_VRHOT", host_error_monitor::cpu1MemABCDVRHotHandler,
1645 host_error_monitor::cpu1MemABCDVRHotLine,
1646 host_error_monitor::cpu1MemABCDVRHotEvent))
1647 {
1648 return -1;
1649 }
1650
1651 // Request CPU1_MEM_EFGH_VRHOT GPIO events
1652 if (!host_error_monitor::requestGPIOEvents(
1653 "CPU1_MEM_EFGH_VRHOT", host_error_monitor::cpu1MemEFGHVRHotHandler,
1654 host_error_monitor::cpu1MemEFGHVRHotLine,
1655 host_error_monitor::cpu1MemEFGHVRHotEvent))
1656 {
1657 return -1;
1658 }
1659
Jason M. Bills250fa632019-08-28 15:58:25 -07001660 // Request CPU2_VRHOT GPIO events
1661 if (!host_error_monitor::requestGPIOEvents(
1662 "CPU2_VRHOT", host_error_monitor::cpu2VRHotHandler,
1663 host_error_monitor::cpu2VRHotLine,
1664 host_error_monitor::cpu2VRHotEvent))
1665 {
1666 return -1;
1667 }
1668
Jason M. Bills9647ba72019-08-29 14:19:19 -07001669 // Request CPU2_MEM_ABCD_VRHOT GPIO events
1670 if (!host_error_monitor::requestGPIOEvents(
1671 "CPU2_MEM_ABCD_VRHOT", host_error_monitor::cpu2MemABCDVRHotHandler,
1672 host_error_monitor::cpu2MemABCDVRHotLine,
1673 host_error_monitor::cpu2MemABCDVRHotEvent))
1674 {
1675 return -1;
1676 }
1677
1678 // Request CPU2_MEM_EFGH_VRHOT GPIO events
1679 if (!host_error_monitor::requestGPIOEvents(
1680 "CPU2_MEM_EFGH_VRHOT", host_error_monitor::cpu2MemEFGHVRHotHandler,
1681 host_error_monitor::cpu2MemEFGHVRHotLine,
1682 host_error_monitor::cpu2MemEFGHVRHotEvent))
1683 {
1684 return -1;
1685 }
1686
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001687 // Request PCH_BMC_THERMTRIP GPIO events
1688 if (!host_error_monitor::requestGPIOEvents(
1689 "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,
1690 host_error_monitor::pchThermtripLine,
1691 host_error_monitor::pchThermtripEvent))
1692 {
1693 return -1;
1694 }
1695
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001696 // Request CPU1_MEM_THERM_EVENT GPIO events
1697 if (!host_error_monitor::requestGPIOEvents(
1698 "CPU1_MEM_THERM_EVENT", host_error_monitor::cpu1MemtripHandler,
1699 host_error_monitor::cpu1MemtripLine,
1700 host_error_monitor::cpu1MemtripEvent))
1701 {
1702 return -1;
1703 }
1704
1705 // Request CPU2_MEM_THERM_EVENT GPIO events
1706 if (!host_error_monitor::requestGPIOEvents(
1707 "CPU2_MEM_THERM_EVENT", host_error_monitor::cpu2MemtripHandler,
1708 host_error_monitor::cpu2MemtripLine,
1709 host_error_monitor::cpu2MemtripEvent))
1710 {
1711 return -1;
1712 }
1713
Jason M. Bills1490b142019-07-01 15:48:43 -07001714 host_error_monitor::io.run();
1715
1716 return 0;
1717}