blob: 90476774d7643691b15e45b6c7b81ded5c338c71 [file] [log] [blame]
Jason M. Bills1490b142019-07-01 15:48:43 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
Jason M. Bills6a2cb692019-08-06 11:03:49 -070016#include <peci.h>
Chen Yugange6c0f1c2019-08-02 20:36:42 +080017#include <systemd/sd-journal.h>
18
Jason M. Bills1490b142019-07-01 15:48:43 -070019#include <boost/asio/posix/stream_descriptor.hpp>
20#include <gpiod.hpp>
Jason M. Bills1490b142019-07-01 15:48:43 -070021#include <sdbusplus/asio/object_server.hpp>
Jason M. Bills48e5dff2020-06-10 13:47:47 -070022
23#include <bitset>
24#include <iostream>
Jason M. Billsd1a19f62019-08-06 11:52:58 -070025#include <variant>
Jason M. Bills1490b142019-07-01 15:48:43 -070026
27namespace host_error_monitor
28{
29static boost::asio::io_service io;
30static std::shared_ptr<sdbusplus::asio::connection> conn;
Jason M. Billsc4b91f22019-11-26 17:04:50 -080031static std::shared_ptr<sdbusplus::asio::dbus_interface> hostErrorTimeoutIface;
Jason M. Bills1490b142019-07-01 15:48:43 -070032
Yong Li1429ca82020-04-27 16:49:45 +080033using Association = std::tuple<std::string, std::string, std::string>;
34static std::shared_ptr<sdbusplus::asio::dbus_interface> associationSSBThermTrip;
35static std::shared_ptr<sdbusplus::asio::dbus_interface> associationCATAssert;
36
37static const constexpr char* rootPath = "/xyz/openbmc_project/CallbackManager";
38
Jason M. Bills1490b142019-07-01 15:48:43 -070039static bool hostOff = true;
40
Jason M. Billsc4b91f22019-11-26 17:04:50 -080041static size_t caterrTimeoutMs = 2000;
42const static constexpr size_t caterrTimeoutMsMax = 600000; // 10 minutes maximum
Jason M. Billscbf78532019-08-16 15:32:11 -070043const static constexpr size_t errTimeoutMs = 90000;
Jason M. Bills89922f82019-08-06 11:10:02 -070044const static constexpr size_t smiTimeoutMs = 90000;
Jason M. Bills1490b142019-07-01 15:48:43 -070045const static constexpr size_t crashdumpTimeoutS = 300;
46
47// Timers
48// Timer for CATERR asserted
49static boost::asio::steady_timer caterrAssertTimer(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070050// Timer for ERR0 asserted
51static boost::asio::steady_timer err0AssertTimer(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070052// Timer for ERR1 asserted
53static boost::asio::steady_timer err1AssertTimer(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070054// Timer for ERR2 asserted
55static boost::asio::steady_timer err2AssertTimer(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070056// Timer for SMI asserted
57static boost::asio::steady_timer smiAssertTimer(io);
Jason M. Bills1490b142019-07-01 15:48:43 -070058
59// GPIO Lines and Event Descriptors
60static gpiod::line caterrLine;
61static boost::asio::posix::stream_descriptor caterrEvent(io);
Jason M. Bills8c584392019-08-19 11:05:51 -070062static gpiod::line err0Line;
63static boost::asio::posix::stream_descriptor err0Event(io);
Jason M. Bills75af3962019-08-19 11:07:17 -070064static gpiod::line err1Line;
65static boost::asio::posix::stream_descriptor err1Event(io);
Jason M. Bills6a2cb692019-08-06 11:03:49 -070066static gpiod::line err2Line;
67static boost::asio::posix::stream_descriptor err2Event(io);
Jason M. Bills89922f82019-08-06 11:10:02 -070068static gpiod::line smiLine;
69static boost::asio::posix::stream_descriptor smiEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070070static gpiod::line cpu1FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070071static gpiod::line cpu1ThermtripLine;
72static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io);
Jason M. Bills45e87e02019-09-09 14:45:38 -070073static gpiod::line cpu2FIVRFaultLine;
Jason M. Bills78c5eed2019-08-28 14:00:40 -070074static gpiod::line cpu2ThermtripLine;
75static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io);
Jason M. Bills250fa632019-08-28 15:58:25 -070076static gpiod::line cpu1VRHotLine;
77static boost::asio::posix::stream_descriptor cpu1VRHotEvent(io);
78static gpiod::line cpu2VRHotLine;
Jason M. Bills9647ba72019-08-29 14:19:19 -070079static boost::asio::posix::stream_descriptor cpu1MemABCDVRHotEvent(io);
80static gpiod::line cpu1MemEFGHVRHotLine;
81static boost::asio::posix::stream_descriptor cpu1MemEFGHVRHotEvent(io);
82static gpiod::line cpu2MemABCDVRHotLine;
Jason M. Bills250fa632019-08-28 15:58:25 -070083static boost::asio::posix::stream_descriptor cpu2VRHotEvent(io);
Jason M. Bills9647ba72019-08-29 14:19:19 -070084static gpiod::line cpu1MemABCDVRHotLine;
85static boost::asio::posix::stream_descriptor cpu2MemABCDVRHotEvent(io);
86static gpiod::line cpu2MemEFGHVRHotLine;
87static boost::asio::posix::stream_descriptor cpu2MemEFGHVRHotEvent(io);
Chen Yugange6c0f1c2019-08-02 20:36:42 +080088//----------------------------------
89// PCH_BMC_THERMTRIP function related definition
90//----------------------------------
Chen Yugange6c0f1c2019-08-02 20:36:42 +080091static gpiod::line pchThermtripLine;
92static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +000093//----------------------------------
94// CPU_MEM_THERM_EVENT function related definition
95//----------------------------------
96static gpiod::line cpu1MemtripLine;
97static boost::asio::posix::stream_descriptor cpu1MemtripEvent(io);
98static gpiod::line cpu2MemtripLine;
99static boost::asio::posix::stream_descriptor cpu2MemtripEvent(io);
jayaprakash Mutyala53099c42020-03-15 00:16:26 +0000100//---------------------------------
101// CPU_MISMATCH function related definition
102//---------------------------------
103static gpiod::line cpu1MismatchLine;
104static gpiod::line cpu2MismatchLine;
Jason M. Bills1490b142019-07-01 15:48:43 -0700105
Yong Li061eb032020-02-26 15:06:18 +0800106// beep function for CPU error
Yong Li8c798c72020-04-22 15:29:07 +0800107const static constexpr uint8_t beepCPUIERR = 4;
Yong Li061eb032020-02-26 15:06:18 +0800108const static constexpr uint8_t beepCPUErr2 = 5;
109
110static void beep(const uint8_t& beepPriority)
111{
112 conn->async_method_call(
113 [](boost::system::error_code ec) {
114 if (ec)
115 {
116 std::cerr << "beep returned error with "
117 "async_method_call (ec = "
118 << ec << ")\n";
119 return;
120 }
121 },
122 "xyz.openbmc_project.BeepCode", "/xyz/openbmc_project/BeepCode",
123 "xyz.openbmc_project.BeepCode", "Beep", uint8_t(beepPriority));
124}
125
Jason M. Billsa3397932019-08-06 11:07:21 -0700126static void cpuIERRLog()
127{
128 sd_journal_send("MESSAGE=HostError: IERR", "PRIORITY=%i", LOG_INFO,
129 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
130 "REDFISH_MESSAGE_ARGS=%s", "IERR", NULL);
131}
132
133static void cpuIERRLog(const int cpuNum)
134{
135 std::string msg = "IERR on CPU " + std::to_string(cpuNum + 1);
136
137 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
138 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
139 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
140}
141
142static void cpuIERRLog(const int cpuNum, const std::string& type)
143{
144 std::string msg = type + " IERR on CPU " + std::to_string(cpuNum + 1);
145
146 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
147 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
148 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
149}
150
Jason M. Billscbf78532019-08-16 15:32:11 -0700151static void cpuERRXLog(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700152{
Jason M. Billscbf78532019-08-16 15:32:11 -0700153 std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
154
155 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
156 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
157 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700158}
159
Jason M. Billscbf78532019-08-16 15:32:11 -0700160static void cpuERRXLog(const int errPin, const int cpuNum)
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700161{
Jason M. Billscbf78532019-08-16 15:32:11 -0700162 std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
163 std::to_string(cpuNum + 1);
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700164
165 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
166 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
167 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
168}
169
Jason M. Bills89922f82019-08-06 11:10:02 -0700170static void smiTimeoutLog()
171{
172 sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
173 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
174 "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
175}
176
Jason M. Bills45e87e02019-09-09 14:45:38 -0700177static void cpuBootFIVRFaultLog(const int cpuNum)
178{
179 std::string msg = "Boot FIVR Fault on CPU " + std::to_string(cpuNum);
180
181 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
182 LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
183 "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
184}
185
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700186static void cpuThermTripLog(const int cpuNum)
187{
188 std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip";
189
190 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
191 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
192 "OpenBMC.0.1.CPUThermalTrip", "REDFISH_MESSAGE_ARGS=%d",
193 cpuNum, NULL);
194}
195
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000196static void memThermTripLog(const int cpuNum)
197{
198 std::string cpuNumber = "CPU " + std::to_string(cpuNum);
199 std::string msg = cpuNumber + " Memory Thermal trip.";
200
201 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
202 LOG_ERR, "REDFISH_MESSAGE_ID=%s",
203 "OpenBMC.0.1.MemoryThermTrip", "REDFISH_MESSAGE_ARGS=%s",
204 cpuNumber.c_str(), NULL);
205}
206
jayaprakash Mutyala53099c42020-03-15 00:16:26 +0000207static void cpuMismatchLog(const int cpuNum)
208{
209 std::string msg = "CPU " + std::to_string(cpuNum) + " mismatch";
210
211 sd_journal_send("MESSAGE= %s", msg.c_str(), "PRIORITY=%i", LOG_ERR,
212 "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUMismatch",
213 "REDFISH_MESSAGE_ARGS=%d", cpuNum, NULL);
214}
215
Jason M. Bills250fa632019-08-28 15:58:25 -0700216static void cpuVRHotLog(const std::string& vr)
217{
218 std::string msg = vr + " Voltage Regulator Overheated.";
219
220 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
221 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
222 "OpenBMC.0.1.VoltageRegulatorOverheated",
223 "REDFISH_MESSAGE_ARGS=%s", vr.c_str(), NULL);
224}
225
Jason M. Bills08866542019-08-16 12:04:19 -0700226static void ssbThermTripLog()
227{
228 sd_journal_send("MESSAGE=HostError: SSB thermal trip", "PRIORITY=%i",
229 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
230 "OpenBMC.0.1.SsbThermalTrip", NULL);
231}
232
Jason M. Billsa15c2522019-08-16 10:01:44 -0700233static void initializeErrorState();
Jason M. Bills1490b142019-07-01 15:48:43 -0700234static void initializeHostState()
235{
236 conn->async_method_call(
237 [](boost::system::error_code ec,
238 const std::variant<std::string>& property) {
239 if (ec)
240 {
241 return;
242 }
243 const std::string* state = std::get_if<std::string>(&property);
244 if (state == nullptr)
245 {
246 std::cerr << "Unable to read host state value\n";
247 return;
248 }
249 hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
Jason M. Billsa15c2522019-08-16 10:01:44 -0700250 // If the system is on, initialize the error state
251 if (!hostOff)
252 {
253 initializeErrorState();
254 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700255 },
256 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
257 "org.freedesktop.DBus.Properties", "Get",
258 "xyz.openbmc_project.State.Host", "CurrentHostState");
259}
260
261static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
262{
263 return std::make_shared<sdbusplus::bus::match::match>(
264 *conn,
265 "type='signal',interface='org.freedesktop.DBus.Properties',"
266 "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
267 "Host'",
268 [](sdbusplus::message::message& msg) {
269 std::string interfaceName;
270 boost::container::flat_map<std::string, std::variant<std::string>>
271 propertiesChanged;
272 std::string state;
273 try
274 {
275 msg.read(interfaceName, propertiesChanged);
276 state =
277 std::get<std::string>(propertiesChanged.begin()->second);
278 }
279 catch (std::exception& e)
280 {
281 std::cerr << "Unable to read host state\n";
282 return;
283 }
284 hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
285
Jason M. Bills1490b142019-07-01 15:48:43 -0700286 if (hostOff)
287 {
Jason M. Billse94f5e12019-09-13 11:11:34 -0700288 // No host events should fire while off, so cancel any pending
289 // timers
Jason M. Bills1490b142019-07-01 15:48:43 -0700290 caterrAssertTimer.cancel();
Jason M. Bills8c584392019-08-19 11:05:51 -0700291 err0AssertTimer.cancel();
Jason M. Bills75af3962019-08-19 11:07:17 -0700292 err1AssertTimer.cancel();
Jason M. Bills6a2cb692019-08-06 11:03:49 -0700293 err2AssertTimer.cancel();
Jason M. Bills89922f82019-08-06 11:10:02 -0700294 smiAssertTimer.cancel();
Jason M. Bills1490b142019-07-01 15:48:43 -0700295 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700296 else
297 {
298 // Handle any initial errors when the host turns on
299 initializeErrorState();
300 }
Jason M. Bills1490b142019-07-01 15:48:43 -0700301 });
302}
303
304static bool requestGPIOEvents(
305 const std::string& name, const std::function<void()>& handler,
306 gpiod::line& gpioLine,
307 boost::asio::posix::stream_descriptor& gpioEventDescriptor)
308{
309 // Find the GPIO line
310 gpioLine = gpiod::find_line(name);
311 if (!gpioLine)
312 {
313 std::cerr << "Failed to find the " << name << " line\n";
314 return false;
315 }
316
317 try
318 {
319 gpioLine.request(
320 {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
321 }
322 catch (std::exception&)
323 {
324 std::cerr << "Failed to request events for " << name << "\n";
325 return false;
326 }
327
328 int gpioLineFd = gpioLine.event_get_fd();
329 if (gpioLineFd < 0)
330 {
331 std::cerr << "Failed to get " << name << " fd\n";
332 return false;
333 }
334
335 gpioEventDescriptor.assign(gpioLineFd);
336
337 gpioEventDescriptor.async_wait(
338 boost::asio::posix::stream_descriptor::wait_read,
339 [&name, handler](const boost::system::error_code ec) {
340 if (ec)
341 {
342 std::cerr << name << " fd handler error: " << ec.message()
343 << "\n";
344 return;
345 }
346 handler();
347 });
348 return true;
349}
350
Jason M. Bills45e87e02019-09-09 14:45:38 -0700351static bool requestGPIOInput(const std::string& name, gpiod::line& gpioLine)
352{
353 // Find the GPIO line
354 gpioLine = gpiod::find_line(name);
355 if (!gpioLine)
356 {
357 std::cerr << "Failed to find the " << name << " line.\n";
358 return false;
359 }
360
361 // Request GPIO input
362 try
363 {
364 gpioLine.request({__FUNCTION__, gpiod::line_request::DIRECTION_INPUT});
365 }
366 catch (std::exception&)
367 {
368 std::cerr << "Failed to request " << name << " input\n";
369 return false;
370 }
371
372 return true;
373}
374
Jason M. Bills1490b142019-07-01 15:48:43 -0700375static void startPowerCycle()
376{
377 conn->async_method_call(
378 [](boost::system::error_code ec) {
379 if (ec)
380 {
381 std::cerr << "failed to set Chassis State\n";
382 }
383 },
384 "xyz.openbmc_project.State.Chassis",
385 "/xyz/openbmc_project/state/chassis0",
386 "org.freedesktop.DBus.Properties", "Set",
387 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
388 std::variant<std::string>{
389 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
390}
391
Jason M. Billsb61766b2019-11-26 17:02:44 -0800392static void startCrashdumpAndRecovery(bool recoverSystem,
393 const std::string& triggerType)
Jason M. Bills1490b142019-07-01 15:48:43 -0700394{
395 std::cout << "Starting crashdump\n";
396 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
397 static boost::asio::steady_timer crashdumpTimer(io);
398
399 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
400 *conn,
401 "type='signal',interface='org.freedesktop.DBus.Properties',"
402 "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
403 [recoverSystem](sdbusplus::message::message& msg) {
404 crashdumpTimer.cancel();
405 std::cout << "Crashdump completed\n";
406 if (recoverSystem)
407 {
408 std::cout << "Recovering the system\n";
409 startPowerCycle();
410 }
411 crashdumpCompleteMatch.reset();
412 });
413
414 crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
415 crashdumpTimer.async_wait([](const boost::system::error_code ec) {
416 if (ec)
417 {
418 // operation_aborted is expected if timer is canceled
419 if (ec != boost::asio::error::operation_aborted)
420 {
421 std::cerr << "Crashdump async_wait failed: " << ec.message()
422 << "\n";
423 }
424 std::cout << "Crashdump timer canceled\n";
425 return;
426 }
427 std::cerr << "Crashdump failed to complete before timeout\n";
428 crashdumpCompleteMatch.reset();
429 });
430
431 conn->async_method_call(
432 [](boost::system::error_code ec) {
433 if (ec)
434 {
435 std::cerr << "failed to start Crashdump\n";
436 crashdumpTimer.cancel();
437 crashdumpCompleteMatch.reset();
438 }
439 },
440 "com.intel.crashdump", "/com/intel/crashdump",
Jason M. Billsb61766b2019-11-26 17:02:44 -0800441 "com.intel.crashdump.Stored", "GenerateStoredLog", triggerType);
Jason M. Bills1490b142019-07-01 15:48:43 -0700442}
443
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700444static void incrementCPUErrorCount(int cpuNum)
445{
446 std::string propertyName = "ErrorCountCPU" + std::to_string(cpuNum + 1);
447
448 // Get the current count
449 conn->async_method_call(
450 [propertyName](boost::system::error_code ec,
451 const std::variant<uint8_t>& property) {
452 if (ec)
453 {
454 std::cerr << "Failed to read " << propertyName << ": "
455 << ec.message() << "\n";
456 return;
457 }
458 const uint8_t* errorCountVariant = std::get_if<uint8_t>(&property);
459 if (errorCountVariant == nullptr)
460 {
461 std::cerr << propertyName << " invalid\n";
462 return;
463 }
464 uint8_t errorCount = *errorCountVariant;
465 if (errorCount == std::numeric_limits<uint8_t>::max())
466 {
467 std::cerr << "Maximum error count reached\n";
468 return;
469 }
470 // Increment the count
471 errorCount++;
472 conn->async_method_call(
473 [propertyName](boost::system::error_code ec) {
474 if (ec)
475 {
476 std::cerr << "Failed to set " << propertyName << ": "
477 << ec.message() << "\n";
478 }
479 },
480 "xyz.openbmc_project.Settings",
481 "/xyz/openbmc_project/control/processor_error_config",
482 "org.freedesktop.DBus.Properties", "Set",
483 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName,
484 std::variant<uint8_t>{errorCount});
485 },
486 "xyz.openbmc_project.Settings",
487 "/xyz/openbmc_project/control/processor_error_config",
488 "org.freedesktop.DBus.Properties", "Get",
489 "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName);
490}
491
Jason M. Billsa3397932019-08-06 11:07:21 -0700492static bool checkIERRCPUs()
493{
494 bool cpuIERRFound = false;
495 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
496 cpu++, addr++)
497 {
498 uint8_t cc = 0;
499 CPUModel model{};
500 uint8_t stepping = 0;
501 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
502 {
503 std::cerr << "Cannot get CPUID!\n";
504 continue;
505 }
506
507 switch (model)
508 {
509 case skx:
510 {
511 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
512 // that caused the IERR
513 uint32_t mcaErrSrcLog = 0;
514 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
515 &cc) != PECI_CC_SUCCESS)
516 {
517 continue;
518 }
519 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
520 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
521 {
522 // TODO: Light the CPU fault LED?
523 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700524 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700525 // Next check if it's a CPU/VR mismatch by reading the
526 // IA32_MC4_STATUS MSR (0x411)
527 uint64_t mc4Status = 0;
528 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
529 PECI_CC_SUCCESS)
530 {
531 continue;
532 }
533 // Check MSEC bits 31:24 for
534 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
535 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
536 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
537 if ((mc4Status & (0x40 << 24)) ||
538 (mc4Status & (0x42 << 24)) ||
539 (mc4Status & (0x43 << 24)))
540 {
541 cpuIERRLog(cpu, "CPU/VR Mismatch");
542 continue;
543 }
544
545 // Next check if it's a Core FIVR fault by looking for a
546 // non-zero value of CORE_FIVR_ERR_LOG (B(1) D30 F2 offset
547 // 80h)
548 uint32_t coreFIVRErrLog = 0;
549 if (peci_RdPCIConfigLocal(
550 addr, 1, 30, 2, 0x80, sizeof(uint32_t),
551 (uint8_t*)&coreFIVRErrLog, &cc) != PECI_CC_SUCCESS)
552 {
553 continue;
554 }
555 if (coreFIVRErrLog)
556 {
557 cpuIERRLog(cpu, "Core FIVR Fault");
558 continue;
559 }
560
561 // Next check if it's an Uncore FIVR fault by looking for a
562 // non-zero value of UNCORE_FIVR_ERR_LOG (B(1) D30 F2 offset
563 // 84h)
564 uint32_t uncoreFIVRErrLog = 0;
565 if (peci_RdPCIConfigLocal(addr, 1, 30, 2, 0x84,
566 sizeof(uint32_t),
567 (uint8_t*)&uncoreFIVRErrLog,
568 &cc) != PECI_CC_SUCCESS)
569 {
570 continue;
571 }
572 if (uncoreFIVRErrLog)
573 {
574 cpuIERRLog(cpu, "Uncore FIVR Fault");
575 continue;
576 }
577
578 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
579 // both zero, but MSEC bits 31:24 have either
580 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
581 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
582 // uncore FIVR fault
583 if (!coreFIVRErrLog && !uncoreFIVRErrLog &&
584 ((mc4Status & (0x51 << 24)) ||
585 (mc4Status & (0x52 << 24))))
586 {
587 cpuIERRLog(cpu, "Uncore FIVR Fault");
588 continue;
589 }
590 cpuIERRLog(cpu);
591 }
592 break;
593 }
594 case icx:
595 {
596 // First check the MCA_ERR_SRC_LOG to see if this is the CPU
597 // that caused the IERR
598 uint32_t mcaErrSrcLog = 0;
599 if (peci_RdPkgConfig(addr, 0, 5, 4, (uint8_t*)&mcaErrSrcLog,
600 &cc) != PECI_CC_SUCCESS)
601 {
602 continue;
603 }
604 // Check MSMI_INTERNAL (20) and IERR_INTERNAL (27)
605 if ((mcaErrSrcLog & (1 << 20)) || (mcaErrSrcLog & (1 << 27)))
606 {
607 // TODO: Light the CPU fault LED?
608 cpuIERRFound = true;
Jason M. Billsd1a19f62019-08-06 11:52:58 -0700609 incrementCPUErrorCount(cpu);
Jason M. Billsa3397932019-08-06 11:07:21 -0700610 // Next check if it's a CPU/VR mismatch by reading the
611 // IA32_MC4_STATUS MSR (0x411)
612 uint64_t mc4Status = 0;
613 if (peci_RdIAMSR(addr, 0, 0x411, &mc4Status, &cc) !=
614 PECI_CC_SUCCESS)
615 {
616 continue;
617 }
618 // TODO: Update MSEC/MSCOD_31_24 check
619 // Check MSEC bits 31:24 for
620 // MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE (0x40),
621 // MCA_SVID_VCCIN_VR_VOUT_FAILURE (0x42), or
622 // MCA_SVID_CPU_VR_CAPABILITY_ERROR (0x43)
623 if ((mc4Status & (0x40 << 24)) ||
624 (mc4Status & (0x42 << 24)) ||
625 (mc4Status & (0x43 << 24)))
626 {
627 cpuIERRLog(cpu, "CPU/VR Mismatch");
628 continue;
629 }
630
631 // Next check if it's a Core FIVR fault by looking for a
632 // non-zero value of CORE_FIVR_ERR_LOG (B(31) D30 F2 offsets
633 // C0h and C4h) (Note: Bus 31 is accessed on PECI as bus 14)
634 uint32_t coreFIVRErrLog0 = 0;
635 uint32_t coreFIVRErrLog1 = 0;
636 if (peci_RdEndPointConfigPciLocal(
637 addr, 0, 14, 30, 2, 0xC0, sizeof(uint32_t),
638 (uint8_t*)&coreFIVRErrLog0, &cc) != PECI_CC_SUCCESS)
639 {
640 continue;
641 }
642 if (peci_RdEndPointConfigPciLocal(
643 addr, 0, 14, 30, 2, 0xC4, sizeof(uint32_t),
644 (uint8_t*)&coreFIVRErrLog1, &cc) != PECI_CC_SUCCESS)
645 {
646 continue;
647 }
648 if (coreFIVRErrLog0 || coreFIVRErrLog1)
649 {
650 cpuIERRLog(cpu, "Core FIVR Fault");
651 continue;
652 }
653
654 // Next check if it's an Uncore FIVR fault by looking for a
655 // non-zero value of UNCORE_FIVR_ERR_LOG (B(31) D30 F2
656 // offset 84h) (Note: Bus 31 is accessed on PECI as bus 14)
657 uint32_t uncoreFIVRErrLog = 0;
658 if (peci_RdEndPointConfigPciLocal(
659 addr, 0, 14, 30, 2, 0x84, sizeof(uint32_t),
660 (uint8_t*)&uncoreFIVRErrLog,
661 &cc) != PECI_CC_SUCCESS)
662 {
663 continue;
664 }
665 if (uncoreFIVRErrLog)
666 {
667 cpuIERRLog(cpu, "Uncore FIVR Fault");
668 continue;
669 }
670
671 // TODO: Update MSEC/MSCOD_31_24 check
672 // Last if CORE_FIVR_ERR_LOG and UNCORE_FIVR_ERR_LOG are
673 // both zero, but MSEC bits 31:24 have either
674 // MCA_FIVR_CATAS_OVERVOL_FAULT (0x51) or
675 // MCA_FIVR_CATAS_OVERCUR_FAULT (0x52), then log it as an
676 // uncore FIVR fault
677 if (!coreFIVRErrLog0 && !coreFIVRErrLog1 &&
678 !uncoreFIVRErrLog &&
679 ((mc4Status & (0x51 << 24)) ||
680 (mc4Status & (0x52 << 24))))
681 {
682 cpuIERRLog(cpu, "Uncore FIVR Fault");
683 continue;
684 }
685 cpuIERRLog(cpu);
686 }
687 break;
688 }
689 }
690 }
691 return cpuIERRFound;
692}
693
Jason M. Billsa15c2522019-08-16 10:01:44 -0700694static void caterrAssertHandler()
695{
Jason M. Billsa15c2522019-08-16 10:01:44 -0700696 caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs));
697 caterrAssertTimer.async_wait([](const boost::system::error_code ec) {
698 if (ec)
699 {
700 // operation_aborted is expected if timer is canceled
701 // before completion.
702 if (ec != boost::asio::error::operation_aborted)
703 {
704 std::cerr << "caterr timeout async_wait failed: "
705 << ec.message() << "\n";
706 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700707 return;
708 }
Jason M. Billsa3397932019-08-06 11:07:21 -0700709 std::cerr << "CATERR asserted for " << std::to_string(caterrTimeoutMs)
710 << " ms\n";
Yong Li8c798c72020-04-22 15:29:07 +0800711 beep(beepCPUIERR);
Jason M. Billsa3397932019-08-06 11:07:21 -0700712 if (!checkIERRCPUs())
713 {
714 cpuIERRLog();
715 }
Jason M. Billsa15c2522019-08-16 10:01:44 -0700716 conn->async_method_call(
717 [](boost::system::error_code ec,
718 const std::variant<bool>& property) {
719 if (ec)
720 {
721 return;
722 }
723 const bool* reset = std::get_if<bool>(&property);
724 if (reset == nullptr)
725 {
726 std::cerr << "Unable to read reset on CATERR value\n";
727 return;
728 }
Jason M. Billsb61766b2019-11-26 17:02:44 -0800729 startCrashdumpAndRecovery(*reset, "IERR");
Jason M. Billsa15c2522019-08-16 10:01:44 -0700730 },
731 "xyz.openbmc_project.Settings",
732 "/xyz/openbmc_project/control/processor_error_config",
733 "org.freedesktop.DBus.Properties", "Get",
734 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR");
735 });
736}
737
Jason M. Bills1490b142019-07-01 15:48:43 -0700738static void caterrHandler()
739{
740 if (!hostOff)
741 {
742 gpiod::line_event gpioLineEvent = caterrLine.event_read();
743
744 bool caterr =
745 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
Yong Li1429ca82020-04-27 16:49:45 +0800746
747 std::vector<Association> associations;
Jason M. Bills1490b142019-07-01 15:48:43 -0700748 if (caterr)
749 {
Jason M. Billsa15c2522019-08-16 10:01:44 -0700750 caterrAssertHandler();
Yong Li1429ca82020-04-27 16:49:45 +0800751 associations.emplace_back(
752 "", "critical",
753 "/xyz/openbmc_project/host_error_monitor/cat_error");
754 associations.emplace_back("", "critical",
755 host_error_monitor::rootPath);
Jason M. Bills1490b142019-07-01 15:48:43 -0700756 }
757 else
758 {
759 caterrAssertTimer.cancel();
Yong Li1429ca82020-04-27 16:49:45 +0800760 associations.emplace_back("", "", "");
Jason M. Bills1490b142019-07-01 15:48:43 -0700761 }
Yong Li1429ca82020-04-27 16:49:45 +0800762 host_error_monitor::associationCATAssert->set_property("Associations",
763 associations);
Jason M. Bills1490b142019-07-01 15:48:43 -0700764 }
765 caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
766 [](const boost::system::error_code ec) {
767 if (ec)
768 {
769 std::cerr << "caterr handler error: "
770 << ec.message() << "\n";
771 return;
772 }
773 caterrHandler();
774 });
775}
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700776
Jason M. Billse94f5e12019-09-13 11:11:34 -0700777static void cpu1ThermtripAssertHandler()
778{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700779 if (cpu1FIVRFaultLine.get_value() == 0)
780 {
781 cpuBootFIVRFaultLog(1);
782 }
783 else
784 {
785 cpuThermTripLog(1);
786 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700787}
788
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700789static void cpu1ThermtripHandler()
790{
Jason M. Bills84951142020-04-17 15:57:11 -0700791 gpiod::line_event gpioLineEvent = cpu1ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700792
Jason M. Bills84951142020-04-17 15:57:11 -0700793 bool cpu1Thermtrip =
794 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
795 if (cpu1Thermtrip)
796 {
797 cpu1ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700798 }
Jason M. Bills84951142020-04-17 15:57:11 -0700799
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700800 cpu1ThermtripEvent.async_wait(
801 boost::asio::posix::stream_descriptor::wait_read,
802 [](const boost::system::error_code ec) {
803 if (ec)
804 {
805 std::cerr << "CPU 1 Thermtrip handler error: " << ec.message()
806 << "\n";
807 return;
808 }
809 cpu1ThermtripHandler();
810 });
811}
812
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000813static void cpu1MemtripHandler()
814{
815 if (!hostOff)
816 {
817 gpiod::line_event gpioLineEvent = cpu1MemtripLine.event_read();
818
819 bool cpu1Memtrip =
820 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
821 if (cpu1Memtrip)
822 {
823 memThermTripLog(1);
824 }
825 }
826 cpu1MemtripEvent.async_wait(
827 boost::asio::posix::stream_descriptor::wait_read,
828 [](const boost::system::error_code ec) {
829 if (ec)
830 {
831 std::cerr << "CPU 1 Memory Thermaltrip handler error: "
832 << ec.message() << "\n";
833 return;
834 }
835 cpu1MemtripHandler();
836 });
837}
838
Jason M. Billse94f5e12019-09-13 11:11:34 -0700839static void cpu2ThermtripAssertHandler()
840{
Jason M. Bills45e87e02019-09-09 14:45:38 -0700841 if (cpu2FIVRFaultLine.get_value() == 0)
842 {
843 cpuBootFIVRFaultLog(2);
844 }
845 else
846 {
847 cpuThermTripLog(2);
848 }
Jason M. Billse94f5e12019-09-13 11:11:34 -0700849}
850
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700851static void cpu2ThermtripHandler()
852{
Jason M. Bills84951142020-04-17 15:57:11 -0700853 gpiod::line_event gpioLineEvent = cpu2ThermtripLine.event_read();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700854
Jason M. Bills84951142020-04-17 15:57:11 -0700855 bool cpu2Thermtrip =
856 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
857 if (cpu2Thermtrip)
858 {
859 cpu2ThermtripAssertHandler();
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700860 }
Jason M. Bills84951142020-04-17 15:57:11 -0700861
Jason M. Bills78c5eed2019-08-28 14:00:40 -0700862 cpu2ThermtripEvent.async_wait(
863 boost::asio::posix::stream_descriptor::wait_read,
864 [](const boost::system::error_code ec) {
865 if (ec)
866 {
867 std::cerr << "CPU 2 Thermtrip handler error: " << ec.message()
868 << "\n";
869 return;
870 }
871 cpu2ThermtripHandler();
872 });
873}
874
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +0000875static void cpu2MemtripHandler()
876{
877 if (!hostOff)
878 {
879 gpiod::line_event gpioLineEvent = cpu2MemtripLine.event_read();
880
881 bool cpu2Memtrip =
882 gpioLineEvent.event_type == gpiod::line_event::RISING_EDGE;
883 if (cpu2Memtrip)
884 {
885 memThermTripLog(2);
886 }
887 }
888 cpu2MemtripEvent.async_wait(
889 boost::asio::posix::stream_descriptor::wait_read,
890 [](const boost::system::error_code ec) {
891 if (ec)
892 {
893 std::cerr << "CPU 2 Memory Thermaltrip handler error: "
894 << ec.message() << "\n";
895 return;
896 }
897 cpu2MemtripHandler();
898 });
899}
900
Jason M. Billse94f5e12019-09-13 11:11:34 -0700901static void cpu1VRHotAssertHandler()
902{
903 cpuVRHotLog("CPU 1");
904}
905
Jason M. Bills250fa632019-08-28 15:58:25 -0700906static void cpu1VRHotHandler()
907{
Jason M. Bills84951142020-04-17 15:57:11 -0700908 gpiod::line_event gpioLineEvent = cpu1VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700909
Jason M. Bills84951142020-04-17 15:57:11 -0700910 bool cpu1VRHot =
911 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
912 if (cpu1VRHot)
913 {
914 cpu1VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -0700915 }
Jason M. Bills84951142020-04-17 15:57:11 -0700916
Jason M. Bills250fa632019-08-28 15:58:25 -0700917 cpu1VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
918 [](const boost::system::error_code ec) {
919 if (ec)
920 {
921 std::cerr << "CPU 1 VRHot handler error: "
922 << ec.message() << "\n";
923 return;
924 }
925 cpu1VRHotHandler();
926 });
927}
928
Jason M. Billse94f5e12019-09-13 11:11:34 -0700929static void cpu1MemABCDVRHotAssertHandler()
930{
931 cpuVRHotLog("CPU 1 Memory ABCD");
932}
933
Jason M. Bills9647ba72019-08-29 14:19:19 -0700934static void cpu1MemABCDVRHotHandler()
935{
Jason M. Bills84951142020-04-17 15:57:11 -0700936 gpiod::line_event gpioLineEvent = cpu1MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700937
Jason M. Bills84951142020-04-17 15:57:11 -0700938 bool cpu1MemABCDVRHot =
939 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
940 if (cpu1MemABCDVRHot)
941 {
942 cpu1MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700943 }
Jason M. Bills84951142020-04-17 15:57:11 -0700944
Jason M. Bills9647ba72019-08-29 14:19:19 -0700945 cpu1MemABCDVRHotEvent.async_wait(
946 boost::asio::posix::stream_descriptor::wait_read,
947 [](const boost::system::error_code ec) {
948 if (ec)
949 {
950 std::cerr << "CPU 1 Memory ABCD VRHot handler error: "
951 << ec.message() << "\n";
952 return;
953 }
954 cpu1MemABCDVRHotHandler();
955 });
956}
957
Jason M. Billse94f5e12019-09-13 11:11:34 -0700958static void cpu1MemEFGHVRHotAssertHandler()
959{
960 cpuVRHotLog("CPU 1 Memory EFGH");
961}
962
Jason M. Bills9647ba72019-08-29 14:19:19 -0700963static void cpu1MemEFGHVRHotHandler()
964{
Jason M. Bills84951142020-04-17 15:57:11 -0700965 gpiod::line_event gpioLineEvent = cpu1MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700966
Jason M. Bills84951142020-04-17 15:57:11 -0700967 bool cpu1MemEFGHVRHot =
968 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
969 if (cpu1MemEFGHVRHot)
970 {
971 cpu1MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -0700972 }
Jason M. Bills84951142020-04-17 15:57:11 -0700973
Jason M. Bills9647ba72019-08-29 14:19:19 -0700974 cpu1MemEFGHVRHotEvent.async_wait(
975 boost::asio::posix::stream_descriptor::wait_read,
976 [](const boost::system::error_code ec) {
977 if (ec)
978 {
979 std::cerr << "CPU 1 Memory EFGH VRHot handler error: "
980 << ec.message() << "\n";
981 return;
982 }
983 cpu1MemEFGHVRHotHandler();
984 });
985}
986
Jason M. Billse94f5e12019-09-13 11:11:34 -0700987static void cpu2VRHotAssertHandler()
988{
989 cpuVRHotLog("CPU 2");
990}
991
Jason M. Bills250fa632019-08-28 15:58:25 -0700992static void cpu2VRHotHandler()
993{
Jason M. Bills84951142020-04-17 15:57:11 -0700994 gpiod::line_event gpioLineEvent = cpu2VRHotLine.event_read();
Jason M. Bills250fa632019-08-28 15:58:25 -0700995
Jason M. Bills84951142020-04-17 15:57:11 -0700996 bool cpu2VRHot =
997 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
998 if (cpu2VRHot)
999 {
1000 cpu2VRHotAssertHandler();
Jason M. Bills250fa632019-08-28 15:58:25 -07001001 }
Jason M. Bills84951142020-04-17 15:57:11 -07001002
Jason M. Bills250fa632019-08-28 15:58:25 -07001003 cpu2VRHotEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1004 [](const boost::system::error_code ec) {
1005 if (ec)
1006 {
1007 std::cerr << "CPU 2 VRHot handler error: "
1008 << ec.message() << "\n";
1009 return;
1010 }
1011 cpu2VRHotHandler();
1012 });
1013}
1014
Jason M. Billse94f5e12019-09-13 11:11:34 -07001015static void cpu2MemABCDVRHotAssertHandler()
1016{
1017 cpuVRHotLog("CPU 2 Memory ABCD");
1018}
1019
Jason M. Bills9647ba72019-08-29 14:19:19 -07001020static void cpu2MemABCDVRHotHandler()
1021{
Jason M. Bills84951142020-04-17 15:57:11 -07001022 gpiod::line_event gpioLineEvent = cpu2MemABCDVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001023
Jason M. Bills84951142020-04-17 15:57:11 -07001024 bool cpu2MemABCDVRHot =
1025 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1026 if (cpu2MemABCDVRHot)
1027 {
1028 cpu2MemABCDVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001029 }
Jason M. Bills84951142020-04-17 15:57:11 -07001030
Jason M. Bills9647ba72019-08-29 14:19:19 -07001031 cpu2MemABCDVRHotEvent.async_wait(
1032 boost::asio::posix::stream_descriptor::wait_read,
1033 [](const boost::system::error_code ec) {
1034 if (ec)
1035 {
1036 std::cerr << "CPU 2 Memory ABCD VRHot handler error: "
1037 << ec.message() << "\n";
1038 return;
1039 }
1040 cpu2MemABCDVRHotHandler();
1041 });
1042}
1043
Jason M. Billse94f5e12019-09-13 11:11:34 -07001044static void cpu2MemEFGHVRHotAssertHandler()
1045{
1046 cpuVRHotLog("CPU 2 Memory EFGH");
1047}
1048
Jason M. Bills9647ba72019-08-29 14:19:19 -07001049static void cpu2MemEFGHVRHotHandler()
1050{
Jason M. Bills84951142020-04-17 15:57:11 -07001051 gpiod::line_event gpioLineEvent = cpu2MemEFGHVRHotLine.event_read();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001052
Jason M. Bills84951142020-04-17 15:57:11 -07001053 bool cpu2MemEFGHVRHot =
1054 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1055 if (cpu2MemEFGHVRHot)
1056 {
1057 cpu2MemEFGHVRHotAssertHandler();
Jason M. Bills9647ba72019-08-29 14:19:19 -07001058 }
Jason M. Bills84951142020-04-17 15:57:11 -07001059
Jason M. Bills9647ba72019-08-29 14:19:19 -07001060 cpu2MemEFGHVRHotEvent.async_wait(
1061 boost::asio::posix::stream_descriptor::wait_read,
1062 [](const boost::system::error_code ec) {
1063 if (ec)
1064 {
1065 std::cerr << "CPU 2 Memory EFGH VRHot handler error: "
1066 << ec.message() << "\n";
1067 return;
1068 }
1069 cpu2MemEFGHVRHotHandler();
1070 });
1071}
1072
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001073static void pchThermtripHandler()
1074{
Yong Li1429ca82020-04-27 16:49:45 +08001075 std::vector<Association> associations;
1076
Jason M. Bills84951142020-04-17 15:57:11 -07001077 gpiod::line_event gpioLineEvent = pchThermtripLine.event_read();
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001078
Jason M. Bills84951142020-04-17 15:57:11 -07001079 bool pchThermtrip =
1080 gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1081 if (pchThermtrip)
1082 {
1083 ssbThermTripLog();
Yong Li1429ca82020-04-27 16:49:45 +08001084 associations.emplace_back(
1085 "", "critical",
1086 "/xyz/openbmc_project/host_error_monitor/ssb_thermal_trip");
1087 associations.emplace_back("", "critical", host_error_monitor::rootPath);
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001088 }
Yong Li1429ca82020-04-27 16:49:45 +08001089 else
1090 {
1091 associations.emplace_back("", "", "");
1092 }
1093 host_error_monitor::associationSSBThermTrip->set_property("Associations",
1094 associations);
Jason M. Bills84951142020-04-17 15:57:11 -07001095
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001096 pchThermtripEvent.async_wait(
1097 boost::asio::posix::stream_descriptor::wait_read,
1098 [](const boost::system::error_code ec) {
1099 if (ec)
1100 {
1101 std::cerr << "PCH Thermal trip handler error: " << ec.message()
1102 << "\n";
1103 return;
1104 }
1105 pchThermtripHandler();
1106 });
1107}
1108
Jason M. Billscbf78532019-08-16 15:32:11 -07001109static std::bitset<MAX_CPUS> checkERRPinCPUs(const int errPin)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001110{
Jason M. Billscbf78532019-08-16 15:32:11 -07001111 int errPinSts = (1 << errPin);
1112 std::bitset<MAX_CPUS> errPinCPUs = 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001113 for (int cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
1114 cpu++, addr++)
1115 {
1116 if (peci_Ping(addr) == PECI_CC_SUCCESS)
1117 {
1118 uint8_t cc = 0;
1119 CPUModel model{};
1120 uint8_t stepping = 0;
1121 if (peci_GetCPUID(addr, &model, &stepping, &cc) != PECI_CC_SUCCESS)
1122 {
1123 std::cerr << "Cannot get CPUID!\n";
1124 continue;
1125 }
1126
1127 switch (model)
1128 {
1129 case skx:
1130 {
1131 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001132 // the ERRx (B(0) D8 F0 offset 210h)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001133 uint32_t errpinsts = 0;
1134 if (peci_RdPCIConfigLocal(
1135 addr, 0, 8, 0, 0x210, sizeof(uint32_t),
1136 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1137 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001138 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001139 }
1140 break;
1141 }
1142 case icx:
1143 {
1144 // Check the ERRPINSTS to see if this is the CPU that caused
Jason M. Billscbf78532019-08-16 15:32:11 -07001145 // the ERRx (B(30) D0 F3 offset 274h) (Note: Bus 30 is
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001146 // accessed on PECI as bus 13)
1147 uint32_t errpinsts = 0;
1148 if (peci_RdEndPointConfigPciLocal(
1149 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
1150 (uint8_t*)&errpinsts, &cc) == PECI_CC_SUCCESS)
1151 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001152 errPinCPUs[cpu] = (errpinsts & errPinSts) != 0;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001153 }
1154 break;
1155 }
1156 }
1157 }
1158 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001159 return errPinCPUs;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001160}
1161
Jason M. Billscbf78532019-08-16 15:32:11 -07001162static void errXAssertHandler(const int errPin,
1163 boost::asio::steady_timer& errXAssertTimer)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001164{
Jason M. Billscbf78532019-08-16 15:32:11 -07001165 // ERRx status is not guaranteed through the timeout, so save which
1166 // CPUs have it asserted
1167 std::bitset<MAX_CPUS> errPinCPUs = checkERRPinCPUs(errPin);
1168 errXAssertTimer.expires_after(std::chrono::milliseconds(errTimeoutMs));
1169 errXAssertTimer.async_wait([errPin, errPinCPUs](
1170 const boost::system::error_code ec) {
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001171 if (ec)
1172 {
1173 // operation_aborted is expected if timer is canceled before
1174 // completion.
1175 if (ec != boost::asio::error::operation_aborted)
1176 {
1177 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1178 << "\n";
1179 }
1180 return;
1181 }
Jason M. Billscbf78532019-08-16 15:32:11 -07001182 std::cerr << "ERR" << std::to_string(errPin) << " asserted for "
1183 << std::to_string(errTimeoutMs) << " ms\n";
1184 if (errPinCPUs.count())
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001185 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001186 for (int i = 0; i < errPinCPUs.size(); i++)
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001187 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001188 if (errPinCPUs[i])
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001189 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001190 cpuERRXLog(errPin, i);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001191 }
1192 }
1193 }
1194 else
1195 {
Jason M. Billscbf78532019-08-16 15:32:11 -07001196 cpuERRXLog(errPin);
1197 }
1198 });
1199}
1200
Jason M. Bills8c584392019-08-19 11:05:51 -07001201static void err0AssertHandler()
1202{
1203 // Handle the standard ERR0 detection and logging
1204 const static constexpr int err0 = 0;
1205 errXAssertHandler(err0, err0AssertTimer);
1206}
1207
1208static void err0Handler()
1209{
1210 if (!hostOff)
1211 {
1212 gpiod::line_event gpioLineEvent = err0Line.event_read();
1213
1214 bool err0 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1215 if (err0)
1216 {
1217 err0AssertHandler();
1218 }
1219 else
1220 {
1221 err0AssertTimer.cancel();
1222 }
1223 }
1224 err0Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1225 [](const boost::system::error_code ec) {
1226 if (ec)
1227 {
1228 std::cerr
1229 << "err0 handler error: " << ec.message()
1230 << "\n";
1231 return;
1232 }
1233 err0Handler();
1234 });
1235}
1236
Jason M. Bills75af3962019-08-19 11:07:17 -07001237static void err1AssertHandler()
1238{
1239 // Handle the standard ERR1 detection and logging
1240 const static constexpr int err1 = 1;
1241 errXAssertHandler(err1, err1AssertTimer);
1242}
1243
1244static void err1Handler()
1245{
1246 if (!hostOff)
1247 {
1248 gpiod::line_event gpioLineEvent = err1Line.event_read();
1249
1250 bool err1 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1251 if (err1)
1252 {
1253 err1AssertHandler();
1254 }
1255 else
1256 {
1257 err1AssertTimer.cancel();
1258 }
1259 }
1260 err1Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1261 [](const boost::system::error_code ec) {
1262 if (ec)
1263 {
1264 std::cerr
1265 << "err1 handler error: " << ec.message()
1266 << "\n";
1267 return;
1268 }
1269 err1Handler();
1270 });
1271}
1272
Jason M. Billscbf78532019-08-16 15:32:11 -07001273static void err2AssertHandler()
1274{
1275 // Handle the standard ERR2 detection and logging
1276 const static constexpr int err2 = 2;
1277 errXAssertHandler(err2, err2AssertTimer);
1278 // Also handle reset for ERR2
1279 err2AssertTimer.async_wait([](const boost::system::error_code ec) {
1280 if (ec)
1281 {
1282 // operation_aborted is expected if timer is canceled before
1283 // completion.
1284 if (ec != boost::asio::error::operation_aborted)
1285 {
1286 std::cerr << "err2 timeout async_wait failed: " << ec.message()
1287 << "\n";
1288 }
1289 return;
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001290 }
1291 conn->async_method_call(
1292 [](boost::system::error_code ec,
1293 const std::variant<bool>& property) {
1294 if (ec)
1295 {
1296 return;
1297 }
1298 const bool* reset = std::get_if<bool>(&property);
1299 if (reset == nullptr)
1300 {
1301 std::cerr << "Unable to read reset on ERR2 value\n";
1302 return;
1303 }
Jason M. Billsb61766b2019-11-26 17:02:44 -08001304 startCrashdumpAndRecovery(*reset, "ERR2 Timeout");
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001305 },
1306 "xyz.openbmc_project.Settings",
1307 "/xyz/openbmc_project/control/processor_error_config",
1308 "org.freedesktop.DBus.Properties", "Get",
1309 "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnERR2");
Yong Li061eb032020-02-26 15:06:18 +08001310
1311 beep(beepCPUErr2);
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001312 });
1313}
1314
1315static void err2Handler()
1316{
1317 if (!hostOff)
1318 {
1319 gpiod::line_event gpioLineEvent = err2Line.event_read();
1320
1321 bool err2 = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1322 if (err2)
1323 {
1324 err2AssertHandler();
1325 }
1326 else
1327 {
1328 err2AssertTimer.cancel();
1329 }
1330 }
1331 err2Event.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1332 [](const boost::system::error_code ec) {
1333 if (ec)
1334 {
1335 std::cerr
1336 << "err2 handler error: " << ec.message()
1337 << "\n";
1338 return;
1339 }
1340 err2Handler();
1341 });
1342}
1343
Jason M. Bills89922f82019-08-06 11:10:02 -07001344static void smiAssertHandler()
1345{
1346 smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
1347 smiAssertTimer.async_wait([](const boost::system::error_code ec) {
1348 if (ec)
1349 {
1350 // operation_aborted is expected if timer is canceled before
1351 // completion.
1352 if (ec != boost::asio::error::operation_aborted)
1353 {
1354 std::cerr << "smi timeout async_wait failed: " << ec.message()
1355 << "\n";
1356 }
1357 return;
1358 }
1359 std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
1360 << " ms\n";
1361 smiTimeoutLog();
1362 conn->async_method_call(
1363 [](boost::system::error_code ec,
1364 const std::variant<bool>& property) {
1365 if (ec)
1366 {
1367 return;
1368 }
1369 const bool* reset = std::get_if<bool>(&property);
1370 if (reset == nullptr)
1371 {
1372 std::cerr << "Unable to read reset on SMI value\n";
1373 return;
1374 }
Jason M. Bills94785442020-01-07 15:22:09 -08001375#ifdef HOST_ERROR_CRASHDUMP_ON_SMI_TIMEOUT
Jason M. Billsb61766b2019-11-26 17:02:44 -08001376 startCrashdumpAndRecovery(*reset, "SMI Timeout");
Jason M. Bills94785442020-01-07 15:22:09 -08001377#else
1378 if (*reset)
1379 {
1380 std::cout << "Recovering the system\n";
1381 startPowerCycle();
1382 }
1383#endif
Jason M. Bills89922f82019-08-06 11:10:02 -07001384 },
1385 "xyz.openbmc_project.Settings",
1386 "/xyz/openbmc_project/control/bmc_reset_disables",
1387 "org.freedesktop.DBus.Properties", "Get",
1388 "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
1389 });
1390}
1391
1392static void smiHandler()
1393{
1394 if (!hostOff)
1395 {
1396 gpiod::line_event gpioLineEvent = smiLine.event_read();
1397
1398 bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
1399 if (smi)
1400 {
1401 smiAssertHandler();
1402 }
1403 else
1404 {
1405 smiAssertTimer.cancel();
1406 }
1407 }
1408 smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
1409 [](const boost::system::error_code ec) {
1410 if (ec)
1411 {
1412 std::cerr
1413 << "smi handler error: " << ec.message()
1414 << "\n";
1415 return;
1416 }
1417 smiHandler();
1418 });
1419}
1420
Jason M. Billsa15c2522019-08-16 10:01:44 -07001421static void initializeErrorState()
1422{
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001423 // Handle CPU1_MISMATCH if it's asserted now
1424 if (cpu1MismatchLine.get_value() == 1)
1425 {
1426 cpuMismatchLog(1);
1427 }
1428
1429 // Handle CPU2_MISMATCH if it's asserted now
1430 if (cpu2MismatchLine.get_value() == 1)
1431 {
1432 cpuMismatchLog(2);
1433 }
1434
Jason M. Billsa15c2522019-08-16 10:01:44 -07001435 // Handle CPU_CATERR if it's asserted now
1436 if (caterrLine.get_value() == 0)
1437 {
1438 caterrAssertHandler();
Yong Li1429ca82020-04-27 16:49:45 +08001439 std::vector<Association> associations;
1440 associations.emplace_back(
1441 "", "critical", "/xyz/openbmc_project/host_error_monitor/cat_err");
1442 associations.emplace_back("", "critical", host_error_monitor::rootPath);
1443 host_error_monitor::associationCATAssert->set_property("Associations",
1444 associations);
Jason M. Billsa15c2522019-08-16 10:01:44 -07001445 }
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001446
Jason M. Bills8c584392019-08-19 11:05:51 -07001447 // Handle CPU_ERR0 if it's asserted now
1448 if (err0Line.get_value() == 0)
1449 {
1450 err0AssertHandler();
1451 }
1452
Jason M. Bills75af3962019-08-19 11:07:17 -07001453 // Handle CPU_ERR1 if it's asserted now
1454 if (err1Line.get_value() == 0)
1455 {
1456 err1AssertHandler();
1457 }
1458
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001459 // Handle CPU_ERR2 if it's asserted now
1460 if (err2Line.get_value() == 0)
1461 {
1462 err2AssertHandler();
1463 }
Jason M. Bills89922f82019-08-06 11:10:02 -07001464
1465 // Handle SMI if it's asserted now
1466 if (smiLine.get_value() == 0)
1467 {
1468 smiAssertHandler();
1469 }
Jason M. Bills08866542019-08-16 12:04:19 -07001470
Jason M. Billse94f5e12019-09-13 11:11:34 -07001471 // Handle CPU1_THERMTRIP if it's asserted now
1472 if (cpu1ThermtripLine.get_value() == 0)
1473 {
1474 cpu1ThermtripAssertHandler();
1475 }
1476
1477 // Handle CPU2_THERMTRIP if it's asserted now
1478 if (cpu2ThermtripLine.get_value() == 0)
1479 {
1480 cpu2ThermtripAssertHandler();
1481 }
1482
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001483 // Handle CPU1_MEM_THERM_EVENT (CPU1 DIMM Thermal trip) if it's asserted now
1484 if (cpu1MemtripLine.get_value() == 0)
1485 {
1486 memThermTripLog(1);
1487 }
1488
1489 // Handle CPU2_MEM_THERM_EVENT (CPU2 DIMM Thermal trip) if it's asserted now
1490 if (cpu2MemtripLine.get_value() == 0)
1491 {
1492 memThermTripLog(2);
1493 }
1494
Jason M. Billse94f5e12019-09-13 11:11:34 -07001495 // Handle CPU1_VRHOT if it's asserted now
1496 if (cpu1VRHotLine.get_value() == 0)
1497 {
1498 cpu1VRHotAssertHandler();
1499 }
1500
1501 // Handle CPU1_MEM_ABCD_VRHOT if it's asserted now
1502 if (cpu1MemABCDVRHotLine.get_value() == 0)
1503 {
1504 cpu1MemABCDVRHotAssertHandler();
1505 }
1506
1507 // Handle CPU1_MEM_EFGH_VRHOT if it's asserted now
1508 if (cpu1MemEFGHVRHotLine.get_value() == 0)
1509 {
1510 cpu1MemEFGHVRHotAssertHandler();
1511 }
1512
1513 // Handle CPU2_VRHOT if it's asserted now
1514 if (cpu2VRHotLine.get_value() == 0)
1515 {
1516 cpu2VRHotAssertHandler();
1517 }
1518
1519 // Handle CPU2_MEM_ABCD_VRHOT if it's asserted now
1520 if (cpu2MemABCDVRHotLine.get_value() == 0)
1521 {
1522 cpu2MemABCDVRHotAssertHandler();
1523 }
1524
1525 // Handle CPU2_MEM_EFGH_VRHOT if it's asserted now
1526 if (cpu2MemEFGHVRHotLine.get_value() == 0)
1527 {
1528 cpu2MemEFGHVRHotAssertHandler();
1529 }
1530
Jason M. Bills08866542019-08-16 12:04:19 -07001531 // Handle PCH_BMC_THERMTRIP if it's asserted now
1532 if (pchThermtripLine.get_value() == 0)
1533 {
1534 ssbThermTripLog();
Yong Li1429ca82020-04-27 16:49:45 +08001535 std::vector<Association> associations;
1536 associations.emplace_back(
1537 "", "critical",
1538 "/xyz/openbmc_project/host_error_monitor/ssb_thermal_trip");
1539 associations.emplace_back("", "critical", host_error_monitor::rootPath);
1540 host_error_monitor::associationSSBThermTrip->set_property(
1541 "Associations", associations);
Jason M. Bills08866542019-08-16 12:04:19 -07001542 }
Jason M. Billsa15c2522019-08-16 10:01:44 -07001543}
Jason M. Bills1490b142019-07-01 15:48:43 -07001544} // namespace host_error_monitor
1545
1546int main(int argc, char* argv[])
1547{
1548 // setup connection to dbus
1549 host_error_monitor::conn =
1550 std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
1551
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001552 // Host Error Monitor Service
Jason M. Bills1490b142019-07-01 15:48:43 -07001553 host_error_monitor::conn->request_name(
1554 "xyz.openbmc_project.HostErrorMonitor");
1555 sdbusplus::asio::object_server server =
1556 sdbusplus::asio::object_server(host_error_monitor::conn);
1557
Yong Li1429ca82020-04-27 16:49:45 +08001558 // Associations interface for led status
1559 std::vector<host_error_monitor::Association> associations;
1560 associations.emplace_back("", "", "");
1561 host_error_monitor::associationSSBThermTrip = server.add_interface(
1562 "/xyz/openbmc_project/host_error_monitor/ssb_thermal_trip",
1563 "xyz.openbmc_project.Association.Definitions");
1564 host_error_monitor::associationSSBThermTrip->register_property(
1565 "Associations", associations);
1566 host_error_monitor::associationSSBThermTrip->initialize();
1567
1568 host_error_monitor::associationCATAssert = server.add_interface(
1569 "/xyz/openbmc_project/host_error_monitor/cat_assert",
1570 "xyz.openbmc_project.Association.Definitions");
1571 host_error_monitor::associationCATAssert->register_property("Associations",
1572 associations);
1573 host_error_monitor::associationCATAssert->initialize();
1574
Jason M. Billsc4b91f22019-11-26 17:04:50 -08001575 // Restart Cause Interface
1576 host_error_monitor::hostErrorTimeoutIface =
1577 server.add_interface("/xyz/openbmc_project/host_error_monitor",
1578 "xyz.openbmc_project.HostErrorMonitor.Timeout");
1579
1580 host_error_monitor::hostErrorTimeoutIface->register_property(
1581 "IERRTimeoutMs", host_error_monitor::caterrTimeoutMs,
1582 [](const std::size_t& requested, std::size_t& resp) {
1583 if (requested > host_error_monitor::caterrTimeoutMsMax)
1584 {
1585 std::cerr << "IERRTimeoutMs update to " << requested
1586 << "ms rejected. Cannot be greater than "
1587 << host_error_monitor::caterrTimeoutMsMax << "ms.\n";
1588 return 0;
1589 }
1590 std::cerr << "IERRTimeoutMs updated to " << requested << "ms\n";
1591 host_error_monitor::caterrTimeoutMs = requested;
1592 resp = requested;
1593 return 1;
1594 },
1595 [](std::size_t& resp) { return host_error_monitor::caterrTimeoutMs; });
1596 host_error_monitor::hostErrorTimeoutIface->initialize();
1597
Jason M. Bills1490b142019-07-01 15:48:43 -07001598 // Start tracking host state
1599 std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
1600 host_error_monitor::startHostStateMonitor();
1601
jayaprakash Mutyala53099c42020-03-15 00:16:26 +00001602 // Request CPU1_MISMATCH GPIO events
1603 if (!host_error_monitor::requestGPIOInput(
1604 "CPU1_MISMATCH", host_error_monitor::cpu1MismatchLine))
1605 {
1606 return -1;
1607 }
1608
1609 // Request CPU2_MISMATCH GPIO events
1610 if (!host_error_monitor::requestGPIOInput(
1611 "CPU2_MISMATCH", host_error_monitor::cpu2MismatchLine))
1612 {
1613 return -1;
1614 }
1615
Jason M. Bills1490b142019-07-01 15:48:43 -07001616 // Initialize the host state
1617 host_error_monitor::initializeHostState();
1618
1619 // Request CPU_CATERR GPIO events
1620 if (!host_error_monitor::requestGPIOEvents(
1621 "CPU_CATERR", host_error_monitor::caterrHandler,
1622 host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
1623 {
1624 return -1;
1625 }
1626
Jason M. Bills8c584392019-08-19 11:05:51 -07001627 // Request CPU_ERR0 GPIO events
1628 if (!host_error_monitor::requestGPIOEvents(
1629 "CPU_ERR0", host_error_monitor::err0Handler,
1630 host_error_monitor::err0Line, host_error_monitor::err0Event))
1631 {
1632 return -1;
1633 }
1634
Jason M. Bills75af3962019-08-19 11:07:17 -07001635 // Request CPU_ERR1 GPIO events
1636 if (!host_error_monitor::requestGPIOEvents(
1637 "CPU_ERR1", host_error_monitor::err1Handler,
1638 host_error_monitor::err1Line, host_error_monitor::err1Event))
1639 {
1640 return -1;
1641 }
1642
Jason M. Bills6a2cb692019-08-06 11:03:49 -07001643 // Request CPU_ERR2 GPIO events
1644 if (!host_error_monitor::requestGPIOEvents(
1645 "CPU_ERR2", host_error_monitor::err2Handler,
1646 host_error_monitor::err2Line, host_error_monitor::err2Event))
1647 {
1648 return -1;
1649 }
1650
Jason M. Bills89922f82019-08-06 11:10:02 -07001651 // Request SMI GPIO events
1652 if (!host_error_monitor::requestGPIOEvents(
1653 "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
1654 host_error_monitor::smiEvent))
1655 {
1656 return -1;
1657 }
1658
Jason M. Bills45e87e02019-09-09 14:45:38 -07001659 // Request CPU1_FIVR_FAULT GPIO input
1660 if (!host_error_monitor::requestGPIOInput(
1661 "CPU1_FIVR_FAULT", host_error_monitor::cpu1FIVRFaultLine))
1662 {
1663 return -1;
1664 }
1665
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001666 // Request CPU1_THERMTRIP GPIO events
1667 if (!host_error_monitor::requestGPIOEvents(
1668 "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler,
1669 host_error_monitor::cpu1ThermtripLine,
1670 host_error_monitor::cpu1ThermtripEvent))
1671 {
1672 return -1;
1673 }
1674
Jason M. Bills45e87e02019-09-09 14:45:38 -07001675 // Request CPU2_FIVR_FAULT GPIO input
1676 if (!host_error_monitor::requestGPIOInput(
1677 "CPU2_FIVR_FAULT", host_error_monitor::cpu2FIVRFaultLine))
1678 {
1679 return -1;
1680 }
1681
Jason M. Bills78c5eed2019-08-28 14:00:40 -07001682 // Request CPU2_THERMTRIP GPIO events
1683 if (!host_error_monitor::requestGPIOEvents(
1684 "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler,
1685 host_error_monitor::cpu2ThermtripLine,
1686 host_error_monitor::cpu2ThermtripEvent))
1687 {
1688 return -1;
1689 }
1690
Jason M. Bills250fa632019-08-28 15:58:25 -07001691 // Request CPU1_VRHOT GPIO events
1692 if (!host_error_monitor::requestGPIOEvents(
1693 "CPU1_VRHOT", host_error_monitor::cpu1VRHotHandler,
1694 host_error_monitor::cpu1VRHotLine,
1695 host_error_monitor::cpu1VRHotEvent))
1696 {
1697 return -1;
1698 }
1699
Jason M. Bills9647ba72019-08-29 14:19:19 -07001700 // Request CPU1_MEM_ABCD_VRHOT GPIO events
1701 if (!host_error_monitor::requestGPIOEvents(
1702 "CPU1_MEM_ABCD_VRHOT", host_error_monitor::cpu1MemABCDVRHotHandler,
1703 host_error_monitor::cpu1MemABCDVRHotLine,
1704 host_error_monitor::cpu1MemABCDVRHotEvent))
1705 {
1706 return -1;
1707 }
1708
1709 // Request CPU1_MEM_EFGH_VRHOT GPIO events
1710 if (!host_error_monitor::requestGPIOEvents(
1711 "CPU1_MEM_EFGH_VRHOT", host_error_monitor::cpu1MemEFGHVRHotHandler,
1712 host_error_monitor::cpu1MemEFGHVRHotLine,
1713 host_error_monitor::cpu1MemEFGHVRHotEvent))
1714 {
1715 return -1;
1716 }
1717
Jason M. Bills250fa632019-08-28 15:58:25 -07001718 // Request CPU2_VRHOT GPIO events
1719 if (!host_error_monitor::requestGPIOEvents(
1720 "CPU2_VRHOT", host_error_monitor::cpu2VRHotHandler,
1721 host_error_monitor::cpu2VRHotLine,
1722 host_error_monitor::cpu2VRHotEvent))
1723 {
1724 return -1;
1725 }
1726
Jason M. Bills9647ba72019-08-29 14:19:19 -07001727 // Request CPU2_MEM_ABCD_VRHOT GPIO events
1728 if (!host_error_monitor::requestGPIOEvents(
1729 "CPU2_MEM_ABCD_VRHOT", host_error_monitor::cpu2MemABCDVRHotHandler,
1730 host_error_monitor::cpu2MemABCDVRHotLine,
1731 host_error_monitor::cpu2MemABCDVRHotEvent))
1732 {
1733 return -1;
1734 }
1735
1736 // Request CPU2_MEM_EFGH_VRHOT GPIO events
1737 if (!host_error_monitor::requestGPIOEvents(
1738 "CPU2_MEM_EFGH_VRHOT", host_error_monitor::cpu2MemEFGHVRHotHandler,
1739 host_error_monitor::cpu2MemEFGHVRHotLine,
1740 host_error_monitor::cpu2MemEFGHVRHotEvent))
1741 {
1742 return -1;
1743 }
1744
Chen Yugange6c0f1c2019-08-02 20:36:42 +08001745 // Request PCH_BMC_THERMTRIP GPIO events
1746 if (!host_error_monitor::requestGPIOEvents(
1747 "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,
1748 host_error_monitor::pchThermtripLine,
1749 host_error_monitor::pchThermtripEvent))
1750 {
1751 return -1;
1752 }
1753
jayaprakash Mutyala009adbc2019-12-24 22:08:07 +00001754 // Request CPU1_MEM_THERM_EVENT GPIO events
1755 if (!host_error_monitor::requestGPIOEvents(
1756 "CPU1_MEM_THERM_EVENT", host_error_monitor::cpu1MemtripHandler,
1757 host_error_monitor::cpu1MemtripLine,
1758 host_error_monitor::cpu1MemtripEvent))
1759 {
1760 return -1;
1761 }
1762
1763 // Request CPU2_MEM_THERM_EVENT GPIO events
1764 if (!host_error_monitor::requestGPIOEvents(
1765 "CPU2_MEM_THERM_EVENT", host_error_monitor::cpu2MemtripHandler,
1766 host_error_monitor::cpu2MemtripLine,
1767 host_error_monitor::cpu2MemtripEvent))
1768 {
1769 return -1;
1770 }
1771
Jason M. Bills1490b142019-07-01 15:48:43 -07001772 host_error_monitor::io.run();
1773
1774 return 0;
1775}