blob: 294dbef28ef42eb1b7a2f3fcff63e2ce305e5b2c [file] [log] [blame]
Jason M. Billsd711cc82020-12-04 16:46:39 -08001/*
2// Copyright (c) 2021 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16#pragma once
Jason M. Billsdae0e922020-12-14 17:16:41 -080017#include <peci.h>
18
Jason M. Billsd711cc82020-12-04 16:46:39 -080019#include <sdbusplus/asio/object_server.hpp>
20
Jason M. Bills32a90c62022-03-14 13:33:31 -070021#include <bitset>
Jason M. Billsd711cc82020-12-04 16:46:39 -080022#include <iostream>
23
24namespace host_error_monitor
25{
Jason M. Bills8fa1c962020-12-10 14:33:56 -080026using Association = std::tuple<std::string, std::string, std::string>;
27
Jason M. Billsd711cc82020-12-04 16:46:39 -080028bool hostIsOff();
29
30void startPowerCycle(std::shared_ptr<sdbusplus::asio::connection> conn)
31{
32 conn->async_method_call(
33 [](boost::system::error_code ec) {
34 if (ec)
35 {
36 std::cerr << "failed to set Chassis State\n";
37 }
38 },
39 "xyz.openbmc_project.State.Chassis",
40 "/xyz/openbmc_project/state/chassis0",
41 "org.freedesktop.DBus.Properties", "Set",
42 "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
43 std::variant<std::string>{
44 "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
45}
46
47void startWarmReset(std::shared_ptr<sdbusplus::asio::connection> conn)
48{
49 conn->async_method_call(
50 [](boost::system::error_code ec) {
51 if (ec)
52 {
53 std::cerr << "failed to set Host State\n";
54 }
55 },
56 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
57 "org.freedesktop.DBus.Properties", "Set",
58 "xyz.openbmc_project.State.Host", "RequestedHostTransition",
59 std::variant<std::string>{
60 "xyz.openbmc_project.State.Host.Transition.ForceWarmReboot"});
61}
62
63void startCrashdumpAndRecovery(
64 std::shared_ptr<sdbusplus::asio::connection> conn, bool recoverSystem,
65 const std::string& triggerType)
66{
67 static bool recover;
68 recover = recoverSystem;
69 std::cerr << "Starting crashdump\n";
70 static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
71
72 if (!crashdumpCompleteMatch)
73 {
74 crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
75 *conn,
76 "type='signal',interface='com.intel.crashdump.Stored',member='"
77 "CrashdumpComplete'",
78 [conn](sdbusplus::message::message& msg) {
79 std::cerr << "Crashdump completed\n";
80 if (recover)
81 {
82 std::cerr << "Recovering the system\n";
83 startWarmReset(conn);
84 }
85 crashdumpCompleteMatch.reset();
86 });
87 }
88
89 conn->async_method_call(
90 [](boost::system::error_code ec) {
91 if (ec)
92 {
93 std::cerr << "failed to start Crashdump\n";
94 }
95 },
96 "com.intel.crashdump", "/com/intel/crashdump",
97 "com.intel.crashdump.Stored", "GenerateStoredLog", triggerType);
98}
99
Jason M. Bills0e06b842020-10-02 16:30:06 -0700100static inline bool peciError(EPECIStatus peciStatus, uint8_t cc)
101{
102 return (
103 peciStatus != PECI_CC_SUCCESS ||
104 (cc != PECI_DEV_CC_SUCCESS && cc != PECI_DEV_CC_FATAL_MCA_DETECTED));
105}
106
107static void printPECIError(const std::string& reg, const size_t addr,
108 const EPECIStatus peciStatus, const size_t cc)
109{
Jason M. Bills4a6e45c2021-03-17 16:00:38 -0700110 std::cerr << "Failed to read " << reg << " on CPU address " << std::dec
111 << addr << ". Error: " << peciStatus << ": cc: 0x" << std::hex
112 << cc << "\n";
Jason M. Bills0e06b842020-10-02 16:30:06 -0700113}
114
Jason M. Bills47008522020-10-07 16:42:34 -0700115static void beep(std::shared_ptr<sdbusplus::asio::connection> conn,
116 const uint8_t& beepPriority)
117{
118 conn->async_method_call(
119 [](boost::system::error_code ec) {
120 if (ec)
121 {
122 std::cerr << "beep returned error with "
123 "async_method_call (ec = "
124 << ec << ")\n";
125 return;
126 }
127 },
128 "xyz.openbmc_project.BeepCode", "/xyz/openbmc_project/BeepCode",
129 "xyz.openbmc_project.BeepCode", "Beep", uint8_t(beepPriority));
130}
131
Jason M. Bills32a90c62022-03-14 13:33:31 -0700132static void checkErrPinCPUs(const size_t errPin,
133 std::bitset<MAX_CPUS>& errPinCPUs)
134{
135 errPinCPUs.reset();
136 for (size_t cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
137 cpu++, addr++)
138 {
139 EPECIStatus peciStatus = PECI_CC_SUCCESS;
140 uint8_t cc = 0;
141 CPUModel model{};
142 uint8_t stepping = 0;
143 peciStatus = peci_GetCPUID(addr, &model, &stepping, &cc);
144 if (peciStatus != PECI_CC_SUCCESS)
145 {
146 if (peciStatus != PECI_CC_CPU_NOT_PRESENT)
147 {
148 printPECIError("CPUID", addr, peciStatus, cc);
149 }
150 continue;
151 }
152
153 switch (model)
154 {
155 case skx:
156 {
157 // Check the ERRPINSTS to see if this is the CPU that
158 // caused the ERRx (B(0) D8 F0 offset 210h)
159 uint32_t errpinsts = 0;
160 peciStatus = peci_RdPCIConfigLocal(addr, 0, 8, 0, 0x210,
161 sizeof(uint32_t),
162 (uint8_t*)&errpinsts, &cc);
163 if (peciError(peciStatus, cc))
164 {
165 printPECIError("ERRPINSTS", addr, peciStatus, cc);
166 continue;
167 }
168
169 errPinCPUs[cpu] = (errpinsts & (1 << errPin)) != 0;
170 break;
171 }
172 case icx:
173 {
174 // Check the ERRPINSTS to see if this is the CPU that
175 // caused the ERRx (B(30) D0 F3 offset 274h) (Note: Bus
176 // 30 is accessed on PECI as bus 13)
177 uint32_t errpinsts = 0;
178 peciStatus = peci_RdEndPointConfigPciLocal(
179 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
180 (uint8_t*)&errpinsts, &cc);
181 if (peciError(peciStatus, cc))
182 {
183 printPECIError("ERRPINSTS", addr, peciStatus, cc);
184 continue;
185 }
186
187 errPinCPUs[cpu] = (errpinsts & (1 << errPin)) != 0;
188 break;
189 }
190 }
191 }
192}
193
Jason M. Billsd711cc82020-12-04 16:46:39 -0800194} // namespace host_error_monitor