blob: ddafb2bd6dccb25d84cacc5b7ead5038d50cf855 [file] [log] [blame]
Jason M. Bills0e06b842020-10-02 16:30:06 -07001/*
2// Copyright (c) 2021 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16#pragma once
17#include <systemd/sd-journal.h>
18
19#include <error_monitors/base_gpio_poll_monitor.hpp>
20#include <host_error_monitor.hpp>
21#include <sdbusplus/asio/object_server.hpp>
22
23#include <bitset>
24
25namespace host_error_monitor::err_pin_monitor
26{
27static constexpr bool debug = false;
28
29class ErrPinMonitor :
30 public host_error_monitor::base_gpio_poll_monitor::BaseGPIOPollMonitor
31{
32 size_t errPin;
33 std::bitset<MAX_CPUS> errPinCPUs;
34 const static host_error_monitor::base_gpio_poll_monitor::AssertValue
35 assertValue =
36 host_error_monitor::base_gpio_poll_monitor::AssertValue::lowAssert;
37 const static constexpr size_t errPinPollingTimeMs = 1000;
38 const static constexpr size_t errPinTimeoutMs = 90000;
39
40 void logEvent()
41 {
42 if (errPinCPUs.none())
43 {
44 return errPinTimeoutLog();
45 }
46
47 for (size_t i = 0; i < errPinCPUs.size(); i++)
48 {
49 if (errPinCPUs[i])
50 {
51 errPinTimeoutLog(i);
52 }
53 }
54 }
55
56 void errPinTimeoutLog()
57 {
58 std::string msg = "ERR" + std::to_string(errPin) + " Timeout";
59
60 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
61 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
62 "OpenBMC.0.1.CPUError", "REDFISH_MESSAGE_ARGS=%s",
63 msg.c_str(), NULL);
64 }
65
66 void errPinTimeoutLog(const int cpuNum)
67 {
68 std::string msg = "ERR" + std::to_string(errPin) + " Timeout on CPU " +
69 std::to_string(cpuNum + 1);
70
71 sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i",
72 LOG_INFO, "REDFISH_MESSAGE_ID=%s",
73 "OpenBMC.0.1.CPUError", "REDFISH_MESSAGE_ARGS=%s",
74 msg.c_str(), NULL);
75 }
76
77 void checkErrPinCPUs()
78 {
79 errPinCPUs.reset();
80 for (size_t cpu = 0, addr = MIN_CLIENT_ADDR; addr <= MAX_CLIENT_ADDR;
81 cpu++, addr++)
82 {
83 EPECIStatus peciStatus = PECI_CC_SUCCESS;
84 uint8_t cc = 0;
85 CPUModel model{};
86 uint8_t stepping = 0;
87 peciStatus = peci_GetCPUID(addr, &model, &stepping, &cc);
88 if (peciStatus != PECI_CC_SUCCESS)
89 {
90 if (peciStatus != PECI_CC_CPU_NOT_PRESENT)
91 {
92 printPECIError("CPUID", addr, peciStatus, cc);
93 }
94 continue;
95 }
96
97 switch (model)
98 {
99 case skx:
100 {
101 // Check the ERRPINSTS to see if this is the CPU that
102 // caused the ERRx (B(0) D8 F0 offset 210h)
103 uint32_t errpinsts = 0;
104 peciStatus = peci_RdPCIConfigLocal(
105 addr, 0, 8, 0, 0x210, sizeof(uint32_t),
106 (uint8_t*)&errpinsts, &cc);
107 if (peciError(peciStatus, cc))
108 {
109 printPECIError("ERRPINSTS", addr, peciStatus, cc);
110 continue;
111 }
112
113 errPinCPUs[cpu] = (errpinsts & (1 << errPin)) != 0;
114 break;
115 }
116 case icx:
117 {
118 // Check the ERRPINSTS to see if this is the CPU that
119 // caused the ERRx (B(30) D0 F3 offset 274h) (Note: Bus
120 // 30 is accessed on PECI as bus 13)
121 uint32_t errpinsts = 0;
122 peciStatus = peci_RdEndPointConfigPciLocal(
123 addr, 0, 13, 0, 3, 0x274, sizeof(uint32_t),
124 (uint8_t*)&errpinsts, &cc);
125 if (peciError(peciStatus, cc))
126 {
127 printPECIError("ERRPINSTS", addr, peciStatus, cc);
128 continue;
129 }
130
131 errPinCPUs[cpu] = (errpinsts & (1 << errPin)) != 0;
132 break;
133 }
134 }
135 }
136 }
137
138 void startPolling() override
139 {
140 checkErrPinCPUs();
141 host_error_monitor::base_gpio_poll_monitor::BaseGPIOPollMonitor::
142 startPolling();
143 }
144
145 public:
146 ErrPinMonitor(boost::asio::io_service& io,
147 std::shared_ptr<sdbusplus::asio::connection> conn,
148 const std::string& signalName, const size_t errPin) :
149 BaseGPIOPollMonitor(io, conn, signalName, assertValue,
150 errPinPollingTimeMs, errPinTimeoutMs),
151 errPin(errPin)
152 {
153 if (valid)
154 {
155 startPolling();
156 }
157 }
158};
159} // namespace host_error_monitor::err_pin_monitor