blob: 0f09442c835587fadca62f2bb23f48a7354a10c1 [file] [log] [blame]
Will Lianga1d42022019-06-13 14:17:12 +08001#include "config.h"
2
3#include "ecc_manager.hpp"
4
Patrick Williams527190b2023-05-10 07:51:28 -05005#include <phosphor-logging/elog-errors.hpp>
6
Eddie James28b153c2024-07-22 11:37:09 -05007#include <chrono>
8#include <ctime>
Will Lianga1d42022019-06-13 14:17:12 +08009#include <filesystem>
10#include <fstream>
11#include <iostream>
Will Lianga1d42022019-06-13 14:17:12 +080012#include <string>
13
Eddie James28b153c2024-07-22 11:37:09 -050014#ifdef ECC_PHOSPHOR_LOGGING
15#include <xyz/openbmc_project/Memory/MemoryECC/error.hpp>
16#endif
17
Will Lianga1d42022019-06-13 14:17:12 +080018using namespace phosphor::logging;
19
20namespace phosphor
21{
22namespace memory
23{
24static constexpr const char ECC_FILE[] = "/etc/ecc/maxlog.conf";
25static constexpr const auto RESET_COUNT = "1";
26static constexpr const char CLOSE_EDAC_REPORT[] = "off";
27
28auto retries = 3;
29static constexpr auto delay = std::chrono::milliseconds{100};
Eddie James28b153c2024-07-22 11:37:09 -050030static constexpr auto interval = std::chrono::seconds{1};
31#ifdef ECC_PHOSPHOR_LOGGING
32static constexpr auto ceInterval = std::chrono::hours{1};
33#endif
Will Lianga1d42022019-06-13 14:17:12 +080034static constexpr uint16_t selBMCGenID = 0x0020;
35void ECC::init()
36{
Will Lianga1d42022019-06-13 14:17:12 +080037 namespace fs = std::filesystem;
38
39 if (fs::exists(sysfsRootPath))
40 {
41 try
42 {
43 resetCounter();
44 getMaxLogValue();
45 }
46 catch (const std::system_error& e)
47 {
Will Lianga1d42022019-06-13 14:17:12 +080048 log<level::INFO>(
49 "Logging failing sysfs file",
50 phosphor::logging::entry("FILE=%s", sysfsRootPath));
51 }
52 }
53 _bus.request_name(BUSNAME);
54}
55
56std::string ECC::getValue(std::string fullPath)
57{
58 std::string val;
59 std::ifstream ifs;
60
61 while (true)
62 {
63 try
64 {
65 if (!ifs.is_open())
66 ifs.open(fullPath);
67 ifs.clear();
68 ifs.seekg(0);
69 ifs >> val;
70 }
71 catch (const std::exception& e)
72 {
73 --retries;
74 std::this_thread::sleep_for(delay);
75 continue;
76 }
77 break;
78 }
79
80 ifs.close();
81 return val;
82}
83
84void ECC::writeValue(std::string fullPath, std::string value)
85{
86 std::ofstream ofs;
87 while (true)
88 {
89 try
90 {
91 if (!ofs.is_open())
92 ofs.open(fullPath);
93 ofs.clear();
94 ofs.seekp(0);
95 ofs << value;
96 ofs.flush();
97 }
98 catch (const std::exception& e)
99 {
100 --retries;
101 std::this_thread::sleep_for(delay);
102 continue;
103 }
104 break;
105 }
106 ofs.close();
107}
108
109void ECC::run()
110{
111 init();
112 std::function<void()> callback(std::bind(&ECC::read, this));
113 try
114 {
Eddie James28b153c2024-07-22 11:37:09 -0500115 _timer.restart(interval);
Will Lianga1d42022019-06-13 14:17:12 +0800116
117 _bus.attach_event(_event.get(), SD_EVENT_PRIORITY_IMPORTANT);
118 _event.loop();
119 }
120 catch (const std::exception& e)
121 {
122 log<level::ERR>("Error in sysfs polling loop",
123 entry("ERROR=%s", e.what()));
124 throw;
125 }
126}
127
128void ECC::checkEccLogFull(int64_t ceCount, int64_t ueCount)
129{
130 std::string errorMsg = "ECC error(memory error logging limit reached)";
131 std::vector<uint8_t> eccLogFullEventData{0x05, 0xff, 0xfe};
Will Lianga1d42022019-06-13 14:17:12 +0800132 auto total = ceCount + ueCount;
Eddie James28b153c2024-07-22 11:37:09 -0500133
Will Lianga1d42022019-06-13 14:17:12 +0800134 if (total == 0)
135 {
Eddie James28b153c2024-07-22 11:37:09 -0500136 // someone reset edac report from driver, so clear all parameter
Will Lianga1d42022019-06-13 14:17:12 +0800137 EccInterface::ceCount(ceCount);
138 EccInterface::ueCount(ueCount);
139 previousCeCounter = 0;
140 previousUeCounter = 0;
Eddie James28b153c2024-07-22 11:37:09 -0500141 EccInterface::isLoggingLimitReached(false);
142#ifdef ECC_PHOSPHOR_LOGGING
143 startCeCount = 0;
144 maxCeLimitReached = false;
145#endif
146 return;
Will Lianga1d42022019-06-13 14:17:12 +0800147 }
Eddie James28b153c2024-07-22 11:37:09 -0500148
149 if (total >= maxECCLog)
Will Lianga1d42022019-06-13 14:17:12 +0800150 {
Eddie James28b153c2024-07-22 11:37:09 -0500151#ifdef ECC_PHOSPHOR_LOGGING
152 if (((previousCeCounter - startCeCount) >= maxECCLog) &&
153 !maxCeLimitReached)
154 {
155 using error = sdbusplus::xyz::openbmc_project::Memory::MemoryECC::
156 Error::isLoggingLimitReached;
157 report<error>();
158
159 maxCeLimitReached = true;
160 maxCeLimitReachedTime = std::chrono::system_clock::now();
161 }
162#else
Will Lianga1d42022019-06-13 14:17:12 +0800163 // add SEL log
Eddie James28b153c2024-07-22 11:37:09 -0500164 addSELLog(errorMsg, OBJPATH, eccLogFullEventData, true, selBMCGenID);
165#endif
Will Lianga1d42022019-06-13 14:17:12 +0800166 // set ECC state
Eddie James28b153c2024-07-22 11:37:09 -0500167 EccInterface::isLoggingLimitReached(true);
168 controlEDACReport(CLOSE_EDAC_REPORT);
Will Lianga1d42022019-06-13 14:17:12 +0800169 EccInterface::state(MemoryECC::ECCStatus::LogFull);
170 }
171}
172
173int ECC::checkCeCount()
174{
175 std::string item = "ce_count";
176 std::string errorMsg = "ECC error(correctable)";
177 int64_t value = 0;
178 std::string fullPath = sysfsRootPath;
179 fullPath.append(item);
180 value = std::stoi(getValue(fullPath));
181 std::vector<uint8_t> eccCeEventData{0x00, 0xff, 0xfe};
Will Lianga1d42022019-06-13 14:17:12 +0800182
Eddie James28b153c2024-07-22 11:37:09 -0500183#ifdef ECC_PHOSPHOR_LOGGING
184 auto currentTime = std::chrono::system_clock::now();
185
186 // Start logging CE after user defined elaspsed time
187 if (maxCeLimitReached &&
188 (currentTime - maxCeLimitReachedTime >= ceInterval))
Will Lianga1d42022019-06-13 14:17:12 +0800189 {
Eddie James28b153c2024-07-22 11:37:09 -0500190 if (value)
191 startCeCount = previousCeCounter = value;
192 else
193 startCeCount = previousCeCounter = 0;
194 maxCeLimitReached = false;
195 }
196#endif
197 for (int64_t i = previousCeCounter + 1; i <= value; i++)
198 {
199 previousCeCounter = i;
200 EccInterface::ceCount(i);
201#ifdef ECC_PHOSPHOR_LOGGING
202 if ((i - startCeCount) < maxECCLog)
203 {
204 using warning = sdbusplus::xyz::openbmc_project::Memory::MemoryECC::
205 Error::ceCount;
206 report<warning>();
207 }
208#else
Will Lianga1d42022019-06-13 14:17:12 +0800209 // add SEL log
Eddie James28b153c2024-07-22 11:37:09 -0500210 addSELLog(errorMsg, OBJPATH, eccCeEventData, true, selBMCGenID);
211#endif
Will Lianga1d42022019-06-13 14:17:12 +0800212 // set ECC state
213 EccInterface::state(MemoryECC::ECCStatus::CE);
214 }
215 return value;
216}
217
218int ECC::checkUeCount()
219{
220 std::string item = "ue_count";
221 std::string errorMsg = "ECC error(uncorrectable)";
222 int64_t value = 0;
223 std::string fullPath = sysfsRootPath;
224 fullPath.append(item);
225 value = std::stoi(getValue(fullPath));
226 std::vector<uint8_t> eccUeEventData{0x01, 0xff, 0xfe};
Will Lianga1d42022019-06-13 14:17:12 +0800227
228 while (previousUeCounter < value)
229 {
230 previousUeCounter++;
231 // add phosphor-logging log
232 EccInterface::ueCount(previousUeCounter);
Eddie James28b153c2024-07-22 11:37:09 -0500233#ifdef ECC_PHOSPHOR_LOGGING
234 if (previousUeCounter == 1)
235 {
236 using error = sdbusplus::xyz::openbmc_project::Memory::MemoryECC::
237 Error::ueCount;
238 report<error>();
239 }
240#else
Will Lianga1d42022019-06-13 14:17:12 +0800241 // add SEL log
Eddie James28b153c2024-07-22 11:37:09 -0500242 addSELLog(errorMsg, OBJPATH, eccUeEventData, true, selBMCGenID);
243#endif
Will Lianga1d42022019-06-13 14:17:12 +0800244 // set ECC state
245 EccInterface::state(MemoryECC::ECCStatus::UE);
246 }
247 return value;
248}
249
250void ECC::resetCounter()
251{
252 std::string item = "reset_counters";
253 std::string fullPath = sysfsRootPath;
254 fullPath.append(item);
255 writeValue(fullPath, RESET_COUNT);
256}
257
258void ECC::read()
259{
260 int64_t ceCount = 0;
261 int64_t ueCount = 0;
262 ceCount = checkCeCount();
263 ueCount = checkUeCount();
264 checkEccLogFull(ceCount, ueCount);
265}
266
267void ECC::controlEDACReport(std::string op)
268{
269 writeValue(sysfsEDACReportPath, op);
270}
271
272// get max log from file
273void ECC::getMaxLogValue()
274{
275 maxECCLog = std::stoi(getValue(ECC_FILE));
276}
277
278void ECC::addSELLog(std::string message, std::string path,
279 std::vector<uint8_t> selData, bool assert, uint16_t genId)
280{
Patrick Williamsc5d295b2022-11-26 09:41:58 -0600281 // sdbusplus::bus_t bus = sdbusplus::bus::new_default();
Will Lianga1d42022019-06-13 14:17:12 +0800282
283 auto selCall = _bus.new_method_call(
284 "xyz.openbmc_project.Logging.IPMI", "/xyz/openbmc_project/Logging/IPMI",
285 "xyz.openbmc_project.Logging.IPMI", "IpmiSelAdd");
286 selCall.append(message, path, selData, assert, genId);
287
288 auto selReply = _bus.call(selCall);
289 if (selReply.is_method_error())
290 {
291 log<level::ERR>("add SEL log error\n");
292 }
293}
294
295} // namespace memory
296} // namespace phosphor