blob: 3772265e3e2d34718598313bfa373a753da241d4 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05007#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05008#include "utils.hpp"
9
George Liub5ca1012021-09-10 12:53:11 +080010#include <phosphor-logging/elog-errors.hpp>
11#include <phosphor-logging/log.hpp>
12#include <xyz/openbmc_project/Common/error.hpp>
13
Matt Spinlerd267cec2021-09-01 14:49:19 -050014#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080015#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080016#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060017#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080018#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050019
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053020namespace open_power
21{
22namespace occ
23{
24
Matt Spinler8b8abee2021-08-25 15:18:21 -050025constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050026constexpr auto fruTypeSuffix = "fru_type";
27constexpr auto faultSuffix = "fault";
28constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050029constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050030
Chris Cain1718fd82022-02-16 16:39:50 -060031const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
32
Chris Caina8857c52021-01-27 11:53:05 -060033using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060034using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060035
Matt Spinlera26f1522021-08-25 15:50:20 -050036template <typename T>
37T readFile(const std::string& path)
38{
39 std::ifstream ifs;
40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
41 std::ifstream::eofbit);
42 T data;
43
44 try
45 {
46 ifs.open(path);
47 ifs >> data;
48 ifs.close();
49 }
50 catch (const std::exception& e)
51 {
52 auto err = errno;
53 throw std::system_error(err, std::generic_category());
54 }
55
56 return data;
57}
58
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053059void Manager::findAndCreateObjects()
60{
Matt Spinlerd267cec2021-09-01 14:49:19 -050061#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050062 for (auto id = 0; id < MAX_CPUS; ++id)
63 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060064 // Create one occ per cpu
65 auto occ = std::string(OCC_NAME) + std::to_string(id);
66 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053067 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050068#else
Chris Cain613dc902022-04-08 09:56:22 -050069 if (!pmode)
70 {
71 // Create the power mode object
72 pmode = std::make_unique<powermode::PowerMode>(
73 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
74 }
75
Chris Cain1718fd82022-02-16 16:39:50 -060076 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050077 {
Chris Cainbae4d072022-02-28 09:46:50 -060078 static bool statusObjCreated = false;
79 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060080 {
Chris Cainbae4d072022-02-28 09:46:50 -060081 // Create the OCCs based on on the /dev/occX devices
82 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060083
Chris Cainbae4d072022-02-28 09:46:50 -060084 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060085 {
Chris Cainbae4d072022-02-28 09:46:50 -060086 // Something changed or no OCCs yet, try again in 10s.
87 // Note on the first pass prevOCCSearch will be empty,
88 // so there will be at least one delay to give things
89 // a chance to settle.
90 prevOCCSearch = occs;
91
92 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -060093 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -060094 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
95 occs.size())
96 .c_str());
97
98 discoverTimer->restartOnce(10s);
99 }
100 else
101 {
102 // All OCCs appear to be available, create status objects
103
104 // createObjects requires OCC0 first.
105 std::sort(occs.begin(), occs.end());
106
107 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600108 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -0600109 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
110 occs.size())
111 .c_str());
112 for (auto id : occs)
113 {
114 createObjects(std::string(OCC_NAME) + std::to_string(id));
115 }
116 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500117 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500118
119 // Find/update the processor path associated with each OCC
120 for (auto& obj : statusObjects)
121 {
122 obj->updateProcAssociation();
123 }
Chris Cainbae4d072022-02-28 09:46:50 -0600124 }
125 }
126
Chris Cain6d8f37a2022-04-29 13:46:01 -0500127 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600128 {
129 static bool tracedHostWait = false;
130 if (utils::isHostRunning())
131 {
132 if (tracedHostWait)
133 {
134 log<level::INFO>(
135 "Manager::findAndCreateObjects(): Host is running");
136 tracedHostWait = false;
137 }
Chris Cainbae4d072022-02-28 09:46:50 -0600138 checkAllActiveSensors();
139 }
140 else
141 {
142 if (!tracedHostWait)
143 {
144 log<level::INFO>(
145 "Manager::findAndCreateObjects(): Waiting for host to start");
146 tracedHostWait = true;
147 }
148 discoverTimer->restartOnce(30s);
Chris Cain1718fd82022-02-16 16:39:50 -0600149 }
150 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500151 }
152 else
153 {
Chris Cain1718fd82022-02-16 16:39:50 -0600154 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600155 std::format(
Chris Cain1718fd82022-02-16 16:39:50 -0600156 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
157 HOST_ON_FILE)
158 .c_str());
159 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500160 }
161#endif
162}
163
Chris Cainbae4d072022-02-28 09:46:50 -0600164#ifdef POWER10
165// Check if all occActive sensors are available
166void Manager::checkAllActiveSensors()
167{
168 static bool allActiveSensorAvailable = false;
169 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500170 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600171
Chris Cain082a6ca2023-03-21 10:27:26 -0500172 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600173 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500174 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600175 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500176 waitingForHost = false;
177 log<level::INFO>("checkAllActiveSensors(): Host is now running");
178 }
179
180 // Start with the assumption that all are available
181 allActiveSensorAvailable = true;
182 for (auto& obj : statusObjects)
183 {
184 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600185 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500186 auto instance = obj->getOccInstanceID();
187 // Check if sensor was queued while waiting for discovery
188 auto match = queuedActiveState.find(instance);
189 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500190 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500191 queuedActiveState.erase(match);
Chris Cainbd551de2022-04-26 13:41:16 -0500192 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600193 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500194 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)",
Chris Cainbd551de2022-04-26 13:41:16 -0500195 instance)
196 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -0500197 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500198 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500199 else
200 {
201 allActiveSensorAvailable = false;
202 if (!tracedSensorWait)
203 {
204 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600205 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500206 "checkAllActiveSensors(): Waiting on OCC{} Active sensor",
207 instance)
208 .c_str());
209 tracedSensorWait = true;
Chris Cain755af102024-02-27 16:09:51 -0600210 // Make sure traces are not throttled
211#ifdef PLDM
212 pldmHandle->setTraceThrottle(false);
213 // Start timer to throttle pldm traces when timer
214 // expires
215 throttleTraceTimer->restartOnce(5min);
216#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500217 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600218#ifdef PLDM
Chris Cain7f89e4d2022-05-09 13:27:45 -0500219 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600220#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500221 break;
222 }
Chris Cainbd551de2022-04-26 13:41:16 -0500223 }
Chris Cainbae4d072022-02-28 09:46:50 -0600224 }
225 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500226 else
227 {
228 if (!waitingForHost)
229 {
230 waitingForHost = true;
231 log<level::INFO>(
232 "checkAllActiveSensors(): Waiting for host to start");
233 }
234 }
Chris Cainbae4d072022-02-28 09:46:50 -0600235
236 if (allActiveSensorAvailable)
237 {
238 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500239 if (discoverTimer->isEnabled())
240 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500241 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500242 }
Chris Cain755af102024-02-27 16:09:51 -0600243#ifdef PLDM
244 if (throttleTraceTimer->isEnabled())
245 {
246 // Disable throttle timer and make sure traces are not throttled
247 throttleTraceTimer->setEnabled(false);
248 pldmHandle->setTraceThrottle(false);
249 }
250#endif
Chris Cainbae4d072022-02-28 09:46:50 -0600251
Chris Cain7f89e4d2022-05-09 13:27:45 -0500252 if (waitingForAllOccActiveSensors)
253 {
254 log<level::INFO>(
255 "checkAllActiveSensors(): OCC Active sensors are available");
256 waitingForAllOccActiveSensors = false;
257 }
258 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600259 tracedSensorWait = false;
260 }
261 else
262 {
263 // Not all sensors were available, so keep waiting
264 if (!tracedSensorWait)
265 {
266 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500267 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600268 tracedSensorWait = true;
269 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500270 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600271 }
272}
273#endif
274
Matt Spinlerd267cec2021-09-01 14:49:19 -0500275std::vector<int> Manager::findOCCsInDev()
276{
277 std::vector<int> occs;
278 std::regex expr{R"(occ(\d+)$)"};
279
280 for (auto& file : fs::directory_iterator("/dev"))
281 {
282 std::smatch match;
283 std::string path{file.path().string()};
284 if (std::regex_search(path, match, expr))
285 {
286 auto num = std::stoi(match[1].str());
287
288 // /dev numbering starts at 1, ours starts at 0.
289 occs.push_back(num - 1);
290 }
291 }
292
293 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530294}
295
Patrick Williamsaf408082022-07-22 19:26:54 -0500296int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530297{
George Liubcef3b42021-09-10 12:39:02 +0800298 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530299
300 sdbusplus::message::object_path o;
301 msg.read(o);
302 fs::path cpuPath(std::string(std::move(o)));
303
304 auto name = cpuPath.filename().string();
305 auto index = name.find(CPU_NAME);
306 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
307
308 createObjects(name);
309
310 return 0;
311}
312
313void Manager::createObjects(const std::string& occ)
314{
315 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
316
Gunnar Mills94df8c92018-09-14 14:50:03 -0500317 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800318 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600319#ifdef POWER10
320 pmode,
321#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500322 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600323 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530324#ifdef PLDM
325 ,
326 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
327 std::placeholders::_1)
328#endif
329 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530330
Chris Cain40501a22022-03-14 17:33:27 -0500331 // Create the power cap monitor object
332 if (!pcap)
333 {
334 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
335 *statusObjects.back());
336 }
337
Chris Cain36f9cde2021-11-22 11:18:21 -0600338 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530339 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600340 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600341 std::format("Manager::createObjects(): OCC{} is the master",
Chris Cain36f9cde2021-11-22 11:18:21 -0600342 statusObjects.back()->getOccInstanceID())
343 .c_str());
344 _pollTimer->setEnabled(false);
345
Chris Cain78e86012021-03-04 16:15:31 -0600346#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600347 // Set the master OCC on the PowerMode object
348 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600349#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600350 }
351
352 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
353#ifdef POWER10
354 ,
355 pmode
356#endif
357 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530358}
359
Sheldon Bailey373af752022-02-21 15:14:00 -0600360void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530361{
Chris Caina7b74dc2021-11-10 17:03:43 -0600362 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600363 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600364 // OCC went active
365 ++activeCount;
366
367#ifdef POWER10
368 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600369 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600370 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500371 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500372 }
373#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600374
375 if (activeCount == statusObjects.size())
376 {
377#ifdef POWER10
378 // All OCCs are now running
379 if (waitForAllOccsTimer->isEnabled())
380 {
381 // stop occ wait timer
382 waitForAllOccsTimer->setEnabled(false);
383 }
384#endif
385
386 // Verify master OCC and start presence monitor
387 validateOccMaster();
388 }
389
390 // Start poll timer if not already started
391 if (!_pollTimer->isEnabled())
392 {
393 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600394 std::format("Manager: OCCs will be polled every {} seconds",
Chris Cain36f9cde2021-11-22 11:18:21 -0600395 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600396 .c_str());
397
398 // Send poll and start OCC poll timer
399 pollerTimerExpired();
400 }
401 }
402 else
403 {
404 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500405 if (activeCount > 0)
406 {
407 --activeCount;
408 }
409 else
410 {
411 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600412 std::format("OCC{} disabled, but currently no active OCCs",
Chris Cain082a6ca2023-03-21 10:27:26 -0500413 instance)
414 .c_str());
415 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600416
417 if (activeCount == 0)
418 {
419 // No OCCs are running
420
421 // Stop OCC poll timer
422 if (_pollTimer->isEnabled())
423 {
424 log<level::INFO>(
425 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
426 _pollTimer->setEnabled(false);
427 }
428
429#ifdef POWER10
430 // stop wait timer
431 if (waitForAllOccsTimer->isEnabled())
432 {
433 waitForAllOccsTimer->setEnabled(false);
434 }
435#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600436 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600437#ifdef READ_OCC_SENSORS
438 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500439 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600440#endif
Chris Caina8857c52021-01-27 11:53:05 -0600441 }
Chris Cainbae4d072022-02-28 09:46:50 -0600442
443#ifdef POWER10
444 if (waitingForAllOccActiveSensors)
445 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500446 if (utils::isHostRunning())
447 {
448 checkAllActiveSensors();
449 }
Chris Cainbae4d072022-02-28 09:46:50 -0600450 }
451#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530452}
453
454#ifdef I2C_OCC
455void Manager::initStatusObjects()
456{
457 // Make sure we have a valid path string
458 static_assert(sizeof(DEV_PATH) != 0);
459
460 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
461 for (auto& name : deviceNames)
462 {
463 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800464 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530465 auto path = fs::path(OCC_CONTROL_ROOT) / name;
466 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800467 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530468 }
Chris Cain40501a22022-03-14 17:33:27 -0500469 // The first device is master occ
470 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
471 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600472#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600473 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
474 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600475 // Set the master OCC on the PowerMode object
476 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600477#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530478}
479#endif
480
Tom Joseph815f9f52020-07-27 12:12:13 +0530481#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500482void Manager::sbeTimeout(unsigned int instance)
483{
Eddie James2a751d72022-03-04 09:16:12 -0600484 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
485 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500486 return instance == obj->getOccInstanceID();
487 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500488
Eddie Jamescb018da2022-03-05 11:49:37 -0600489 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600490 {
Chris Cainbae4d072022-02-28 09:46:50 -0600491 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600492 std::format("SBE timeout, requesting HRESET (OCC{})", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600493 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500494
Eddie James2a751d72022-03-04 09:16:12 -0600495 setSBEState(instance, SBE_STATE_NOT_USABLE);
496
497 pldmHandle->sendHRESET(instance);
498 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500499}
500
Tom Joseph815f9f52020-07-27 12:12:13 +0530501bool Manager::updateOCCActive(instanceID instance, bool status)
502{
Chris Cain7e374fb2022-04-07 09:47:23 -0500503 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
504 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500505 return instance == obj->getOccInstanceID();
506 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500507
Chris Cain082a6ca2023-03-21 10:27:26 -0500508 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500509 if (obj != statusObjects.end())
510 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500511 if (!hostRunning && (status == true))
512 {
513 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600514 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500515 "updateOCCActive: Host is not running yet (OCC{} active={}), clearing sensor received",
516 instance, status)
517 .c_str());
518 (*obj)->setPldmSensorReceived(false);
519 if (!waitingForAllOccActiveSensors)
520 {
521 log<level::INFO>(
522 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
523 waitingForAllOccActiveSensors = true;
524 }
Chris Cain755af102024-02-27 16:09:51 -0600525#ifdef POWER10
Chris Cain082a6ca2023-03-21 10:27:26 -0500526 discoverTimer->restartOnce(30s);
Chris Cain755af102024-02-27 16:09:51 -0600527#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500528 return false;
529 }
530 else
531 {
Patrick Williams48002492024-02-13 21:43:32 -0600532 log<level::INFO>(std::format("updateOCCActive: OCC{} active={}",
Chris Cain082a6ca2023-03-21 10:27:26 -0500533 instance, status)
534 .c_str());
535 (*obj)->setPldmSensorReceived(true);
536 return (*obj)->occActive(status);
537 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500538 }
539 else
540 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500541 if (hostRunning)
542 {
543 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600544 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500545 "updateOCCActive: No status object to update for OCC{} (active={})",
546 instance, status)
547 .c_str());
548 }
549 else
550 {
551 if (status == true)
552 {
553 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600554 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500555 "updateOCCActive: No status objects and Host is not running yet (OCC{} active={})",
556 instance, status)
557 .c_str());
558 }
559 }
Chris Cainbd551de2022-04-26 13:41:16 -0500560 if (status == true)
561 {
562 // OCC went active
563 queuedActiveState.insert(instance);
564 }
565 else
566 {
567 auto match = queuedActiveState.find(instance);
568 if (match != queuedActiveState.end())
569 {
570 // OCC was disabled
571 queuedActiveState.erase(match);
572 }
573 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500574 return false;
575 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530576}
Eddie Jamescbad2192021-10-07 09:39:39 -0500577
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500578// Called upon pldm event To set powermode Safe Mode State for system.
579void Manager::updateOccSafeMode(bool safeMode)
580{
581#ifdef POWER10
582 pmode->updateDbusSafeMode(safeMode);
583#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500584 // Update the processor throttle status on dbus
585 for (auto& obj : statusObjects)
586 {
587 obj->updateThrottle(safeMode, THROTTLED_SAFE);
588 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500589}
590
Eddie Jamescbad2192021-10-07 09:39:39 -0500591void Manager::sbeHRESETResult(instanceID instance, bool success)
592{
593 if (success)
594 {
Chris Cainbae4d072022-02-28 09:46:50 -0600595 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600596 std::format("HRESET succeeded (OCC{})", instance).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500597
598 setSBEState(instance, SBE_STATE_BOOTED);
599
600 return;
601 }
602
603 setSBEState(instance, SBE_STATE_FAILED);
604
605 if (sbeCanDump(instance))
606 {
Chris Cainbae4d072022-02-28 09:46:50 -0600607 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600608 std::format("HRESET failed (OCC{}), triggering SBE dump", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600609 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500610
611 auto& bus = utils::getBus();
612 uint32_t src6 = instance << 16;
613 uint32_t logId =
614 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
615 src6, "SBE command timeout");
616
617 try
618 {
George Liuf3a4a692021-12-28 13:59:51 +0800619 constexpr auto path = "/org/openpower/dump";
620 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
621 constexpr auto function = "CreateDump";
622
Eddie Jamescbad2192021-10-07 09:39:39 -0500623 std::string service = utils::getService(path, interface);
Patrick Williamsa49c9872023-05-10 07:50:35 -0500624 auto method = bus.new_method_call(service.c_str(), path, interface,
625 function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500626
627 std::map<std::string, std::variant<std::string, uint64_t>>
628 createParams{
629 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
630 uint64_t(logId)},
631 {"com.ibm.Dump.Create.CreateParameters.DumpType",
632 "com.ibm.Dump.Create.DumpType.SBE"},
633 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
634 uint64_t(instance)},
635 };
636
637 method.append(createParams);
638
639 auto response = bus.call(method);
640 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500641 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500642 {
643 constexpr auto ERROR_DUMP_DISABLED =
644 "xyz.openbmc_project.Dump.Create.Error.Disabled";
645 if (e.name() == ERROR_DUMP_DISABLED)
646 {
647 log<level::INFO>("Dump is disabled, skipping");
648 }
649 else
650 {
651 log<level::ERR>("Dump failed");
652 }
653 }
654 }
655}
656
657bool Manager::sbeCanDump(unsigned int instance)
658{
659 struct pdbg_target* proc = getPdbgTarget(instance);
660
661 if (!proc)
662 {
663 // allow the dump in the error case
664 return true;
665 }
666
667 try
668 {
669 if (!openpower::phal::sbe::isDumpAllowed(proc))
670 {
671 return false;
672 }
673
674 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
675 {
676 return false;
677 }
678 }
679 catch (openpower::phal::exception::SbeError& e)
680 {
681 log<level::INFO>("Failed to query SBE state");
682 }
683
684 // allow the dump in the error case
685 return true;
686}
687
688void Manager::setSBEState(unsigned int instance, enum sbe_state state)
689{
690 struct pdbg_target* proc = getPdbgTarget(instance);
691
692 if (!proc)
693 {
694 return;
695 }
696
697 try
698 {
699 openpower::phal::sbe::setState(proc, state);
700 }
701 catch (const openpower::phal::exception::SbeError& e)
702 {
703 log<level::ERR>("Failed to set SBE state");
704 }
705}
706
707struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
708{
709 if (!pdbgInitialized)
710 {
711 try
712 {
713 openpower::phal::pdbg::init();
714 pdbgInitialized = true;
715 }
716 catch (const openpower::phal::exception::PdbgError& e)
717 {
718 log<level::ERR>("pdbg initialization failed");
719 return nullptr;
720 }
721 }
722
723 struct pdbg_target* proc = nullptr;
724 pdbg_for_each_class_target("proc", proc)
725 {
726 if (pdbg_target_index(proc) == instance)
727 {
728 return proc;
729 }
730 }
731
732 log<level::ERR>("Failed to get pdbg target");
733 return nullptr;
734}
Tom Joseph815f9f52020-07-27 12:12:13 +0530735#endif
736
Chris Caina8857c52021-01-27 11:53:05 -0600737void Manager::pollerTimerExpired()
738{
Chris Caina8857c52021-01-27 11:53:05 -0600739 if (!_pollTimer)
740 {
741 log<level::ERR>(
742 "Manager::pollerTimerExpired() ERROR: Timer not defined");
743 return;
744 }
745
746 for (auto& obj : statusObjects)
747 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600748 if (!obj->occActive())
749 {
750 // OCC is not running yet
751#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600752 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500753 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600754#endif
755 continue;
756 }
757
Chris Caina8857c52021-01-27 11:53:05 -0600758 // Read sysfs to force kernel to poll OCC
759 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800760
761#ifdef READ_OCC_SENSORS
762 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600763 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800764#endif
Chris Caina8857c52021-01-27 11:53:05 -0600765 }
766
Chris Caina7b74dc2021-11-10 17:03:43 -0600767 if (activeCount > 0)
768 {
769 // Restart OCC poll timer
770 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
771 }
772 else
773 {
774 // No OCCs running, so poll timer will not be restarted
775 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600776 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -0600777 "Manager::pollerTimerExpired: poll timer will not be restarted")
778 .c_str());
779 }
Chris Caina8857c52021-01-27 11:53:05 -0600780}
781
Chicago Duanbb895cb2021-06-18 19:37:16 +0800782#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600783void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800784{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500785 // There may be more than one sensor with the same FRU type
786 // and label so make two passes: the first to read the temps
787 // from sysfs, and the second to put them on D-Bus after
788 // resolving any conflicts.
789 std::map<std::string, double> sensorData;
790
Chicago Duanbb895cb2021-06-18 19:37:16 +0800791 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
792 for (auto& file : fs::directory_iterator(path))
793 {
794 if (!std::regex_search(file.path().string(), expr))
795 {
796 continue;
797 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800798
Matt Spinlera26f1522021-08-25 15:50:20 -0500799 uint32_t labelValue{0};
800
801 try
802 {
803 labelValue = readFile<uint32_t>(file.path());
804 }
805 catch (const std::system_error& e)
806 {
807 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600808 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500809 file.path().string(), e.code().value())
810 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800811 continue;
812 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800813
814 const std::string& tempLabel = "label";
815 const std::string filePathString = file.path().string().substr(
816 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500817
818 uint32_t fruTypeValue{0};
819 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800820 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500821 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
822 }
823 catch (const std::system_error& e)
824 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800825 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600826 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500827 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800828 .c_str());
829 continue;
830 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800831
Patrick Williamsa49c9872023-05-10 07:50:35 -0500832 std::string sensorPath = OCC_SENSORS_ROOT +
833 std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800834
Matt Spinlerace67d82021-10-18 13:41:57 -0500835 std::string dvfsTempPath;
836
Chicago Duanbb895cb2021-06-18 19:37:16 +0800837 if (fruTypeValue == VRMVdd)
838 {
Chris Cainae157b62024-01-23 16:05:12 -0600839 sensorPath.append("vrm_vdd" + std::to_string(occInstance) +
840 "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800841 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500842 else if (fruTypeValue == processorIoRing)
843 {
Chris Cainae157b62024-01-23 16:05:12 -0600844 sensorPath.append("proc" + std::to_string(occInstance) +
845 "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500846 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600847 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500848 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800849 else
850 {
Matt Spinler14d14022021-08-25 15:38:29 -0500851 uint16_t type = (labelValue & 0xFF000000) >> 24;
852 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800853
854 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
855 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500856 if (fruTypeValue == fruTypeNotAvailable)
857 {
858 // Not all DIMM related temps are available to read
859 // (no _input file in this case)
860 continue;
861 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800862 auto iter = dimmTempSensorName.find(fruTypeValue);
863 if (iter == dimmTempSensorName.end())
864 {
George Liub5ca1012021-09-10 12:53:11 +0800865 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600866 std::format(
George Liub5ca1012021-09-10 12:53:11 +0800867 "readTempSensors: Fru type error! fruTypeValue = {}) ",
868 fruTypeValue)
869 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800870 continue;
871 }
872
873 sensorPath.append("dimm" + std::to_string(instanceID) +
874 iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500875
876 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
877 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800878 }
879 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
880 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500881 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800882 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500883 // The OCC reports small core temps, of which there are
884 // two per big core. All current P10 systems are in big
885 // core mode, so use a big core name.
886 uint16_t coreNum = instanceID / 2;
887 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -0600888 sensorPath.append("proc" + std::to_string(occInstance) +
889 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -0500890 std::to_string(tempNum) + "_temp");
891
Chris Cainae157b62024-01-23 16:05:12 -0600892 dvfsTempPath =
893 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
894 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500895 }
896 else
897 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800898 continue;
899 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800900 }
901 else
902 {
903 continue;
904 }
905 }
906
Matt Spinlerace67d82021-10-18 13:41:57 -0500907 // The dvfs temp file only needs to be read once per chip per type.
908 if (!dvfsTempPath.empty() &&
909 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
910 {
911 try
912 {
913 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
914
915 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
916 dvfsTempPath, dvfsValue * std::pow(10, -3));
917 }
918 catch (const std::system_error& e)
919 {
920 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600921 std::format(
Matt Spinlerace67d82021-10-18 13:41:57 -0500922 "readTempSensors: Failed reading {}, errno = {}",
923 filePathString + maxSuffix, e.code().value())
924 .c_str());
925 }
926 }
927
Matt Spinlera26f1522021-08-25 15:50:20 -0500928 uint32_t faultValue{0};
929 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800930 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500931 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
932 }
933 catch (const std::system_error& e)
934 {
935 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600936 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500937 filePathString + faultSuffix, e.code().value())
938 .c_str());
939 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800940 }
941
Chris Cainae157b62024-01-23 16:05:12 -0600942 double tempValue{0};
943 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -0500944 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800945 {
Chris Cainae157b62024-01-23 16:05:12 -0600946 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800947 }
Chris Cainae157b62024-01-23 16:05:12 -0600948 else
Chicago Duanbb895cb2021-06-18 19:37:16 +0800949 {
Chris Cainae157b62024-01-23 16:05:12 -0600950 // Read the temperature
951 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500952 {
Chris Cainae157b62024-01-23 16:05:12 -0600953 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500954 }
Chris Cainae157b62024-01-23 16:05:12 -0600955 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500956 {
Chris Cainae157b62024-01-23 16:05:12 -0600957 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600958 std::format(
Chris Cainae157b62024-01-23 16:05:12 -0600959 "readTempSensors: Failed reading {}, errno = {}",
960 filePathString + inputSuffix, e.code().value())
961 .c_str());
962
963 // if errno == EAGAIN(Resource temporarily unavailable) then set
964 // temp to 0, to avoid using old temp, and affecting FAN
965 // Control.
966 if (e.code().value() == EAGAIN)
967 {
968 tempValue = 0;
969 }
970 // else the errno would be something like
971 // EBADF(Bad file descriptor)
972 // or ENOENT(No such file or directory)
973 else
974 {
975 continue;
976 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500977 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500978 }
979
Matt Spinler818cc8d2023-10-23 11:43:39 -0500980 // If this object path already has a value, only overwite
981 // it if the previous one was an NaN or a smaller value.
982 auto existing = sensorData.find(sensorPath);
983 if (existing != sensorData.end())
984 {
Chris Cainae157b62024-01-23 16:05:12 -0600985 // Multiple sensors found for this FRU type
986 if ((std::isnan(existing->second) && (tempValue == 0)) ||
987 ((existing->second == 0) && std::isnan(tempValue)))
988 {
989 // One of the redundant sensors has failed (0xFF/nan), and the
990 // other sensor has no reading (0), so set the FRU to NaN to
991 // force fan increase
992 tempValue = std::numeric_limits<double>::quiet_NaN();
993 existing->second = tempValue;
994 }
Matt Spinler818cc8d2023-10-23 11:43:39 -0500995 if (std::isnan(existing->second) || (tempValue > existing->second))
996 {
997 existing->second = tempValue;
998 }
999 }
1000 else
1001 {
Chris Cainae157b62024-01-23 16:05:12 -06001002 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001003 sensorData[sensorPath] = tempValue;
1004 }
1005 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001006
Matt Spinler818cc8d2023-10-23 11:43:39 -05001007 // Now publish the values on D-Bus.
1008 for (const auto& [objectPath, value] : sensorData)
1009 {
1010 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1011 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001012
Matt Spinler818cc8d2023-10-23 11:43:39 -05001013 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1014 objectPath, !std::isnan(value));
1015
1016 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001017 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001018 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Matt Spinler818cc8d2023-10-23 11:43:39 -05001019 objectPath);
Chris Cain6fa848a2022-01-24 14:54:38 -06001020 }
1021
Chris Cainae157b62024-01-23 16:05:12 -06001022 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001023 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001024}
1025
1026std::optional<std::string>
1027 Manager::getPowerLabelFunctionID(const std::string& value)
1028{
1029 // If the value is "system", then the FunctionID is "system".
1030 if (value == "system")
1031 {
1032 return value;
1033 }
1034
1035 // If the value is not "system", then the label value have 3 numbers, of
1036 // which we only care about the middle one:
1037 // <sensor id>_<function id>_<apss channel>
1038 // eg: The value is "0_10_5" , then the FunctionID is "10".
1039 if (value.find("_") == std::string::npos)
1040 {
1041 return std::nullopt;
1042 }
1043
1044 auto powerLabelValue = value.substr((value.find("_") + 1));
1045
1046 if (powerLabelValue.find("_") == std::string::npos)
1047 {
1048 return std::nullopt;
1049 }
1050
1051 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1052}
1053
1054void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1055{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001056 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1057 for (auto& file : fs::directory_iterator(path))
1058 {
1059 if (!std::regex_search(file.path().string(), expr))
1060 {
1061 continue;
1062 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001063
Matt Spinlera26f1522021-08-25 15:50:20 -05001064 std::string labelValue;
1065 try
1066 {
1067 labelValue = readFile<std::string>(file.path());
1068 }
1069 catch (const std::system_error& e)
1070 {
1071 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001072 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001073 file.path().string(), e.code().value())
1074 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001075 continue;
1076 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001077
1078 auto functionID = getPowerLabelFunctionID(labelValue);
1079 if (functionID == std::nullopt)
1080 {
1081 continue;
1082 }
1083
1084 const std::string& tempLabel = "label";
1085 const std::string filePathString = file.path().string().substr(
1086 0, file.path().string().length() - tempLabel.length());
1087
1088 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1089
1090 auto iter = powerSensorName.find(*functionID);
1091 if (iter == powerSensorName.end())
1092 {
1093 continue;
1094 }
1095 sensorPath.append(iter->second);
1096
Matt Spinlera26f1522021-08-25 15:50:20 -05001097 double tempValue{0};
1098
1099 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001100 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001101 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001102 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001103 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001104 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001105 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001106 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001107 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +08001108 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -05001109 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001110 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001111
Chris Cain5d66a0a2022-02-09 08:52:10 -06001112 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001113 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1114
Chris Cain5d66a0a2022-02-09 08:52:10 -06001115 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001116 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1117
Chris Cain5d66a0a2022-02-09 08:52:10 -06001118 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1119 true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001120
Matt Spinler5901abd2021-09-23 13:50:03 -05001121 if (existingSensors.find(sensorPath) == existingSensors.end())
1122 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001123 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1124 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -05001125 }
1126
Matt Spinlera26f1522021-08-25 15:50:20 -05001127 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001128 }
1129 return;
1130}
1131
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001132void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001133{
1134 for (const auto& [sensorPath, occId] : existingSensors)
1135 {
1136 if (occId == id)
1137 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001138 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001139 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001140
1141 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1142 true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001143 }
1144 }
1145 return;
1146}
1147
Sheldon Bailey373af752022-02-21 15:14:00 -06001148void Manager::setSensorValueToNonFunctional(uint32_t id) const
1149{
1150 for (const auto& [sensorPath, occId] : existingSensors)
1151 {
1152 if (occId == id)
1153 {
1154 dbus::OccDBusSensors::getOccDBus().setValue(
1155 sensorPath, std::numeric_limits<double>::quiet_NaN());
1156
1157 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1158 false);
1159 }
1160 }
1161 return;
1162}
1163
Chris Cain5d66a0a2022-02-09 08:52:10 -06001164void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001165{
Chris Caine2d0a432022-03-28 11:08:49 -05001166 static bool tracedError[8] = {0};
1167 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001168 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001169
Chris Caine2d0a432022-03-28 11:08:49 -05001170 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001171 {
Chris Caine2d0a432022-03-28 11:08:49 -05001172 // Read temperature sensors
1173 readTempSensors(sensorPath, id);
1174
1175 if (occ->isMasterOcc())
1176 {
1177 // Read power sensors
1178 readPowerSensors(sensorPath, id);
1179 }
1180 tracedError[id] = false;
1181 }
1182 else
1183 {
1184 if (!tracedError[id])
1185 {
1186 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001187 std::format(
Chris Caine2d0a432022-03-28 11:08:49 -05001188 "Manager::getSensorValues: OCC{} sensor path missing: {}",
1189 id, sensorPath.c_str())
1190 .c_str());
1191 tracedError[id] = true;
1192 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001193 }
1194
1195 return;
1196}
1197#endif
Chris Cain17257672021-10-22 13:41:03 -05001198
1199// Read the altitude from DBus
1200void Manager::readAltitude()
1201{
1202 static bool traceAltitudeErr = true;
1203
1204 utils::PropertyValue altitudeProperty{};
1205 try
1206 {
1207 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1208 ALTITUDE_PROP);
1209 auto sensorVal = std::get<double>(altitudeProperty);
1210 if (sensorVal < 0xFFFF)
1211 {
1212 if (sensorVal < 0)
1213 {
1214 altitude = 0;
1215 }
1216 else
1217 {
1218 // Round to nearest meter
1219 altitude = uint16_t(sensorVal + 0.5);
1220 }
Patrick Williams48002492024-02-13 21:43:32 -06001221 log<level::DEBUG>(std::format("readAltitude: sensor={} ({}m)",
Chris Cain17257672021-10-22 13:41:03 -05001222 sensorVal, altitude)
1223 .c_str());
1224 traceAltitudeErr = true;
1225 }
1226 else
1227 {
1228 if (traceAltitudeErr)
1229 {
1230 traceAltitudeErr = false;
1231 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001232 std::format("Invalid altitude value: {}", sensorVal)
Chris Cain17257672021-10-22 13:41:03 -05001233 .c_str());
1234 }
1235 }
1236 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001237 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001238 {
1239 if (traceAltitudeErr)
1240 {
1241 traceAltitudeErr = false;
1242 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001243 std::format("Unable to read Altitude: {}", e.what()).c_str());
Chris Cain17257672021-10-22 13:41:03 -05001244 }
1245 altitude = 0xFFFF; // not available
1246 }
1247}
1248
1249// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001250void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001251{
1252 double currentTemp = 0;
1253 uint8_t truncatedTemp = 0xFF;
1254 std::string msgSensor;
1255 std::map<std::string, std::variant<double>> msgData;
1256 msg.read(msgSensor, msgData);
1257
1258 auto valPropMap = msgData.find(AMBIENT_PROP);
1259 if (valPropMap == msgData.end())
1260 {
1261 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
1262 return;
1263 }
1264 currentTemp = std::get<double>(valPropMap->second);
1265 if (std::isnan(currentTemp))
1266 {
1267 truncatedTemp = 0xFF;
1268 }
1269 else
1270 {
1271 if (currentTemp < 0)
1272 {
1273 truncatedTemp = 0;
1274 }
1275 else
1276 {
1277 // Round to nearest degree C
1278 truncatedTemp = uint8_t(currentTemp + 0.5);
1279 }
1280 }
1281
1282 // If ambient changes, notify OCCs
1283 if (truncatedTemp != ambient)
1284 {
1285 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001286 std::format("ambientCallback: Ambient change from {} to {}C",
Chris Cain17257672021-10-22 13:41:03 -05001287 ambient, currentTemp)
1288 .c_str());
1289
1290 ambient = truncatedTemp;
1291 if (altitude == 0xFFFF)
1292 {
1293 // No altitude yet, try reading again
1294 readAltitude();
1295 }
1296
1297 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001298 std::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
Chris Cain17257672021-10-22 13:41:03 -05001299 altitude)
1300 .c_str());
1301#ifdef POWER10
1302 // Send ambient and altitude to all OCCs
1303 for (auto& obj : statusObjects)
1304 {
1305 if (obj->occActive())
1306 {
1307 obj->sendAmbient(ambient, altitude);
1308 }
1309 }
1310#endif // POWER10
1311 }
1312}
1313
1314// return the current ambient and altitude readings
1315void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1316 uint16_t& altitudeValue) const
1317{
1318 ambientValid = true;
1319 ambientTemp = ambient;
1320 altitudeValue = altitude;
1321
1322 if (ambient == 0xFF)
1323 {
1324 ambientValid = false;
1325 }
1326}
1327
Chris Caina7b74dc2021-11-10 17:03:43 -06001328#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001329// Called when waitForAllOccsTimer expires
1330// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001331void Manager::occsNotAllRunning()
1332{
Chris Caina7b74dc2021-11-10 17:03:43 -06001333 if (activeCount != statusObjects.size())
1334 {
1335 // Not all OCCs went active
1336 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001337 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001338 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1339 activeCount, statusObjects.size())
1340 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001341 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001342 }
1343
1344 validateOccMaster();
1345}
Chris Cain755af102024-02-27 16:09:51 -06001346
1347#ifdef PLDM
1348// Called when throttleTraceTimer expires.
1349// If this timer expires, that indicates there is still no confirmed OCC status
1350// which will trigger pldm traces to be throttled.
1351void Manager::throttleTraceExpired()
1352{
1353 // Throttle traces
1354 pldmHandle->setTraceThrottle(true);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001355 // Create PEL
1356 createPldmSensorPEL();
1357}
1358
1359void Manager::createPldmSensorPEL()
1360{
1361 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1362 std::map<std::string, std::string> additionalData;
1363
1364 additionalData.emplace("_PID", std::to_string(getpid()));
1365
1366 log<level::INFO>(
1367 std::format(
1368 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs")
1369 .c_str());
1370
1371 auto& bus = utils::getBus();
1372
1373 try
1374 {
1375 FFDCFiles ffdc;
1376 // Add occ-control journal traces to PEL FFDC
1377 auto occJournalFile =
1378 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1379
1380 static constexpr auto loggingObjectPath =
1381 "/xyz/openbmc_project/logging";
1382 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
1383 std::string service = utils::getService(loggingObjectPath,
1384 opLoggingInterface);
1385 auto method = bus.new_method_call(service.c_str(), loggingObjectPath,
1386 opLoggingInterface,
1387 "CreatePELWithFFDCFiles");
1388
Chris Cain1c3349e2024-04-24 14:14:11 -05001389 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001390 auto level =
1391 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1392 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001393 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001394
1395 method.append(d.path, level, additionalData, ffdc);
1396 bus.call(method);
1397 }
1398 catch (const sdbusplus::exception_t& e)
1399 {
1400 log<level::ERR>(
1401 std::format("Failed to create MISSING_OCC_SENSORS PEL: {}",
1402 e.what())
1403 .c_str());
1404 }
Chris Cain755af102024-02-27 16:09:51 -06001405}
1406#endif // PLDM
Chris Caina7b74dc2021-11-10 17:03:43 -06001407#endif // POWER10
1408
1409// Verify single master OCC and start presence monitor
1410void Manager::validateOccMaster()
1411{
1412 int masterInstance = -1;
1413 for (auto& obj : statusObjects)
1414 {
Chris Cainbd551de2022-04-26 13:41:16 -05001415 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001416#ifdef POWER10
1417 if (!obj->occActive())
1418 {
1419 if (utils::isHostRunning())
1420 {
Chris Cainbd551de2022-04-26 13:41:16 -05001421 // Check if sensor was queued while waiting for discovery
1422 auto match = queuedActiveState.find(instance);
1423 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001424 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001425 queuedActiveState.erase(match);
Chris Cainbae4d072022-02-28 09:46:50 -06001426 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001427 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001428 "validateOccMaster: OCC{} is ACTIVE (queued)",
1429 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001430 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -05001431 obj->occActive(true);
1432 }
1433 else
1434 {
1435 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001436#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001437 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001438#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001439 if (obj->occActive())
1440 {
1441 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001442 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001443 "validateOccMaster: OCC{} is ACTIVE after reading sensor",
1444 instance)
1445 .c_str());
1446 }
Chris Cainbae4d072022-02-28 09:46:50 -06001447 }
1448 }
1449 else
1450 {
1451 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001452 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -06001453 "validateOccMaster: HOST is not running (OCC{})",
Chris Cainbd551de2022-04-26 13:41:16 -05001454 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001455 .c_str());
1456 return;
1457 }
1458 }
1459#endif // POWER10
1460
Chris Caina7b74dc2021-11-10 17:03:43 -06001461 if (obj->isMasterOcc())
1462 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001463 obj->addPresenceWatchMaster();
1464
Chris Caina7b74dc2021-11-10 17:03:43 -06001465 if (masterInstance == -1)
1466 {
Chris Cainbd551de2022-04-26 13:41:16 -05001467 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001468 }
1469 else
1470 {
1471 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001472 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001473 "validateOccMaster: Multiple OCC masters! ({} and {})",
Chris Cainbd551de2022-04-26 13:41:16 -05001474 masterInstance, instance)
Chris Caina7b74dc2021-11-10 17:03:43 -06001475 .c_str());
1476 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001477 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001478 }
1479 }
1480 }
Chris Cainbae4d072022-02-28 09:46:50 -06001481
Chris Caina7b74dc2021-11-10 17:03:43 -06001482 if (masterInstance < 0)
1483 {
Chris Cainbae4d072022-02-28 09:46:50 -06001484 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001485 std::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
Chris Cainbae4d072022-02-28 09:46:50 -06001486 statusObjects.size())
1487 .c_str());
Chris Caina7b74dc2021-11-10 17:03:43 -06001488 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001489 statusObjects.front()->deviceError(
1490 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001491 }
1492 else
1493 {
1494 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001495 std::format("validateOccMaster: OCC{} is master of {} OCCs",
Chris Cain36f9cde2021-11-22 11:18:21 -06001496 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001497 .c_str());
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001498#ifdef POWER10
1499 pmode->updateDbusSafeMode(false);
1500#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001501 }
1502}
1503
Chris Cain40501a22022-03-14 17:33:27 -05001504void Manager::updatePcapBounds() const
1505{
1506 if (pcap)
1507 {
1508 pcap->updatePcapBounds();
1509 }
1510}
1511
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301512} // namespace occ
1513} // namespace open_power