blob: 630d85d0d14b24eb79c26971d0c0c99bd8d4436f [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain613dc902022-04-08 09:56:22 -050068 if (!pmode)
69 {
70 // Create the power mode object
71 pmode = std::make_unique<powermode::PowerMode>(
72 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
73 }
74
Chris Cain1718fd82022-02-16 16:39:50 -060075 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050076 {
Chris Cainbae4d072022-02-28 09:46:50 -060077 static bool statusObjCreated = false;
78 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060079 {
Chris Cainbae4d072022-02-28 09:46:50 -060080 // Create the OCCs based on on the /dev/occX devices
81 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060082
Chris Cainbae4d072022-02-28 09:46:50 -060083 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060084 {
Chris Cainbae4d072022-02-28 09:46:50 -060085 // Something changed or no OCCs yet, try again in 10s.
86 // Note on the first pass prevOCCSearch will be empty,
87 // so there will be at least one delay to give things
88 // a chance to settle.
89 prevOCCSearch = occs;
90
91 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -060092 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -060093 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
94 occs.size())
95 .c_str());
96
97 discoverTimer->restartOnce(10s);
98 }
99 else
100 {
101 // All OCCs appear to be available, create status objects
102
103 // createObjects requires OCC0 first.
104 std::sort(occs.begin(), occs.end());
105
106 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600107 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -0600108 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
109 occs.size())
110 .c_str());
111 for (auto id : occs)
112 {
113 createObjects(std::string(OCC_NAME) + std::to_string(id));
114 }
115 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500116 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500117
118 // Find/update the processor path associated with each OCC
119 for (auto& obj : statusObjects)
120 {
121 obj->updateProcAssociation();
122 }
Chris Cainbae4d072022-02-28 09:46:50 -0600123 }
124 }
125
Chris Cain6d8f37a2022-04-29 13:46:01 -0500126 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600127 {
128 static bool tracedHostWait = false;
129 if (utils::isHostRunning())
130 {
131 if (tracedHostWait)
132 {
133 log<level::INFO>(
134 "Manager::findAndCreateObjects(): Host is running");
135 tracedHostWait = false;
136 }
Chris Cainbae4d072022-02-28 09:46:50 -0600137 checkAllActiveSensors();
138 }
139 else
140 {
141 if (!tracedHostWait)
142 {
143 log<level::INFO>(
144 "Manager::findAndCreateObjects(): Waiting for host to start");
145 tracedHostWait = true;
146 }
147 discoverTimer->restartOnce(30s);
Chris Cain1718fd82022-02-16 16:39:50 -0600148 }
149 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500150 }
151 else
152 {
Chris Cain1718fd82022-02-16 16:39:50 -0600153 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600154 std::format(
Chris Cain1718fd82022-02-16 16:39:50 -0600155 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
156 HOST_ON_FILE)
157 .c_str());
158 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500159 }
160#endif
161}
162
Chris Cainbae4d072022-02-28 09:46:50 -0600163#ifdef POWER10
164// Check if all occActive sensors are available
165void Manager::checkAllActiveSensors()
166{
167 static bool allActiveSensorAvailable = false;
168 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500169 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600170
Chris Cain082a6ca2023-03-21 10:27:26 -0500171 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600172 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500173 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600174 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500175 waitingForHost = false;
176 log<level::INFO>("checkAllActiveSensors(): Host is now running");
177 }
178
179 // Start with the assumption that all are available
180 allActiveSensorAvailable = true;
181 for (auto& obj : statusObjects)
182 {
183 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600184 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500185 auto instance = obj->getOccInstanceID();
186 // Check if sensor was queued while waiting for discovery
187 auto match = queuedActiveState.find(instance);
188 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500189 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500190 queuedActiveState.erase(match);
Chris Cainbd551de2022-04-26 13:41:16 -0500191 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600192 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500193 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)",
Chris Cainbd551de2022-04-26 13:41:16 -0500194 instance)
195 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -0500196 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500197 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500198 else
199 {
200 allActiveSensorAvailable = false;
201 if (!tracedSensorWait)
202 {
203 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600204 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500205 "checkAllActiveSensors(): Waiting on OCC{} Active sensor",
206 instance)
207 .c_str());
208 tracedSensorWait = true;
Chris Cain755af102024-02-27 16:09:51 -0600209 // Make sure traces are not throttled
210#ifdef PLDM
211 pldmHandle->setTraceThrottle(false);
212 // Start timer to throttle pldm traces when timer
213 // expires
214 throttleTraceTimer->restartOnce(5min);
215#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500216 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600217#ifdef PLDM
Chris Cain7f89e4d2022-05-09 13:27:45 -0500218 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600219#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500220 break;
221 }
Chris Cainbd551de2022-04-26 13:41:16 -0500222 }
Chris Cainbae4d072022-02-28 09:46:50 -0600223 }
224 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500225 else
226 {
227 if (!waitingForHost)
228 {
229 waitingForHost = true;
230 log<level::INFO>(
231 "checkAllActiveSensors(): Waiting for host to start");
232 }
233 }
Chris Cainbae4d072022-02-28 09:46:50 -0600234
235 if (allActiveSensorAvailable)
236 {
237 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500238 if (discoverTimer->isEnabled())
239 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500240 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500241 }
Chris Cain755af102024-02-27 16:09:51 -0600242#ifdef PLDM
243 if (throttleTraceTimer->isEnabled())
244 {
245 // Disable throttle timer and make sure traces are not throttled
246 throttleTraceTimer->setEnabled(false);
247 pldmHandle->setTraceThrottle(false);
248 }
249#endif
Chris Cainbae4d072022-02-28 09:46:50 -0600250
Chris Cain7f89e4d2022-05-09 13:27:45 -0500251 if (waitingForAllOccActiveSensors)
252 {
253 log<level::INFO>(
254 "checkAllActiveSensors(): OCC Active sensors are available");
255 waitingForAllOccActiveSensors = false;
256 }
257 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600258 tracedSensorWait = false;
259 }
260 else
261 {
262 // Not all sensors were available, so keep waiting
263 if (!tracedSensorWait)
264 {
265 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500266 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600267 tracedSensorWait = true;
268 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500269 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600270 }
271}
272#endif
273
Matt Spinlerd267cec2021-09-01 14:49:19 -0500274std::vector<int> Manager::findOCCsInDev()
275{
276 std::vector<int> occs;
277 std::regex expr{R"(occ(\d+)$)"};
278
279 for (auto& file : fs::directory_iterator("/dev"))
280 {
281 std::smatch match;
282 std::string path{file.path().string()};
283 if (std::regex_search(path, match, expr))
284 {
285 auto num = std::stoi(match[1].str());
286
287 // /dev numbering starts at 1, ours starts at 0.
288 occs.push_back(num - 1);
289 }
290 }
291
292 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530293}
294
Patrick Williamsaf408082022-07-22 19:26:54 -0500295int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530296{
George Liubcef3b42021-09-10 12:39:02 +0800297 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530298
299 sdbusplus::message::object_path o;
300 msg.read(o);
301 fs::path cpuPath(std::string(std::move(o)));
302
303 auto name = cpuPath.filename().string();
304 auto index = name.find(CPU_NAME);
305 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
306
307 createObjects(name);
308
309 return 0;
310}
311
312void Manager::createObjects(const std::string& occ)
313{
314 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
315
Gunnar Mills94df8c92018-09-14 14:50:03 -0500316 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800317 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600318#ifdef POWER10
319 pmode,
320#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500321 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600322 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530323#ifdef PLDM
324 ,
325 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
326 std::placeholders::_1)
327#endif
328 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530329
Chris Cain40501a22022-03-14 17:33:27 -0500330 // Create the power cap monitor object
331 if (!pcap)
332 {
333 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
334 *statusObjects.back());
335 }
336
Chris Cain36f9cde2021-11-22 11:18:21 -0600337 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530338 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600339 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600340 std::format("Manager::createObjects(): OCC{} is the master",
Chris Cain36f9cde2021-11-22 11:18:21 -0600341 statusObjects.back()->getOccInstanceID())
342 .c_str());
343 _pollTimer->setEnabled(false);
344
Chris Cain78e86012021-03-04 16:15:31 -0600345#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600346 // Set the master OCC on the PowerMode object
347 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600348#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600349 }
350
351 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
352#ifdef POWER10
353 ,
354 pmode
355#endif
356 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530357}
358
Sheldon Bailey373af752022-02-21 15:14:00 -0600359void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530360{
Chris Caina7b74dc2021-11-10 17:03:43 -0600361 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600362 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600363 // OCC went active
364 ++activeCount;
365
366#ifdef POWER10
367 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600368 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600369 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500370 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500371 }
372#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600373
374 if (activeCount == statusObjects.size())
375 {
376#ifdef POWER10
377 // All OCCs are now running
378 if (waitForAllOccsTimer->isEnabled())
379 {
380 // stop occ wait timer
381 waitForAllOccsTimer->setEnabled(false);
382 }
383#endif
384
385 // Verify master OCC and start presence monitor
386 validateOccMaster();
387 }
388
389 // Start poll timer if not already started
390 if (!_pollTimer->isEnabled())
391 {
392 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600393 std::format("Manager: OCCs will be polled every {} seconds",
Chris Cain36f9cde2021-11-22 11:18:21 -0600394 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600395 .c_str());
396
397 // Send poll and start OCC poll timer
398 pollerTimerExpired();
399 }
400 }
401 else
402 {
403 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500404 if (activeCount > 0)
405 {
406 --activeCount;
407 }
408 else
409 {
410 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600411 std::format("OCC{} disabled, but currently no active OCCs",
Chris Cain082a6ca2023-03-21 10:27:26 -0500412 instance)
413 .c_str());
414 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600415
416 if (activeCount == 0)
417 {
418 // No OCCs are running
419
420 // Stop OCC poll timer
421 if (_pollTimer->isEnabled())
422 {
423 log<level::INFO>(
424 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
425 _pollTimer->setEnabled(false);
426 }
427
428#ifdef POWER10
429 // stop wait timer
430 if (waitForAllOccsTimer->isEnabled())
431 {
432 waitForAllOccsTimer->setEnabled(false);
433 }
434#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600435 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600436#ifdef READ_OCC_SENSORS
437 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500438 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600439#endif
Chris Caina8857c52021-01-27 11:53:05 -0600440 }
Chris Cainbae4d072022-02-28 09:46:50 -0600441
442#ifdef POWER10
443 if (waitingForAllOccActiveSensors)
444 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500445 if (utils::isHostRunning())
446 {
447 checkAllActiveSensors();
448 }
Chris Cainbae4d072022-02-28 09:46:50 -0600449 }
450#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530451}
452
453#ifdef I2C_OCC
454void Manager::initStatusObjects()
455{
456 // Make sure we have a valid path string
457 static_assert(sizeof(DEV_PATH) != 0);
458
459 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
460 for (auto& name : deviceNames)
461 {
462 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800463 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530464 auto path = fs::path(OCC_CONTROL_ROOT) / name;
465 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800466 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530467 }
Chris Cain40501a22022-03-14 17:33:27 -0500468 // The first device is master occ
469 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
470 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600471#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600472 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
473 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600474 // Set the master OCC on the PowerMode object
475 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600476#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530477}
478#endif
479
Tom Joseph815f9f52020-07-27 12:12:13 +0530480#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500481void Manager::sbeTimeout(unsigned int instance)
482{
Eddie James2a751d72022-03-04 09:16:12 -0600483 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
484 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500485 return instance == obj->getOccInstanceID();
486 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500487
Eddie Jamescb018da2022-03-05 11:49:37 -0600488 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600489 {
Chris Cainbae4d072022-02-28 09:46:50 -0600490 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600491 std::format("SBE timeout, requesting HRESET (OCC{})", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600492 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500493
Eddie James2a751d72022-03-04 09:16:12 -0600494 setSBEState(instance, SBE_STATE_NOT_USABLE);
495
496 pldmHandle->sendHRESET(instance);
497 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500498}
499
Tom Joseph815f9f52020-07-27 12:12:13 +0530500bool Manager::updateOCCActive(instanceID instance, bool status)
501{
Chris Cain7e374fb2022-04-07 09:47:23 -0500502 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
503 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500504 return instance == obj->getOccInstanceID();
505 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500506
Chris Cain082a6ca2023-03-21 10:27:26 -0500507 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500508 if (obj != statusObjects.end())
509 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500510 if (!hostRunning && (status == true))
511 {
512 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600513 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500514 "updateOCCActive: Host is not running yet (OCC{} active={}), clearing sensor received",
515 instance, status)
516 .c_str());
517 (*obj)->setPldmSensorReceived(false);
518 if (!waitingForAllOccActiveSensors)
519 {
520 log<level::INFO>(
521 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
522 waitingForAllOccActiveSensors = true;
523 }
Chris Cain755af102024-02-27 16:09:51 -0600524#ifdef POWER10
Chris Cain082a6ca2023-03-21 10:27:26 -0500525 discoverTimer->restartOnce(30s);
Chris Cain755af102024-02-27 16:09:51 -0600526#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500527 return false;
528 }
529 else
530 {
Patrick Williams48002492024-02-13 21:43:32 -0600531 log<level::INFO>(std::format("updateOCCActive: OCC{} active={}",
Chris Cain082a6ca2023-03-21 10:27:26 -0500532 instance, status)
533 .c_str());
534 (*obj)->setPldmSensorReceived(true);
535 return (*obj)->occActive(status);
536 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500537 }
538 else
539 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500540 if (hostRunning)
541 {
542 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600543 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500544 "updateOCCActive: No status object to update for OCC{} (active={})",
545 instance, status)
546 .c_str());
547 }
548 else
549 {
550 if (status == true)
551 {
552 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600553 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500554 "updateOCCActive: No status objects and Host is not running yet (OCC{} active={})",
555 instance, status)
556 .c_str());
557 }
558 }
Chris Cainbd551de2022-04-26 13:41:16 -0500559 if (status == true)
560 {
561 // OCC went active
562 queuedActiveState.insert(instance);
563 }
564 else
565 {
566 auto match = queuedActiveState.find(instance);
567 if (match != queuedActiveState.end())
568 {
569 // OCC was disabled
570 queuedActiveState.erase(match);
571 }
572 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500573 return false;
574 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530575}
Eddie Jamescbad2192021-10-07 09:39:39 -0500576
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500577// Called upon pldm event To set powermode Safe Mode State for system.
578void Manager::updateOccSafeMode(bool safeMode)
579{
580#ifdef POWER10
581 pmode->updateDbusSafeMode(safeMode);
582#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500583 // Update the processor throttle status on dbus
584 for (auto& obj : statusObjects)
585 {
586 obj->updateThrottle(safeMode, THROTTLED_SAFE);
587 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500588}
589
Eddie Jamescbad2192021-10-07 09:39:39 -0500590void Manager::sbeHRESETResult(instanceID instance, bool success)
591{
592 if (success)
593 {
Chris Cainbae4d072022-02-28 09:46:50 -0600594 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600595 std::format("HRESET succeeded (OCC{})", instance).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500596
597 setSBEState(instance, SBE_STATE_BOOTED);
598
599 return;
600 }
601
602 setSBEState(instance, SBE_STATE_FAILED);
603
604 if (sbeCanDump(instance))
605 {
Chris Cainbae4d072022-02-28 09:46:50 -0600606 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600607 std::format("HRESET failed (OCC{}), triggering SBE dump", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600608 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500609
610 auto& bus = utils::getBus();
611 uint32_t src6 = instance << 16;
612 uint32_t logId =
613 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
614 src6, "SBE command timeout");
615
616 try
617 {
George Liuf3a4a692021-12-28 13:59:51 +0800618 constexpr auto path = "/org/openpower/dump";
619 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
620 constexpr auto function = "CreateDump";
621
Eddie Jamescbad2192021-10-07 09:39:39 -0500622 std::string service = utils::getService(path, interface);
Patrick Williamsa49c9872023-05-10 07:50:35 -0500623 auto method = bus.new_method_call(service.c_str(), path, interface,
624 function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500625
626 std::map<std::string, std::variant<std::string, uint64_t>>
627 createParams{
628 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
629 uint64_t(logId)},
630 {"com.ibm.Dump.Create.CreateParameters.DumpType",
631 "com.ibm.Dump.Create.DumpType.SBE"},
632 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
633 uint64_t(instance)},
634 };
635
636 method.append(createParams);
637
638 auto response = bus.call(method);
639 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500640 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500641 {
642 constexpr auto ERROR_DUMP_DISABLED =
643 "xyz.openbmc_project.Dump.Create.Error.Disabled";
644 if (e.name() == ERROR_DUMP_DISABLED)
645 {
646 log<level::INFO>("Dump is disabled, skipping");
647 }
648 else
649 {
650 log<level::ERR>("Dump failed");
651 }
652 }
653 }
654}
655
656bool Manager::sbeCanDump(unsigned int instance)
657{
658 struct pdbg_target* proc = getPdbgTarget(instance);
659
660 if (!proc)
661 {
662 // allow the dump in the error case
663 return true;
664 }
665
666 try
667 {
668 if (!openpower::phal::sbe::isDumpAllowed(proc))
669 {
670 return false;
671 }
672
673 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
674 {
675 return false;
676 }
677 }
678 catch (openpower::phal::exception::SbeError& e)
679 {
680 log<level::INFO>("Failed to query SBE state");
681 }
682
683 // allow the dump in the error case
684 return true;
685}
686
687void Manager::setSBEState(unsigned int instance, enum sbe_state state)
688{
689 struct pdbg_target* proc = getPdbgTarget(instance);
690
691 if (!proc)
692 {
693 return;
694 }
695
696 try
697 {
698 openpower::phal::sbe::setState(proc, state);
699 }
700 catch (const openpower::phal::exception::SbeError& e)
701 {
702 log<level::ERR>("Failed to set SBE state");
703 }
704}
705
706struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
707{
708 if (!pdbgInitialized)
709 {
710 try
711 {
712 openpower::phal::pdbg::init();
713 pdbgInitialized = true;
714 }
715 catch (const openpower::phal::exception::PdbgError& e)
716 {
717 log<level::ERR>("pdbg initialization failed");
718 return nullptr;
719 }
720 }
721
722 struct pdbg_target* proc = nullptr;
723 pdbg_for_each_class_target("proc", proc)
724 {
725 if (pdbg_target_index(proc) == instance)
726 {
727 return proc;
728 }
729 }
730
731 log<level::ERR>("Failed to get pdbg target");
732 return nullptr;
733}
Tom Joseph815f9f52020-07-27 12:12:13 +0530734#endif
735
Chris Caina8857c52021-01-27 11:53:05 -0600736void Manager::pollerTimerExpired()
737{
Chris Caina8857c52021-01-27 11:53:05 -0600738 if (!_pollTimer)
739 {
740 log<level::ERR>(
741 "Manager::pollerTimerExpired() ERROR: Timer not defined");
742 return;
743 }
744
745 for (auto& obj : statusObjects)
746 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600747 if (!obj->occActive())
748 {
749 // OCC is not running yet
750#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600751 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500752 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600753#endif
754 continue;
755 }
756
Chris Caina8857c52021-01-27 11:53:05 -0600757 // Read sysfs to force kernel to poll OCC
758 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800759
760#ifdef READ_OCC_SENSORS
761 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600762 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800763#endif
Chris Caina8857c52021-01-27 11:53:05 -0600764 }
765
Chris Caina7b74dc2021-11-10 17:03:43 -0600766 if (activeCount > 0)
767 {
768 // Restart OCC poll timer
769 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
770 }
771 else
772 {
773 // No OCCs running, so poll timer will not be restarted
774 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600775 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -0600776 "Manager::pollerTimerExpired: poll timer will not be restarted")
777 .c_str());
778 }
Chris Caina8857c52021-01-27 11:53:05 -0600779}
780
Chicago Duanbb895cb2021-06-18 19:37:16 +0800781#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600782void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800783{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500784 // There may be more than one sensor with the same FRU type
785 // and label so make two passes: the first to read the temps
786 // from sysfs, and the second to put them on D-Bus after
787 // resolving any conflicts.
788 std::map<std::string, double> sensorData;
789
Chicago Duanbb895cb2021-06-18 19:37:16 +0800790 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
791 for (auto& file : fs::directory_iterator(path))
792 {
793 if (!std::regex_search(file.path().string(), expr))
794 {
795 continue;
796 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800797
Matt Spinlera26f1522021-08-25 15:50:20 -0500798 uint32_t labelValue{0};
799
800 try
801 {
802 labelValue = readFile<uint32_t>(file.path());
803 }
804 catch (const std::system_error& e)
805 {
806 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600807 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500808 file.path().string(), e.code().value())
809 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800810 continue;
811 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800812
813 const std::string& tempLabel = "label";
814 const std::string filePathString = file.path().string().substr(
815 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500816
817 uint32_t fruTypeValue{0};
818 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800819 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500820 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
821 }
822 catch (const std::system_error& e)
823 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800824 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600825 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500826 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800827 .c_str());
828 continue;
829 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800830
Patrick Williamsa49c9872023-05-10 07:50:35 -0500831 std::string sensorPath = OCC_SENSORS_ROOT +
832 std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800833
Matt Spinlerace67d82021-10-18 13:41:57 -0500834 std::string dvfsTempPath;
835
Chicago Duanbb895cb2021-06-18 19:37:16 +0800836 if (fruTypeValue == VRMVdd)
837 {
Chris Cainae157b62024-01-23 16:05:12 -0600838 sensorPath.append("vrm_vdd" + std::to_string(occInstance) +
839 "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800840 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500841 else if (fruTypeValue == processorIoRing)
842 {
Chris Cainae157b62024-01-23 16:05:12 -0600843 sensorPath.append("proc" + std::to_string(occInstance) +
844 "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500845 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600846 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500847 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800848 else
849 {
Matt Spinler14d14022021-08-25 15:38:29 -0500850 uint16_t type = (labelValue & 0xFF000000) >> 24;
851 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800852
853 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
854 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500855 if (fruTypeValue == fruTypeNotAvailable)
856 {
857 // Not all DIMM related temps are available to read
858 // (no _input file in this case)
859 continue;
860 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800861 auto iter = dimmTempSensorName.find(fruTypeValue);
862 if (iter == dimmTempSensorName.end())
863 {
George Liub5ca1012021-09-10 12:53:11 +0800864 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600865 std::format(
George Liub5ca1012021-09-10 12:53:11 +0800866 "readTempSensors: Fru type error! fruTypeValue = {}) ",
867 fruTypeValue)
868 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800869 continue;
870 }
871
872 sensorPath.append("dimm" + std::to_string(instanceID) +
873 iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500874
875 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
876 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800877 }
878 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
879 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500880 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800881 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500882 // The OCC reports small core temps, of which there are
883 // two per big core. All current P10 systems are in big
884 // core mode, so use a big core name.
885 uint16_t coreNum = instanceID / 2;
886 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -0600887 sensorPath.append("proc" + std::to_string(occInstance) +
888 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -0500889 std::to_string(tempNum) + "_temp");
890
Chris Cainae157b62024-01-23 16:05:12 -0600891 dvfsTempPath =
892 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
893 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500894 }
895 else
896 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800897 continue;
898 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800899 }
900 else
901 {
902 continue;
903 }
904 }
905
Matt Spinlerace67d82021-10-18 13:41:57 -0500906 // The dvfs temp file only needs to be read once per chip per type.
907 if (!dvfsTempPath.empty() &&
908 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
909 {
910 try
911 {
912 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
913
914 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
915 dvfsTempPath, dvfsValue * std::pow(10, -3));
916 }
917 catch (const std::system_error& e)
918 {
919 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600920 std::format(
Matt Spinlerace67d82021-10-18 13:41:57 -0500921 "readTempSensors: Failed reading {}, errno = {}",
922 filePathString + maxSuffix, e.code().value())
923 .c_str());
924 }
925 }
926
Matt Spinlera26f1522021-08-25 15:50:20 -0500927 uint32_t faultValue{0};
928 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800929 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500930 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
931 }
932 catch (const std::system_error& e)
933 {
934 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600935 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500936 filePathString + faultSuffix, e.code().value())
937 .c_str());
938 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800939 }
940
Chris Cainae157b62024-01-23 16:05:12 -0600941 double tempValue{0};
942 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -0500943 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800944 {
Chris Cainae157b62024-01-23 16:05:12 -0600945 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800946 }
Chris Cainae157b62024-01-23 16:05:12 -0600947 else
Chicago Duanbb895cb2021-06-18 19:37:16 +0800948 {
Chris Cainae157b62024-01-23 16:05:12 -0600949 // Read the temperature
950 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500951 {
Chris Cainae157b62024-01-23 16:05:12 -0600952 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500953 }
Chris Cainae157b62024-01-23 16:05:12 -0600954 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500955 {
Chris Cainae157b62024-01-23 16:05:12 -0600956 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600957 std::format(
Chris Cainae157b62024-01-23 16:05:12 -0600958 "readTempSensors: Failed reading {}, errno = {}",
959 filePathString + inputSuffix, e.code().value())
960 .c_str());
961
962 // if errno == EAGAIN(Resource temporarily unavailable) then set
963 // temp to 0, to avoid using old temp, and affecting FAN
964 // Control.
965 if (e.code().value() == EAGAIN)
966 {
967 tempValue = 0;
968 }
969 // else the errno would be something like
970 // EBADF(Bad file descriptor)
971 // or ENOENT(No such file or directory)
972 else
973 {
974 continue;
975 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500976 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500977 }
978
Matt Spinler818cc8d2023-10-23 11:43:39 -0500979 // If this object path already has a value, only overwite
980 // it if the previous one was an NaN or a smaller value.
981 auto existing = sensorData.find(sensorPath);
982 if (existing != sensorData.end())
983 {
Chris Cainae157b62024-01-23 16:05:12 -0600984 // Multiple sensors found for this FRU type
985 if ((std::isnan(existing->second) && (tempValue == 0)) ||
986 ((existing->second == 0) && std::isnan(tempValue)))
987 {
988 // One of the redundant sensors has failed (0xFF/nan), and the
989 // other sensor has no reading (0), so set the FRU to NaN to
990 // force fan increase
991 tempValue = std::numeric_limits<double>::quiet_NaN();
992 existing->second = tempValue;
993 }
Matt Spinler818cc8d2023-10-23 11:43:39 -0500994 if (std::isnan(existing->second) || (tempValue > existing->second))
995 {
996 existing->second = tempValue;
997 }
998 }
999 else
1000 {
Chris Cainae157b62024-01-23 16:05:12 -06001001 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001002 sensorData[sensorPath] = tempValue;
1003 }
1004 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001005
Matt Spinler818cc8d2023-10-23 11:43:39 -05001006 // Now publish the values on D-Bus.
1007 for (const auto& [objectPath, value] : sensorData)
1008 {
1009 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1010 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001011
Matt Spinler818cc8d2023-10-23 11:43:39 -05001012 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1013 objectPath, !std::isnan(value));
1014
1015 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001016 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001017 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Matt Spinler818cc8d2023-10-23 11:43:39 -05001018 objectPath);
Chris Cain6fa848a2022-01-24 14:54:38 -06001019 }
1020
Chris Cainae157b62024-01-23 16:05:12 -06001021 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001022 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001023}
1024
1025std::optional<std::string>
1026 Manager::getPowerLabelFunctionID(const std::string& value)
1027{
1028 // If the value is "system", then the FunctionID is "system".
1029 if (value == "system")
1030 {
1031 return value;
1032 }
1033
1034 // If the value is not "system", then the label value have 3 numbers, of
1035 // which we only care about the middle one:
1036 // <sensor id>_<function id>_<apss channel>
1037 // eg: The value is "0_10_5" , then the FunctionID is "10".
1038 if (value.find("_") == std::string::npos)
1039 {
1040 return std::nullopt;
1041 }
1042
1043 auto powerLabelValue = value.substr((value.find("_") + 1));
1044
1045 if (powerLabelValue.find("_") == std::string::npos)
1046 {
1047 return std::nullopt;
1048 }
1049
1050 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1051}
1052
1053void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1054{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001055 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1056 for (auto& file : fs::directory_iterator(path))
1057 {
1058 if (!std::regex_search(file.path().string(), expr))
1059 {
1060 continue;
1061 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001062
Matt Spinlera26f1522021-08-25 15:50:20 -05001063 std::string labelValue;
1064 try
1065 {
1066 labelValue = readFile<std::string>(file.path());
1067 }
1068 catch (const std::system_error& e)
1069 {
1070 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001071 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001072 file.path().string(), e.code().value())
1073 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001074 continue;
1075 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001076
1077 auto functionID = getPowerLabelFunctionID(labelValue);
1078 if (functionID == std::nullopt)
1079 {
1080 continue;
1081 }
1082
1083 const std::string& tempLabel = "label";
1084 const std::string filePathString = file.path().string().substr(
1085 0, file.path().string().length() - tempLabel.length());
1086
1087 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1088
1089 auto iter = powerSensorName.find(*functionID);
1090 if (iter == powerSensorName.end())
1091 {
1092 continue;
1093 }
1094 sensorPath.append(iter->second);
1095
Matt Spinlera26f1522021-08-25 15:50:20 -05001096 double tempValue{0};
1097
1098 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001099 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001100 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001101 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001102 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001103 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001104 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001105 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001106 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +08001107 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -05001108 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001109 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001110
Chris Cain5d66a0a2022-02-09 08:52:10 -06001111 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001112 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1113
Chris Cain5d66a0a2022-02-09 08:52:10 -06001114 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001115 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1116
Chris Cain5d66a0a2022-02-09 08:52:10 -06001117 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1118 true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001119
Matt Spinler5901abd2021-09-23 13:50:03 -05001120 if (existingSensors.find(sensorPath) == existingSensors.end())
1121 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001122 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1123 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -05001124 }
1125
Matt Spinlera26f1522021-08-25 15:50:20 -05001126 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001127 }
1128 return;
1129}
1130
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001131void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001132{
1133 for (const auto& [sensorPath, occId] : existingSensors)
1134 {
1135 if (occId == id)
1136 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001137 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001138 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001139
1140 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1141 true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001142 }
1143 }
1144 return;
1145}
1146
Sheldon Bailey373af752022-02-21 15:14:00 -06001147void Manager::setSensorValueToNonFunctional(uint32_t id) const
1148{
1149 for (const auto& [sensorPath, occId] : existingSensors)
1150 {
1151 if (occId == id)
1152 {
1153 dbus::OccDBusSensors::getOccDBus().setValue(
1154 sensorPath, std::numeric_limits<double>::quiet_NaN());
1155
1156 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1157 false);
1158 }
1159 }
1160 return;
1161}
1162
Chris Cain5d66a0a2022-02-09 08:52:10 -06001163void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001164{
Chris Caine2d0a432022-03-28 11:08:49 -05001165 static bool tracedError[8] = {0};
1166 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001167 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001168
Chris Caine2d0a432022-03-28 11:08:49 -05001169 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001170 {
Chris Caine2d0a432022-03-28 11:08:49 -05001171 // Read temperature sensors
1172 readTempSensors(sensorPath, id);
1173
1174 if (occ->isMasterOcc())
1175 {
1176 // Read power sensors
1177 readPowerSensors(sensorPath, id);
1178 }
1179 tracedError[id] = false;
1180 }
1181 else
1182 {
1183 if (!tracedError[id])
1184 {
1185 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001186 std::format(
Chris Caine2d0a432022-03-28 11:08:49 -05001187 "Manager::getSensorValues: OCC{} sensor path missing: {}",
1188 id, sensorPath.c_str())
1189 .c_str());
1190 tracedError[id] = true;
1191 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001192 }
1193
1194 return;
1195}
1196#endif
Chris Cain17257672021-10-22 13:41:03 -05001197
1198// Read the altitude from DBus
1199void Manager::readAltitude()
1200{
1201 static bool traceAltitudeErr = true;
1202
1203 utils::PropertyValue altitudeProperty{};
1204 try
1205 {
1206 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1207 ALTITUDE_PROP);
1208 auto sensorVal = std::get<double>(altitudeProperty);
1209 if (sensorVal < 0xFFFF)
1210 {
1211 if (sensorVal < 0)
1212 {
1213 altitude = 0;
1214 }
1215 else
1216 {
1217 // Round to nearest meter
1218 altitude = uint16_t(sensorVal + 0.5);
1219 }
Patrick Williams48002492024-02-13 21:43:32 -06001220 log<level::DEBUG>(std::format("readAltitude: sensor={} ({}m)",
Chris Cain17257672021-10-22 13:41:03 -05001221 sensorVal, altitude)
1222 .c_str());
1223 traceAltitudeErr = true;
1224 }
1225 else
1226 {
1227 if (traceAltitudeErr)
1228 {
1229 traceAltitudeErr = false;
1230 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001231 std::format("Invalid altitude value: {}", sensorVal)
Chris Cain17257672021-10-22 13:41:03 -05001232 .c_str());
1233 }
1234 }
1235 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001236 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001237 {
1238 if (traceAltitudeErr)
1239 {
1240 traceAltitudeErr = false;
1241 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001242 std::format("Unable to read Altitude: {}", e.what()).c_str());
Chris Cain17257672021-10-22 13:41:03 -05001243 }
1244 altitude = 0xFFFF; // not available
1245 }
1246}
1247
1248// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001249void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001250{
1251 double currentTemp = 0;
1252 uint8_t truncatedTemp = 0xFF;
1253 std::string msgSensor;
1254 std::map<std::string, std::variant<double>> msgData;
1255 msg.read(msgSensor, msgData);
1256
1257 auto valPropMap = msgData.find(AMBIENT_PROP);
1258 if (valPropMap == msgData.end())
1259 {
1260 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
1261 return;
1262 }
1263 currentTemp = std::get<double>(valPropMap->second);
1264 if (std::isnan(currentTemp))
1265 {
1266 truncatedTemp = 0xFF;
1267 }
1268 else
1269 {
1270 if (currentTemp < 0)
1271 {
1272 truncatedTemp = 0;
1273 }
1274 else
1275 {
1276 // Round to nearest degree C
1277 truncatedTemp = uint8_t(currentTemp + 0.5);
1278 }
1279 }
1280
1281 // If ambient changes, notify OCCs
1282 if (truncatedTemp != ambient)
1283 {
1284 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001285 std::format("ambientCallback: Ambient change from {} to {}C",
Chris Cain17257672021-10-22 13:41:03 -05001286 ambient, currentTemp)
1287 .c_str());
1288
1289 ambient = truncatedTemp;
1290 if (altitude == 0xFFFF)
1291 {
1292 // No altitude yet, try reading again
1293 readAltitude();
1294 }
1295
1296 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001297 std::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
Chris Cain17257672021-10-22 13:41:03 -05001298 altitude)
1299 .c_str());
1300#ifdef POWER10
1301 // Send ambient and altitude to all OCCs
1302 for (auto& obj : statusObjects)
1303 {
1304 if (obj->occActive())
1305 {
1306 obj->sendAmbient(ambient, altitude);
1307 }
1308 }
1309#endif // POWER10
1310 }
1311}
1312
1313// return the current ambient and altitude readings
1314void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1315 uint16_t& altitudeValue) const
1316{
1317 ambientValid = true;
1318 ambientTemp = ambient;
1319 altitudeValue = altitude;
1320
1321 if (ambient == 0xFF)
1322 {
1323 ambientValid = false;
1324 }
1325}
1326
Chris Caina7b74dc2021-11-10 17:03:43 -06001327#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001328// Called when waitForAllOccsTimer expires
1329// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001330void Manager::occsNotAllRunning()
1331{
Chris Caina7b74dc2021-11-10 17:03:43 -06001332 if (activeCount != statusObjects.size())
1333 {
1334 // Not all OCCs went active
1335 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001336 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001337 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1338 activeCount, statusObjects.size())
1339 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001340 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001341 }
1342
1343 validateOccMaster();
1344}
Chris Cain755af102024-02-27 16:09:51 -06001345
1346#ifdef PLDM
1347// Called when throttleTraceTimer expires.
1348// If this timer expires, that indicates there is still no confirmed OCC status
1349// which will trigger pldm traces to be throttled.
1350void Manager::throttleTraceExpired()
1351{
1352 // Throttle traces
1353 pldmHandle->setTraceThrottle(true);
1354}
1355#endif // PLDM
Chris Caina7b74dc2021-11-10 17:03:43 -06001356#endif // POWER10
1357
1358// Verify single master OCC and start presence monitor
1359void Manager::validateOccMaster()
1360{
1361 int masterInstance = -1;
1362 for (auto& obj : statusObjects)
1363 {
Chris Cainbd551de2022-04-26 13:41:16 -05001364 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001365#ifdef POWER10
1366 if (!obj->occActive())
1367 {
1368 if (utils::isHostRunning())
1369 {
Chris Cainbd551de2022-04-26 13:41:16 -05001370 // Check if sensor was queued while waiting for discovery
1371 auto match = queuedActiveState.find(instance);
1372 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001373 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001374 queuedActiveState.erase(match);
Chris Cainbae4d072022-02-28 09:46:50 -06001375 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001376 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001377 "validateOccMaster: OCC{} is ACTIVE (queued)",
1378 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001379 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -05001380 obj->occActive(true);
1381 }
1382 else
1383 {
1384 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001385#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001386 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001387#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001388 if (obj->occActive())
1389 {
1390 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001391 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001392 "validateOccMaster: OCC{} is ACTIVE after reading sensor",
1393 instance)
1394 .c_str());
1395 }
Chris Cainbae4d072022-02-28 09:46:50 -06001396 }
1397 }
1398 else
1399 {
1400 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001401 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -06001402 "validateOccMaster: HOST is not running (OCC{})",
Chris Cainbd551de2022-04-26 13:41:16 -05001403 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001404 .c_str());
1405 return;
1406 }
1407 }
1408#endif // POWER10
1409
Chris Caina7b74dc2021-11-10 17:03:43 -06001410 if (obj->isMasterOcc())
1411 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001412 obj->addPresenceWatchMaster();
1413
Chris Caina7b74dc2021-11-10 17:03:43 -06001414 if (masterInstance == -1)
1415 {
Chris Cainbd551de2022-04-26 13:41:16 -05001416 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001417 }
1418 else
1419 {
1420 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001421 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001422 "validateOccMaster: Multiple OCC masters! ({} and {})",
Chris Cainbd551de2022-04-26 13:41:16 -05001423 masterInstance, instance)
Chris Caina7b74dc2021-11-10 17:03:43 -06001424 .c_str());
1425 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001426 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001427 }
1428 }
1429 }
Chris Cainbae4d072022-02-28 09:46:50 -06001430
Chris Caina7b74dc2021-11-10 17:03:43 -06001431 if (masterInstance < 0)
1432 {
Chris Cainbae4d072022-02-28 09:46:50 -06001433 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001434 std::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
Chris Cainbae4d072022-02-28 09:46:50 -06001435 statusObjects.size())
1436 .c_str());
Chris Caina7b74dc2021-11-10 17:03:43 -06001437 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001438 statusObjects.front()->deviceError(
1439 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001440 }
1441 else
1442 {
1443 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001444 std::format("validateOccMaster: OCC{} is master of {} OCCs",
Chris Cain36f9cde2021-11-22 11:18:21 -06001445 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001446 .c_str());
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001447#ifdef POWER10
1448 pmode->updateDbusSafeMode(false);
1449#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001450 }
1451}
1452
Chris Cain40501a22022-03-14 17:33:27 -05001453void Manager::updatePcapBounds() const
1454{
1455 if (pcap)
1456 {
1457 pcap->updatePcapBounds();
1458 }
1459}
1460
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301461} // namespace occ
1462} // namespace open_power