blob: 50076826e8053a2c420178ef4a06293173602309 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05007#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05008#include "utils.hpp"
9
George Liub5ca1012021-09-10 12:53:11 +080010#include <phosphor-logging/elog-errors.hpp>
11#include <phosphor-logging/log.hpp>
12#include <xyz/openbmc_project/Common/error.hpp>
13
Matt Spinlerd267cec2021-09-01 14:49:19 -050014#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080015#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080016#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060017#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080018#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050019
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053020namespace open_power
21{
22namespace occ
23{
24
Matt Spinler8b8abee2021-08-25 15:18:21 -050025constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050026constexpr auto fruTypeSuffix = "fru_type";
27constexpr auto faultSuffix = "fault";
28constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050029constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050030
Chris Cain1718fd82022-02-16 16:39:50 -060031const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
32
Chris Caina8857c52021-01-27 11:53:05 -060033using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060034using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060035
Matt Spinlera26f1522021-08-25 15:50:20 -050036template <typename T>
37T readFile(const std::string& path)
38{
39 std::ifstream ifs;
40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
41 std::ifstream::eofbit);
42 T data;
43
44 try
45 {
46 ifs.open(path);
47 ifs >> data;
48 ifs.close();
49 }
50 catch (const std::exception& e)
51 {
52 auto err = errno;
53 throw std::system_error(err, std::generic_category());
54 }
55
56 return data;
57}
58
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053059void Manager::findAndCreateObjects()
60{
Matt Spinlerd267cec2021-09-01 14:49:19 -050061#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050062 for (auto id = 0; id < MAX_CPUS; ++id)
63 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060064 // Create one occ per cpu
65 auto occ = std::string(OCC_NAME) + std::to_string(id);
66 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053067 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050068#else
Chris Cain613dc902022-04-08 09:56:22 -050069 if (!pmode)
70 {
71 // Create the power mode object
72 pmode = std::make_unique<powermode::PowerMode>(
73 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
74 }
75
Chris Cain1718fd82022-02-16 16:39:50 -060076 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050077 {
Chris Cainbae4d072022-02-28 09:46:50 -060078 static bool statusObjCreated = false;
79 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060080 {
Chris Cainbae4d072022-02-28 09:46:50 -060081 // Create the OCCs based on on the /dev/occX devices
82 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060083
Chris Cainbae4d072022-02-28 09:46:50 -060084 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060085 {
Chris Cainbae4d072022-02-28 09:46:50 -060086 // Something changed or no OCCs yet, try again in 10s.
87 // Note on the first pass prevOCCSearch will be empty,
88 // so there will be at least one delay to give things
89 // a chance to settle.
90 prevOCCSearch = occs;
91
92 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -060093 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -060094 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
95 occs.size())
96 .c_str());
97
98 discoverTimer->restartOnce(10s);
99 }
100 else
101 {
102 // All OCCs appear to be available, create status objects
103
104 // createObjects requires OCC0 first.
105 std::sort(occs.begin(), occs.end());
106
107 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600108 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -0600109 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
110 occs.size())
111 .c_str());
112 for (auto id : occs)
113 {
114 createObjects(std::string(OCC_NAME) + std::to_string(id));
115 }
116 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500117 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500118
119 // Find/update the processor path associated with each OCC
120 for (auto& obj : statusObjects)
121 {
122 obj->updateProcAssociation();
123 }
Chris Cainbae4d072022-02-28 09:46:50 -0600124 }
125 }
126
Chris Cain6d8f37a2022-04-29 13:46:01 -0500127 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600128 {
129 static bool tracedHostWait = false;
130 if (utils::isHostRunning())
131 {
132 if (tracedHostWait)
133 {
134 log<level::INFO>(
135 "Manager::findAndCreateObjects(): Host is running");
136 tracedHostWait = false;
137 }
Chris Cainbae4d072022-02-28 09:46:50 -0600138 checkAllActiveSensors();
139 }
140 else
141 {
142 if (!tracedHostWait)
143 {
144 log<level::INFO>(
145 "Manager::findAndCreateObjects(): Waiting for host to start");
146 tracedHostWait = true;
147 }
148 discoverTimer->restartOnce(30s);
Chris Cain7651c062024-05-02 14:14:06 -0500149#ifdef PLDM
150 if (throttleTraceTimer->isEnabled())
151 {
152 // Host is no longer running, disable throttle timer and
153 // make sure traces are not throttled
154 log<level::INFO>(
155 "findAndCreateObjects(): disabling sensor timer");
156 throttleTraceTimer->setEnabled(false);
157 pldmHandle->setTraceThrottle(false);
158 }
159#endif
Chris Cain1718fd82022-02-16 16:39:50 -0600160 }
161 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500162 }
163 else
164 {
Chris Cain1718fd82022-02-16 16:39:50 -0600165 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600166 std::format(
Chris Cain1718fd82022-02-16 16:39:50 -0600167 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
168 HOST_ON_FILE)
169 .c_str());
170 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500171 }
172#endif
173}
174
Chris Cainbae4d072022-02-28 09:46:50 -0600175#ifdef POWER10
176// Check if all occActive sensors are available
177void Manager::checkAllActiveSensors()
178{
179 static bool allActiveSensorAvailable = false;
180 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500181 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600182
Chris Cain082a6ca2023-03-21 10:27:26 -0500183 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600184 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500185 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600186 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500187 waitingForHost = false;
188 log<level::INFO>("checkAllActiveSensors(): Host is now running");
189 }
190
191 // Start with the assumption that all are available
192 allActiveSensorAvailable = true;
193 for (auto& obj : statusObjects)
194 {
195 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600196 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500197 auto instance = obj->getOccInstanceID();
198 // Check if sensor was queued while waiting for discovery
199 auto match = queuedActiveState.find(instance);
200 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500201 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500202 queuedActiveState.erase(match);
Chris Cainbd551de2022-04-26 13:41:16 -0500203 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600204 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500205 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)",
Chris Cainbd551de2022-04-26 13:41:16 -0500206 instance)
207 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -0500208 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500209 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500210 else
211 {
212 allActiveSensorAvailable = false;
213 if (!tracedSensorWait)
214 {
215 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600216 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500217 "checkAllActiveSensors(): Waiting on OCC{} Active sensor",
218 instance)
219 .c_str());
220 tracedSensorWait = true;
Chris Cain755af102024-02-27 16:09:51 -0600221#ifdef PLDM
Chris Caina19bd422024-05-24 16:39:01 -0500222 // Make sure traces are not throttled
Chris Cain755af102024-02-27 16:09:51 -0600223 pldmHandle->setTraceThrottle(false);
224 // Start timer to throttle pldm traces when timer
225 // expires
Chris Caina19bd422024-05-24 16:39:01 -0500226 throttleTraceTimer->restartOnce(40min);
Chris Cain755af102024-02-27 16:09:51 -0600227#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500228 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600229#ifdef PLDM
Chris Cain7f89e4d2022-05-09 13:27:45 -0500230 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600231#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500232 break;
233 }
Chris Cainbd551de2022-04-26 13:41:16 -0500234 }
Chris Cainbae4d072022-02-28 09:46:50 -0600235 }
236 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500237 else
238 {
239 if (!waitingForHost)
240 {
241 waitingForHost = true;
242 log<level::INFO>(
243 "checkAllActiveSensors(): Waiting for host to start");
Chris Cain7651c062024-05-02 14:14:06 -0500244#ifdef PLDM
245 if (throttleTraceTimer->isEnabled())
246 {
247 // Host is no longer running, disable throttle timer and
248 // make sure traces are not throttled
249 log<level::INFO>(
250 "checkAllActiveSensors(): disabling sensor timer");
251 throttleTraceTimer->setEnabled(false);
252 pldmHandle->setTraceThrottle(false);
253 }
254#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500255 }
256 }
Chris Cainbae4d072022-02-28 09:46:50 -0600257
258 if (allActiveSensorAvailable)
259 {
260 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500261 if (discoverTimer->isEnabled())
262 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500263 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500264 }
Chris Cain755af102024-02-27 16:09:51 -0600265#ifdef PLDM
266 if (throttleTraceTimer->isEnabled())
267 {
268 // Disable throttle timer and make sure traces are not throttled
269 throttleTraceTimer->setEnabled(false);
270 pldmHandle->setTraceThrottle(false);
271 }
272#endif
Chris Cainbae4d072022-02-28 09:46:50 -0600273
Chris Cain7f89e4d2022-05-09 13:27:45 -0500274 if (waitingForAllOccActiveSensors)
275 {
276 log<level::INFO>(
277 "checkAllActiveSensors(): OCC Active sensors are available");
278 waitingForAllOccActiveSensors = false;
279 }
280 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600281 tracedSensorWait = false;
282 }
283 else
284 {
285 // Not all sensors were available, so keep waiting
286 if (!tracedSensorWait)
287 {
288 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500289 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600290 tracedSensorWait = true;
291 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500292 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600293 }
294}
295#endif
296
Matt Spinlerd267cec2021-09-01 14:49:19 -0500297std::vector<int> Manager::findOCCsInDev()
298{
299 std::vector<int> occs;
300 std::regex expr{R"(occ(\d+)$)"};
301
302 for (auto& file : fs::directory_iterator("/dev"))
303 {
304 std::smatch match;
305 std::string path{file.path().string()};
306 if (std::regex_search(path, match, expr))
307 {
308 auto num = std::stoi(match[1].str());
309
310 // /dev numbering starts at 1, ours starts at 0.
311 occs.push_back(num - 1);
312 }
313 }
314
315 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530316}
317
Patrick Williamsaf408082022-07-22 19:26:54 -0500318int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530319{
George Liubcef3b42021-09-10 12:39:02 +0800320 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530321
322 sdbusplus::message::object_path o;
323 msg.read(o);
324 fs::path cpuPath(std::string(std::move(o)));
325
326 auto name = cpuPath.filename().string();
327 auto index = name.find(CPU_NAME);
328 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
329
330 createObjects(name);
331
332 return 0;
333}
334
335void Manager::createObjects(const std::string& occ)
336{
337 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
338
Gunnar Mills94df8c92018-09-14 14:50:03 -0500339 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800340 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600341#ifdef POWER10
342 pmode,
343#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500344 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600345 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530346#ifdef PLDM
347 ,
348 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
349 std::placeholders::_1)
350#endif
351 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530352
Chris Cain40501a22022-03-14 17:33:27 -0500353 // Create the power cap monitor object
354 if (!pcap)
355 {
356 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
357 *statusObjects.back());
358 }
359
Chris Cain36f9cde2021-11-22 11:18:21 -0600360 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530361 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600362 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600363 std::format("Manager::createObjects(): OCC{} is the master",
Chris Cain36f9cde2021-11-22 11:18:21 -0600364 statusObjects.back()->getOccInstanceID())
365 .c_str());
366 _pollTimer->setEnabled(false);
367
Chris Cain78e86012021-03-04 16:15:31 -0600368#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600369 // Set the master OCC on the PowerMode object
370 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600371#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600372 }
373
374 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
375#ifdef POWER10
376 ,
377 pmode
378#endif
379 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530380}
381
Sheldon Bailey373af752022-02-21 15:14:00 -0600382void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530383{
Chris Caina7b74dc2021-11-10 17:03:43 -0600384 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600385 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600386 // OCC went active
387 ++activeCount;
388
389#ifdef POWER10
390 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600391 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600392 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500393 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500394 }
395#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600396
397 if (activeCount == statusObjects.size())
398 {
399#ifdef POWER10
400 // All OCCs are now running
401 if (waitForAllOccsTimer->isEnabled())
402 {
403 // stop occ wait timer
404 waitForAllOccsTimer->setEnabled(false);
405 }
406#endif
407
408 // Verify master OCC and start presence monitor
409 validateOccMaster();
410 }
411
412 // Start poll timer if not already started
413 if (!_pollTimer->isEnabled())
414 {
415 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600416 std::format("Manager: OCCs will be polled every {} seconds",
Chris Cain36f9cde2021-11-22 11:18:21 -0600417 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600418 .c_str());
419
420 // Send poll and start OCC poll timer
421 pollerTimerExpired();
422 }
423 }
424 else
425 {
426 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500427 if (activeCount > 0)
428 {
429 --activeCount;
430 }
431 else
432 {
433 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600434 std::format("OCC{} disabled, but currently no active OCCs",
Chris Cain082a6ca2023-03-21 10:27:26 -0500435 instance)
436 .c_str());
437 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600438
439 if (activeCount == 0)
440 {
441 // No OCCs are running
442
443 // Stop OCC poll timer
444 if (_pollTimer->isEnabled())
445 {
446 log<level::INFO>(
447 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
448 _pollTimer->setEnabled(false);
449 }
450
451#ifdef POWER10
452 // stop wait timer
453 if (waitForAllOccsTimer->isEnabled())
454 {
455 waitForAllOccsTimer->setEnabled(false);
456 }
457#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600458 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600459#ifdef READ_OCC_SENSORS
460 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500461 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600462#endif
Chris Caina8857c52021-01-27 11:53:05 -0600463 }
Chris Cainbae4d072022-02-28 09:46:50 -0600464
465#ifdef POWER10
466 if (waitingForAllOccActiveSensors)
467 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500468 if (utils::isHostRunning())
469 {
470 checkAllActiveSensors();
471 }
Chris Cainbae4d072022-02-28 09:46:50 -0600472 }
473#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530474}
475
476#ifdef I2C_OCC
477void Manager::initStatusObjects()
478{
479 // Make sure we have a valid path string
480 static_assert(sizeof(DEV_PATH) != 0);
481
482 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
483 for (auto& name : deviceNames)
484 {
485 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800486 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530487 auto path = fs::path(OCC_CONTROL_ROOT) / name;
488 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800489 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530490 }
Chris Cain40501a22022-03-14 17:33:27 -0500491 // The first device is master occ
492 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
493 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600494#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600495 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
496 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600497 // Set the master OCC on the PowerMode object
498 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600499#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530500}
501#endif
502
Tom Joseph815f9f52020-07-27 12:12:13 +0530503#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500504void Manager::sbeTimeout(unsigned int instance)
505{
Eddie James2a751d72022-03-04 09:16:12 -0600506 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
507 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500508 return instance == obj->getOccInstanceID();
509 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500510
Eddie Jamescb018da2022-03-05 11:49:37 -0600511 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600512 {
Chris Cainbae4d072022-02-28 09:46:50 -0600513 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600514 std::format("SBE timeout, requesting HRESET (OCC{})", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600515 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500516
Eddie James2a751d72022-03-04 09:16:12 -0600517 setSBEState(instance, SBE_STATE_NOT_USABLE);
518
519 pldmHandle->sendHRESET(instance);
520 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500521}
522
Tom Joseph815f9f52020-07-27 12:12:13 +0530523bool Manager::updateOCCActive(instanceID instance, bool status)
524{
Chris Cain7e374fb2022-04-07 09:47:23 -0500525 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
526 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500527 return instance == obj->getOccInstanceID();
528 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500529
Chris Cain082a6ca2023-03-21 10:27:26 -0500530 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500531 if (obj != statusObjects.end())
532 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500533 if (!hostRunning && (status == true))
534 {
535 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600536 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500537 "updateOCCActive: Host is not running yet (OCC{} active={}), clearing sensor received",
538 instance, status)
539 .c_str());
540 (*obj)->setPldmSensorReceived(false);
541 if (!waitingForAllOccActiveSensors)
542 {
543 log<level::INFO>(
544 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
545 waitingForAllOccActiveSensors = true;
546 }
Chris Cain755af102024-02-27 16:09:51 -0600547#ifdef POWER10
Chris Cain082a6ca2023-03-21 10:27:26 -0500548 discoverTimer->restartOnce(30s);
Chris Cain755af102024-02-27 16:09:51 -0600549#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500550 return false;
551 }
552 else
553 {
Patrick Williams48002492024-02-13 21:43:32 -0600554 log<level::INFO>(std::format("updateOCCActive: OCC{} active={}",
Chris Cain082a6ca2023-03-21 10:27:26 -0500555 instance, status)
556 .c_str());
557 (*obj)->setPldmSensorReceived(true);
558 return (*obj)->occActive(status);
559 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500560 }
561 else
562 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500563 if (hostRunning)
564 {
565 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600566 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500567 "updateOCCActive: No status object to update for OCC{} (active={})",
568 instance, status)
569 .c_str());
570 }
571 else
572 {
573 if (status == true)
574 {
575 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600576 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500577 "updateOCCActive: No status objects and Host is not running yet (OCC{} active={})",
578 instance, status)
579 .c_str());
580 }
581 }
Chris Cainbd551de2022-04-26 13:41:16 -0500582 if (status == true)
583 {
584 // OCC went active
585 queuedActiveState.insert(instance);
586 }
587 else
588 {
589 auto match = queuedActiveState.find(instance);
590 if (match != queuedActiveState.end())
591 {
592 // OCC was disabled
593 queuedActiveState.erase(match);
594 }
595 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500596 return false;
597 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530598}
Eddie Jamescbad2192021-10-07 09:39:39 -0500599
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500600// Called upon pldm event To set powermode Safe Mode State for system.
601void Manager::updateOccSafeMode(bool safeMode)
602{
603#ifdef POWER10
604 pmode->updateDbusSafeMode(safeMode);
605#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500606 // Update the processor throttle status on dbus
607 for (auto& obj : statusObjects)
608 {
609 obj->updateThrottle(safeMode, THROTTLED_SAFE);
610 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500611}
612
Eddie Jamescbad2192021-10-07 09:39:39 -0500613void Manager::sbeHRESETResult(instanceID instance, bool success)
614{
615 if (success)
616 {
Chris Cainbae4d072022-02-28 09:46:50 -0600617 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600618 std::format("HRESET succeeded (OCC{})", instance).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500619
620 setSBEState(instance, SBE_STATE_BOOTED);
621
622 return;
623 }
624
625 setSBEState(instance, SBE_STATE_FAILED);
626
627 if (sbeCanDump(instance))
628 {
Chris Cainbae4d072022-02-28 09:46:50 -0600629 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600630 std::format("HRESET failed (OCC{}), triggering SBE dump", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600631 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500632
633 auto& bus = utils::getBus();
634 uint32_t src6 = instance << 16;
635 uint32_t logId =
636 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
637 src6, "SBE command timeout");
638
639 try
640 {
George Liuf3a4a692021-12-28 13:59:51 +0800641 constexpr auto path = "/org/openpower/dump";
642 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
643 constexpr auto function = "CreateDump";
644
Eddie Jamescbad2192021-10-07 09:39:39 -0500645 std::string service = utils::getService(path, interface);
Patrick Williamsa49c9872023-05-10 07:50:35 -0500646 auto method = bus.new_method_call(service.c_str(), path, interface,
647 function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500648
649 std::map<std::string, std::variant<std::string, uint64_t>>
650 createParams{
651 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
652 uint64_t(logId)},
653 {"com.ibm.Dump.Create.CreateParameters.DumpType",
654 "com.ibm.Dump.Create.DumpType.SBE"},
655 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
656 uint64_t(instance)},
657 };
658
659 method.append(createParams);
660
661 auto response = bus.call(method);
662 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500663 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500664 {
665 constexpr auto ERROR_DUMP_DISABLED =
666 "xyz.openbmc_project.Dump.Create.Error.Disabled";
667 if (e.name() == ERROR_DUMP_DISABLED)
668 {
669 log<level::INFO>("Dump is disabled, skipping");
670 }
671 else
672 {
673 log<level::ERR>("Dump failed");
674 }
675 }
676 }
677}
678
679bool Manager::sbeCanDump(unsigned int instance)
680{
681 struct pdbg_target* proc = getPdbgTarget(instance);
682
683 if (!proc)
684 {
685 // allow the dump in the error case
686 return true;
687 }
688
689 try
690 {
691 if (!openpower::phal::sbe::isDumpAllowed(proc))
692 {
693 return false;
694 }
695
696 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
697 {
698 return false;
699 }
700 }
701 catch (openpower::phal::exception::SbeError& e)
702 {
703 log<level::INFO>("Failed to query SBE state");
704 }
705
706 // allow the dump in the error case
707 return true;
708}
709
710void Manager::setSBEState(unsigned int instance, enum sbe_state state)
711{
712 struct pdbg_target* proc = getPdbgTarget(instance);
713
714 if (!proc)
715 {
716 return;
717 }
718
719 try
720 {
721 openpower::phal::sbe::setState(proc, state);
722 }
723 catch (const openpower::phal::exception::SbeError& e)
724 {
725 log<level::ERR>("Failed to set SBE state");
726 }
727}
728
729struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
730{
731 if (!pdbgInitialized)
732 {
733 try
734 {
735 openpower::phal::pdbg::init();
736 pdbgInitialized = true;
737 }
738 catch (const openpower::phal::exception::PdbgError& e)
739 {
740 log<level::ERR>("pdbg initialization failed");
741 return nullptr;
742 }
743 }
744
745 struct pdbg_target* proc = nullptr;
746 pdbg_for_each_class_target("proc", proc)
747 {
748 if (pdbg_target_index(proc) == instance)
749 {
750 return proc;
751 }
752 }
753
754 log<level::ERR>("Failed to get pdbg target");
755 return nullptr;
756}
Tom Joseph815f9f52020-07-27 12:12:13 +0530757#endif
758
Chris Caina8857c52021-01-27 11:53:05 -0600759void Manager::pollerTimerExpired()
760{
Chris Caina8857c52021-01-27 11:53:05 -0600761 if (!_pollTimer)
762 {
763 log<level::ERR>(
764 "Manager::pollerTimerExpired() ERROR: Timer not defined");
765 return;
766 }
767
768 for (auto& obj : statusObjects)
769 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600770 if (!obj->occActive())
771 {
772 // OCC is not running yet
773#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600774 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500775 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600776#endif
777 continue;
778 }
779
Chris Caina8857c52021-01-27 11:53:05 -0600780 // Read sysfs to force kernel to poll OCC
781 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800782
783#ifdef READ_OCC_SENSORS
784 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600785 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800786#endif
Chris Caina8857c52021-01-27 11:53:05 -0600787 }
788
Chris Caina7b74dc2021-11-10 17:03:43 -0600789 if (activeCount > 0)
790 {
791 // Restart OCC poll timer
792 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
793 }
794 else
795 {
796 // No OCCs running, so poll timer will not be restarted
797 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600798 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -0600799 "Manager::pollerTimerExpired: poll timer will not be restarted")
800 .c_str());
801 }
Chris Caina8857c52021-01-27 11:53:05 -0600802}
803
Chicago Duanbb895cb2021-06-18 19:37:16 +0800804#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600805void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800806{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500807 // There may be more than one sensor with the same FRU type
808 // and label so make two passes: the first to read the temps
809 // from sysfs, and the second to put them on D-Bus after
810 // resolving any conflicts.
811 std::map<std::string, double> sensorData;
812
Chicago Duanbb895cb2021-06-18 19:37:16 +0800813 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
814 for (auto& file : fs::directory_iterator(path))
815 {
816 if (!std::regex_search(file.path().string(), expr))
817 {
818 continue;
819 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800820
Matt Spinlera26f1522021-08-25 15:50:20 -0500821 uint32_t labelValue{0};
822
823 try
824 {
825 labelValue = readFile<uint32_t>(file.path());
826 }
827 catch (const std::system_error& e)
828 {
829 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600830 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500831 file.path().string(), e.code().value())
832 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800833 continue;
834 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800835
836 const std::string& tempLabel = "label";
837 const std::string filePathString = file.path().string().substr(
838 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500839
840 uint32_t fruTypeValue{0};
841 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800842 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500843 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
844 }
845 catch (const std::system_error& e)
846 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800847 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600848 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500849 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800850 .c_str());
851 continue;
852 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800853
Patrick Williamsa49c9872023-05-10 07:50:35 -0500854 std::string sensorPath = OCC_SENSORS_ROOT +
855 std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800856
Matt Spinlerace67d82021-10-18 13:41:57 -0500857 std::string dvfsTempPath;
858
Chicago Duanbb895cb2021-06-18 19:37:16 +0800859 if (fruTypeValue == VRMVdd)
860 {
Chris Cainae157b62024-01-23 16:05:12 -0600861 sensorPath.append("vrm_vdd" + std::to_string(occInstance) +
862 "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800863 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500864 else if (fruTypeValue == processorIoRing)
865 {
Chris Cainae157b62024-01-23 16:05:12 -0600866 sensorPath.append("proc" + std::to_string(occInstance) +
867 "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500868 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600869 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500870 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800871 else
872 {
Matt Spinler14d14022021-08-25 15:38:29 -0500873 uint16_t type = (labelValue & 0xFF000000) >> 24;
874 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800875
876 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
877 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500878 if (fruTypeValue == fruTypeNotAvailable)
879 {
880 // Not all DIMM related temps are available to read
881 // (no _input file in this case)
882 continue;
883 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800884 auto iter = dimmTempSensorName.find(fruTypeValue);
885 if (iter == dimmTempSensorName.end())
886 {
George Liub5ca1012021-09-10 12:53:11 +0800887 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600888 std::format(
George Liub5ca1012021-09-10 12:53:11 +0800889 "readTempSensors: Fru type error! fruTypeValue = {}) ",
890 fruTypeValue)
891 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800892 continue;
893 }
894
895 sensorPath.append("dimm" + std::to_string(instanceID) +
896 iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500897
898 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
899 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800900 }
901 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
902 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500903 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800904 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500905 // The OCC reports small core temps, of which there are
906 // two per big core. All current P10 systems are in big
907 // core mode, so use a big core name.
908 uint16_t coreNum = instanceID / 2;
909 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -0600910 sensorPath.append("proc" + std::to_string(occInstance) +
911 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -0500912 std::to_string(tempNum) + "_temp");
913
Chris Cainae157b62024-01-23 16:05:12 -0600914 dvfsTempPath =
915 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
916 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500917 }
918 else
919 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800920 continue;
921 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800922 }
923 else
924 {
925 continue;
926 }
927 }
928
Matt Spinlerace67d82021-10-18 13:41:57 -0500929 // The dvfs temp file only needs to be read once per chip per type.
930 if (!dvfsTempPath.empty() &&
931 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
932 {
933 try
934 {
935 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
936
937 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
938 dvfsTempPath, dvfsValue * std::pow(10, -3));
939 }
940 catch (const std::system_error& e)
941 {
942 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600943 std::format(
Matt Spinlerace67d82021-10-18 13:41:57 -0500944 "readTempSensors: Failed reading {}, errno = {}",
945 filePathString + maxSuffix, e.code().value())
946 .c_str());
947 }
948 }
949
Matt Spinlera26f1522021-08-25 15:50:20 -0500950 uint32_t faultValue{0};
951 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800952 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500953 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
954 }
955 catch (const std::system_error& e)
956 {
957 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600958 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500959 filePathString + faultSuffix, e.code().value())
960 .c_str());
961 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800962 }
963
Chris Cainae157b62024-01-23 16:05:12 -0600964 double tempValue{0};
965 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -0500966 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800967 {
Chris Cainae157b62024-01-23 16:05:12 -0600968 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800969 }
Chris Cainae157b62024-01-23 16:05:12 -0600970 else
Chicago Duanbb895cb2021-06-18 19:37:16 +0800971 {
Chris Cainae157b62024-01-23 16:05:12 -0600972 // Read the temperature
973 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500974 {
Chris Cainae157b62024-01-23 16:05:12 -0600975 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500976 }
Chris Cainae157b62024-01-23 16:05:12 -0600977 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500978 {
Chris Cainae157b62024-01-23 16:05:12 -0600979 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600980 std::format(
Chris Cainae157b62024-01-23 16:05:12 -0600981 "readTempSensors: Failed reading {}, errno = {}",
982 filePathString + inputSuffix, e.code().value())
983 .c_str());
984
985 // if errno == EAGAIN(Resource temporarily unavailable) then set
986 // temp to 0, to avoid using old temp, and affecting FAN
987 // Control.
988 if (e.code().value() == EAGAIN)
989 {
990 tempValue = 0;
991 }
992 // else the errno would be something like
993 // EBADF(Bad file descriptor)
994 // or ENOENT(No such file or directory)
995 else
996 {
997 continue;
998 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500999 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001000 }
1001
Matt Spinler818cc8d2023-10-23 11:43:39 -05001002 // If this object path already has a value, only overwite
1003 // it if the previous one was an NaN or a smaller value.
1004 auto existing = sensorData.find(sensorPath);
1005 if (existing != sensorData.end())
1006 {
Chris Cainae157b62024-01-23 16:05:12 -06001007 // Multiple sensors found for this FRU type
1008 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1009 ((existing->second == 0) && std::isnan(tempValue)))
1010 {
1011 // One of the redundant sensors has failed (0xFF/nan), and the
1012 // other sensor has no reading (0), so set the FRU to NaN to
1013 // force fan increase
1014 tempValue = std::numeric_limits<double>::quiet_NaN();
1015 existing->second = tempValue;
1016 }
Matt Spinler818cc8d2023-10-23 11:43:39 -05001017 if (std::isnan(existing->second) || (tempValue > existing->second))
1018 {
1019 existing->second = tempValue;
1020 }
1021 }
1022 else
1023 {
Chris Cainae157b62024-01-23 16:05:12 -06001024 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001025 sensorData[sensorPath] = tempValue;
1026 }
1027 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001028
Matt Spinler818cc8d2023-10-23 11:43:39 -05001029 // Now publish the values on D-Bus.
1030 for (const auto& [objectPath, value] : sensorData)
1031 {
1032 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1033 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001034
Matt Spinler818cc8d2023-10-23 11:43:39 -05001035 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1036 objectPath, !std::isnan(value));
1037
1038 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001039 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001040 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Matt Spinler818cc8d2023-10-23 11:43:39 -05001041 objectPath);
Chris Cain6fa848a2022-01-24 14:54:38 -06001042 }
1043
Chris Cainae157b62024-01-23 16:05:12 -06001044 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001045 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001046}
1047
1048std::optional<std::string>
1049 Manager::getPowerLabelFunctionID(const std::string& value)
1050{
1051 // If the value is "system", then the FunctionID is "system".
1052 if (value == "system")
1053 {
1054 return value;
1055 }
1056
1057 // If the value is not "system", then the label value have 3 numbers, of
1058 // which we only care about the middle one:
1059 // <sensor id>_<function id>_<apss channel>
1060 // eg: The value is "0_10_5" , then the FunctionID is "10".
1061 if (value.find("_") == std::string::npos)
1062 {
1063 return std::nullopt;
1064 }
1065
1066 auto powerLabelValue = value.substr((value.find("_") + 1));
1067
1068 if (powerLabelValue.find("_") == std::string::npos)
1069 {
1070 return std::nullopt;
1071 }
1072
1073 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1074}
1075
1076void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1077{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001078 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1079 for (auto& file : fs::directory_iterator(path))
1080 {
1081 if (!std::regex_search(file.path().string(), expr))
1082 {
1083 continue;
1084 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001085
Matt Spinlera26f1522021-08-25 15:50:20 -05001086 std::string labelValue;
1087 try
1088 {
1089 labelValue = readFile<std::string>(file.path());
1090 }
1091 catch (const std::system_error& e)
1092 {
1093 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001094 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001095 file.path().string(), e.code().value())
1096 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001097 continue;
1098 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001099
1100 auto functionID = getPowerLabelFunctionID(labelValue);
1101 if (functionID == std::nullopt)
1102 {
1103 continue;
1104 }
1105
1106 const std::string& tempLabel = "label";
1107 const std::string filePathString = file.path().string().substr(
1108 0, file.path().string().length() - tempLabel.length());
1109
1110 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1111
1112 auto iter = powerSensorName.find(*functionID);
1113 if (iter == powerSensorName.end())
1114 {
1115 continue;
1116 }
1117 sensorPath.append(iter->second);
1118
Matt Spinlera26f1522021-08-25 15:50:20 -05001119 double tempValue{0};
1120
1121 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001122 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001123 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001124 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001125 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001126 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001127 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001128 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001129 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +08001130 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -05001131 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001132 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001133
Chris Cain5d66a0a2022-02-09 08:52:10 -06001134 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001135 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1136
Chris Cain5d66a0a2022-02-09 08:52:10 -06001137 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001138 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1139
Chris Cain5d66a0a2022-02-09 08:52:10 -06001140 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1141 true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001142
Matt Spinler5901abd2021-09-23 13:50:03 -05001143 if (existingSensors.find(sensorPath) == existingSensors.end())
1144 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001145 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1146 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -05001147 }
1148
Matt Spinlera26f1522021-08-25 15:50:20 -05001149 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001150 }
1151 return;
1152}
1153
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001154void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001155{
1156 for (const auto& [sensorPath, occId] : existingSensors)
1157 {
1158 if (occId == id)
1159 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001160 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001161 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001162
1163 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1164 true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001165 }
1166 }
1167 return;
1168}
1169
Sheldon Bailey373af752022-02-21 15:14:00 -06001170void Manager::setSensorValueToNonFunctional(uint32_t id) const
1171{
1172 for (const auto& [sensorPath, occId] : existingSensors)
1173 {
1174 if (occId == id)
1175 {
1176 dbus::OccDBusSensors::getOccDBus().setValue(
1177 sensorPath, std::numeric_limits<double>::quiet_NaN());
1178
1179 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1180 false);
1181 }
1182 }
1183 return;
1184}
1185
Chris Cain5d66a0a2022-02-09 08:52:10 -06001186void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001187{
Chris Caine2d0a432022-03-28 11:08:49 -05001188 static bool tracedError[8] = {0};
1189 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001190 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001191
Chris Caine2d0a432022-03-28 11:08:49 -05001192 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001193 {
Chris Caine2d0a432022-03-28 11:08:49 -05001194 // Read temperature sensors
1195 readTempSensors(sensorPath, id);
1196
1197 if (occ->isMasterOcc())
1198 {
1199 // Read power sensors
1200 readPowerSensors(sensorPath, id);
1201 }
1202 tracedError[id] = false;
1203 }
1204 else
1205 {
1206 if (!tracedError[id])
1207 {
1208 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001209 std::format(
Chris Caine2d0a432022-03-28 11:08:49 -05001210 "Manager::getSensorValues: OCC{} sensor path missing: {}",
1211 id, sensorPath.c_str())
1212 .c_str());
1213 tracedError[id] = true;
1214 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001215 }
1216
1217 return;
1218}
1219#endif
Chris Cain17257672021-10-22 13:41:03 -05001220
1221// Read the altitude from DBus
1222void Manager::readAltitude()
1223{
1224 static bool traceAltitudeErr = true;
1225
1226 utils::PropertyValue altitudeProperty{};
1227 try
1228 {
1229 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1230 ALTITUDE_PROP);
1231 auto sensorVal = std::get<double>(altitudeProperty);
1232 if (sensorVal < 0xFFFF)
1233 {
1234 if (sensorVal < 0)
1235 {
1236 altitude = 0;
1237 }
1238 else
1239 {
1240 // Round to nearest meter
1241 altitude = uint16_t(sensorVal + 0.5);
1242 }
Patrick Williams48002492024-02-13 21:43:32 -06001243 log<level::DEBUG>(std::format("readAltitude: sensor={} ({}m)",
Chris Cain17257672021-10-22 13:41:03 -05001244 sensorVal, altitude)
1245 .c_str());
1246 traceAltitudeErr = true;
1247 }
1248 else
1249 {
1250 if (traceAltitudeErr)
1251 {
1252 traceAltitudeErr = false;
1253 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001254 std::format("Invalid altitude value: {}", sensorVal)
Chris Cain17257672021-10-22 13:41:03 -05001255 .c_str());
1256 }
1257 }
1258 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001259 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001260 {
1261 if (traceAltitudeErr)
1262 {
1263 traceAltitudeErr = false;
1264 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001265 std::format("Unable to read Altitude: {}", e.what()).c_str());
Chris Cain17257672021-10-22 13:41:03 -05001266 }
1267 altitude = 0xFFFF; // not available
1268 }
1269}
1270
1271// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001272void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001273{
1274 double currentTemp = 0;
1275 uint8_t truncatedTemp = 0xFF;
1276 std::string msgSensor;
1277 std::map<std::string, std::variant<double>> msgData;
1278 msg.read(msgSensor, msgData);
1279
1280 auto valPropMap = msgData.find(AMBIENT_PROP);
1281 if (valPropMap == msgData.end())
1282 {
1283 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
1284 return;
1285 }
1286 currentTemp = std::get<double>(valPropMap->second);
1287 if (std::isnan(currentTemp))
1288 {
1289 truncatedTemp = 0xFF;
1290 }
1291 else
1292 {
1293 if (currentTemp < 0)
1294 {
1295 truncatedTemp = 0;
1296 }
1297 else
1298 {
1299 // Round to nearest degree C
1300 truncatedTemp = uint8_t(currentTemp + 0.5);
1301 }
1302 }
1303
1304 // If ambient changes, notify OCCs
1305 if (truncatedTemp != ambient)
1306 {
1307 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001308 std::format("ambientCallback: Ambient change from {} to {}C",
Chris Cain17257672021-10-22 13:41:03 -05001309 ambient, currentTemp)
1310 .c_str());
1311
1312 ambient = truncatedTemp;
1313 if (altitude == 0xFFFF)
1314 {
1315 // No altitude yet, try reading again
1316 readAltitude();
1317 }
1318
1319 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001320 std::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
Chris Cain17257672021-10-22 13:41:03 -05001321 altitude)
1322 .c_str());
1323#ifdef POWER10
1324 // Send ambient and altitude to all OCCs
1325 for (auto& obj : statusObjects)
1326 {
1327 if (obj->occActive())
1328 {
1329 obj->sendAmbient(ambient, altitude);
1330 }
1331 }
1332#endif // POWER10
1333 }
1334}
1335
1336// return the current ambient and altitude readings
1337void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1338 uint16_t& altitudeValue) const
1339{
1340 ambientValid = true;
1341 ambientTemp = ambient;
1342 altitudeValue = altitude;
1343
1344 if (ambient == 0xFF)
1345 {
1346 ambientValid = false;
1347 }
1348}
1349
Chris Caina7b74dc2021-11-10 17:03:43 -06001350#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001351// Called when waitForAllOccsTimer expires
1352// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001353void Manager::occsNotAllRunning()
1354{
Chris Caina7b74dc2021-11-10 17:03:43 -06001355 if (activeCount != statusObjects.size())
1356 {
1357 // Not all OCCs went active
1358 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001359 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001360 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1361 activeCount, statusObjects.size())
1362 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001363 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001364 }
1365
1366 validateOccMaster();
1367}
Chris Cain755af102024-02-27 16:09:51 -06001368
1369#ifdef PLDM
1370// Called when throttleTraceTimer expires.
Chris Caina19bd422024-05-24 16:39:01 -05001371// If this timer expires, that indicates there are no OCC active sensor PDRs
1372// found which will trigger pldm traces to be throttled and PEL to be created
Chris Cain755af102024-02-27 16:09:51 -06001373void Manager::throttleTraceExpired()
1374{
Chris Cain7651c062024-05-02 14:14:06 -05001375 if (utils::isHostRunning())
1376 {
1377 // Throttle traces
1378 pldmHandle->setTraceThrottle(true);
1379 // Create PEL
1380 createPldmSensorPEL();
1381 }
1382 else
1383 {
1384 // Make sure traces are not throttled
1385 pldmHandle->setTraceThrottle(false);
1386 log<level::INFO>(
1387 "throttleTraceExpired(): host it not running ignoring sensor timer");
1388 }
Chris Cain4b82f3e2024-04-22 14:44:29 -05001389}
1390
1391void Manager::createPldmSensorPEL()
1392{
1393 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1394 std::map<std::string, std::string> additionalData;
1395
1396 additionalData.emplace("_PID", std::to_string(getpid()));
1397
1398 log<level::INFO>(
1399 std::format(
1400 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs")
1401 .c_str());
1402
1403 auto& bus = utils::getBus();
1404
1405 try
1406 {
1407 FFDCFiles ffdc;
1408 // Add occ-control journal traces to PEL FFDC
1409 auto occJournalFile =
1410 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1411
1412 static constexpr auto loggingObjectPath =
1413 "/xyz/openbmc_project/logging";
1414 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
1415 std::string service = utils::getService(loggingObjectPath,
1416 opLoggingInterface);
1417 auto method = bus.new_method_call(service.c_str(), loggingObjectPath,
1418 opLoggingInterface,
1419 "CreatePELWithFFDCFiles");
1420
Chris Cain1c3349e2024-04-24 14:14:11 -05001421 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001422 auto level =
1423 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1424 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001425 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001426
1427 method.append(d.path, level, additionalData, ffdc);
1428 bus.call(method);
1429 }
1430 catch (const sdbusplus::exception_t& e)
1431 {
1432 log<level::ERR>(
1433 std::format("Failed to create MISSING_OCC_SENSORS PEL: {}",
1434 e.what())
1435 .c_str());
1436 }
Chris Cain755af102024-02-27 16:09:51 -06001437}
1438#endif // PLDM
Chris Caina7b74dc2021-11-10 17:03:43 -06001439#endif // POWER10
1440
1441// Verify single master OCC and start presence monitor
1442void Manager::validateOccMaster()
1443{
1444 int masterInstance = -1;
1445 for (auto& obj : statusObjects)
1446 {
Chris Cainbd551de2022-04-26 13:41:16 -05001447 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001448#ifdef POWER10
1449 if (!obj->occActive())
1450 {
1451 if (utils::isHostRunning())
1452 {
Chris Cainbd551de2022-04-26 13:41:16 -05001453 // Check if sensor was queued while waiting for discovery
1454 auto match = queuedActiveState.find(instance);
1455 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001456 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001457 queuedActiveState.erase(match);
Chris Cainbae4d072022-02-28 09:46:50 -06001458 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001459 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001460 "validateOccMaster: OCC{} is ACTIVE (queued)",
1461 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001462 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -05001463 obj->occActive(true);
1464 }
1465 else
1466 {
1467 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001468#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001469 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001470#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001471 if (obj->occActive())
1472 {
1473 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001474 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001475 "validateOccMaster: OCC{} is ACTIVE after reading sensor",
1476 instance)
1477 .c_str());
1478 }
Chris Cainbae4d072022-02-28 09:46:50 -06001479 }
1480 }
1481 else
1482 {
1483 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001484 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -06001485 "validateOccMaster: HOST is not running (OCC{})",
Chris Cainbd551de2022-04-26 13:41:16 -05001486 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001487 .c_str());
1488 return;
1489 }
1490 }
1491#endif // POWER10
1492
Chris Caina7b74dc2021-11-10 17:03:43 -06001493 if (obj->isMasterOcc())
1494 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001495 obj->addPresenceWatchMaster();
1496
Chris Caina7b74dc2021-11-10 17:03:43 -06001497 if (masterInstance == -1)
1498 {
Chris Cainbd551de2022-04-26 13:41:16 -05001499 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001500 }
1501 else
1502 {
1503 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001504 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001505 "validateOccMaster: Multiple OCC masters! ({} and {})",
Chris Cainbd551de2022-04-26 13:41:16 -05001506 masterInstance, instance)
Chris Caina7b74dc2021-11-10 17:03:43 -06001507 .c_str());
1508 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001509 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001510 }
1511 }
1512 }
Chris Cainbae4d072022-02-28 09:46:50 -06001513
Chris Caina7b74dc2021-11-10 17:03:43 -06001514 if (masterInstance < 0)
1515 {
Chris Cainbae4d072022-02-28 09:46:50 -06001516 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001517 std::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
Chris Cainbae4d072022-02-28 09:46:50 -06001518 statusObjects.size())
1519 .c_str());
Chris Caina7b74dc2021-11-10 17:03:43 -06001520 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001521 statusObjects.front()->deviceError(
1522 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001523 }
1524 else
1525 {
1526 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001527 std::format("validateOccMaster: OCC{} is master of {} OCCs",
Chris Cain36f9cde2021-11-22 11:18:21 -06001528 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001529 .c_str());
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001530#ifdef POWER10
1531 pmode->updateDbusSafeMode(false);
1532#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001533 }
1534}
1535
Chris Cain40501a22022-03-14 17:33:27 -05001536void Manager::updatePcapBounds() const
1537{
1538 if (pcap)
1539 {
1540 pcap->updatePcapBounds();
1541 }
1542}
1543
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301544} // namespace occ
1545} // namespace open_power