blob: 308c67e33e62edec56f16ad6edbb1beaa9f36c8f [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05007#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05008#include "utils.hpp"
9
George Liub5ca1012021-09-10 12:53:11 +080010#include <phosphor-logging/elog-errors.hpp>
11#include <phosphor-logging/log.hpp>
12#include <xyz/openbmc_project/Common/error.hpp>
13
Matt Spinlerd267cec2021-09-01 14:49:19 -050014#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080015#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080016#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060017#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080018#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050019
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053020namespace open_power
21{
22namespace occ
23{
24
Matt Spinler8b8abee2021-08-25 15:18:21 -050025constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050026constexpr auto fruTypeSuffix = "fru_type";
27constexpr auto faultSuffix = "fault";
28constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050029constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050030
Chris Cain1718fd82022-02-16 16:39:50 -060031const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
32
Chris Caina8857c52021-01-27 11:53:05 -060033using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060034using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060035
Matt Spinlera26f1522021-08-25 15:50:20 -050036template <typename T>
37T readFile(const std::string& path)
38{
39 std::ifstream ifs;
40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
41 std::ifstream::eofbit);
42 T data;
43
44 try
45 {
46 ifs.open(path);
47 ifs >> data;
48 ifs.close();
49 }
50 catch (const std::exception& e)
51 {
52 auto err = errno;
53 throw std::system_error(err, std::generic_category());
54 }
55
56 return data;
57}
58
Chris Cainc33171b2024-05-24 16:14:50 -050059// findAndCreateObjects():
60// Takes care of getting the required objects created and
61// finds the available devices/processors.
62// (function is called everytime the discoverTimer expires)
63// - create the PowerMode object to control OCC modes
64// - create statusObjects for each OCC device found
65// - waits for OCC Active sensors PDRs to become available
66// - restart discoverTimer if all data is not available yet
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053067void Manager::findAndCreateObjects()
68{
Matt Spinlerd267cec2021-09-01 14:49:19 -050069#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050070 for (auto id = 0; id < MAX_CPUS; ++id)
71 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060072 // Create one occ per cpu
73 auto occ = std::string(OCC_NAME) + std::to_string(id);
74 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053075 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050076#else
Chris Cain613dc902022-04-08 09:56:22 -050077 if (!pmode)
78 {
79 // Create the power mode object
80 pmode = std::make_unique<powermode::PowerMode>(
81 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
82 }
83
Chris Cain1718fd82022-02-16 16:39:50 -060084 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050085 {
Chris Cainbae4d072022-02-28 09:46:50 -060086 static bool statusObjCreated = false;
87 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060088 {
Chris Cainbae4d072022-02-28 09:46:50 -060089 // Create the OCCs based on on the /dev/occX devices
90 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060091
Chris Cainbae4d072022-02-28 09:46:50 -060092 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060093 {
Chris Cainbae4d072022-02-28 09:46:50 -060094 // Something changed or no OCCs yet, try again in 10s.
95 // Note on the first pass prevOCCSearch will be empty,
96 // so there will be at least one delay to give things
97 // a chance to settle.
98 prevOCCSearch = occs;
99
100 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600101 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -0600102 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
103 occs.size())
104 .c_str());
105
106 discoverTimer->restartOnce(10s);
107 }
108 else
109 {
110 // All OCCs appear to be available, create status objects
111
112 // createObjects requires OCC0 first.
113 std::sort(occs.begin(), occs.end());
114
115 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600116 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -0600117 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
118 occs.size())
119 .c_str());
120 for (auto id : occs)
121 {
122 createObjects(std::string(OCC_NAME) + std::to_string(id));
123 }
124 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500125 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500126
127 // Find/update the processor path associated with each OCC
128 for (auto& obj : statusObjects)
129 {
130 obj->updateProcAssociation();
131 }
Chris Cainbae4d072022-02-28 09:46:50 -0600132 }
133 }
134
Chris Cain6d8f37a2022-04-29 13:46:01 -0500135 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600136 {
137 static bool tracedHostWait = false;
138 if (utils::isHostRunning())
139 {
140 if (tracedHostWait)
141 {
142 log<level::INFO>(
143 "Manager::findAndCreateObjects(): Host is running");
144 tracedHostWait = false;
145 }
Chris Cainbae4d072022-02-28 09:46:50 -0600146 checkAllActiveSensors();
147 }
148 else
149 {
150 if (!tracedHostWait)
151 {
152 log<level::INFO>(
153 "Manager::findAndCreateObjects(): Waiting for host to start");
154 tracedHostWait = true;
155 }
156 discoverTimer->restartOnce(30s);
Chris Cain7651c062024-05-02 14:14:06 -0500157#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500158 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500159 {
160 // Host is no longer running, disable throttle timer and
161 // make sure traces are not throttled
162 log<level::INFO>(
163 "findAndCreateObjects(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500164 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500165 pldmHandle->setTraceThrottle(false);
166 }
167#endif
Chris Cain1718fd82022-02-16 16:39:50 -0600168 }
169 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500170 }
171 else
172 {
Chris Cain1718fd82022-02-16 16:39:50 -0600173 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600174 std::format(
Chris Cain1718fd82022-02-16 16:39:50 -0600175 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
176 HOST_ON_FILE)
177 .c_str());
178 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500179 }
180#endif
181}
182
Chris Cainbae4d072022-02-28 09:46:50 -0600183#ifdef POWER10
184// Check if all occActive sensors are available
185void Manager::checkAllActiveSensors()
186{
187 static bool allActiveSensorAvailable = false;
188 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500189 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600190
Chris Cain082a6ca2023-03-21 10:27:26 -0500191 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600192 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500193 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600194 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500195 waitingForHost = false;
196 log<level::INFO>("checkAllActiveSensors(): Host is now running");
197 }
198
199 // Start with the assumption that all are available
200 allActiveSensorAvailable = true;
201 for (auto& obj : statusObjects)
202 {
203 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600204 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500205 auto instance = obj->getOccInstanceID();
206 // Check if sensor was queued while waiting for discovery
207 auto match = queuedActiveState.find(instance);
208 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500209 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500210 queuedActiveState.erase(match);
Chris Cainbd551de2022-04-26 13:41:16 -0500211 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600212 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500213 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)",
Chris Cainbd551de2022-04-26 13:41:16 -0500214 instance)
215 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -0500216 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500217 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500218 else
219 {
220 allActiveSensorAvailable = false;
221 if (!tracedSensorWait)
222 {
223 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600224 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500225 "checkAllActiveSensors(): Waiting on OCC{} Active sensor",
226 instance)
227 .c_str());
228 tracedSensorWait = true;
Chris Cain755af102024-02-27 16:09:51 -0600229#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500230 // Make sure PLDM traces are not throttled
Chris Cain755af102024-02-27 16:09:51 -0600231 pldmHandle->setTraceThrottle(false);
Chris Cainc33171b2024-05-24 16:14:50 -0500232 // Start timer to throttle PLDM traces when timer
Chris Cain755af102024-02-27 16:09:51 -0600233 // expires
Chris Cainc33171b2024-05-24 16:14:50 -0500234 onPldmTimeoutCreatePel = false;
235 throttlePldmTraceTimer->restartOnce(5min);
Chris Cain755af102024-02-27 16:09:51 -0600236#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500237 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600238#ifdef PLDM
Chris Cainf0295f52024-09-12 15:41:14 -0500239 // Ignore active sensor check if the OCCs are being reset
240 if (!resetInProgress)
241 {
242 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
243 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600244#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500245 break;
246 }
Chris Cainbd551de2022-04-26 13:41:16 -0500247 }
Chris Cainbae4d072022-02-28 09:46:50 -0600248 }
249 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500250 else
251 {
252 if (!waitingForHost)
253 {
254 waitingForHost = true;
255 log<level::INFO>(
256 "checkAllActiveSensors(): Waiting for host to start");
Chris Cain7651c062024-05-02 14:14:06 -0500257#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500258 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500259 {
260 // Host is no longer running, disable throttle timer and
261 // make sure traces are not throttled
262 log<level::INFO>(
263 "checkAllActiveSensors(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500264 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500265 pldmHandle->setTraceThrottle(false);
266 }
267#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500268 }
269 }
Chris Cainbae4d072022-02-28 09:46:50 -0600270
271 if (allActiveSensorAvailable)
272 {
273 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500274 if (discoverTimer->isEnabled())
275 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500276 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500277 }
Chris Cain755af102024-02-27 16:09:51 -0600278#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500279 if (throttlePldmTraceTimer->isEnabled())
Chris Cain755af102024-02-27 16:09:51 -0600280 {
281 // Disable throttle timer and make sure traces are not throttled
Chris Cainc33171b2024-05-24 16:14:50 -0500282 throttlePldmTraceTimer->setEnabled(false);
Chris Cain755af102024-02-27 16:09:51 -0600283 pldmHandle->setTraceThrottle(false);
284 }
285#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500286 if (waitingForAllOccActiveSensors)
287 {
288 log<level::INFO>(
289 "checkAllActiveSensors(): OCC Active sensors are available");
290 waitingForAllOccActiveSensors = false;
Chris Cainf0295f52024-09-12 15:41:14 -0500291
292 if (resetRequired)
293 {
294 initiateOccRequest(resetInstance);
295
296 if (!waitForAllOccsTimer->isEnabled())
297 {
298 log<level::WARNING>(
299 "occsNotAllRunning: Restarting waitForAllOccTimer");
300 // restart occ wait timer to check status after reset
301 // completes
302 waitForAllOccsTimer->restartOnce(60s);
303 }
304 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500305 }
306 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600307 tracedSensorWait = false;
308 }
309 else
310 {
311 // Not all sensors were available, so keep waiting
312 if (!tracedSensorWait)
313 {
314 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500315 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600316 tracedSensorWait = true;
317 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500318 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600319 }
320}
321#endif
322
Matt Spinlerd267cec2021-09-01 14:49:19 -0500323std::vector<int> Manager::findOCCsInDev()
324{
325 std::vector<int> occs;
326 std::regex expr{R"(occ(\d+)$)"};
327
328 for (auto& file : fs::directory_iterator("/dev"))
329 {
330 std::smatch match;
331 std::string path{file.path().string()};
332 if (std::regex_search(path, match, expr))
333 {
334 auto num = std::stoi(match[1].str());
335
336 // /dev numbering starts at 1, ours starts at 0.
337 occs.push_back(num - 1);
338 }
339 }
340
341 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530342}
343
Patrick Williamsaf408082022-07-22 19:26:54 -0500344int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530345{
George Liubcef3b42021-09-10 12:39:02 +0800346 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530347
348 sdbusplus::message::object_path o;
349 msg.read(o);
350 fs::path cpuPath(std::string(std::move(o)));
351
352 auto name = cpuPath.filename().string();
353 auto index = name.find(CPU_NAME);
354 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
355
356 createObjects(name);
357
358 return 0;
359}
360
361void Manager::createObjects(const std::string& occ)
362{
363 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
364
Gunnar Mills94df8c92018-09-14 14:50:03 -0500365 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800366 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600367#ifdef POWER10
368 pmode,
369#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500370 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600371 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530372#ifdef PLDM
373 ,
Chris Cainf0295f52024-09-12 15:41:14 -0500374 // Callback will set flag indicating reset needs to be done
375 // instead of immediately issuing a reset via PLDM.
376 std::bind(std::mem_fn(&Manager::resetOccRequest), this,
Tom Joseph00325232020-07-29 17:51:48 +0530377 std::placeholders::_1)
378#endif
379 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530380
Chris Cain40501a22022-03-14 17:33:27 -0500381 // Create the power cap monitor object
382 if (!pcap)
383 {
384 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
385 *statusObjects.back());
386 }
387
Chris Cain36f9cde2021-11-22 11:18:21 -0600388 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530389 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600390 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600391 std::format("Manager::createObjects(): OCC{} is the master",
Chris Cain36f9cde2021-11-22 11:18:21 -0600392 statusObjects.back()->getOccInstanceID())
393 .c_str());
394 _pollTimer->setEnabled(false);
395
Chris Cain78e86012021-03-04 16:15:31 -0600396#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600397 // Set the master OCC on the PowerMode object
398 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600399#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600400 }
401
Patrick Williamsd7542c82024-08-16 15:20:28 -0400402 passThroughObjects.emplace_back(std::make_unique<PassThrough>(
403 path.c_str()
Chris Cain36f9cde2021-11-22 11:18:21 -0600404#ifdef POWER10
Patrick Williamsd7542c82024-08-16 15:20:28 -0400405 ,
406 pmode
Chris Cain36f9cde2021-11-22 11:18:21 -0600407#endif
Patrick Williamsd7542c82024-08-16 15:20:28 -0400408 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530409}
410
Chris Cainf0295f52024-09-12 15:41:14 -0500411// If a reset is not already outstanding, set a flag to indicate that a reset is
412// needed.
413void Manager::resetOccRequest(instanceID instance)
414{
415 if (!resetRequired)
416 {
417 resetRequired = true;
418 resetInstance = instance;
419 log<level::ERR>(
420 std::format(
421 "resetOccRequest: PM Complex reset was requested due to OCC{}",
422 instance)
423 .c_str());
424 }
425 else if (instance != resetInstance)
426 {
427 log<level::WARNING>(
428 std::format(
429 "resetOccRequest: Ignoring PM Complex reset request for OCC{}, because reset already outstanding for OCC{}",
430 instance, resetInstance)
431 .c_str());
432 }
433}
434
435// If a reset has not been started, initiate an OCC reset via PLDM
436void Manager::initiateOccRequest(instanceID instance)
437{
438 if (!resetInProgress)
439 {
440 resetInProgress = true;
441 resetInstance = instance;
442 log<level::ERR>(
443 std::format(
444 "initiateOccRequest: Initiating PM Complex reset due to OCC{}",
445 instance)
446 .c_str());
447#ifdef PLDM
448 pldmHandle->resetOCC(instance);
449#endif
450 resetRequired = false;
451 }
452 else
453 {
454 log<level::WARNING>(
455 std::format(
456 "initiateOccRequest: Ignoring PM Complex reset request for OCC{}, because reset already in process for OCC{}",
457 instance, resetInstance)
458 .c_str());
459 }
460}
461
Sheldon Bailey373af752022-02-21 15:14:00 -0600462void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530463{
Chris Caina7b74dc2021-11-10 17:03:43 -0600464 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600465 {
Chris Cainf0295f52024-09-12 15:41:14 -0500466 if (resetInProgress)
467 {
468 log<level::INFO>(
469 std::format(
470 "statusCallBack: Ignoring OCC{} activate because a reset has been initiated due to OCC{}",
471 instance, resetInstance)
472 .c_str());
473 return;
474 }
475
Chris Caina7b74dc2021-11-10 17:03:43 -0600476 // OCC went active
477 ++activeCount;
478
479#ifdef POWER10
480 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600481 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600482 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500483 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500484 }
485#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600486
487 if (activeCount == statusObjects.size())
488 {
489#ifdef POWER10
490 // All OCCs are now running
491 if (waitForAllOccsTimer->isEnabled())
492 {
493 // stop occ wait timer
494 waitForAllOccsTimer->setEnabled(false);
495 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600496
Chris Cainf0295f52024-09-12 15:41:14 -0500497 // All OCCs have been found, check if we need a reset
498 if (resetRequired)
499 {
500 initiateOccRequest(resetInstance);
501
502 if (!waitForAllOccsTimer->isEnabled())
503 {
504 log<level::WARNING>(
505 "occsNotAllRunning: Restarting waitForAllOccTimer");
506 // restart occ wait timer
507 waitForAllOccsTimer->restartOnce(60s);
508 }
509 }
510 else
511 {
512 // Verify master OCC and start presence monitor
513 validateOccMaster();
514 }
515#else
Chris Caina7b74dc2021-11-10 17:03:43 -0600516 // Verify master OCC and start presence monitor
517 validateOccMaster();
Chris Cainf0295f52024-09-12 15:41:14 -0500518#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600519 }
520
521 // Start poll timer if not already started
522 if (!_pollTimer->isEnabled())
523 {
524 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600525 std::format("Manager: OCCs will be polled every {} seconds",
Chris Cain36f9cde2021-11-22 11:18:21 -0600526 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600527 .c_str());
528
529 // Send poll and start OCC poll timer
530 pollerTimerExpired();
531 }
532 }
533 else
534 {
535 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500536 if (activeCount > 0)
537 {
538 --activeCount;
539 }
540 else
541 {
Chris Cainf0295f52024-09-12 15:41:14 -0500542 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600543 std::format("OCC{} disabled, but currently no active OCCs",
Chris Cain082a6ca2023-03-21 10:27:26 -0500544 instance)
545 .c_str());
546 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600547
548 if (activeCount == 0)
549 {
550 // No OCCs are running
551
Chris Cainf0295f52024-09-12 15:41:14 -0500552 if (resetInProgress)
553 {
554 // All OCC active sensors are clear (reset should be in
555 // progress)
556 log<level::INFO>(
557 std::format(
558 "statusCallBack: Clearing resetInProgress (activeCount={}, OCC{}, status={})",
559 activeCount, instance, status)
560 .c_str());
561 resetInProgress = false;
562 resetInstance = 255;
563 }
564
Chris Caina7b74dc2021-11-10 17:03:43 -0600565 // Stop OCC poll timer
566 if (_pollTimer->isEnabled())
567 {
568 log<level::INFO>(
569 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
570 _pollTimer->setEnabled(false);
571 }
572
573#ifdef POWER10
574 // stop wait timer
575 if (waitForAllOccsTimer->isEnabled())
576 {
577 waitForAllOccsTimer->setEnabled(false);
578 }
579#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600580 }
Chris Cainf0295f52024-09-12 15:41:14 -0500581 else if (resetInProgress)
582 {
583 log<level::INFO>(
584 std::format(
585 "statusCallBack: Skipping clear of resetInProgress (activeCount={}, OCC{}, status={})",
586 activeCount, instance, status)
587 .c_str());
588 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600589#ifdef READ_OCC_SENSORS
590 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500591 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600592#endif
Chris Caina8857c52021-01-27 11:53:05 -0600593 }
Chris Cainbae4d072022-02-28 09:46:50 -0600594
595#ifdef POWER10
596 if (waitingForAllOccActiveSensors)
597 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500598 if (utils::isHostRunning())
599 {
600 checkAllActiveSensors();
601 }
Chris Cainbae4d072022-02-28 09:46:50 -0600602 }
603#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530604}
605
606#ifdef I2C_OCC
607void Manager::initStatusObjects()
608{
609 // Make sure we have a valid path string
610 static_assert(sizeof(DEV_PATH) != 0);
611
612 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
613 for (auto& name : deviceNames)
614 {
615 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800616 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530617 auto path = fs::path(OCC_CONTROL_ROOT) / name;
618 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800619 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530620 }
Chris Cain40501a22022-03-14 17:33:27 -0500621 // The first device is master occ
622 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
623 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600624#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600625 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
626 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600627 // Set the master OCC on the PowerMode object
628 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600629#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530630}
631#endif
632
Tom Joseph815f9f52020-07-27 12:12:13 +0530633#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500634void Manager::sbeTimeout(unsigned int instance)
635{
Eddie James2a751d72022-03-04 09:16:12 -0600636 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
637 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400638 return instance == obj->getOccInstanceID();
639 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500640
Eddie Jamescb018da2022-03-05 11:49:37 -0600641 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600642 {
Chris Cainbae4d072022-02-28 09:46:50 -0600643 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600644 std::format("SBE timeout, requesting HRESET (OCC{})", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600645 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500646
Eddie James2a751d72022-03-04 09:16:12 -0600647 setSBEState(instance, SBE_STATE_NOT_USABLE);
648
649 pldmHandle->sendHRESET(instance);
650 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500651}
652
Tom Joseph815f9f52020-07-27 12:12:13 +0530653bool Manager::updateOCCActive(instanceID instance, bool status)
654{
Chris Cain7e374fb2022-04-07 09:47:23 -0500655 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
656 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400657 return instance == obj->getOccInstanceID();
658 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500659
Chris Cain082a6ca2023-03-21 10:27:26 -0500660 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500661 if (obj != statusObjects.end())
662 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500663 if (!hostRunning && (status == true))
664 {
665 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600666 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500667 "updateOCCActive: Host is not running yet (OCC{} active={}), clearing sensor received",
668 instance, status)
669 .c_str());
670 (*obj)->setPldmSensorReceived(false);
671 if (!waitingForAllOccActiveSensors)
672 {
673 log<level::INFO>(
674 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
675 waitingForAllOccActiveSensors = true;
676 }
Chris Cain755af102024-02-27 16:09:51 -0600677#ifdef POWER10
Chris Cain082a6ca2023-03-21 10:27:26 -0500678 discoverTimer->restartOnce(30s);
Chris Cain755af102024-02-27 16:09:51 -0600679#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500680 return false;
681 }
682 else
683 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500684 (*obj)->setPldmSensorReceived(true);
685 return (*obj)->occActive(status);
686 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500687 }
688 else
689 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500690 if (hostRunning)
691 {
692 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600693 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500694 "updateOCCActive: No status object to update for OCC{} (active={})",
695 instance, status)
696 .c_str());
697 }
698 else
699 {
700 if (status == true)
701 {
702 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600703 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500704 "updateOCCActive: No status objects and Host is not running yet (OCC{} active={})",
705 instance, status)
706 .c_str());
707 }
708 }
Chris Cainbd551de2022-04-26 13:41:16 -0500709 if (status == true)
710 {
711 // OCC went active
712 queuedActiveState.insert(instance);
713 }
714 else
715 {
716 auto match = queuedActiveState.find(instance);
717 if (match != queuedActiveState.end())
718 {
719 // OCC was disabled
720 queuedActiveState.erase(match);
721 }
722 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500723 return false;
724 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530725}
Eddie Jamescbad2192021-10-07 09:39:39 -0500726
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500727// Called upon pldm event To set powermode Safe Mode State for system.
728void Manager::updateOccSafeMode(bool safeMode)
729{
730#ifdef POWER10
731 pmode->updateDbusSafeMode(safeMode);
732#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500733 // Update the processor throttle status on dbus
734 for (auto& obj : statusObjects)
735 {
736 obj->updateThrottle(safeMode, THROTTLED_SAFE);
737 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500738}
739
Eddie Jamescbad2192021-10-07 09:39:39 -0500740void Manager::sbeHRESETResult(instanceID instance, bool success)
741{
742 if (success)
743 {
Chris Cainbae4d072022-02-28 09:46:50 -0600744 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600745 std::format("HRESET succeeded (OCC{})", instance).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500746
747 setSBEState(instance, SBE_STATE_BOOTED);
748
749 return;
750 }
751
752 setSBEState(instance, SBE_STATE_FAILED);
753
754 if (sbeCanDump(instance))
755 {
Chris Cainbae4d072022-02-28 09:46:50 -0600756 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600757 std::format("HRESET failed (OCC{}), triggering SBE dump", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600758 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500759
760 auto& bus = utils::getBus();
761 uint32_t src6 = instance << 16;
762 uint32_t logId =
763 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
764 src6, "SBE command timeout");
765
766 try
767 {
George Liuf3a4a692021-12-28 13:59:51 +0800768 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
769 constexpr auto function = "CreateDump";
770
Patrick Williamsd7542c82024-08-16 15:20:28 -0400771 std::string service =
772 utils::getService(OP_DUMP_OBJ_PATH, interface);
Dhruvaraj Subhashchandran1173b2b2024-06-01 11:12:13 -0500773 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH,
774 interface, function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500775
776 std::map<std::string, std::variant<std::string, uint64_t>>
777 createParams{
778 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
779 uint64_t(logId)},
780 {"com.ibm.Dump.Create.CreateParameters.DumpType",
781 "com.ibm.Dump.Create.DumpType.SBE"},
782 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
783 uint64_t(instance)},
784 };
785
786 method.append(createParams);
787
788 auto response = bus.call(method);
789 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500790 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500791 {
792 constexpr auto ERROR_DUMP_DISABLED =
793 "xyz.openbmc_project.Dump.Create.Error.Disabled";
794 if (e.name() == ERROR_DUMP_DISABLED)
795 {
796 log<level::INFO>("Dump is disabled, skipping");
797 }
798 else
799 {
800 log<level::ERR>("Dump failed");
801 }
802 }
803 }
Chris Cainf0295f52024-09-12 15:41:14 -0500804
805 // SBE Reset failed, try PM Complex reset
806 log<level::ERR>("sbeHRESETResult: Forcing PM Complex reset");
807 resetOccRequest(instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500808}
809
810bool Manager::sbeCanDump(unsigned int instance)
811{
812 struct pdbg_target* proc = getPdbgTarget(instance);
813
814 if (!proc)
815 {
816 // allow the dump in the error case
817 return true;
818 }
819
820 try
821 {
822 if (!openpower::phal::sbe::isDumpAllowed(proc))
823 {
824 return false;
825 }
826
827 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
828 {
829 return false;
830 }
831 }
832 catch (openpower::phal::exception::SbeError& e)
833 {
834 log<level::INFO>("Failed to query SBE state");
835 }
836
837 // allow the dump in the error case
838 return true;
839}
840
841void Manager::setSBEState(unsigned int instance, enum sbe_state state)
842{
843 struct pdbg_target* proc = getPdbgTarget(instance);
844
845 if (!proc)
846 {
847 return;
848 }
849
850 try
851 {
852 openpower::phal::sbe::setState(proc, state);
853 }
854 catch (const openpower::phal::exception::SbeError& e)
855 {
Chris Cain358d3962024-08-23 15:29:38 -0500856 log<level::ERR>(
857 std::format("Failed to set SBE state: {}", e.what()).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500858 }
859}
860
861struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
862{
863 if (!pdbgInitialized)
864 {
865 try
866 {
867 openpower::phal::pdbg::init();
868 pdbgInitialized = true;
869 }
870 catch (const openpower::phal::exception::PdbgError& e)
871 {
872 log<level::ERR>("pdbg initialization failed");
873 return nullptr;
874 }
875 }
876
877 struct pdbg_target* proc = nullptr;
878 pdbg_for_each_class_target("proc", proc)
879 {
880 if (pdbg_target_index(proc) == instance)
881 {
882 return proc;
883 }
884 }
885
886 log<level::ERR>("Failed to get pdbg target");
887 return nullptr;
888}
Tom Joseph815f9f52020-07-27 12:12:13 +0530889#endif
890
Chris Caina8857c52021-01-27 11:53:05 -0600891void Manager::pollerTimerExpired()
892{
Chris Caina8857c52021-01-27 11:53:05 -0600893 if (!_pollTimer)
894 {
Chris Cainf0295f52024-09-12 15:41:14 -0500895 log<level::ERR>("pollerTimerExpired() ERROR: Timer not defined");
Chris Caina8857c52021-01-27 11:53:05 -0600896 return;
897 }
898
Chris Cainf0295f52024-09-12 15:41:14 -0500899#ifdef POWER10
900 if (resetRequired)
901 {
902 log<level::ERR>("pollerTimerExpired() - Initiating PM Complex reset");
903 initiateOccRequest(resetInstance);
904
905 if (!waitForAllOccsTimer->isEnabled())
906 {
907 log<level::WARNING>(
908 "pollerTimerExpired: Restarting waitForAllOccTimer");
909 // restart occ wait timer
910 waitForAllOccsTimer->restartOnce(60s);
911 }
912 return;
913 }
914#endif
915
Chris Caina8857c52021-01-27 11:53:05 -0600916 for (auto& obj : statusObjects)
917 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600918 if (!obj->occActive())
919 {
920 // OCC is not running yet
921#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600922 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500923 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600924#endif
925 continue;
926 }
927
Chris Caina8857c52021-01-27 11:53:05 -0600928 // Read sysfs to force kernel to poll OCC
929 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800930
931#ifdef READ_OCC_SENSORS
932 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600933 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800934#endif
Chris Caina8857c52021-01-27 11:53:05 -0600935 }
936
Chris Caina7b74dc2021-11-10 17:03:43 -0600937 if (activeCount > 0)
938 {
939 // Restart OCC poll timer
940 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
941 }
942 else
943 {
944 // No OCCs running, so poll timer will not be restarted
945 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600946 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -0600947 "Manager::pollerTimerExpired: poll timer will not be restarted")
948 .c_str());
949 }
Chris Caina8857c52021-01-27 11:53:05 -0600950}
951
Chicago Duanbb895cb2021-06-18 19:37:16 +0800952#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600953void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800954{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500955 // There may be more than one sensor with the same FRU type
956 // and label so make two passes: the first to read the temps
957 // from sysfs, and the second to put them on D-Bus after
958 // resolving any conflicts.
959 std::map<std::string, double> sensorData;
960
Chicago Duanbb895cb2021-06-18 19:37:16 +0800961 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
962 for (auto& file : fs::directory_iterator(path))
963 {
964 if (!std::regex_search(file.path().string(), expr))
965 {
966 continue;
967 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800968
Matt Spinlera26f1522021-08-25 15:50:20 -0500969 uint32_t labelValue{0};
970
971 try
972 {
973 labelValue = readFile<uint32_t>(file.path());
974 }
975 catch (const std::system_error& e)
976 {
977 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600978 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500979 file.path().string(), e.code().value())
980 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800981 continue;
982 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800983
984 const std::string& tempLabel = "label";
985 const std::string filePathString = file.path().string().substr(
986 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500987
988 uint32_t fruTypeValue{0};
989 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800990 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500991 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
992 }
993 catch (const std::system_error& e)
994 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800995 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600996 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500997 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800998 .c_str());
999 continue;
1000 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001001
Patrick Williamsd7542c82024-08-16 15:20:28 -04001002 std::string sensorPath =
1003 OCC_SENSORS_ROOT + std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +08001004
Matt Spinlerace67d82021-10-18 13:41:57 -05001005 std::string dvfsTempPath;
1006
Chicago Duanbb895cb2021-06-18 19:37:16 +08001007 if (fruTypeValue == VRMVdd)
1008 {
Patrick Williamsd7542c82024-08-16 15:20:28 -04001009 sensorPath.append(
1010 "vrm_vdd" + std::to_string(occInstance) + "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +08001011 }
Matt Spinlerace67d82021-10-18 13:41:57 -05001012 else if (fruTypeValue == processorIoRing)
1013 {
Patrick Williamsd7542c82024-08-16 15:20:28 -04001014 sensorPath.append(
1015 "proc" + std::to_string(occInstance) + "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -05001016 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -06001017 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -05001018 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001019 else
1020 {
Matt Spinler14d14022021-08-25 15:38:29 -05001021 uint16_t type = (labelValue & 0xFF000000) >> 24;
1022 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001023
1024 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
1025 {
Matt Spinler8b8abee2021-08-25 15:18:21 -05001026 if (fruTypeValue == fruTypeNotAvailable)
1027 {
1028 // Not all DIMM related temps are available to read
1029 // (no _input file in this case)
1030 continue;
1031 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001032 auto iter = dimmTempSensorName.find(fruTypeValue);
1033 if (iter == dimmTempSensorName.end())
1034 {
George Liub5ca1012021-09-10 12:53:11 +08001035 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001036 std::format(
George Liub5ca1012021-09-10 12:53:11 +08001037 "readTempSensors: Fru type error! fruTypeValue = {}) ",
1038 fruTypeValue)
1039 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001040 continue;
1041 }
1042
Patrick Williamsd7542c82024-08-16 15:20:28 -04001043 sensorPath.append(
1044 "dimm" + std::to_string(instanceID) + iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -05001045
1046 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
1047 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001048 }
1049 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
1050 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001051 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001052 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001053 // The OCC reports small core temps, of which there are
1054 // two per big core. All current P10 systems are in big
1055 // core mode, so use a big core name.
1056 uint16_t coreNum = instanceID / 2;
1057 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -06001058 sensorPath.append("proc" + std::to_string(occInstance) +
1059 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -05001060 std::to_string(tempNum) + "_temp");
1061
Chris Cainae157b62024-01-23 16:05:12 -06001062 dvfsTempPath =
1063 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
1064 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -05001065 }
1066 else
1067 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001068 continue;
1069 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001070 }
1071 else
1072 {
1073 continue;
1074 }
1075 }
1076
Matt Spinlerace67d82021-10-18 13:41:57 -05001077 // The dvfs temp file only needs to be read once per chip per type.
1078 if (!dvfsTempPath.empty() &&
1079 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
1080 {
1081 try
1082 {
1083 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
1084
1085 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
1086 dvfsTempPath, dvfsValue * std::pow(10, -3));
1087 }
1088 catch (const std::system_error& e)
1089 {
1090 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001091 std::format(
Matt Spinlerace67d82021-10-18 13:41:57 -05001092 "readTempSensors: Failed reading {}, errno = {}",
1093 filePathString + maxSuffix, e.code().value())
1094 .c_str());
1095 }
1096 }
1097
Matt Spinlera26f1522021-08-25 15:50:20 -05001098 uint32_t faultValue{0};
1099 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001100 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001101 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
1102 }
1103 catch (const std::system_error& e)
1104 {
1105 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001106 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001107 filePathString + faultSuffix, e.code().value())
1108 .c_str());
1109 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001110 }
1111
Chris Cainae157b62024-01-23 16:05:12 -06001112 double tempValue{0};
1113 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -05001114 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001115 {
Chris Cainae157b62024-01-23 16:05:12 -06001116 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001117 }
Chris Cainae157b62024-01-23 16:05:12 -06001118 else
Chicago Duanbb895cb2021-06-18 19:37:16 +08001119 {
Chris Cainae157b62024-01-23 16:05:12 -06001120 // Read the temperature
1121 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001122 {
Chris Cainae157b62024-01-23 16:05:12 -06001123 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001124 }
Chris Cainae157b62024-01-23 16:05:12 -06001125 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001126 {
Chris Cainae157b62024-01-23 16:05:12 -06001127 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001128 std::format(
Chris Cainae157b62024-01-23 16:05:12 -06001129 "readTempSensors: Failed reading {}, errno = {}",
1130 filePathString + inputSuffix, e.code().value())
1131 .c_str());
1132
1133 // if errno == EAGAIN(Resource temporarily unavailable) then set
1134 // temp to 0, to avoid using old temp, and affecting FAN
1135 // Control.
1136 if (e.code().value() == EAGAIN)
1137 {
1138 tempValue = 0;
1139 }
1140 // else the errno would be something like
1141 // EBADF(Bad file descriptor)
1142 // or ENOENT(No such file or directory)
1143 else
1144 {
1145 continue;
1146 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001147 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001148 }
1149
Matt Spinler818cc8d2023-10-23 11:43:39 -05001150 // If this object path already has a value, only overwite
1151 // it if the previous one was an NaN or a smaller value.
1152 auto existing = sensorData.find(sensorPath);
1153 if (existing != sensorData.end())
1154 {
Chris Cainae157b62024-01-23 16:05:12 -06001155 // Multiple sensors found for this FRU type
1156 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1157 ((existing->second == 0) && std::isnan(tempValue)))
1158 {
1159 // One of the redundant sensors has failed (0xFF/nan), and the
1160 // other sensor has no reading (0), so set the FRU to NaN to
1161 // force fan increase
1162 tempValue = std::numeric_limits<double>::quiet_NaN();
1163 existing->second = tempValue;
1164 }
Matt Spinler818cc8d2023-10-23 11:43:39 -05001165 if (std::isnan(existing->second) || (tempValue > existing->second))
1166 {
1167 existing->second = tempValue;
1168 }
1169 }
1170 else
1171 {
Chris Cainae157b62024-01-23 16:05:12 -06001172 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001173 sensorData[sensorPath] = tempValue;
1174 }
1175 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001176
Matt Spinler818cc8d2023-10-23 11:43:39 -05001177 // Now publish the values on D-Bus.
1178 for (const auto& [objectPath, value] : sensorData)
1179 {
1180 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1181 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001182
Matt Spinler818cc8d2023-10-23 11:43:39 -05001183 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1184 objectPath, !std::isnan(value));
1185
1186 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001187 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001188 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Matt Spinler818cc8d2023-10-23 11:43:39 -05001189 objectPath);
Chris Cain6fa848a2022-01-24 14:54:38 -06001190 }
1191
Chris Cainae157b62024-01-23 16:05:12 -06001192 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001193 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001194}
1195
1196std::optional<std::string>
1197 Manager::getPowerLabelFunctionID(const std::string& value)
1198{
1199 // If the value is "system", then the FunctionID is "system".
1200 if (value == "system")
1201 {
1202 return value;
1203 }
1204
1205 // If the value is not "system", then the label value have 3 numbers, of
1206 // which we only care about the middle one:
1207 // <sensor id>_<function id>_<apss channel>
1208 // eg: The value is "0_10_5" , then the FunctionID is "10".
1209 if (value.find("_") == std::string::npos)
1210 {
1211 return std::nullopt;
1212 }
1213
1214 auto powerLabelValue = value.substr((value.find("_") + 1));
1215
1216 if (powerLabelValue.find("_") == std::string::npos)
1217 {
1218 return std::nullopt;
1219 }
1220
1221 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1222}
1223
1224void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1225{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001226 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1227 for (auto& file : fs::directory_iterator(path))
1228 {
1229 if (!std::regex_search(file.path().string(), expr))
1230 {
1231 continue;
1232 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001233
Matt Spinlera26f1522021-08-25 15:50:20 -05001234 std::string labelValue;
1235 try
1236 {
1237 labelValue = readFile<std::string>(file.path());
1238 }
1239 catch (const std::system_error& e)
1240 {
1241 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001242 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001243 file.path().string(), e.code().value())
1244 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001245 continue;
1246 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001247
1248 auto functionID = getPowerLabelFunctionID(labelValue);
1249 if (functionID == std::nullopt)
1250 {
1251 continue;
1252 }
1253
1254 const std::string& tempLabel = "label";
1255 const std::string filePathString = file.path().string().substr(
1256 0, file.path().string().length() - tempLabel.length());
1257
1258 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1259
1260 auto iter = powerSensorName.find(*functionID);
1261 if (iter == powerSensorName.end())
1262 {
1263 continue;
1264 }
1265 sensorPath.append(iter->second);
1266
Matt Spinlera26f1522021-08-25 15:50:20 -05001267 double tempValue{0};
1268
1269 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001270 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001271 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001272 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001273 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001274 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001275 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001276 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001277 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +08001278 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -05001279 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001280 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001281
Chris Cain5d66a0a2022-02-09 08:52:10 -06001282 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001283 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1284
Chris Cain5d66a0a2022-02-09 08:52:10 -06001285 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001286 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1287
Patrick Williamsd7542c82024-08-16 15:20:28 -04001288 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1289 sensorPath, true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001290
Matt Spinler5901abd2021-09-23 13:50:03 -05001291 if (existingSensors.find(sensorPath) == existingSensors.end())
1292 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001293 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1294 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -05001295 }
1296
Matt Spinlera26f1522021-08-25 15:50:20 -05001297 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001298 }
1299 return;
1300}
1301
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001302void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001303{
1304 for (const auto& [sensorPath, occId] : existingSensors)
1305 {
1306 if (occId == id)
1307 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001308 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001309 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001310
Patrick Williamsd7542c82024-08-16 15:20:28 -04001311 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1312 sensorPath, true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001313 }
1314 }
1315 return;
1316}
1317
Sheldon Bailey373af752022-02-21 15:14:00 -06001318void Manager::setSensorValueToNonFunctional(uint32_t id) const
1319{
1320 for (const auto& [sensorPath, occId] : existingSensors)
1321 {
1322 if (occId == id)
1323 {
1324 dbus::OccDBusSensors::getOccDBus().setValue(
1325 sensorPath, std::numeric_limits<double>::quiet_NaN());
1326
Patrick Williamsd7542c82024-08-16 15:20:28 -04001327 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1328 sensorPath, false);
Sheldon Bailey373af752022-02-21 15:14:00 -06001329 }
1330 }
1331 return;
1332}
1333
Chris Cain5d66a0a2022-02-09 08:52:10 -06001334void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001335{
Chris Caine2d0a432022-03-28 11:08:49 -05001336 static bool tracedError[8] = {0};
1337 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001338 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001339
Chris Caine2d0a432022-03-28 11:08:49 -05001340 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001341 {
Chris Caine2d0a432022-03-28 11:08:49 -05001342 // Read temperature sensors
1343 readTempSensors(sensorPath, id);
1344
1345 if (occ->isMasterOcc())
1346 {
1347 // Read power sensors
1348 readPowerSensors(sensorPath, id);
1349 }
1350 tracedError[id] = false;
1351 }
1352 else
1353 {
1354 if (!tracedError[id])
1355 {
1356 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001357 std::format(
Chris Caine2d0a432022-03-28 11:08:49 -05001358 "Manager::getSensorValues: OCC{} sensor path missing: {}",
1359 id, sensorPath.c_str())
1360 .c_str());
1361 tracedError[id] = true;
1362 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001363 }
1364
1365 return;
1366}
1367#endif
Chris Cain17257672021-10-22 13:41:03 -05001368
1369// Read the altitude from DBus
1370void Manager::readAltitude()
1371{
1372 static bool traceAltitudeErr = true;
1373
1374 utils::PropertyValue altitudeProperty{};
1375 try
1376 {
1377 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1378 ALTITUDE_PROP);
1379 auto sensorVal = std::get<double>(altitudeProperty);
1380 if (sensorVal < 0xFFFF)
1381 {
1382 if (sensorVal < 0)
1383 {
1384 altitude = 0;
1385 }
1386 else
1387 {
1388 // Round to nearest meter
1389 altitude = uint16_t(sensorVal + 0.5);
1390 }
Patrick Williams48002492024-02-13 21:43:32 -06001391 log<level::DEBUG>(std::format("readAltitude: sensor={} ({}m)",
Chris Cain17257672021-10-22 13:41:03 -05001392 sensorVal, altitude)
1393 .c_str());
1394 traceAltitudeErr = true;
1395 }
1396 else
1397 {
1398 if (traceAltitudeErr)
1399 {
1400 traceAltitudeErr = false;
1401 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001402 std::format("Invalid altitude value: {}", sensorVal)
Chris Cain17257672021-10-22 13:41:03 -05001403 .c_str());
1404 }
1405 }
1406 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001407 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001408 {
1409 if (traceAltitudeErr)
1410 {
1411 traceAltitudeErr = false;
1412 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001413 std::format("Unable to read Altitude: {}", e.what()).c_str());
Chris Cain17257672021-10-22 13:41:03 -05001414 }
1415 altitude = 0xFFFF; // not available
1416 }
1417}
1418
1419// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001420void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001421{
1422 double currentTemp = 0;
1423 uint8_t truncatedTemp = 0xFF;
1424 std::string msgSensor;
1425 std::map<std::string, std::variant<double>> msgData;
1426 msg.read(msgSensor, msgData);
1427
1428 auto valPropMap = msgData.find(AMBIENT_PROP);
1429 if (valPropMap == msgData.end())
1430 {
1431 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
1432 return;
1433 }
1434 currentTemp = std::get<double>(valPropMap->second);
1435 if (std::isnan(currentTemp))
1436 {
1437 truncatedTemp = 0xFF;
1438 }
1439 else
1440 {
1441 if (currentTemp < 0)
1442 {
1443 truncatedTemp = 0;
1444 }
1445 else
1446 {
1447 // Round to nearest degree C
1448 truncatedTemp = uint8_t(currentTemp + 0.5);
1449 }
1450 }
1451
1452 // If ambient changes, notify OCCs
1453 if (truncatedTemp != ambient)
1454 {
1455 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001456 std::format("ambientCallback: Ambient change from {} to {}C",
Chris Cain17257672021-10-22 13:41:03 -05001457 ambient, currentTemp)
1458 .c_str());
1459
1460 ambient = truncatedTemp;
1461 if (altitude == 0xFFFF)
1462 {
1463 // No altitude yet, try reading again
1464 readAltitude();
1465 }
1466
1467 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001468 std::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
Chris Cain17257672021-10-22 13:41:03 -05001469 altitude)
1470 .c_str());
1471#ifdef POWER10
1472 // Send ambient and altitude to all OCCs
1473 for (auto& obj : statusObjects)
1474 {
1475 if (obj->occActive())
1476 {
1477 obj->sendAmbient(ambient, altitude);
1478 }
1479 }
1480#endif // POWER10
1481 }
1482}
1483
1484// return the current ambient and altitude readings
1485void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1486 uint16_t& altitudeValue) const
1487{
1488 ambientValid = true;
1489 ambientTemp = ambient;
1490 altitudeValue = altitude;
1491
1492 if (ambient == 0xFF)
1493 {
1494 ambientValid = false;
1495 }
1496}
1497
Chris Caina7b74dc2021-11-10 17:03:43 -06001498#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001499// Called when waitForAllOccsTimer expires
1500// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001501void Manager::occsNotAllRunning()
1502{
Chris Cainf0295f52024-09-12 15:41:14 -05001503 if (resetInProgress)
1504 {
1505 log<level::WARNING>(
1506 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress");
1507 return;
1508 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001509 if (activeCount != statusObjects.size())
1510 {
1511 // Not all OCCs went active
1512 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001513 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001514 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1515 activeCount, statusObjects.size())
1516 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001517 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001518 }
1519
Chris Cainf0295f52024-09-12 15:41:14 -05001520 if (resetRequired)
1521 {
1522 initiateOccRequest(resetInstance);
1523
1524 if (!waitForAllOccsTimer->isEnabled())
1525 {
1526 log<level::WARNING>(
1527 "occsNotAllRunning: Restarting waitForAllOccTimer");
1528 // restart occ wait timer
1529 waitForAllOccsTimer->restartOnce(60s);
1530 }
1531 }
1532 else
1533 {
1534 validateOccMaster();
1535 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001536}
Chris Cain755af102024-02-27 16:09:51 -06001537
1538#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -05001539// Called when throttlePldmTraceTimer expires.
Chris Caina19bd422024-05-24 16:39:01 -05001540// If this timer expires, that indicates there are no OCC active sensor PDRs
Chris Cainc33171b2024-05-24 16:14:50 -05001541// found which will trigger pldm traces to be throttled.
1542// The second time this timer expires, a PEL will get created.
1543void Manager::throttlePldmTraceExpired()
Chris Cain755af102024-02-27 16:09:51 -06001544{
Chris Cain7651c062024-05-02 14:14:06 -05001545 if (utils::isHostRunning())
1546 {
Chris Cainc33171b2024-05-24 16:14:50 -05001547 if (!onPldmTimeoutCreatePel)
1548 {
1549 // Throttle traces
1550 pldmHandle->setTraceThrottle(true);
1551 // Restart timer to log a PEL when timer expires
1552 onPldmTimeoutCreatePel = true;
1553 throttlePldmTraceTimer->restartOnce(40min);
1554 }
1555 else
1556 {
1557 log<level::ERR>(
1558 "throttlePldmTraceExpired(): OCC active sensors still not available!");
1559 // Create PEL
1560 createPldmSensorPEL();
1561 }
Chris Cain7651c062024-05-02 14:14:06 -05001562 }
1563 else
1564 {
1565 // Make sure traces are not throttled
1566 pldmHandle->setTraceThrottle(false);
1567 log<level::INFO>(
Chris Cainc33171b2024-05-24 16:14:50 -05001568 "throttlePldmTraceExpired(): host it not running ignoring sensor timer");
Chris Cain7651c062024-05-02 14:14:06 -05001569 }
Chris Cain4b82f3e2024-04-22 14:44:29 -05001570}
1571
1572void Manager::createPldmSensorPEL()
1573{
1574 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1575 std::map<std::string, std::string> additionalData;
1576
1577 additionalData.emplace("_PID", std::to_string(getpid()));
1578
1579 log<level::INFO>(
1580 std::format(
1581 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs")
1582 .c_str());
1583
1584 auto& bus = utils::getBus();
1585
1586 try
1587 {
1588 FFDCFiles ffdc;
1589 // Add occ-control journal traces to PEL FFDC
1590 auto occJournalFile =
1591 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1592
1593 static constexpr auto loggingObjectPath =
1594 "/xyz/openbmc_project/logging";
1595 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Patrick Williamsd7542c82024-08-16 15:20:28 -04001596 std::string service =
1597 utils::getService(loggingObjectPath, opLoggingInterface);
1598 auto method =
1599 bus.new_method_call(service.c_str(), loggingObjectPath,
1600 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001601
Chris Cain1c3349e2024-04-24 14:14:11 -05001602 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001603 auto level =
1604 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1605 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001606 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001607
1608 method.append(d.path, level, additionalData, ffdc);
1609 bus.call(method);
1610 }
1611 catch (const sdbusplus::exception_t& e)
1612 {
1613 log<level::ERR>(
1614 std::format("Failed to create MISSING_OCC_SENSORS PEL: {}",
1615 e.what())
1616 .c_str());
1617 }
Chris Cain755af102024-02-27 16:09:51 -06001618}
1619#endif // PLDM
Chris Caina7b74dc2021-11-10 17:03:43 -06001620#endif // POWER10
1621
1622// Verify single master OCC and start presence monitor
1623void Manager::validateOccMaster()
1624{
1625 int masterInstance = -1;
1626 for (auto& obj : statusObjects)
1627 {
Chris Cainbd551de2022-04-26 13:41:16 -05001628 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001629#ifdef POWER10
1630 if (!obj->occActive())
1631 {
1632 if (utils::isHostRunning())
1633 {
Chris Cainbd551de2022-04-26 13:41:16 -05001634 // Check if sensor was queued while waiting for discovery
1635 auto match = queuedActiveState.find(instance);
1636 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001637 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001638 queuedActiveState.erase(match);
Chris Cainbae4d072022-02-28 09:46:50 -06001639 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001640 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001641 "validateOccMaster: OCC{} is ACTIVE (queued)",
1642 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001643 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -05001644 obj->occActive(true);
1645 }
1646 else
1647 {
1648 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001649#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001650 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001651#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001652 if (obj->occActive())
1653 {
1654 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001655 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001656 "validateOccMaster: OCC{} is ACTIVE after reading sensor",
1657 instance)
1658 .c_str());
1659 }
Chris Cainbae4d072022-02-28 09:46:50 -06001660 }
1661 }
1662 else
1663 {
1664 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001665 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -06001666 "validateOccMaster: HOST is not running (OCC{})",
Chris Cainbd551de2022-04-26 13:41:16 -05001667 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001668 .c_str());
1669 return;
1670 }
1671 }
1672#endif // POWER10
1673
Chris Caina7b74dc2021-11-10 17:03:43 -06001674 if (obj->isMasterOcc())
1675 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001676 obj->addPresenceWatchMaster();
1677
Chris Caina7b74dc2021-11-10 17:03:43 -06001678 if (masterInstance == -1)
1679 {
Chris Cainbd551de2022-04-26 13:41:16 -05001680 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001681 }
1682 else
1683 {
1684 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001685 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001686 "validateOccMaster: Multiple OCC masters! ({} and {})",
Chris Cainbd551de2022-04-26 13:41:16 -05001687 masterInstance, instance)
Chris Caina7b74dc2021-11-10 17:03:43 -06001688 .c_str());
1689 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001690 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001691 }
1692 }
1693 }
Chris Cainbae4d072022-02-28 09:46:50 -06001694
Chris Caina7b74dc2021-11-10 17:03:43 -06001695 if (masterInstance < 0)
1696 {
Chris Cainbae4d072022-02-28 09:46:50 -06001697 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001698 std::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
Chris Cainbae4d072022-02-28 09:46:50 -06001699 statusObjects.size())
1700 .c_str());
Chris Caina7b74dc2021-11-10 17:03:43 -06001701 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001702 statusObjects.front()->deviceError(
1703 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001704 }
1705 else
1706 {
1707 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001708 std::format("validateOccMaster: OCC{} is master of {} OCCs",
Chris Cain36f9cde2021-11-22 11:18:21 -06001709 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001710 .c_str());
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001711#ifdef POWER10
1712 pmode->updateDbusSafeMode(false);
1713#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001714 }
1715}
1716
Chris Cain40501a22022-03-14 17:33:27 -05001717void Manager::updatePcapBounds() const
1718{
1719 if (pcap)
1720 {
1721 pcap->updatePcapBounds();
1722 }
1723}
1724
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301725} // namespace occ
1726} // namespace open_power