blob: 3ae11f68ba0ca00e1696e9190bd459ae6497f1ce [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05007#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05008#include "utils.hpp"
9
George Liub5ca1012021-09-10 12:53:11 +080010#include <phosphor-logging/elog-errors.hpp>
Chris Cain37abe9b2024-10-31 17:20:31 -050011#include <phosphor-logging/lg2.hpp>
George Liub5ca1012021-09-10 12:53:11 +080012#include <xyz/openbmc_project/Common/error.hpp>
13
Matt Spinlerd267cec2021-09-01 14:49:19 -050014#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080015#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080016#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060017#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080018#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050019
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053020namespace open_power
21{
22namespace occ
23{
24
Matt Spinler8b8abee2021-08-25 15:18:21 -050025constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050026constexpr auto fruTypeSuffix = "fru_type";
27constexpr auto faultSuffix = "fault";
28constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050029constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050030
Chris Cain1718fd82022-02-16 16:39:50 -060031const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
32
Chris Caina8857c52021-01-27 11:53:05 -060033using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060034using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060035
Matt Spinlera26f1522021-08-25 15:50:20 -050036template <typename T>
37T readFile(const std::string& path)
38{
39 std::ifstream ifs;
40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
41 std::ifstream::eofbit);
42 T data;
43
44 try
45 {
46 ifs.open(path);
47 ifs >> data;
48 ifs.close();
49 }
50 catch (const std::exception& e)
51 {
52 auto err = errno;
53 throw std::system_error(err, std::generic_category());
54 }
55
56 return data;
57}
58
Chris Cain720a3842025-01-09 10:23:36 -060059void Manager::createPldmHandle()
60{
61#ifdef PLDM
62 pldmHandle = std::make_unique<pldm::Interface>(
63 std::bind(std::mem_fn(&Manager::updateOCCActive), this,
64 std::placeholders::_1, std::placeholders::_2),
65 std::bind(std::mem_fn(&Manager::sbeHRESETResult), this,
66 std::placeholders::_1, std::placeholders::_2),
67 std::bind(std::mem_fn(&Manager::updateOccSafeMode), this,
68 std::placeholders::_1),
Chris Cainc488bac2025-03-17 09:01:15 -050069 std::bind(std::mem_fn(&Manager::hostPoweredOff), this), event);
Chris Cain720a3842025-01-09 10:23:36 -060070#endif
71}
72
Chris Cainc33171b2024-05-24 16:14:50 -050073// findAndCreateObjects():
74// Takes care of getting the required objects created and
75// finds the available devices/processors.
76// (function is called everytime the discoverTimer expires)
77// - create the PowerMode object to control OCC modes
78// - create statusObjects for each OCC device found
79// - waits for OCC Active sensors PDRs to become available
80// - restart discoverTimer if all data is not available yet
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053081void Manager::findAndCreateObjects()
82{
Matt Spinlerd267cec2021-09-01 14:49:19 -050083#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050084 for (auto id = 0; id < MAX_CPUS; ++id)
85 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060086 // Create one occ per cpu
87 auto occ = std::string(OCC_NAME) + std::to_string(id);
88 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053089 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050090#else
Chris Cain613dc902022-04-08 09:56:22 -050091 if (!pmode)
92 {
93 // Create the power mode object
94 pmode = std::make_unique<powermode::PowerMode>(
95 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
96 }
97
Chris Cain1718fd82022-02-16 16:39:50 -060098 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050099 {
Chris Cainbae4d072022-02-28 09:46:50 -0600100 static bool statusObjCreated = false;
101 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -0600102 {
Chris Cainbae4d072022-02-28 09:46:50 -0600103 // Create the OCCs based on on the /dev/occX devices
104 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -0600105
Chris Cainbae4d072022-02-28 09:46:50 -0600106 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -0600107 {
Chris Cainbae4d072022-02-28 09:46:50 -0600108 // Something changed or no OCCs yet, try again in 10s.
109 // Note on the first pass prevOCCSearch will be empty,
110 // so there will be at least one delay to give things
111 // a chance to settle.
112 prevOCCSearch = occs;
113
Chris Cain37abe9b2024-10-31 17:20:31 -0500114 lg2::info(
115 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {QTY})",
116 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600117
118 discoverTimer->restartOnce(10s);
119 }
120 else
121 {
122 // All OCCs appear to be available, create status objects
123
124 // createObjects requires OCC0 first.
125 std::sort(occs.begin(), occs.end());
126
Chris Cain37abe9b2024-10-31 17:20:31 -0500127 lg2::info(
128 "Manager::findAndCreateObjects(): Creating {QTY} OCC Status Objects",
129 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600130 for (auto id : occs)
131 {
132 createObjects(std::string(OCC_NAME) + std::to_string(id));
133 }
134 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500135 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500136
137 // Find/update the processor path associated with each OCC
138 for (auto& obj : statusObjects)
139 {
140 obj->updateProcAssociation();
141 }
Chris Cainbae4d072022-02-28 09:46:50 -0600142 }
143 }
144
Chris Cain6d8f37a2022-04-29 13:46:01 -0500145 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600146 {
147 static bool tracedHostWait = false;
148 if (utils::isHostRunning())
149 {
150 if (tracedHostWait)
151 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500152 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600153 "Manager::findAndCreateObjects(): Host is running");
154 tracedHostWait = false;
155 }
Chris Cainbae4d072022-02-28 09:46:50 -0600156 checkAllActiveSensors();
157 }
158 else
159 {
160 if (!tracedHostWait)
161 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500162 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600163 "Manager::findAndCreateObjects(): Waiting for host to start");
164 tracedHostWait = true;
165 }
166 discoverTimer->restartOnce(30s);
Chris Cain7651c062024-05-02 14:14:06 -0500167#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500168 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500169 {
170 // Host is no longer running, disable throttle timer and
171 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500172 lg2::info("findAndCreateObjects(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500173 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500174 pldmHandle->setTraceThrottle(false);
175 }
176#endif
Chris Cain1718fd82022-02-16 16:39:50 -0600177 }
178 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500179 }
180 else
181 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500182 lg2::info(
183 "Manager::findAndCreateObjects(): Waiting for {FILE} to complete...",
184 "FILE", HOST_ON_FILE);
Chris Cain1718fd82022-02-16 16:39:50 -0600185 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500186 }
187#endif
188}
189
Chris Cainbae4d072022-02-28 09:46:50 -0600190#ifdef POWER10
191// Check if all occActive sensors are available
192void Manager::checkAllActiveSensors()
193{
194 static bool allActiveSensorAvailable = false;
195 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500196 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600197
Chris Cain082a6ca2023-03-21 10:27:26 -0500198 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600199 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500200 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600201 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500202 waitingForHost = false;
Chris Cain37abe9b2024-10-31 17:20:31 -0500203 lg2::info("checkAllActiveSensors(): Host is now running");
Chris Cain082a6ca2023-03-21 10:27:26 -0500204 }
205
206 // Start with the assumption that all are available
207 allActiveSensorAvailable = true;
208 for (auto& obj : statusObjects)
209 {
210 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600211 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500212 auto instance = obj->getOccInstanceID();
213 // Check if sensor was queued while waiting for discovery
214 auto match = queuedActiveState.find(instance);
215 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500216 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500217 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -0500218 lg2::info(
219 "checkAllActiveSensors(): OCC{INST} is ACTIVE (queued)",
220 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500221 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500222 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500223 else
224 {
225 allActiveSensorAvailable = false;
226 if (!tracedSensorWait)
227 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500228 lg2::info(
229 "checkAllActiveSensors(): Waiting on OCC{INST} Active sensor",
230 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500231 tracedSensorWait = true;
Chris Cain755af102024-02-27 16:09:51 -0600232#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500233 // Make sure PLDM traces are not throttled
Chris Cain755af102024-02-27 16:09:51 -0600234 pldmHandle->setTraceThrottle(false);
Chris Cainc33171b2024-05-24 16:14:50 -0500235 // Start timer to throttle PLDM traces when timer
Chris Cain755af102024-02-27 16:09:51 -0600236 // expires
Chris Cainc33171b2024-05-24 16:14:50 -0500237 onPldmTimeoutCreatePel = false;
238 throttlePldmTraceTimer->restartOnce(5min);
Chris Cain755af102024-02-27 16:09:51 -0600239#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500240 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600241#ifdef PLDM
Chris Cainf0295f52024-09-12 15:41:14 -0500242 // Ignore active sensor check if the OCCs are being reset
243 if (!resetInProgress)
244 {
245 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
246 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600247#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500248 break;
249 }
Chris Cainbd551de2022-04-26 13:41:16 -0500250 }
Chris Cainbae4d072022-02-28 09:46:50 -0600251 }
252 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500253 else
254 {
255 if (!waitingForHost)
256 {
257 waitingForHost = true;
Chris Cain37abe9b2024-10-31 17:20:31 -0500258 lg2::info("checkAllActiveSensors(): Waiting for host to start");
Chris Cain7651c062024-05-02 14:14:06 -0500259#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500260 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500261 {
262 // Host is no longer running, disable throttle timer and
263 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500264 lg2::info("checkAllActiveSensors(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500265 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500266 pldmHandle->setTraceThrottle(false);
267 }
268#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500269 }
270 }
Chris Cainbae4d072022-02-28 09:46:50 -0600271
272 if (allActiveSensorAvailable)
273 {
274 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500275 if (discoverTimer->isEnabled())
276 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500277 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500278 }
Chris Cain755af102024-02-27 16:09:51 -0600279#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500280 if (throttlePldmTraceTimer->isEnabled())
Chris Cain755af102024-02-27 16:09:51 -0600281 {
282 // Disable throttle timer and make sure traces are not throttled
Chris Cainc33171b2024-05-24 16:14:50 -0500283 throttlePldmTraceTimer->setEnabled(false);
Chris Cain755af102024-02-27 16:09:51 -0600284 pldmHandle->setTraceThrottle(false);
285 }
286#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500287 if (waitingForAllOccActiveSensors)
288 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500289 lg2::info(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500290 "checkAllActiveSensors(): OCC Active sensors are available");
291 waitingForAllOccActiveSensors = false;
Chris Cainf0295f52024-09-12 15:41:14 -0500292
293 if (resetRequired)
294 {
295 initiateOccRequest(resetInstance);
296
297 if (!waitForAllOccsTimer->isEnabled())
298 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500299 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500300 "occsNotAllRunning: Restarting waitForAllOccTimer");
301 // restart occ wait timer to check status after reset
302 // completes
303 waitForAllOccsTimer->restartOnce(60s);
304 }
305 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500306 }
307 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600308 tracedSensorWait = false;
309 }
310 else
311 {
312 // Not all sensors were available, so keep waiting
313 if (!tracedSensorWait)
314 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500315 lg2::info(
Chris Cainbd551de2022-04-26 13:41:16 -0500316 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600317 tracedSensorWait = true;
318 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500319 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600320 }
321}
322#endif
323
Matt Spinlerd267cec2021-09-01 14:49:19 -0500324std::vector<int> Manager::findOCCsInDev()
325{
326 std::vector<int> occs;
327 std::regex expr{R"(occ(\d+)$)"};
328
329 for (auto& file : fs::directory_iterator("/dev"))
330 {
331 std::smatch match;
332 std::string path{file.path().string()};
333 if (std::regex_search(path, match, expr))
334 {
335 auto num = std::stoi(match[1].str());
336
337 // /dev numbering starts at 1, ours starts at 0.
338 occs.push_back(num - 1);
339 }
340 }
341
342 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530343}
344
Patrick Williamsaf408082022-07-22 19:26:54 -0500345int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530346{
George Liubcef3b42021-09-10 12:39:02 +0800347 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530348
349 sdbusplus::message::object_path o;
350 msg.read(o);
351 fs::path cpuPath(std::string(std::move(o)));
352
353 auto name = cpuPath.filename().string();
354 auto index = name.find(CPU_NAME);
355 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
356
357 createObjects(name);
358
359 return 0;
360}
361
362void Manager::createObjects(const std::string& occ)
363{
364 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
365
Gunnar Mills94df8c92018-09-14 14:50:03 -0500366 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800367 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600368#ifdef POWER10
369 pmode,
370#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500371 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600372 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530373#ifdef PLDM
374 ,
Chris Cainf0295f52024-09-12 15:41:14 -0500375 // Callback will set flag indicating reset needs to be done
376 // instead of immediately issuing a reset via PLDM.
377 std::bind(std::mem_fn(&Manager::resetOccRequest), this,
Tom Joseph00325232020-07-29 17:51:48 +0530378 std::placeholders::_1)
379#endif
380 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530381
Chris Cain40501a22022-03-14 17:33:27 -0500382 // Create the power cap monitor object
383 if (!pcap)
384 {
385 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
386 *statusObjects.back());
387 }
388
Chris Cain36f9cde2021-11-22 11:18:21 -0600389 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530390 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500391 lg2::info("Manager::createObjects(): OCC{INST} is the master", "INST",
392 statusObjects.back()->getOccInstanceID());
Chris Cain36f9cde2021-11-22 11:18:21 -0600393 _pollTimer->setEnabled(false);
394
Chris Cain78e86012021-03-04 16:15:31 -0600395#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600396 // Set the master OCC on the PowerMode object
397 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600398#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600399 }
400
Patrick Williamsd7542c82024-08-16 15:20:28 -0400401 passThroughObjects.emplace_back(std::make_unique<PassThrough>(
402 path.c_str()
Chris Cain36f9cde2021-11-22 11:18:21 -0600403#ifdef POWER10
Patrick Williamsd7542c82024-08-16 15:20:28 -0400404 ,
405 pmode
Chris Cain36f9cde2021-11-22 11:18:21 -0600406#endif
Patrick Williamsd7542c82024-08-16 15:20:28 -0400407 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530408}
409
Chris Cainf0295f52024-09-12 15:41:14 -0500410// If a reset is not already outstanding, set a flag to indicate that a reset is
411// needed.
412void Manager::resetOccRequest(instanceID instance)
413{
414 if (!resetRequired)
415 {
416 resetRequired = true;
417 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500418 lg2::error(
419 "resetOccRequest: PM Complex reset was requested due to OCC{INST}",
420 "INST", instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500421 }
422 else if (instance != resetInstance)
423 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500424 lg2::warning(
425 "resetOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already outstanding for OCC{RINST}",
426 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500427 }
428}
429
430// If a reset has not been started, initiate an OCC reset via PLDM
431void Manager::initiateOccRequest(instanceID instance)
432{
433 if (!resetInProgress)
434 {
435 resetInProgress = true;
436 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500437 lg2::error(
438 "initiateOccRequest: Initiating PM Complex reset due to OCC{INST}",
439 "INST", instance);
Chris Cainf7881502025-04-16 14:48:30 -0500440
441 // Make sure ALL OCC comm stops to all OCCs before the reset
442 for (auto& obj : statusObjects)
443 {
444 if (obj->occActive())
445 {
446 obj->occActive(false);
447 }
448 }
449
Chris Cainf0295f52024-09-12 15:41:14 -0500450#ifdef PLDM
451 pldmHandle->resetOCC(instance);
452#endif
453 resetRequired = false;
454 }
455 else
456 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500457 lg2::warning(
458 "initiateOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already in process for OCC{RINST}",
459 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500460 }
461}
462
Sheldon Bailey373af752022-02-21 15:14:00 -0600463void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530464{
Chris Caina7b74dc2021-11-10 17:03:43 -0600465 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600466 {
Chris Cainf0295f52024-09-12 15:41:14 -0500467 if (resetInProgress)
468 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500469 lg2::info(
Chris Cain92dfb272025-02-13 12:20:27 -0600470 "statusCallBack: Ignoring OCC{INST} activate because a reset has been initiated due to OCC{RINST}",
Chris Cain37abe9b2024-10-31 17:20:31 -0500471 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500472 return;
473 }
474
Chris Caina7b74dc2021-11-10 17:03:43 -0600475 // OCC went active
476 ++activeCount;
477
478#ifdef POWER10
479 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600480 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600481 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500482 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500483 }
484#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600485
486 if (activeCount == statusObjects.size())
487 {
488#ifdef POWER10
489 // All OCCs are now running
490 if (waitForAllOccsTimer->isEnabled())
491 {
492 // stop occ wait timer
493 waitForAllOccsTimer->setEnabled(false);
494 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600495
Chris Cainf0295f52024-09-12 15:41:14 -0500496 // All OCCs have been found, check if we need a reset
497 if (resetRequired)
498 {
499 initiateOccRequest(resetInstance);
500
501 if (!waitForAllOccsTimer->isEnabled())
502 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500503 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500504 "occsNotAllRunning: Restarting waitForAllOccTimer");
505 // restart occ wait timer
506 waitForAllOccsTimer->restartOnce(60s);
507 }
508 }
509 else
510 {
511 // Verify master OCC and start presence monitor
512 validateOccMaster();
513 }
514#else
Chris Caina7b74dc2021-11-10 17:03:43 -0600515 // Verify master OCC and start presence monitor
516 validateOccMaster();
Chris Cainf0295f52024-09-12 15:41:14 -0500517#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600518 }
519
Chris Cainf7881502025-04-16 14:48:30 -0500520 // Start poll timer if not already started (since at least one OCC is
521 // running)
Chris Caina7b74dc2021-11-10 17:03:43 -0600522 if (!_pollTimer->isEnabled())
523 {
Chris Cainf7881502025-04-16 14:48:30 -0500524 // An OCC just went active, PM Complex is just coming online so
525 // clear any outstanding reset requests
526 if (resetRequired)
527 {
528 resetRequired = false;
529 lg2::error(
530 "statusCallBack: clearing resetRequired (since OCC{INST} went active, resetInProgress={RIP})",
531 "INST", instance, "RIP", resetInProgress);
532 }
533
Chris Cain37abe9b2024-10-31 17:20:31 -0500534 lg2::info("Manager: OCCs will be polled every {TIME} seconds",
535 "TIME", pollInterval);
Chris Caina7b74dc2021-11-10 17:03:43 -0600536
537 // Send poll and start OCC poll timer
538 pollerTimerExpired();
539 }
540 }
541 else
542 {
543 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500544 if (activeCount > 0)
545 {
546 --activeCount;
547 }
548 else
549 {
Sheldon Baileyb89d6192025-03-05 09:33:19 -0600550 lg2::info("OCC{INST} disabled, and no other OCCs are active",
Chris Cain37abe9b2024-10-31 17:20:31 -0500551 "INST", instance);
Chris Cain082a6ca2023-03-21 10:27:26 -0500552 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600553
554 if (activeCount == 0)
555 {
556 // No OCCs are running
557
Chris Cainf0295f52024-09-12 15:41:14 -0500558 if (resetInProgress)
559 {
560 // All OCC active sensors are clear (reset should be in
561 // progress)
Chris Cain37abe9b2024-10-31 17:20:31 -0500562 lg2::info(
563 "statusCallBack: Clearing resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
564 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500565 resetInProgress = false;
566 resetInstance = 255;
567 }
568
Chris Caina7b74dc2021-11-10 17:03:43 -0600569 // Stop OCC poll timer
570 if (_pollTimer->isEnabled())
571 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500572 lg2::info(
Chris Caina7b74dc2021-11-10 17:03:43 -0600573 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
574 _pollTimer->setEnabled(false);
575 }
576
577#ifdef POWER10
578 // stop wait timer
579 if (waitForAllOccsTimer->isEnabled())
580 {
581 waitForAllOccsTimer->setEnabled(false);
582 }
583#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600584 }
Chris Cainf0295f52024-09-12 15:41:14 -0500585 else if (resetInProgress)
586 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500587 lg2::info(
588 "statusCallBack: Skipping clear of resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
589 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500590 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600591#ifdef READ_OCC_SENSORS
592 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500593 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600594#endif
Chris Caina8857c52021-01-27 11:53:05 -0600595 }
Chris Cainbae4d072022-02-28 09:46:50 -0600596
597#ifdef POWER10
598 if (waitingForAllOccActiveSensors)
599 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500600 if (utils::isHostRunning())
601 {
602 checkAllActiveSensors();
603 }
Chris Cainbae4d072022-02-28 09:46:50 -0600604 }
605#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530606}
607
608#ifdef I2C_OCC
609void Manager::initStatusObjects()
610{
611 // Make sure we have a valid path string
612 static_assert(sizeof(DEV_PATH) != 0);
613
614 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
615 for (auto& name : deviceNames)
616 {
617 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800618 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530619 auto path = fs::path(OCC_CONTROL_ROOT) / name;
620 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800621 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530622 }
Chris Cain40501a22022-03-14 17:33:27 -0500623 // The first device is master occ
624 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
625 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600626#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600627 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
628 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600629 // Set the master OCC on the PowerMode object
630 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600631#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530632}
633#endif
634
Tom Joseph815f9f52020-07-27 12:12:13 +0530635#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500636void Manager::sbeTimeout(unsigned int instance)
637{
Eddie James2a751d72022-03-04 09:16:12 -0600638 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
639 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400640 return instance == obj->getOccInstanceID();
641 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500642
Eddie Jamescb018da2022-03-05 11:49:37 -0600643 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600644 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500645 lg2::info("SBE timeout, requesting HRESET (OCC{INST})", "INST",
646 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500647
Chris Cain720a3842025-01-09 10:23:36 -0600648#ifdef PHAL_SUPPORT
Eddie James2a751d72022-03-04 09:16:12 -0600649 setSBEState(instance, SBE_STATE_NOT_USABLE);
Chris Cain720a3842025-01-09 10:23:36 -0600650#endif
Eddie James2a751d72022-03-04 09:16:12 -0600651
Chris Cain92dfb272025-02-13 12:20:27 -0600652 // Stop communication with this OCC
653 (*obj)->occActive(false);
654
Eddie James2a751d72022-03-04 09:16:12 -0600655 pldmHandle->sendHRESET(instance);
656 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500657}
658
Tom Joseph815f9f52020-07-27 12:12:13 +0530659bool Manager::updateOCCActive(instanceID instance, bool status)
660{
Chris Cain7e374fb2022-04-07 09:47:23 -0500661 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
662 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400663 return instance == obj->getOccInstanceID();
664 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500665
Chris Cain082a6ca2023-03-21 10:27:26 -0500666 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500667 if (obj != statusObjects.end())
668 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500669 if (!hostRunning && (status == true))
670 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500671 lg2::warning(
672 "updateOCCActive: Host is not running yet (OCC{INST} active={STAT}), clearing sensor received",
673 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500674 (*obj)->setPldmSensorReceived(false);
675 if (!waitingForAllOccActiveSensors)
676 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500677 lg2::info(
Chris Cain082a6ca2023-03-21 10:27:26 -0500678 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
679 waitingForAllOccActiveSensors = true;
680 }
Chris Cain755af102024-02-27 16:09:51 -0600681#ifdef POWER10
Chris Cain082a6ca2023-03-21 10:27:26 -0500682 discoverTimer->restartOnce(30s);
Chris Cain755af102024-02-27 16:09:51 -0600683#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500684 return false;
685 }
686 else
687 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500688 (*obj)->setPldmSensorReceived(true);
689 return (*obj)->occActive(status);
690 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500691 }
692 else
693 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500694 if (hostRunning)
695 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500696 lg2::warning(
697 "updateOCCActive: No status object to update for OCC{INST} (active={STAT})",
698 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500699 }
700 else
701 {
702 if (status == true)
703 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500704 lg2::warning(
705 "updateOCCActive: No status objects and Host is not running yet (OCC{INST} active={STAT})",
706 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500707 }
708 }
Chris Cainbd551de2022-04-26 13:41:16 -0500709 if (status == true)
710 {
711 // OCC went active
712 queuedActiveState.insert(instance);
713 }
714 else
715 {
716 auto match = queuedActiveState.find(instance);
717 if (match != queuedActiveState.end())
718 {
719 // OCC was disabled
720 queuedActiveState.erase(match);
721 }
722 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500723 return false;
724 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530725}
Eddie Jamescbad2192021-10-07 09:39:39 -0500726
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500727// Called upon pldm event To set powermode Safe Mode State for system.
728void Manager::updateOccSafeMode(bool safeMode)
729{
730#ifdef POWER10
731 pmode->updateDbusSafeMode(safeMode);
732#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500733 // Update the processor throttle status on dbus
734 for (auto& obj : statusObjects)
735 {
736 obj->updateThrottle(safeMode, THROTTLED_SAFE);
737 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500738}
739
Eddie Jamescbad2192021-10-07 09:39:39 -0500740void Manager::sbeHRESETResult(instanceID instance, bool success)
741{
742 if (success)
743 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500744 lg2::info("HRESET succeeded (OCC{INST})", "INST", instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500745
Chris Cain720a3842025-01-09 10:23:36 -0600746#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500747 setSBEState(instance, SBE_STATE_BOOTED);
Chris Cain720a3842025-01-09 10:23:36 -0600748#endif
Eddie Jamescbad2192021-10-07 09:39:39 -0500749
Chris Cain92dfb272025-02-13 12:20:27 -0600750 // Re-enable communication with this OCC
751 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
752 [instance](const auto& obj) {
753 return instance == obj->getOccInstanceID();
754 });
755 if (obj != statusObjects.end() && (!(*obj)->occActive()))
756 {
757 (*obj)->occActive(true);
758 }
759
Eddie Jamescbad2192021-10-07 09:39:39 -0500760 return;
761 }
762
Chris Cain720a3842025-01-09 10:23:36 -0600763#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500764 setSBEState(instance, SBE_STATE_FAILED);
765
766 if (sbeCanDump(instance))
767 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500768 lg2::info("HRESET failed (OCC{INST}), triggering SBE dump", "INST",
769 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500770
771 auto& bus = utils::getBus();
772 uint32_t src6 = instance << 16;
773 uint32_t logId =
774 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
775 src6, "SBE command timeout");
776
777 try
778 {
George Liuf3a4a692021-12-28 13:59:51 +0800779 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
780 constexpr auto function = "CreateDump";
781
Patrick Williamsd7542c82024-08-16 15:20:28 -0400782 std::string service =
783 utils::getService(OP_DUMP_OBJ_PATH, interface);
Dhruvaraj Subhashchandran1173b2b2024-06-01 11:12:13 -0500784 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH,
785 interface, function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500786
787 std::map<std::string, std::variant<std::string, uint64_t>>
788 createParams{
789 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
790 uint64_t(logId)},
791 {"com.ibm.Dump.Create.CreateParameters.DumpType",
792 "com.ibm.Dump.Create.DumpType.SBE"},
793 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
794 uint64_t(instance)},
795 };
796
797 method.append(createParams);
798
799 auto response = bus.call(method);
800 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500801 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500802 {
803 constexpr auto ERROR_DUMP_DISABLED =
804 "xyz.openbmc_project.Dump.Create.Error.Disabled";
805 if (e.name() == ERROR_DUMP_DISABLED)
806 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500807 lg2::info("Dump is disabled, skipping");
Eddie Jamescbad2192021-10-07 09:39:39 -0500808 }
809 else
810 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500811 lg2::error("Dump failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500812 }
813 }
814 }
Chris Cain720a3842025-01-09 10:23:36 -0600815#endif
Chris Cainf0295f52024-09-12 15:41:14 -0500816
817 // SBE Reset failed, try PM Complex reset
Chris Cain37abe9b2024-10-31 17:20:31 -0500818 lg2::error("sbeHRESETResult: Forcing PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500819 resetOccRequest(instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500820}
821
Chris Cain720a3842025-01-09 10:23:36 -0600822#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500823bool Manager::sbeCanDump(unsigned int instance)
824{
825 struct pdbg_target* proc = getPdbgTarget(instance);
826
827 if (!proc)
828 {
829 // allow the dump in the error case
830 return true;
831 }
832
833 try
834 {
835 if (!openpower::phal::sbe::isDumpAllowed(proc))
836 {
837 return false;
838 }
839
840 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
841 {
842 return false;
843 }
844 }
845 catch (openpower::phal::exception::SbeError& e)
846 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500847 lg2::info("Failed to query SBE state");
Eddie Jamescbad2192021-10-07 09:39:39 -0500848 }
849
850 // allow the dump in the error case
851 return true;
852}
853
854void Manager::setSBEState(unsigned int instance, enum sbe_state state)
855{
856 struct pdbg_target* proc = getPdbgTarget(instance);
857
858 if (!proc)
859 {
860 return;
861 }
862
863 try
864 {
865 openpower::phal::sbe::setState(proc, state);
866 }
867 catch (const openpower::phal::exception::SbeError& e)
868 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500869 lg2::error("Failed to set SBE state: {ERROR}", "ERROR", e.what());
Eddie Jamescbad2192021-10-07 09:39:39 -0500870 }
871}
872
873struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
874{
875 if (!pdbgInitialized)
876 {
877 try
878 {
879 openpower::phal::pdbg::init();
880 pdbgInitialized = true;
881 }
882 catch (const openpower::phal::exception::PdbgError& e)
883 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500884 lg2::error("pdbg initialization failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500885 return nullptr;
886 }
887 }
888
889 struct pdbg_target* proc = nullptr;
890 pdbg_for_each_class_target("proc", proc)
891 {
892 if (pdbg_target_index(proc) == instance)
893 {
894 return proc;
895 }
896 }
897
Chris Cain37abe9b2024-10-31 17:20:31 -0500898 lg2::error("Failed to get pdbg target");
Eddie Jamescbad2192021-10-07 09:39:39 -0500899 return nullptr;
900}
Tom Joseph815f9f52020-07-27 12:12:13 +0530901#endif
Chris Cain720a3842025-01-09 10:23:36 -0600902#endif
Tom Joseph815f9f52020-07-27 12:12:13 +0530903
Chris Caina8857c52021-01-27 11:53:05 -0600904void Manager::pollerTimerExpired()
905{
Chris Caina8857c52021-01-27 11:53:05 -0600906 if (!_pollTimer)
907 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500908 lg2::error("pollerTimerExpired() ERROR: Timer not defined");
Chris Caina8857c52021-01-27 11:53:05 -0600909 return;
910 }
911
Chris Cainf0295f52024-09-12 15:41:14 -0500912#ifdef POWER10
913 if (resetRequired)
914 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500915 lg2::error("pollerTimerExpired() - Initiating PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500916 initiateOccRequest(resetInstance);
917
918 if (!waitForAllOccsTimer->isEnabled())
919 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500920 lg2::warning("pollerTimerExpired: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -0500921 // restart occ wait timer
922 waitForAllOccsTimer->restartOnce(60s);
923 }
924 return;
925 }
926#endif
927
Chris Caina8857c52021-01-27 11:53:05 -0600928 for (auto& obj : statusObjects)
929 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600930 if (!obj->occActive())
931 {
932 // OCC is not running yet
933#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600934 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500935 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600936#endif
937 continue;
938 }
939
Chris Caina8857c52021-01-27 11:53:05 -0600940 // Read sysfs to force kernel to poll OCC
941 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800942
943#ifdef READ_OCC_SENSORS
944 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600945 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800946#endif
Chris Caina8857c52021-01-27 11:53:05 -0600947 }
948
Chris Caina7b74dc2021-11-10 17:03:43 -0600949 if (activeCount > 0)
950 {
951 // Restart OCC poll timer
952 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
953 }
954 else
955 {
956 // No OCCs running, so poll timer will not be restarted
Chris Cain37abe9b2024-10-31 17:20:31 -0500957 lg2::info(
958 "Manager::pollerTimerExpired: poll timer will not be restarted");
Chris Caina7b74dc2021-11-10 17:03:43 -0600959 }
Chris Caina8857c52021-01-27 11:53:05 -0600960}
961
Chicago Duanbb895cb2021-06-18 19:37:16 +0800962#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600963void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800964{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500965 // There may be more than one sensor with the same FRU type
966 // and label so make two passes: the first to read the temps
967 // from sysfs, and the second to put them on D-Bus after
968 // resolving any conflicts.
969 std::map<std::string, double> sensorData;
970
Chicago Duanbb895cb2021-06-18 19:37:16 +0800971 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
972 for (auto& file : fs::directory_iterator(path))
973 {
974 if (!std::regex_search(file.path().string(), expr))
975 {
976 continue;
977 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800978
Matt Spinlera26f1522021-08-25 15:50:20 -0500979 uint32_t labelValue{0};
980
981 try
982 {
983 labelValue = readFile<uint32_t>(file.path());
984 }
985 catch (const std::system_error& e)
986 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500987 lg2::debug(
988 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
989 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800990 continue;
991 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800992
993 const std::string& tempLabel = "label";
994 const std::string filePathString = file.path().string().substr(
995 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500996
997 uint32_t fruTypeValue{0};
998 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800999 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001000 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
1001 }
1002 catch (const std::system_error& e)
1003 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001004 lg2::debug(
1005 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1006 "PATH", filePathString + fruTypeSuffix, "ERROR",
1007 e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001008 continue;
1009 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001010
Patrick Williamsd7542c82024-08-16 15:20:28 -04001011 std::string sensorPath =
1012 OCC_SENSORS_ROOT + std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +08001013
Matt Spinlerace67d82021-10-18 13:41:57 -05001014 std::string dvfsTempPath;
1015
Chicago Duanbb895cb2021-06-18 19:37:16 +08001016 if (fruTypeValue == VRMVdd)
1017 {
Patrick Williamsd7542c82024-08-16 15:20:28 -04001018 sensorPath.append(
1019 "vrm_vdd" + std::to_string(occInstance) + "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +08001020 }
Matt Spinlerace67d82021-10-18 13:41:57 -05001021 else if (fruTypeValue == processorIoRing)
1022 {
Patrick Williamsd7542c82024-08-16 15:20:28 -04001023 sensorPath.append(
1024 "proc" + std::to_string(occInstance) + "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -05001025 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -06001026 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -05001027 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001028 else
1029 {
Matt Spinler14d14022021-08-25 15:38:29 -05001030 uint16_t type = (labelValue & 0xFF000000) >> 24;
1031 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001032
1033 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
1034 {
Matt Spinler8b8abee2021-08-25 15:18:21 -05001035 if (fruTypeValue == fruTypeNotAvailable)
1036 {
1037 // Not all DIMM related temps are available to read
1038 // (no _input file in this case)
1039 continue;
1040 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001041 auto iter = dimmTempSensorName.find(fruTypeValue);
1042 if (iter == dimmTempSensorName.end())
1043 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001044 lg2::error(
1045 "readTempSensors: Fru type error! fruTypeValue = {FRU}) ",
1046 "FRU", fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001047 continue;
1048 }
1049
Patrick Williamsd7542c82024-08-16 15:20:28 -04001050 sensorPath.append(
1051 "dimm" + std::to_string(instanceID) + iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -05001052
1053 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
1054 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001055 }
1056 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
1057 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001058 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001059 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001060 // The OCC reports small core temps, of which there are
1061 // two per big core. All current P10 systems are in big
1062 // core mode, so use a big core name.
1063 uint16_t coreNum = instanceID / 2;
1064 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -06001065 sensorPath.append("proc" + std::to_string(occInstance) +
1066 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -05001067 std::to_string(tempNum) + "_temp");
1068
Chris Cainae157b62024-01-23 16:05:12 -06001069 dvfsTempPath =
1070 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
1071 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -05001072 }
1073 else
1074 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001075 continue;
1076 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001077 }
1078 else
1079 {
1080 continue;
1081 }
1082 }
1083
Matt Spinlerace67d82021-10-18 13:41:57 -05001084 // The dvfs temp file only needs to be read once per chip per type.
1085 if (!dvfsTempPath.empty() &&
1086 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
1087 {
1088 try
1089 {
1090 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
1091
1092 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
1093 dvfsTempPath, dvfsValue * std::pow(10, -3));
1094 }
1095 catch (const std::system_error& e)
1096 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001097 lg2::debug(
1098 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1099 "PATH", filePathString + maxSuffix, "ERROR",
1100 e.code().value());
Matt Spinlerace67d82021-10-18 13:41:57 -05001101 }
1102 }
1103
Matt Spinlera26f1522021-08-25 15:50:20 -05001104 uint32_t faultValue{0};
1105 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001106 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001107 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
1108 }
1109 catch (const std::system_error& e)
1110 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001111 lg2::debug(
1112 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1113 "PATH", filePathString + faultSuffix, "ERROR",
1114 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001115 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001116 }
1117
Chris Cainae157b62024-01-23 16:05:12 -06001118 double tempValue{0};
1119 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -05001120 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001121 {
Chris Cainae157b62024-01-23 16:05:12 -06001122 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001123 }
Chris Cainae157b62024-01-23 16:05:12 -06001124 else
Chicago Duanbb895cb2021-06-18 19:37:16 +08001125 {
Chris Cainae157b62024-01-23 16:05:12 -06001126 // Read the temperature
1127 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001128 {
Chris Cainae157b62024-01-23 16:05:12 -06001129 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001130 }
Chris Cainae157b62024-01-23 16:05:12 -06001131 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001132 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001133 lg2::debug(
1134 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1135 "PATH", filePathString + inputSuffix, "ERROR",
1136 e.code().value());
Chris Cainae157b62024-01-23 16:05:12 -06001137
1138 // if errno == EAGAIN(Resource temporarily unavailable) then set
1139 // temp to 0, to avoid using old temp, and affecting FAN
1140 // Control.
1141 if (e.code().value() == EAGAIN)
1142 {
1143 tempValue = 0;
1144 }
1145 // else the errno would be something like
1146 // EBADF(Bad file descriptor)
1147 // or ENOENT(No such file or directory)
1148 else
1149 {
1150 continue;
1151 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001152 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001153 }
1154
Matt Spinler818cc8d2023-10-23 11:43:39 -05001155 // If this object path already has a value, only overwite
1156 // it if the previous one was an NaN or a smaller value.
1157 auto existing = sensorData.find(sensorPath);
1158 if (existing != sensorData.end())
1159 {
Chris Cainae157b62024-01-23 16:05:12 -06001160 // Multiple sensors found for this FRU type
1161 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1162 ((existing->second == 0) && std::isnan(tempValue)))
1163 {
1164 // One of the redundant sensors has failed (0xFF/nan), and the
1165 // other sensor has no reading (0), so set the FRU to NaN to
1166 // force fan increase
1167 tempValue = std::numeric_limits<double>::quiet_NaN();
1168 existing->second = tempValue;
1169 }
Matt Spinler818cc8d2023-10-23 11:43:39 -05001170 if (std::isnan(existing->second) || (tempValue > existing->second))
1171 {
1172 existing->second = tempValue;
1173 }
1174 }
1175 else
1176 {
Chris Cainae157b62024-01-23 16:05:12 -06001177 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001178 sensorData[sensorPath] = tempValue;
1179 }
1180 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001181
Matt Spinler818cc8d2023-10-23 11:43:39 -05001182 // Now publish the values on D-Bus.
1183 for (const auto& [objectPath, value] : sensorData)
1184 {
1185 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1186 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001187
Matt Spinler818cc8d2023-10-23 11:43:39 -05001188 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1189 objectPath, !std::isnan(value));
1190
1191 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001192 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001193 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001194 objectPath, {"all_sensors"});
Chris Cain6fa848a2022-01-24 14:54:38 -06001195 }
Chris Cainae157b62024-01-23 16:05:12 -06001196 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001197 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001198}
1199
Patrick Williams2d6ec902025-02-01 08:22:13 -05001200std::optional<std::string> Manager::getPowerLabelFunctionID(
1201 const std::string& value)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001202{
1203 // If the value is "system", then the FunctionID is "system".
1204 if (value == "system")
1205 {
1206 return value;
1207 }
1208
1209 // If the value is not "system", then the label value have 3 numbers, of
1210 // which we only care about the middle one:
1211 // <sensor id>_<function id>_<apss channel>
1212 // eg: The value is "0_10_5" , then the FunctionID is "10".
1213 if (value.find("_") == std::string::npos)
1214 {
1215 return std::nullopt;
1216 }
1217
1218 auto powerLabelValue = value.substr((value.find("_") + 1));
1219
1220 if (powerLabelValue.find("_") == std::string::npos)
1221 {
1222 return std::nullopt;
1223 }
1224
1225 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1226}
1227
1228void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1229{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001230 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1231 for (auto& file : fs::directory_iterator(path))
1232 {
1233 if (!std::regex_search(file.path().string(), expr))
1234 {
1235 continue;
1236 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001237
Matt Spinlera26f1522021-08-25 15:50:20 -05001238 std::string labelValue;
1239 try
1240 {
1241 labelValue = readFile<std::string>(file.path());
1242 }
1243 catch (const std::system_error& e)
1244 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001245 lg2::debug(
1246 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1247 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001248 continue;
1249 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001250
1251 auto functionID = getPowerLabelFunctionID(labelValue);
1252 if (functionID == std::nullopt)
1253 {
1254 continue;
1255 }
1256
1257 const std::string& tempLabel = "label";
1258 const std::string filePathString = file.path().string().substr(
1259 0, file.path().string().length() - tempLabel.length());
1260
1261 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1262
1263 auto iter = powerSensorName.find(*functionID);
1264 if (iter == powerSensorName.end())
1265 {
1266 continue;
1267 }
1268 sensorPath.append(iter->second);
1269
Matt Spinlera26f1522021-08-25 15:50:20 -05001270 double tempValue{0};
1271
1272 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001273 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001274 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001275 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001276 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001277 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001278 lg2::debug(
1279 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1280 "PATH", filePathString + inputSuffix, "ERROR",
1281 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001282 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001283 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001284
Chris Cain5d66a0a2022-02-09 08:52:10 -06001285 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001286 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1287
Chris Cain5d66a0a2022-02-09 08:52:10 -06001288 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001289 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1290
Patrick Williamsd7542c82024-08-16 15:20:28 -04001291 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1292 sensorPath, true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001293
Matt Spinler5901abd2021-09-23 13:50:03 -05001294 if (existingSensors.find(sensorPath) == existingSensors.end())
1295 {
Chris Cain3523cc02024-10-30 17:19:09 -05001296 std::vector<std::string> fTypeList = {"all_sensors"};
1297 if (iter->second == "total_power")
1298 {
Chris Cainff0ce402025-01-17 10:54:55 -06001299 // Set sensor purpose as TotalPower
1300 dbus::OccDBusSensors::getOccDBus().setPurpose(
1301 sensorPath,
1302 "xyz.openbmc_project.Sensor.Purpose.SensorPurpose.TotalPower");
Chris Cain3523cc02024-10-30 17:19:09 -05001303 }
Chris Cain5d66a0a2022-02-09 08:52:10 -06001304 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001305 sensorPath, fTypeList);
Matt Spinler5901abd2021-09-23 13:50:03 -05001306 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001307 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001308 }
1309 return;
1310}
1311
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001312void Manager::readExtnSensors(const fs::path& path, uint32_t id)
1313{
1314 std::regex expr{"extn\\d+_label$"}; // Example: extn5_label
1315 for (auto& file : fs::directory_iterator(path))
1316 {
1317 if (!std::regex_search(file.path().string(), expr))
1318 {
1319 continue;
1320 }
1321
1322 // Read in Label value of the sensor from file.
1323 std::string labelValue;
1324 try
1325 {
1326 labelValue = readFile<std::string>(file.path());
1327 }
1328 catch (const std::system_error& e)
1329 {
1330 lg2::debug(
1331 "readExtnSensors:label Failed reading {PATH}, errno = {ERROR}",
1332 "PATH", file.path().string(), "ERROR", e.code().value());
1333 continue;
1334 }
1335 const std::string& tempLabel = "label";
1336 const std::string filePathString = file.path().string().substr(
1337 0, file.path().string().length() - tempLabel.length());
1338
1339 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1340
1341 // Labels of EXTN sections from OCC interface Document
1342 // have different formats.
1343 // 0x464d494e : FMIN 0x46444953 : FDIS
1344 // 0x46424153 : FBAS 0x46555400 : FUT
1345 // 0x464d4158 : FMAX 0x434c4950 : CLIP
1346 // 0x4d4f4445 : MODE 0x574f4643 : WOFC
1347 // 0x574f4649 : WOFI 0x5057524d : PWRM
1348 // 0x50575250 : PWRP 0x45525248 : ERRH
1349 // Label indicating byte 5 and 6 is the current (mem,proc) power in
1350 // Watts.
1351 if ((labelValue == EXTN_LABEL_PWRM_MEMORY_POWER) ||
1352 (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER))
1353 {
1354 // Build the dbus String for this chiplet power asset.
1355 if (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER)
1356 {
1357 labelValue = "_power";
1358 }
1359 else // else EXTN_LABEL_PWRM_MEMORY_POWER
1360 {
1361 labelValue = "_mem_power";
1362 }
1363 sensorPath.append("chiplet" + std::to_string(id) + labelValue);
1364
1365 // Read in data value of the sensor from file.
1366 // Read in as string due to different format of data in sensors.
1367 std::string extnValue;
1368 try
1369 {
1370 extnValue = readFile<std::string>(filePathString + inputSuffix);
1371 }
1372 catch (const std::system_error& e)
1373 {
1374 lg2::debug(
1375 "readExtnSensors:value Failed reading {PATH}, errno = {ERROR}",
1376 "PATH", filePathString + inputSuffix, "ERROR",
1377 e.code().value());
1378 continue;
1379 }
1380
1381 // For Power field, Convert last 4 bytes of hex string into number
1382 // value.
1383 std::stringstream ssData;
1384 ssData << std::hex << extnValue.substr(extnValue.length() - 4);
1385 uint16_t MyHexNumber;
1386 ssData >> MyHexNumber;
1387
1388 // Convert output/DC power to input/AC power in Watts (round up)
1389 MyHexNumber =
1390 std::round(((MyHexNumber / (PS_DERATING_FACTOR / 100.0))));
1391
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001392 dbus::OccDBusSensors::getOccDBus().setUnit(
1393 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1394
1395 dbus::OccDBusSensors::getOccDBus().setValue(sensorPath,
1396 MyHexNumber);
1397
1398 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1399 sensorPath, true);
1400
1401 if (existingSensors.find(sensorPath) == existingSensors.end())
1402 {
1403 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1404 sensorPath, {"all_sensors"});
1405 }
1406
Sheldon Baileyb89d6192025-03-05 09:33:19 -06001407 existingSensors[sensorPath] = id;
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001408 } // End Extended Power Sensors.
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001409 } // End For loop on files for Extended Sensors.
1410 return;
1411}
1412
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001413void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001414{
1415 for (const auto& [sensorPath, occId] : existingSensors)
1416 {
1417 if (occId == id)
1418 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001419 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001420 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001421
Patrick Williamsd7542c82024-08-16 15:20:28 -04001422 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1423 sensorPath, true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001424 }
1425 }
1426 return;
1427}
1428
Sheldon Bailey373af752022-02-21 15:14:00 -06001429void Manager::setSensorValueToNonFunctional(uint32_t id) const
1430{
1431 for (const auto& [sensorPath, occId] : existingSensors)
1432 {
1433 if (occId == id)
1434 {
1435 dbus::OccDBusSensors::getOccDBus().setValue(
1436 sensorPath, std::numeric_limits<double>::quiet_NaN());
1437
Patrick Williamsd7542c82024-08-16 15:20:28 -04001438 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1439 sensorPath, false);
Sheldon Bailey373af752022-02-21 15:14:00 -06001440 }
1441 }
1442 return;
1443}
1444
Chris Cain5d66a0a2022-02-09 08:52:10 -06001445void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001446{
Chris Caine2d0a432022-03-28 11:08:49 -05001447 static bool tracedError[8] = {0};
1448 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001449 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001450
Chris Caine2d0a432022-03-28 11:08:49 -05001451 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001452 {
Chris Caine2d0a432022-03-28 11:08:49 -05001453 // Read temperature sensors
1454 readTempSensors(sensorPath, id);
Sheldon Baileyb89d6192025-03-05 09:33:19 -06001455 // Read Extended sensors
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001456 readExtnSensors(sensorPath, id);
Chris Caine2d0a432022-03-28 11:08:49 -05001457
1458 if (occ->isMasterOcc())
1459 {
1460 // Read power sensors
1461 readPowerSensors(sensorPath, id);
1462 }
1463 tracedError[id] = false;
1464 }
1465 else
1466 {
1467 if (!tracedError[id])
1468 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001469 lg2::error(
1470 "Manager::getSensorValues: OCC{INST} sensor path missing: {PATH}",
1471 "INST", id, "PATH", sensorPath);
Chris Caine2d0a432022-03-28 11:08:49 -05001472 tracedError[id] = true;
1473 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001474 }
1475
1476 return;
1477}
1478#endif
Chris Cain17257672021-10-22 13:41:03 -05001479
1480// Read the altitude from DBus
1481void Manager::readAltitude()
1482{
1483 static bool traceAltitudeErr = true;
1484
1485 utils::PropertyValue altitudeProperty{};
1486 try
1487 {
1488 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1489 ALTITUDE_PROP);
1490 auto sensorVal = std::get<double>(altitudeProperty);
1491 if (sensorVal < 0xFFFF)
1492 {
1493 if (sensorVal < 0)
1494 {
1495 altitude = 0;
1496 }
1497 else
1498 {
1499 // Round to nearest meter
1500 altitude = uint16_t(sensorVal + 0.5);
1501 }
Chris Cain37abe9b2024-10-31 17:20:31 -05001502 lg2::debug("readAltitude: sensor={VALUE} ({ALT}m)", "VALUE",
1503 sensorVal, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001504 traceAltitudeErr = true;
1505 }
1506 else
1507 {
1508 if (traceAltitudeErr)
1509 {
1510 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001511 lg2::debug("Invalid altitude value: {ALT}", "ALT", sensorVal);
Chris Cain17257672021-10-22 13:41:03 -05001512 }
1513 }
1514 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001515 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001516 {
1517 if (traceAltitudeErr)
1518 {
1519 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001520 lg2::info("Unable to read Altitude: {ERROR}", "ERROR", e.what());
Chris Cain17257672021-10-22 13:41:03 -05001521 }
1522 altitude = 0xFFFF; // not available
1523 }
1524}
1525
1526// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001527void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001528{
1529 double currentTemp = 0;
1530 uint8_t truncatedTemp = 0xFF;
1531 std::string msgSensor;
1532 std::map<std::string, std::variant<double>> msgData;
1533 msg.read(msgSensor, msgData);
1534
1535 auto valPropMap = msgData.find(AMBIENT_PROP);
1536 if (valPropMap == msgData.end())
1537 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001538 lg2::debug("ambientCallback: Unknown ambient property changed");
Chris Cain17257672021-10-22 13:41:03 -05001539 return;
1540 }
1541 currentTemp = std::get<double>(valPropMap->second);
1542 if (std::isnan(currentTemp))
1543 {
1544 truncatedTemp = 0xFF;
1545 }
1546 else
1547 {
1548 if (currentTemp < 0)
1549 {
1550 truncatedTemp = 0;
1551 }
1552 else
1553 {
1554 // Round to nearest degree C
1555 truncatedTemp = uint8_t(currentTemp + 0.5);
1556 }
1557 }
1558
1559 // If ambient changes, notify OCCs
1560 if (truncatedTemp != ambient)
1561 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001562 lg2::debug("ambientCallback: Ambient change from {OLD} to {NEW}C",
1563 "OLD", ambient, "NEW", currentTemp);
Chris Cain17257672021-10-22 13:41:03 -05001564
1565 ambient = truncatedTemp;
1566 if (altitude == 0xFFFF)
1567 {
1568 // No altitude yet, try reading again
1569 readAltitude();
1570 }
1571
Chris Cain37abe9b2024-10-31 17:20:31 -05001572 lg2::debug("ambientCallback: Ambient: {TEMP}C, altitude: {ALT}m",
1573 "TEMP", ambient, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001574#ifdef POWER10
1575 // Send ambient and altitude to all OCCs
1576 for (auto& obj : statusObjects)
1577 {
1578 if (obj->occActive())
1579 {
1580 obj->sendAmbient(ambient, altitude);
1581 }
1582 }
1583#endif // POWER10
1584 }
1585}
1586
1587// return the current ambient and altitude readings
1588void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1589 uint16_t& altitudeValue) const
1590{
1591 ambientValid = true;
1592 ambientTemp = ambient;
1593 altitudeValue = altitude;
1594
1595 if (ambient == 0xFF)
1596 {
1597 ambientValid = false;
1598 }
1599}
1600
Chris Caina7b74dc2021-11-10 17:03:43 -06001601#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001602// Called when waitForAllOccsTimer expires
1603// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001604void Manager::occsNotAllRunning()
1605{
Chris Cainf0295f52024-09-12 15:41:14 -05001606 if (resetInProgress)
1607 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001608 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -05001609 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress");
1610 return;
1611 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001612 if (activeCount != statusObjects.size())
1613 {
1614 // Not all OCCs went active
Chris Cain37abe9b2024-10-31 17:20:31 -05001615 lg2::warning(
1616 "occsNotAllRunning: Active OCC count ({COUNT}) does not match expected count ({EXP})",
1617 "COUNT", activeCount, "EXP", statusObjects.size());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001618 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001619 }
1620
Chris Cainf0295f52024-09-12 15:41:14 -05001621 if (resetRequired)
1622 {
1623 initiateOccRequest(resetInstance);
1624
1625 if (!waitForAllOccsTimer->isEnabled())
1626 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001627 lg2::warning("occsNotAllRunning: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -05001628 // restart occ wait timer
1629 waitForAllOccsTimer->restartOnce(60s);
1630 }
1631 }
1632 else
1633 {
1634 validateOccMaster();
1635 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001636}
Chris Cain755af102024-02-27 16:09:51 -06001637
1638#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -05001639// Called when throttlePldmTraceTimer expires.
Chris Caina19bd422024-05-24 16:39:01 -05001640// If this timer expires, that indicates there are no OCC active sensor PDRs
Chris Cainc33171b2024-05-24 16:14:50 -05001641// found which will trigger pldm traces to be throttled.
1642// The second time this timer expires, a PEL will get created.
1643void Manager::throttlePldmTraceExpired()
Chris Cain755af102024-02-27 16:09:51 -06001644{
Chris Cain7651c062024-05-02 14:14:06 -05001645 if (utils::isHostRunning())
1646 {
Chris Cainc33171b2024-05-24 16:14:50 -05001647 if (!onPldmTimeoutCreatePel)
1648 {
1649 // Throttle traces
1650 pldmHandle->setTraceThrottle(true);
1651 // Restart timer to log a PEL when timer expires
1652 onPldmTimeoutCreatePel = true;
1653 throttlePldmTraceTimer->restartOnce(40min);
1654 }
1655 else
1656 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001657 lg2::error(
Chris Cainc33171b2024-05-24 16:14:50 -05001658 "throttlePldmTraceExpired(): OCC active sensors still not available!");
1659 // Create PEL
1660 createPldmSensorPEL();
1661 }
Chris Cain7651c062024-05-02 14:14:06 -05001662 }
1663 else
1664 {
1665 // Make sure traces are not throttled
1666 pldmHandle->setTraceThrottle(false);
Chris Cain37abe9b2024-10-31 17:20:31 -05001667 lg2::info(
Chris Cainc33171b2024-05-24 16:14:50 -05001668 "throttlePldmTraceExpired(): host it not running ignoring sensor timer");
Chris Cain7651c062024-05-02 14:14:06 -05001669 }
Chris Cain4b82f3e2024-04-22 14:44:29 -05001670}
1671
1672void Manager::createPldmSensorPEL()
1673{
1674 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1675 std::map<std::string, std::string> additionalData;
1676
1677 additionalData.emplace("_PID", std::to_string(getpid()));
1678
Chris Cain37abe9b2024-10-31 17:20:31 -05001679 lg2::info(
1680 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001681
1682 auto& bus = utils::getBus();
1683
1684 try
1685 {
1686 FFDCFiles ffdc;
1687 // Add occ-control journal traces to PEL FFDC
1688 auto occJournalFile =
1689 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1690
1691 static constexpr auto loggingObjectPath =
1692 "/xyz/openbmc_project/logging";
1693 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Patrick Williamsd7542c82024-08-16 15:20:28 -04001694 std::string service =
1695 utils::getService(loggingObjectPath, opLoggingInterface);
1696 auto method =
1697 bus.new_method_call(service.c_str(), loggingObjectPath,
1698 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001699
Chris Cain1c3349e2024-04-24 14:14:11 -05001700 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001701 auto level =
1702 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1703 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001704 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001705
1706 method.append(d.path, level, additionalData, ffdc);
1707 bus.call(method);
1708 }
1709 catch (const sdbusplus::exception_t& e)
1710 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001711 lg2::error("Failed to create MISSING_OCC_SENSORS PEL: {ERROR}", "ERROR",
1712 e.what());
Chris Cain4b82f3e2024-04-22 14:44:29 -05001713 }
Chris Cain755af102024-02-27 16:09:51 -06001714}
1715#endif // PLDM
Chris Caina7b74dc2021-11-10 17:03:43 -06001716#endif // POWER10
1717
1718// Verify single master OCC and start presence monitor
1719void Manager::validateOccMaster()
1720{
1721 int masterInstance = -1;
1722 for (auto& obj : statusObjects)
1723 {
Chris Cainbd551de2022-04-26 13:41:16 -05001724 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001725#ifdef POWER10
1726 if (!obj->occActive())
1727 {
1728 if (utils::isHostRunning())
1729 {
Chris Cainbd551de2022-04-26 13:41:16 -05001730 // Check if sensor was queued while waiting for discovery
1731 auto match = queuedActiveState.find(instance);
1732 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001733 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001734 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -05001735 lg2::info("validateOccMaster: OCC{INST} is ACTIVE (queued)",
1736 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001737 obj->occActive(true);
1738 }
1739 else
1740 {
1741 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001742#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001743 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001744#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001745 if (obj->occActive())
1746 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001747 lg2::info(
1748 "validateOccMaster: OCC{INST} is ACTIVE after reading sensor",
1749 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001750 }
Chris Cainbae4d072022-02-28 09:46:50 -06001751 }
1752 }
1753 else
1754 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001755 lg2::warning(
1756 "validateOccMaster: HOST is not running (OCC{INST})",
1757 "INST", instance);
Chris Cainbae4d072022-02-28 09:46:50 -06001758 return;
1759 }
1760 }
1761#endif // POWER10
1762
Chris Caina7b74dc2021-11-10 17:03:43 -06001763 if (obj->isMasterOcc())
1764 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001765 obj->addPresenceWatchMaster();
1766
Chris Caina7b74dc2021-11-10 17:03:43 -06001767 if (masterInstance == -1)
1768 {
Chris Cainbd551de2022-04-26 13:41:16 -05001769 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001770 }
1771 else
1772 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001773 lg2::error(
1774 "validateOccMaster: Multiple OCC masters! ({MAST1} and {MAST2})",
1775 "MAST1", masterInstance, "MAST2", instance);
Chris Caina7b74dc2021-11-10 17:03:43 -06001776 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001777 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001778 }
1779 }
1780 }
Chris Cainbae4d072022-02-28 09:46:50 -06001781
Chris Caina7b74dc2021-11-10 17:03:43 -06001782 if (masterInstance < 0)
1783 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001784 lg2::error("validateOccMaster: Master OCC not found! (of {NUM} OCCs)",
1785 "NUM", statusObjects.size());
Chris Caina7b74dc2021-11-10 17:03:43 -06001786 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001787 statusObjects.front()->deviceError(
1788 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001789 }
1790 else
1791 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001792 lg2::info("validateOccMaster: OCC{INST} is master of {COUNT} OCCs",
1793 "INST", masterInstance, "COUNT", activeCount);
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001794#ifdef POWER10
1795 pmode->updateDbusSafeMode(false);
1796#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001797 }
1798}
1799
Chris Cain40501a22022-03-14 17:33:27 -05001800void Manager::updatePcapBounds() const
1801{
1802 if (pcap)
1803 {
1804 pcap->updatePcapBounds();
1805 }
1806}
1807
Chris Cainc488bac2025-03-17 09:01:15 -05001808// Clean up any variables since the OCC is no longer running.
1809// Called when pldm receives an event indicating host is powered off.
1810void Manager::hostPoweredOff()
1811{
1812 if (resetRequired)
1813 {
1814 lg2::info("hostPoweredOff: Clearing resetRequired for OCC{INST}",
1815 "INST", resetInstance);
1816 resetRequired = false;
1817 }
1818 if (resetInProgress)
1819 {
1820 lg2::info("hostPoweredOff: Clearing resetInProgress for OCC{INST}",
1821 "INST", resetInstance);
1822 resetInProgress = false;
1823 }
1824 resetInstance = 255;
1825}
1826
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301827} // namespace occ
1828} // namespace open_power