blob: 302a73327d925c339c78aebcd3ac5451dea52ebc [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
Chicago Duanbb895cb2021-06-18 19:37:16 +08005#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05006#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
Chris Cain37abe9b2024-10-31 17:20:31 -050010#include <phosphor-logging/lg2.hpp>
George Liub5ca1012021-09-10 12:53:11 +080011#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Chris Cain720a3842025-01-09 10:23:36 -060058void Manager::createPldmHandle()
59{
Chris Cain720a3842025-01-09 10:23:36 -060060 pldmHandle = std::make_unique<pldm::Interface>(
61 std::bind(std::mem_fn(&Manager::updateOCCActive), this,
62 std::placeholders::_1, std::placeholders::_2),
63 std::bind(std::mem_fn(&Manager::sbeHRESETResult), this,
64 std::placeholders::_1, std::placeholders::_2),
65 std::bind(std::mem_fn(&Manager::updateOccSafeMode), this,
66 std::placeholders::_1),
Chris Cainc488bac2025-03-17 09:01:15 -050067 std::bind(std::mem_fn(&Manager::hostPoweredOff), this), event);
Chris Cain720a3842025-01-09 10:23:36 -060068}
69
Chris Cainc33171b2024-05-24 16:14:50 -050070// findAndCreateObjects():
71// Takes care of getting the required objects created and
72// finds the available devices/processors.
73// (function is called everytime the discoverTimer expires)
74// - create the PowerMode object to control OCC modes
75// - create statusObjects for each OCC device found
76// - waits for OCC Active sensors PDRs to become available
77// - restart discoverTimer if all data is not available yet
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053078void Manager::findAndCreateObjects()
79{
Chris Cain613dc902022-04-08 09:56:22 -050080 if (!pmode)
81 {
82 // Create the power mode object
83 pmode = std::make_unique<powermode::PowerMode>(
84 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
85 }
86
Chris Cain1718fd82022-02-16 16:39:50 -060087 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050088 {
Chris Cainbae4d072022-02-28 09:46:50 -060089 static bool statusObjCreated = false;
90 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060091 {
Chris Cainbae4d072022-02-28 09:46:50 -060092 // Create the OCCs based on on the /dev/occX devices
93 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060094
Chris Cainbae4d072022-02-28 09:46:50 -060095 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060096 {
Chris Cainbae4d072022-02-28 09:46:50 -060097 // Something changed or no OCCs yet, try again in 10s.
98 // Note on the first pass prevOCCSearch will be empty,
99 // so there will be at least one delay to give things
100 // a chance to settle.
101 prevOCCSearch = occs;
102
Chris Cain37abe9b2024-10-31 17:20:31 -0500103 lg2::info(
104 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {QTY})",
105 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600106
107 discoverTimer->restartOnce(10s);
108 }
109 else
110 {
111 // All OCCs appear to be available, create status objects
112
113 // createObjects requires OCC0 first.
114 std::sort(occs.begin(), occs.end());
115
Chris Cain37abe9b2024-10-31 17:20:31 -0500116 lg2::info(
117 "Manager::findAndCreateObjects(): Creating {QTY} OCC Status Objects",
118 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600119 for (auto id : occs)
120 {
121 createObjects(std::string(OCC_NAME) + std::to_string(id));
122 }
123 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500124 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500125
126 // Find/update the processor path associated with each OCC
127 for (auto& obj : statusObjects)
128 {
129 obj->updateProcAssociation();
130 }
Chris Cainbae4d072022-02-28 09:46:50 -0600131 }
132 }
133
Chris Cain6d8f37a2022-04-29 13:46:01 -0500134 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600135 {
136 static bool tracedHostWait = false;
137 if (utils::isHostRunning())
138 {
139 if (tracedHostWait)
140 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500141 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600142 "Manager::findAndCreateObjects(): Host is running");
143 tracedHostWait = false;
144 }
Chris Cainbae4d072022-02-28 09:46:50 -0600145 checkAllActiveSensors();
146 }
147 else
148 {
149 if (!tracedHostWait)
150 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500151 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600152 "Manager::findAndCreateObjects(): Waiting for host to start");
153 tracedHostWait = true;
154 }
155 discoverTimer->restartOnce(30s);
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500156
Chris Cainc33171b2024-05-24 16:14:50 -0500157 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500158 {
159 // Host is no longer running, disable throttle timer and
160 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500161 lg2::info("findAndCreateObjects(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500162 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500163 pldmHandle->setTraceThrottle(false);
164 }
Chris Cain1718fd82022-02-16 16:39:50 -0600165 }
166 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500167 }
168 else
169 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500170 lg2::info(
171 "Manager::findAndCreateObjects(): Waiting for {FILE} to complete...",
172 "FILE", HOST_ON_FILE);
Chris Cain1718fd82022-02-16 16:39:50 -0600173 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500174 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500175}
176
Chris Cainbae4d072022-02-28 09:46:50 -0600177// Check if all occActive sensors are available
178void Manager::checkAllActiveSensors()
179{
180 static bool allActiveSensorAvailable = false;
181 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500182 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600183
Chris Cain082a6ca2023-03-21 10:27:26 -0500184 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600185 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500186 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600187 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500188 waitingForHost = false;
Chris Cain37abe9b2024-10-31 17:20:31 -0500189 lg2::info("checkAllActiveSensors(): Host is now running");
Chris Cain082a6ca2023-03-21 10:27:26 -0500190 }
191
192 // Start with the assumption that all are available
193 allActiveSensorAvailable = true;
194 for (auto& obj : statusObjects)
195 {
196 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600197 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500198 auto instance = obj->getOccInstanceID();
199 // Check if sensor was queued while waiting for discovery
200 auto match = queuedActiveState.find(instance);
201 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500202 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500203 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -0500204 lg2::info(
205 "checkAllActiveSensors(): OCC{INST} is ACTIVE (queued)",
206 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500207 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500208 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500209 else
210 {
211 allActiveSensorAvailable = false;
212 if (!tracedSensorWait)
213 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500214 lg2::info(
215 "checkAllActiveSensors(): Waiting on OCC{INST} Active sensor",
216 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500217 tracedSensorWait = true;
Chris Cainc33171b2024-05-24 16:14:50 -0500218 // Make sure PLDM traces are not throttled
Chris Cain755af102024-02-27 16:09:51 -0600219 pldmHandle->setTraceThrottle(false);
Chris Cainc33171b2024-05-24 16:14:50 -0500220 // Start timer to throttle PLDM traces when timer
Chris Cain755af102024-02-27 16:09:51 -0600221 // expires
Chris Cainc33171b2024-05-24 16:14:50 -0500222 onPldmTimeoutCreatePel = false;
223 throttlePldmTraceTimer->restartOnce(5min);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500224 }
Chris Cainf0295f52024-09-12 15:41:14 -0500225 // Ignore active sensor check if the OCCs are being reset
226 if (!resetInProgress)
227 {
228 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
229 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500230 break;
231 }
Chris Cainbd551de2022-04-26 13:41:16 -0500232 }
Chris Cainbae4d072022-02-28 09:46:50 -0600233 }
234 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500235 else
236 {
237 if (!waitingForHost)
238 {
239 waitingForHost = true;
Chris Cain37abe9b2024-10-31 17:20:31 -0500240 lg2::info("checkAllActiveSensors(): Waiting for host to start");
Chris Cainc33171b2024-05-24 16:14:50 -0500241 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500242 {
243 // Host is no longer running, disable throttle timer and
244 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500245 lg2::info("checkAllActiveSensors(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500246 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500247 pldmHandle->setTraceThrottle(false);
248 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500249 }
250 }
Chris Cainbae4d072022-02-28 09:46:50 -0600251
252 if (allActiveSensorAvailable)
253 {
254 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500255 if (discoverTimer->isEnabled())
256 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500257 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500258 }
Chris Cainc33171b2024-05-24 16:14:50 -0500259 if (throttlePldmTraceTimer->isEnabled())
Chris Cain755af102024-02-27 16:09:51 -0600260 {
261 // Disable throttle timer and make sure traces are not throttled
Chris Cainc33171b2024-05-24 16:14:50 -0500262 throttlePldmTraceTimer->setEnabled(false);
Chris Cain755af102024-02-27 16:09:51 -0600263 pldmHandle->setTraceThrottle(false);
264 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500265 if (waitingForAllOccActiveSensors)
266 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500267 lg2::info(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500268 "checkAllActiveSensors(): OCC Active sensors are available");
269 waitingForAllOccActiveSensors = false;
Chris Cainf0295f52024-09-12 15:41:14 -0500270
271 if (resetRequired)
272 {
273 initiateOccRequest(resetInstance);
274
275 if (!waitForAllOccsTimer->isEnabled())
276 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500277 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500278 "occsNotAllRunning: Restarting waitForAllOccTimer");
279 // restart occ wait timer to check status after reset
280 // completes
281 waitForAllOccsTimer->restartOnce(60s);
282 }
283 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500284 }
285 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600286 tracedSensorWait = false;
287 }
288 else
289 {
290 // Not all sensors were available, so keep waiting
291 if (!tracedSensorWait)
292 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500293 lg2::info(
Chris Cainbd551de2022-04-26 13:41:16 -0500294 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600295 tracedSensorWait = true;
296 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500297 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600298 }
299}
Chris Cainbae4d072022-02-28 09:46:50 -0600300
Matt Spinlerd267cec2021-09-01 14:49:19 -0500301std::vector<int> Manager::findOCCsInDev()
302{
303 std::vector<int> occs;
304 std::regex expr{R"(occ(\d+)$)"};
305
306 for (auto& file : fs::directory_iterator("/dev"))
307 {
308 std::smatch match;
309 std::string path{file.path().string()};
310 if (std::regex_search(path, match, expr))
311 {
312 auto num = std::stoi(match[1].str());
313
314 // /dev numbering starts at 1, ours starts at 0.
315 occs.push_back(num - 1);
316 }
317 }
318
319 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530320}
321
Patrick Williamsaf408082022-07-22 19:26:54 -0500322int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530323{
George Liubcef3b42021-09-10 12:39:02 +0800324 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530325
326 sdbusplus::message::object_path o;
327 msg.read(o);
328 fs::path cpuPath(std::string(std::move(o)));
329
330 auto name = cpuPath.filename().string();
331 auto index = name.find(CPU_NAME);
332 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
333
334 createObjects(name);
335
336 return 0;
337}
338
339void Manager::createObjects(const std::string& occ)
340{
341 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
342
Gunnar Mills94df8c92018-09-14 14:50:03 -0500343 statusObjects.emplace_back(std::make_unique<Status>(
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500344 event, path.c_str(), *this, pmode,
Gunnar Mills94df8c92018-09-14 14:50:03 -0500345 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500346 std::placeholders::_1, std::placeholders::_2),
Chris Cainf0295f52024-09-12 15:41:14 -0500347 // Callback will set flag indicating reset needs to be done
348 // instead of immediately issuing a reset via PLDM.
349 std::bind(std::mem_fn(&Manager::resetOccRequest), this,
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500350 std::placeholders::_1)));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530351
Chris Cain40501a22022-03-14 17:33:27 -0500352 // Create the power cap monitor object
353 if (!pcap)
354 {
355 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
356 *statusObjects.back());
357 }
358
Chris Cain36f9cde2021-11-22 11:18:21 -0600359 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530360 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500361 lg2::info("Manager::createObjects(): OCC{INST} is the master", "INST",
362 statusObjects.back()->getOccInstanceID());
Chris Cain36f9cde2021-11-22 11:18:21 -0600363 _pollTimer->setEnabled(false);
364
Chris Cain6fa848a2022-01-24 14:54:38 -0600365 // Set the master OCC on the PowerMode object
366 pmode->setMasterOcc(path);
Chris Cain36f9cde2021-11-22 11:18:21 -0600367 }
368
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500369 passThroughObjects.emplace_back(
370 std::make_unique<PassThrough>(path.c_str(), pmode));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530371}
372
Chris Cainf0295f52024-09-12 15:41:14 -0500373// If a reset is not already outstanding, set a flag to indicate that a reset is
374// needed.
375void Manager::resetOccRequest(instanceID instance)
376{
377 if (!resetRequired)
378 {
379 resetRequired = true;
380 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500381 lg2::error(
382 "resetOccRequest: PM Complex reset was requested due to OCC{INST}",
383 "INST", instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500384 }
385 else if (instance != resetInstance)
386 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500387 lg2::warning(
388 "resetOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already outstanding for OCC{RINST}",
389 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500390 }
391}
392
393// If a reset has not been started, initiate an OCC reset via PLDM
394void Manager::initiateOccRequest(instanceID instance)
395{
396 if (!resetInProgress)
397 {
398 resetInProgress = true;
399 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500400 lg2::error(
401 "initiateOccRequest: Initiating PM Complex reset due to OCC{INST}",
402 "INST", instance);
Chris Cainf7881502025-04-16 14:48:30 -0500403
404 // Make sure ALL OCC comm stops to all OCCs before the reset
405 for (auto& obj : statusObjects)
406 {
407 if (obj->occActive())
408 {
409 obj->occActive(false);
410 }
411 }
412
Chris Cainf0295f52024-09-12 15:41:14 -0500413 pldmHandle->resetOCC(instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500414 resetRequired = false;
415 }
416 else
417 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500418 lg2::warning(
419 "initiateOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already in process for OCC{RINST}",
420 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500421 }
422}
423
Sheldon Bailey373af752022-02-21 15:14:00 -0600424void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530425{
Chris Caina7b74dc2021-11-10 17:03:43 -0600426 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600427 {
Chris Cainf0295f52024-09-12 15:41:14 -0500428 if (resetInProgress)
429 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500430 lg2::info(
Chris Cain92dfb272025-02-13 12:20:27 -0600431 "statusCallBack: Ignoring OCC{INST} activate because a reset has been initiated due to OCC{RINST}",
Chris Cain37abe9b2024-10-31 17:20:31 -0500432 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500433 return;
434 }
435
Chris Caina7b74dc2021-11-10 17:03:43 -0600436 // OCC went active
437 ++activeCount;
438
Chris Caina7b74dc2021-11-10 17:03:43 -0600439 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600440 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600441 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500442 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500443 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600444
445 if (activeCount == statusObjects.size())
446 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600447 // All OCCs are now running
448 if (waitForAllOccsTimer->isEnabled())
449 {
450 // stop occ wait timer
451 waitForAllOccsTimer->setEnabled(false);
452 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600453
Chris Cainf0295f52024-09-12 15:41:14 -0500454 // All OCCs have been found, check if we need a reset
455 if (resetRequired)
456 {
457 initiateOccRequest(resetInstance);
458
459 if (!waitForAllOccsTimer->isEnabled())
460 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500461 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500462 "occsNotAllRunning: Restarting waitForAllOccTimer");
463 // restart occ wait timer
464 waitForAllOccsTimer->restartOnce(60s);
465 }
466 }
467 else
468 {
469 // Verify master OCC and start presence monitor
470 validateOccMaster();
471 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600472 }
473
Chris Cainf7881502025-04-16 14:48:30 -0500474 // Start poll timer if not already started (since at least one OCC is
475 // running)
Chris Caina7b74dc2021-11-10 17:03:43 -0600476 if (!_pollTimer->isEnabled())
477 {
Chris Cainf7881502025-04-16 14:48:30 -0500478 // An OCC just went active, PM Complex is just coming online so
479 // clear any outstanding reset requests
480 if (resetRequired)
481 {
482 resetRequired = false;
483 lg2::error(
484 "statusCallBack: clearing resetRequired (since OCC{INST} went active, resetInProgress={RIP})",
485 "INST", instance, "RIP", resetInProgress);
486 }
487
Chris Cain37abe9b2024-10-31 17:20:31 -0500488 lg2::info("Manager: OCCs will be polled every {TIME} seconds",
489 "TIME", pollInterval);
Chris Caina7b74dc2021-11-10 17:03:43 -0600490
491 // Send poll and start OCC poll timer
492 pollerTimerExpired();
493 }
494 }
495 else
496 {
497 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500498 if (activeCount > 0)
499 {
500 --activeCount;
501 }
502 else
503 {
Sheldon Baileyb89d6192025-03-05 09:33:19 -0600504 lg2::info("OCC{INST} disabled, and no other OCCs are active",
Chris Cain37abe9b2024-10-31 17:20:31 -0500505 "INST", instance);
Chris Cain082a6ca2023-03-21 10:27:26 -0500506 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600507
508 if (activeCount == 0)
509 {
510 // No OCCs are running
511
Chris Cainf0295f52024-09-12 15:41:14 -0500512 if (resetInProgress)
513 {
514 // All OCC active sensors are clear (reset should be in
515 // progress)
Chris Cain37abe9b2024-10-31 17:20:31 -0500516 lg2::info(
517 "statusCallBack: Clearing resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
518 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500519 resetInProgress = false;
520 resetInstance = 255;
521 }
522
Chris Caina7b74dc2021-11-10 17:03:43 -0600523 // Stop OCC poll timer
524 if (_pollTimer->isEnabled())
525 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500526 lg2::info(
Chris Caina7b74dc2021-11-10 17:03:43 -0600527 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
528 _pollTimer->setEnabled(false);
529 }
530
Chris Caina7b74dc2021-11-10 17:03:43 -0600531 // stop wait timer
532 if (waitForAllOccsTimer->isEnabled())
533 {
534 waitForAllOccsTimer->setEnabled(false);
535 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600536 }
Chris Cainf0295f52024-09-12 15:41:14 -0500537 else if (resetInProgress)
538 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500539 lg2::info(
540 "statusCallBack: Skipping clear of resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
541 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500542 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600543 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500544 setSensorValueToNaN(instance);
Chris Caina8857c52021-01-27 11:53:05 -0600545 }
Chris Cainbae4d072022-02-28 09:46:50 -0600546
Chris Cainbae4d072022-02-28 09:46:50 -0600547 if (waitingForAllOccActiveSensors)
548 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500549 if (utils::isHostRunning())
550 {
551 checkAllActiveSensors();
552 }
Chris Cainbae4d072022-02-28 09:46:50 -0600553 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530554}
555
Eddie Jamescbad2192021-10-07 09:39:39 -0500556void Manager::sbeTimeout(unsigned int instance)
557{
Eddie James2a751d72022-03-04 09:16:12 -0600558 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
559 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400560 return instance == obj->getOccInstanceID();
561 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500562
Eddie Jamescb018da2022-03-05 11:49:37 -0600563 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600564 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500565 lg2::info("SBE timeout, requesting HRESET (OCC{INST})", "INST",
566 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500567
Chris Cain720a3842025-01-09 10:23:36 -0600568#ifdef PHAL_SUPPORT
Eddie James2a751d72022-03-04 09:16:12 -0600569 setSBEState(instance, SBE_STATE_NOT_USABLE);
Chris Cain720a3842025-01-09 10:23:36 -0600570#endif
Eddie James2a751d72022-03-04 09:16:12 -0600571
Chris Cain92dfb272025-02-13 12:20:27 -0600572 // Stop communication with this OCC
573 (*obj)->occActive(false);
574
Eddie James2a751d72022-03-04 09:16:12 -0600575 pldmHandle->sendHRESET(instance);
576 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500577}
578
Tom Joseph815f9f52020-07-27 12:12:13 +0530579bool Manager::updateOCCActive(instanceID instance, bool status)
580{
Chris Cain7e374fb2022-04-07 09:47:23 -0500581 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
582 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400583 return instance == obj->getOccInstanceID();
584 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500585
Chris Cain082a6ca2023-03-21 10:27:26 -0500586 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500587 if (obj != statusObjects.end())
588 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500589 if (!hostRunning && (status == true))
590 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500591 lg2::warning(
592 "updateOCCActive: Host is not running yet (OCC{INST} active={STAT}), clearing sensor received",
593 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500594 (*obj)->setPldmSensorReceived(false);
595 if (!waitingForAllOccActiveSensors)
596 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500597 lg2::info(
Chris Cain082a6ca2023-03-21 10:27:26 -0500598 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
599 waitingForAllOccActiveSensors = true;
600 }
601 discoverTimer->restartOnce(30s);
602 return false;
603 }
604 else
605 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500606 (*obj)->setPldmSensorReceived(true);
607 return (*obj)->occActive(status);
608 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500609 }
610 else
611 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500612 if (hostRunning)
613 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500614 lg2::warning(
615 "updateOCCActive: No status object to update for OCC{INST} (active={STAT})",
616 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500617 }
618 else
619 {
620 if (status == true)
621 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500622 lg2::warning(
623 "updateOCCActive: No status objects and Host is not running yet (OCC{INST} active={STAT})",
624 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500625 }
626 }
Chris Cainbd551de2022-04-26 13:41:16 -0500627 if (status == true)
628 {
629 // OCC went active
630 queuedActiveState.insert(instance);
631 }
632 else
633 {
634 auto match = queuedActiveState.find(instance);
635 if (match != queuedActiveState.end())
636 {
637 // OCC was disabled
638 queuedActiveState.erase(match);
639 }
640 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500641 return false;
642 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530643}
Eddie Jamescbad2192021-10-07 09:39:39 -0500644
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500645// Called upon pldm event To set powermode Safe Mode State for system.
646void Manager::updateOccSafeMode(bool safeMode)
647{
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500648 pmode->updateDbusSafeMode(safeMode);
Chris Cainc86d80f2023-05-04 15:49:18 -0500649 // Update the processor throttle status on dbus
650 for (auto& obj : statusObjects)
651 {
652 obj->updateThrottle(safeMode, THROTTLED_SAFE);
653 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500654}
655
Eddie Jamescbad2192021-10-07 09:39:39 -0500656void Manager::sbeHRESETResult(instanceID instance, bool success)
657{
658 if (success)
659 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500660 lg2::info("HRESET succeeded (OCC{INST})", "INST", instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500661
Chris Cain720a3842025-01-09 10:23:36 -0600662#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500663 setSBEState(instance, SBE_STATE_BOOTED);
Chris Cain720a3842025-01-09 10:23:36 -0600664#endif
Eddie Jamescbad2192021-10-07 09:39:39 -0500665
Chris Cain92dfb272025-02-13 12:20:27 -0600666 // Re-enable communication with this OCC
667 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
668 [instance](const auto& obj) {
669 return instance == obj->getOccInstanceID();
670 });
671 if (obj != statusObjects.end() && (!(*obj)->occActive()))
672 {
673 (*obj)->occActive(true);
674 }
675
Eddie Jamescbad2192021-10-07 09:39:39 -0500676 return;
677 }
678
Chris Cain720a3842025-01-09 10:23:36 -0600679#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500680 setSBEState(instance, SBE_STATE_FAILED);
681
682 if (sbeCanDump(instance))
683 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500684 lg2::info("HRESET failed (OCC{INST}), triggering SBE dump", "INST",
685 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500686
687 auto& bus = utils::getBus();
688 uint32_t src6 = instance << 16;
689 uint32_t logId =
690 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
691 src6, "SBE command timeout");
692
693 try
694 {
George Liuf3a4a692021-12-28 13:59:51 +0800695 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
696 constexpr auto function = "CreateDump";
697
Patrick Williamsd7542c82024-08-16 15:20:28 -0400698 std::string service =
699 utils::getService(OP_DUMP_OBJ_PATH, interface);
Dhruvaraj Subhashchandran1173b2b2024-06-01 11:12:13 -0500700 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH,
701 interface, function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500702
703 std::map<std::string, std::variant<std::string, uint64_t>>
704 createParams{
705 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
706 uint64_t(logId)},
707 {"com.ibm.Dump.Create.CreateParameters.DumpType",
708 "com.ibm.Dump.Create.DumpType.SBE"},
709 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
710 uint64_t(instance)},
711 };
712
713 method.append(createParams);
714
715 auto response = bus.call(method);
716 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500717 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500718 {
719 constexpr auto ERROR_DUMP_DISABLED =
720 "xyz.openbmc_project.Dump.Create.Error.Disabled";
721 if (e.name() == ERROR_DUMP_DISABLED)
722 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500723 lg2::info("Dump is disabled, skipping");
Eddie Jamescbad2192021-10-07 09:39:39 -0500724 }
725 else
726 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500727 lg2::error("Dump failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500728 }
729 }
730 }
Chris Cain720a3842025-01-09 10:23:36 -0600731#endif
Chris Cainf0295f52024-09-12 15:41:14 -0500732
733 // SBE Reset failed, try PM Complex reset
Chris Cain37abe9b2024-10-31 17:20:31 -0500734 lg2::error("sbeHRESETResult: Forcing PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500735 resetOccRequest(instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500736}
737
Chris Cain720a3842025-01-09 10:23:36 -0600738#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500739bool Manager::sbeCanDump(unsigned int instance)
740{
741 struct pdbg_target* proc = getPdbgTarget(instance);
742
743 if (!proc)
744 {
745 // allow the dump in the error case
746 return true;
747 }
748
749 try
750 {
751 if (!openpower::phal::sbe::isDumpAllowed(proc))
752 {
753 return false;
754 }
755
756 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
757 {
758 return false;
759 }
760 }
761 catch (openpower::phal::exception::SbeError& e)
762 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500763 lg2::info("Failed to query SBE state");
Eddie Jamescbad2192021-10-07 09:39:39 -0500764 }
765
766 // allow the dump in the error case
767 return true;
768}
769
770void Manager::setSBEState(unsigned int instance, enum sbe_state state)
771{
772 struct pdbg_target* proc = getPdbgTarget(instance);
773
774 if (!proc)
775 {
776 return;
777 }
778
779 try
780 {
781 openpower::phal::sbe::setState(proc, state);
782 }
783 catch (const openpower::phal::exception::SbeError& e)
784 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500785 lg2::error("Failed to set SBE state: {ERROR}", "ERROR", e.what());
Eddie Jamescbad2192021-10-07 09:39:39 -0500786 }
787}
788
789struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
790{
791 if (!pdbgInitialized)
792 {
793 try
794 {
795 openpower::phal::pdbg::init();
796 pdbgInitialized = true;
797 }
798 catch (const openpower::phal::exception::PdbgError& e)
799 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500800 lg2::error("pdbg initialization failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500801 return nullptr;
802 }
803 }
804
805 struct pdbg_target* proc = nullptr;
806 pdbg_for_each_class_target("proc", proc)
807 {
808 if (pdbg_target_index(proc) == instance)
809 {
810 return proc;
811 }
812 }
813
Chris Cain37abe9b2024-10-31 17:20:31 -0500814 lg2::error("Failed to get pdbg target");
Eddie Jamescbad2192021-10-07 09:39:39 -0500815 return nullptr;
816}
Tom Joseph815f9f52020-07-27 12:12:13 +0530817#endif
818
Chris Caina8857c52021-01-27 11:53:05 -0600819void Manager::pollerTimerExpired()
820{
Chris Caina8857c52021-01-27 11:53:05 -0600821 if (!_pollTimer)
822 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500823 lg2::error("pollerTimerExpired() ERROR: Timer not defined");
Chris Caina8857c52021-01-27 11:53:05 -0600824 return;
825 }
826
Chris Cainf0295f52024-09-12 15:41:14 -0500827 if (resetRequired)
828 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500829 lg2::error("pollerTimerExpired() - Initiating PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500830 initiateOccRequest(resetInstance);
831
832 if (!waitForAllOccsTimer->isEnabled())
833 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500834 lg2::warning("pollerTimerExpired: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -0500835 // restart occ wait timer
836 waitForAllOccsTimer->restartOnce(60s);
837 }
838 return;
839 }
Chris Cainf0295f52024-09-12 15:41:14 -0500840
Chris Caina8857c52021-01-27 11:53:05 -0600841 for (auto& obj : statusObjects)
842 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600843 if (!obj->occActive())
844 {
845 // OCC is not running yet
Chris Cain5d66a0a2022-02-09 08:52:10 -0600846 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500847 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600848 continue;
849 }
850
Chris Caina8857c52021-01-27 11:53:05 -0600851 // Read sysfs to force kernel to poll OCC
852 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800853
Chicago Duanbb895cb2021-06-18 19:37:16 +0800854 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600855 getSensorValues(obj);
Chris Caina8857c52021-01-27 11:53:05 -0600856 }
857
Chris Caina7b74dc2021-11-10 17:03:43 -0600858 if (activeCount > 0)
859 {
860 // Restart OCC poll timer
861 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
862 }
863 else
864 {
865 // No OCCs running, so poll timer will not be restarted
Chris Cain37abe9b2024-10-31 17:20:31 -0500866 lg2::info(
867 "Manager::pollerTimerExpired: poll timer will not be restarted");
Chris Caina7b74dc2021-11-10 17:03:43 -0600868 }
Chris Caina8857c52021-01-27 11:53:05 -0600869}
870
Chris Cainae157b62024-01-23 16:05:12 -0600871void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800872{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500873 // There may be more than one sensor with the same FRU type
874 // and label so make two passes: the first to read the temps
875 // from sysfs, and the second to put them on D-Bus after
876 // resolving any conflicts.
877 std::map<std::string, double> sensorData;
878
Chicago Duanbb895cb2021-06-18 19:37:16 +0800879 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
880 for (auto& file : fs::directory_iterator(path))
881 {
882 if (!std::regex_search(file.path().string(), expr))
883 {
884 continue;
885 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800886
Matt Spinlera26f1522021-08-25 15:50:20 -0500887 uint32_t labelValue{0};
888
889 try
890 {
891 labelValue = readFile<uint32_t>(file.path());
892 }
893 catch (const std::system_error& e)
894 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500895 lg2::debug(
896 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
897 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800898 continue;
899 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800900
901 const std::string& tempLabel = "label";
902 const std::string filePathString = file.path().string().substr(
903 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500904
905 uint32_t fruTypeValue{0};
906 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800907 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500908 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
909 }
910 catch (const std::system_error& e)
911 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500912 lg2::debug(
913 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
914 "PATH", filePathString + fruTypeSuffix, "ERROR",
915 e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800916 continue;
917 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800918
Patrick Williamsd7542c82024-08-16 15:20:28 -0400919 std::string sensorPath =
920 OCC_SENSORS_ROOT + std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800921
Matt Spinlerace67d82021-10-18 13:41:57 -0500922 std::string dvfsTempPath;
923
Chicago Duanbb895cb2021-06-18 19:37:16 +0800924 if (fruTypeValue == VRMVdd)
925 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400926 sensorPath.append(
927 "vrm_vdd" + std::to_string(occInstance) + "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800928 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500929 else if (fruTypeValue == processorIoRing)
930 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400931 sensorPath.append(
932 "proc" + std::to_string(occInstance) + "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500933 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600934 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500935 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800936 else
937 {
Matt Spinler14d14022021-08-25 15:38:29 -0500938 uint16_t type = (labelValue & 0xFF000000) >> 24;
939 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800940
941 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
942 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500943 if (fruTypeValue == fruTypeNotAvailable)
944 {
945 // Not all DIMM related temps are available to read
946 // (no _input file in this case)
947 continue;
948 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800949 auto iter = dimmTempSensorName.find(fruTypeValue);
950 if (iter == dimmTempSensorName.end())
951 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500952 lg2::error(
953 "readTempSensors: Fru type error! fruTypeValue = {FRU}) ",
954 "FRU", fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800955 continue;
956 }
957
Patrick Williamsd7542c82024-08-16 15:20:28 -0400958 sensorPath.append(
959 "dimm" + std::to_string(instanceID) + iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500960
961 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
962 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800963 }
964 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
965 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500966 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800967 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500968 // The OCC reports small core temps, of which there are
969 // two per big core. All current P10 systems are in big
970 // core mode, so use a big core name.
971 uint16_t coreNum = instanceID / 2;
972 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -0600973 sensorPath.append("proc" + std::to_string(occInstance) +
974 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -0500975 std::to_string(tempNum) + "_temp");
976
Chris Cainae157b62024-01-23 16:05:12 -0600977 dvfsTempPath =
978 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
979 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500980 }
981 else
982 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800983 continue;
984 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800985 }
986 else
987 {
988 continue;
989 }
990 }
991
Matt Spinlerace67d82021-10-18 13:41:57 -0500992 // The dvfs temp file only needs to be read once per chip per type.
993 if (!dvfsTempPath.empty() &&
994 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
995 {
996 try
997 {
998 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
999
1000 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
1001 dvfsTempPath, dvfsValue * std::pow(10, -3));
1002 }
1003 catch (const std::system_error& e)
1004 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001005 lg2::debug(
1006 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1007 "PATH", filePathString + maxSuffix, "ERROR",
1008 e.code().value());
Matt Spinlerace67d82021-10-18 13:41:57 -05001009 }
1010 }
1011
Matt Spinlera26f1522021-08-25 15:50:20 -05001012 uint32_t faultValue{0};
1013 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001014 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001015 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
1016 }
1017 catch (const std::system_error& e)
1018 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001019 lg2::debug(
1020 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1021 "PATH", filePathString + faultSuffix, "ERROR",
1022 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001023 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001024 }
1025
Chris Cainae157b62024-01-23 16:05:12 -06001026 double tempValue{0};
1027 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -05001028 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001029 {
Chris Cainae157b62024-01-23 16:05:12 -06001030 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001031 }
Chris Cainae157b62024-01-23 16:05:12 -06001032 else
Chicago Duanbb895cb2021-06-18 19:37:16 +08001033 {
Chris Cainae157b62024-01-23 16:05:12 -06001034 // Read the temperature
1035 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001036 {
Chris Cainae157b62024-01-23 16:05:12 -06001037 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001038 }
Chris Cainae157b62024-01-23 16:05:12 -06001039 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001040 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001041 lg2::debug(
1042 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1043 "PATH", filePathString + inputSuffix, "ERROR",
1044 e.code().value());
Chris Cainae157b62024-01-23 16:05:12 -06001045
1046 // if errno == EAGAIN(Resource temporarily unavailable) then set
1047 // temp to 0, to avoid using old temp, and affecting FAN
1048 // Control.
1049 if (e.code().value() == EAGAIN)
1050 {
1051 tempValue = 0;
1052 }
1053 // else the errno would be something like
1054 // EBADF(Bad file descriptor)
1055 // or ENOENT(No such file or directory)
1056 else
1057 {
1058 continue;
1059 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001060 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001061 }
1062
Matt Spinler818cc8d2023-10-23 11:43:39 -05001063 // If this object path already has a value, only overwite
1064 // it if the previous one was an NaN or a smaller value.
1065 auto existing = sensorData.find(sensorPath);
1066 if (existing != sensorData.end())
1067 {
Chris Cainae157b62024-01-23 16:05:12 -06001068 // Multiple sensors found for this FRU type
1069 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1070 ((existing->second == 0) && std::isnan(tempValue)))
1071 {
1072 // One of the redundant sensors has failed (0xFF/nan), and the
1073 // other sensor has no reading (0), so set the FRU to NaN to
1074 // force fan increase
1075 tempValue = std::numeric_limits<double>::quiet_NaN();
1076 existing->second = tempValue;
1077 }
Matt Spinler818cc8d2023-10-23 11:43:39 -05001078 if (std::isnan(existing->second) || (tempValue > existing->second))
1079 {
1080 existing->second = tempValue;
1081 }
1082 }
1083 else
1084 {
Chris Cainae157b62024-01-23 16:05:12 -06001085 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001086 sensorData[sensorPath] = tempValue;
1087 }
1088 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001089
Matt Spinler818cc8d2023-10-23 11:43:39 -05001090 // Now publish the values on D-Bus.
1091 for (const auto& [objectPath, value] : sensorData)
1092 {
1093 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1094 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001095
Matt Spinler818cc8d2023-10-23 11:43:39 -05001096 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1097 objectPath, !std::isnan(value));
1098
1099 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001100 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001101 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001102 objectPath, {"all_sensors"});
Chris Cain6fa848a2022-01-24 14:54:38 -06001103 }
Chris Cainae157b62024-01-23 16:05:12 -06001104 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001105 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001106}
1107
Patrick Williams2d6ec902025-02-01 08:22:13 -05001108std::optional<std::string> Manager::getPowerLabelFunctionID(
1109 const std::string& value)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001110{
1111 // If the value is "system", then the FunctionID is "system".
1112 if (value == "system")
1113 {
1114 return value;
1115 }
1116
1117 // If the value is not "system", then the label value have 3 numbers, of
1118 // which we only care about the middle one:
1119 // <sensor id>_<function id>_<apss channel>
1120 // eg: The value is "0_10_5" , then the FunctionID is "10".
1121 if (value.find("_") == std::string::npos)
1122 {
1123 return std::nullopt;
1124 }
1125
1126 auto powerLabelValue = value.substr((value.find("_") + 1));
1127
1128 if (powerLabelValue.find("_") == std::string::npos)
1129 {
1130 return std::nullopt;
1131 }
1132
1133 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1134}
1135
1136void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1137{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001138 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1139 for (auto& file : fs::directory_iterator(path))
1140 {
1141 if (!std::regex_search(file.path().string(), expr))
1142 {
1143 continue;
1144 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001145
Matt Spinlera26f1522021-08-25 15:50:20 -05001146 std::string labelValue;
1147 try
1148 {
1149 labelValue = readFile<std::string>(file.path());
1150 }
1151 catch (const std::system_error& e)
1152 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001153 lg2::debug(
1154 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1155 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001156 continue;
1157 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001158
1159 auto functionID = getPowerLabelFunctionID(labelValue);
1160 if (functionID == std::nullopt)
1161 {
1162 continue;
1163 }
1164
1165 const std::string& tempLabel = "label";
1166 const std::string filePathString = file.path().string().substr(
1167 0, file.path().string().length() - tempLabel.length());
1168
1169 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1170
1171 auto iter = powerSensorName.find(*functionID);
1172 if (iter == powerSensorName.end())
1173 {
1174 continue;
1175 }
1176 sensorPath.append(iter->second);
1177
Matt Spinlera26f1522021-08-25 15:50:20 -05001178 double tempValue{0};
1179
1180 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001181 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001182 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001183 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001184 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001185 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001186 lg2::debug(
1187 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1188 "PATH", filePathString + inputSuffix, "ERROR",
1189 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001190 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001191 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001192
Chris Cain5d66a0a2022-02-09 08:52:10 -06001193 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001194 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1195
Chris Cain5d66a0a2022-02-09 08:52:10 -06001196 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001197 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1198
Patrick Williamsd7542c82024-08-16 15:20:28 -04001199 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1200 sensorPath, true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001201
Matt Spinler5901abd2021-09-23 13:50:03 -05001202 if (existingSensors.find(sensorPath) == existingSensors.end())
1203 {
Chris Cain3523cc02024-10-30 17:19:09 -05001204 std::vector<std::string> fTypeList = {"all_sensors"};
1205 if (iter->second == "total_power")
1206 {
Chris Cainff0ce402025-01-17 10:54:55 -06001207 // Set sensor purpose as TotalPower
1208 dbus::OccDBusSensors::getOccDBus().setPurpose(
1209 sensorPath,
1210 "xyz.openbmc_project.Sensor.Purpose.SensorPurpose.TotalPower");
Chris Cain3523cc02024-10-30 17:19:09 -05001211 }
Chris Cain5d66a0a2022-02-09 08:52:10 -06001212 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001213 sensorPath, fTypeList);
Matt Spinler5901abd2021-09-23 13:50:03 -05001214 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001215 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001216 }
1217 return;
1218}
1219
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001220void Manager::readExtnSensors(const fs::path& path, uint32_t id)
1221{
1222 std::regex expr{"extn\\d+_label$"}; // Example: extn5_label
1223 for (auto& file : fs::directory_iterator(path))
1224 {
1225 if (!std::regex_search(file.path().string(), expr))
1226 {
1227 continue;
1228 }
1229
1230 // Read in Label value of the sensor from file.
1231 std::string labelValue;
1232 try
1233 {
1234 labelValue = readFile<std::string>(file.path());
1235 }
1236 catch (const std::system_error& e)
1237 {
1238 lg2::debug(
1239 "readExtnSensors:label Failed reading {PATH}, errno = {ERROR}",
1240 "PATH", file.path().string(), "ERROR", e.code().value());
1241 continue;
1242 }
1243 const std::string& tempLabel = "label";
1244 const std::string filePathString = file.path().string().substr(
1245 0, file.path().string().length() - tempLabel.length());
1246
1247 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1248
1249 // Labels of EXTN sections from OCC interface Document
1250 // have different formats.
1251 // 0x464d494e : FMIN 0x46444953 : FDIS
1252 // 0x46424153 : FBAS 0x46555400 : FUT
1253 // 0x464d4158 : FMAX 0x434c4950 : CLIP
1254 // 0x4d4f4445 : MODE 0x574f4643 : WOFC
1255 // 0x574f4649 : WOFI 0x5057524d : PWRM
1256 // 0x50575250 : PWRP 0x45525248 : ERRH
1257 // Label indicating byte 5 and 6 is the current (mem,proc) power in
1258 // Watts.
1259 if ((labelValue == EXTN_LABEL_PWRM_MEMORY_POWER) ||
1260 (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER))
1261 {
1262 // Build the dbus String for this chiplet power asset.
1263 if (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER)
1264 {
1265 labelValue = "_power";
1266 }
1267 else // else EXTN_LABEL_PWRM_MEMORY_POWER
1268 {
1269 labelValue = "_mem_power";
1270 }
1271 sensorPath.append("chiplet" + std::to_string(id) + labelValue);
1272
1273 // Read in data value of the sensor from file.
1274 // Read in as string due to different format of data in sensors.
1275 std::string extnValue;
1276 try
1277 {
1278 extnValue = readFile<std::string>(filePathString + inputSuffix);
1279 }
1280 catch (const std::system_error& e)
1281 {
1282 lg2::debug(
1283 "readExtnSensors:value Failed reading {PATH}, errno = {ERROR}",
1284 "PATH", filePathString + inputSuffix, "ERROR",
1285 e.code().value());
1286 continue;
1287 }
1288
1289 // For Power field, Convert last 4 bytes of hex string into number
1290 // value.
1291 std::stringstream ssData;
1292 ssData << std::hex << extnValue.substr(extnValue.length() - 4);
1293 uint16_t MyHexNumber;
1294 ssData >> MyHexNumber;
1295
1296 // Convert output/DC power to input/AC power in Watts (round up)
1297 MyHexNumber =
1298 std::round(((MyHexNumber / (PS_DERATING_FACTOR / 100.0))));
1299
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001300 dbus::OccDBusSensors::getOccDBus().setUnit(
1301 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1302
1303 dbus::OccDBusSensors::getOccDBus().setValue(sensorPath,
1304 MyHexNumber);
1305
1306 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1307 sensorPath, true);
1308
1309 if (existingSensors.find(sensorPath) == existingSensors.end())
1310 {
1311 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1312 sensorPath, {"all_sensors"});
1313 }
1314
Sheldon Baileyb89d6192025-03-05 09:33:19 -06001315 existingSensors[sensorPath] = id;
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001316 } // End Extended Power Sensors.
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001317 } // End For loop on files for Extended Sensors.
1318 return;
1319}
1320
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001321void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001322{
1323 for (const auto& [sensorPath, occId] : existingSensors)
1324 {
1325 if (occId == id)
1326 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001327 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001328 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001329
Patrick Williamsd7542c82024-08-16 15:20:28 -04001330 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1331 sensorPath, true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001332 }
1333 }
1334 return;
1335}
1336
Sheldon Bailey373af752022-02-21 15:14:00 -06001337void Manager::setSensorValueToNonFunctional(uint32_t id) const
1338{
1339 for (const auto& [sensorPath, occId] : existingSensors)
1340 {
1341 if (occId == id)
1342 {
1343 dbus::OccDBusSensors::getOccDBus().setValue(
1344 sensorPath, std::numeric_limits<double>::quiet_NaN());
1345
Patrick Williamsd7542c82024-08-16 15:20:28 -04001346 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1347 sensorPath, false);
Sheldon Bailey373af752022-02-21 15:14:00 -06001348 }
1349 }
1350 return;
1351}
1352
Chris Cain5d66a0a2022-02-09 08:52:10 -06001353void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001354{
Chris Caine2d0a432022-03-28 11:08:49 -05001355 static bool tracedError[8] = {0};
1356 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001357 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001358
Chris Caine2d0a432022-03-28 11:08:49 -05001359 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001360 {
Chris Caine2d0a432022-03-28 11:08:49 -05001361 // Read temperature sensors
1362 readTempSensors(sensorPath, id);
Sheldon Baileyb89d6192025-03-05 09:33:19 -06001363 // Read Extended sensors
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001364 readExtnSensors(sensorPath, id);
Chris Caine2d0a432022-03-28 11:08:49 -05001365
1366 if (occ->isMasterOcc())
1367 {
1368 // Read power sensors
1369 readPowerSensors(sensorPath, id);
1370 }
1371 tracedError[id] = false;
1372 }
1373 else
1374 {
1375 if (!tracedError[id])
1376 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001377 lg2::error(
1378 "Manager::getSensorValues: OCC{INST} sensor path missing: {PATH}",
1379 "INST", id, "PATH", sensorPath);
Chris Caine2d0a432022-03-28 11:08:49 -05001380 tracedError[id] = true;
1381 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001382 }
1383
1384 return;
1385}
Chris Cain17257672021-10-22 13:41:03 -05001386
1387// Read the altitude from DBus
1388void Manager::readAltitude()
1389{
1390 static bool traceAltitudeErr = true;
1391
1392 utils::PropertyValue altitudeProperty{};
1393 try
1394 {
1395 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1396 ALTITUDE_PROP);
1397 auto sensorVal = std::get<double>(altitudeProperty);
1398 if (sensorVal < 0xFFFF)
1399 {
1400 if (sensorVal < 0)
1401 {
1402 altitude = 0;
1403 }
1404 else
1405 {
1406 // Round to nearest meter
1407 altitude = uint16_t(sensorVal + 0.5);
1408 }
Chris Cain37abe9b2024-10-31 17:20:31 -05001409 lg2::debug("readAltitude: sensor={VALUE} ({ALT}m)", "VALUE",
1410 sensorVal, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001411 traceAltitudeErr = true;
1412 }
1413 else
1414 {
1415 if (traceAltitudeErr)
1416 {
1417 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001418 lg2::debug("Invalid altitude value: {ALT}", "ALT", sensorVal);
Chris Cain17257672021-10-22 13:41:03 -05001419 }
1420 }
1421 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001422 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001423 {
1424 if (traceAltitudeErr)
1425 {
1426 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001427 lg2::info("Unable to read Altitude: {ERROR}", "ERROR", e.what());
Chris Cain17257672021-10-22 13:41:03 -05001428 }
1429 altitude = 0xFFFF; // not available
1430 }
1431}
1432
1433// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001434void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001435{
1436 double currentTemp = 0;
1437 uint8_t truncatedTemp = 0xFF;
1438 std::string msgSensor;
1439 std::map<std::string, std::variant<double>> msgData;
1440 msg.read(msgSensor, msgData);
1441
1442 auto valPropMap = msgData.find(AMBIENT_PROP);
1443 if (valPropMap == msgData.end())
1444 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001445 lg2::debug("ambientCallback: Unknown ambient property changed");
Chris Cain17257672021-10-22 13:41:03 -05001446 return;
1447 }
1448 currentTemp = std::get<double>(valPropMap->second);
1449 if (std::isnan(currentTemp))
1450 {
1451 truncatedTemp = 0xFF;
1452 }
1453 else
1454 {
1455 if (currentTemp < 0)
1456 {
1457 truncatedTemp = 0;
1458 }
1459 else
1460 {
1461 // Round to nearest degree C
1462 truncatedTemp = uint8_t(currentTemp + 0.5);
1463 }
1464 }
1465
1466 // If ambient changes, notify OCCs
1467 if (truncatedTemp != ambient)
1468 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001469 lg2::debug("ambientCallback: Ambient change from {OLD} to {NEW}C",
1470 "OLD", ambient, "NEW", currentTemp);
Chris Cain17257672021-10-22 13:41:03 -05001471
1472 ambient = truncatedTemp;
1473 if (altitude == 0xFFFF)
1474 {
1475 // No altitude yet, try reading again
1476 readAltitude();
1477 }
1478
Chris Cain37abe9b2024-10-31 17:20:31 -05001479 lg2::debug("ambientCallback: Ambient: {TEMP}C, altitude: {ALT}m",
1480 "TEMP", ambient, "ALT", altitude);
Sheldon Bailey16a5adb2025-06-10 14:10:06 -05001481
Chris Cain17257672021-10-22 13:41:03 -05001482 // Send ambient and altitude to all OCCs
1483 for (auto& obj : statusObjects)
1484 {
1485 if (obj->occActive())
1486 {
1487 obj->sendAmbient(ambient, altitude);
1488 }
1489 }
Chris Cain17257672021-10-22 13:41:03 -05001490 }
1491}
1492
1493// return the current ambient and altitude readings
1494void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1495 uint16_t& altitudeValue) const
1496{
1497 ambientValid = true;
1498 ambientTemp = ambient;
1499 altitudeValue = altitude;
1500
1501 if (ambient == 0xFF)
1502 {
1503 ambientValid = false;
1504 }
1505}
1506
Chris Cain7f89e4d2022-05-09 13:27:45 -05001507// Called when waitForAllOccsTimer expires
1508// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001509void Manager::occsNotAllRunning()
1510{
Chris Cainf0295f52024-09-12 15:41:14 -05001511 if (resetInProgress)
1512 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001513 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -05001514 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress");
1515 return;
1516 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001517 if (activeCount != statusObjects.size())
1518 {
1519 // Not all OCCs went active
Chris Cain37abe9b2024-10-31 17:20:31 -05001520 lg2::warning(
1521 "occsNotAllRunning: Active OCC count ({COUNT}) does not match expected count ({EXP})",
1522 "COUNT", activeCount, "EXP", statusObjects.size());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001523 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001524 }
1525
Chris Cainf0295f52024-09-12 15:41:14 -05001526 if (resetRequired)
1527 {
1528 initiateOccRequest(resetInstance);
1529
1530 if (!waitForAllOccsTimer->isEnabled())
1531 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001532 lg2::warning("occsNotAllRunning: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -05001533 // restart occ wait timer
1534 waitForAllOccsTimer->restartOnce(60s);
1535 }
1536 }
1537 else
1538 {
1539 validateOccMaster();
1540 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001541}
Chris Cain755af102024-02-27 16:09:51 -06001542
Chris Cainc33171b2024-05-24 16:14:50 -05001543// Called when throttlePldmTraceTimer expires.
Chris Caina19bd422024-05-24 16:39:01 -05001544// If this timer expires, that indicates there are no OCC active sensor PDRs
Chris Cainc33171b2024-05-24 16:14:50 -05001545// found which will trigger pldm traces to be throttled.
1546// The second time this timer expires, a PEL will get created.
1547void Manager::throttlePldmTraceExpired()
Chris Cain755af102024-02-27 16:09:51 -06001548{
Chris Cain7651c062024-05-02 14:14:06 -05001549 if (utils::isHostRunning())
1550 {
Chris Cainc33171b2024-05-24 16:14:50 -05001551 if (!onPldmTimeoutCreatePel)
1552 {
1553 // Throttle traces
1554 pldmHandle->setTraceThrottle(true);
1555 // Restart timer to log a PEL when timer expires
1556 onPldmTimeoutCreatePel = true;
1557 throttlePldmTraceTimer->restartOnce(40min);
1558 }
1559 else
1560 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001561 lg2::error(
Chris Cainc33171b2024-05-24 16:14:50 -05001562 "throttlePldmTraceExpired(): OCC active sensors still not available!");
1563 // Create PEL
1564 createPldmSensorPEL();
1565 }
Chris Cain7651c062024-05-02 14:14:06 -05001566 }
1567 else
1568 {
1569 // Make sure traces are not throttled
1570 pldmHandle->setTraceThrottle(false);
Chris Cain37abe9b2024-10-31 17:20:31 -05001571 lg2::info(
Chris Cainc33171b2024-05-24 16:14:50 -05001572 "throttlePldmTraceExpired(): host it not running ignoring sensor timer");
Chris Cain7651c062024-05-02 14:14:06 -05001573 }
Chris Cain4b82f3e2024-04-22 14:44:29 -05001574}
1575
1576void Manager::createPldmSensorPEL()
1577{
1578 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1579 std::map<std::string, std::string> additionalData;
1580
1581 additionalData.emplace("_PID", std::to_string(getpid()));
1582
Chris Cain37abe9b2024-10-31 17:20:31 -05001583 lg2::info(
1584 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001585
1586 auto& bus = utils::getBus();
1587
1588 try
1589 {
1590 FFDCFiles ffdc;
1591 // Add occ-control journal traces to PEL FFDC
1592 auto occJournalFile =
1593 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1594
1595 static constexpr auto loggingObjectPath =
1596 "/xyz/openbmc_project/logging";
1597 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Patrick Williamsd7542c82024-08-16 15:20:28 -04001598 std::string service =
1599 utils::getService(loggingObjectPath, opLoggingInterface);
1600 auto method =
1601 bus.new_method_call(service.c_str(), loggingObjectPath,
1602 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001603
Chris Cain1c3349e2024-04-24 14:14:11 -05001604 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001605 auto level =
1606 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1607 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001608 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001609
1610 method.append(d.path, level, additionalData, ffdc);
1611 bus.call(method);
1612 }
1613 catch (const sdbusplus::exception_t& e)
1614 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001615 lg2::error("Failed to create MISSING_OCC_SENSORS PEL: {ERROR}", "ERROR",
1616 e.what());
Chris Cain4b82f3e2024-04-22 14:44:29 -05001617 }
Chris Cain755af102024-02-27 16:09:51 -06001618}
Chris Caina7b74dc2021-11-10 17:03:43 -06001619
1620// Verify single master OCC and start presence monitor
1621void Manager::validateOccMaster()
1622{
1623 int masterInstance = -1;
1624 for (auto& obj : statusObjects)
1625 {
Chris Cainbd551de2022-04-26 13:41:16 -05001626 auto instance = obj->getOccInstanceID();
Sheldon Bailey16a5adb2025-06-10 14:10:06 -05001627
Chris Cainbae4d072022-02-28 09:46:50 -06001628 if (!obj->occActive())
1629 {
1630 if (utils::isHostRunning())
1631 {
Chris Cainbd551de2022-04-26 13:41:16 -05001632 // Check if sensor was queued while waiting for discovery
1633 auto match = queuedActiveState.find(instance);
1634 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001635 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001636 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -05001637 lg2::info("validateOccMaster: OCC{INST} is ACTIVE (queued)",
1638 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001639 obj->occActive(true);
1640 }
1641 else
1642 {
1643 // OCC does not appear to be active yet, check active sensor
1644 pldmHandle->checkActiveSensor(instance);
1645 if (obj->occActive())
1646 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001647 lg2::info(
1648 "validateOccMaster: OCC{INST} is ACTIVE after reading sensor",
1649 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001650 }
Chris Cainbae4d072022-02-28 09:46:50 -06001651 }
1652 }
1653 else
1654 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001655 lg2::warning(
1656 "validateOccMaster: HOST is not running (OCC{INST})",
1657 "INST", instance);
Chris Cainbae4d072022-02-28 09:46:50 -06001658 return;
1659 }
1660 }
Chris Cainbae4d072022-02-28 09:46:50 -06001661
Chris Caina7b74dc2021-11-10 17:03:43 -06001662 if (obj->isMasterOcc())
1663 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001664 obj->addPresenceWatchMaster();
1665
Chris Caina7b74dc2021-11-10 17:03:43 -06001666 if (masterInstance == -1)
1667 {
Chris Cainbd551de2022-04-26 13:41:16 -05001668 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001669 }
1670 else
1671 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001672 lg2::error(
1673 "validateOccMaster: Multiple OCC masters! ({MAST1} and {MAST2})",
1674 "MAST1", masterInstance, "MAST2", instance);
Chris Caina7b74dc2021-11-10 17:03:43 -06001675 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001676 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001677 }
1678 }
1679 }
Chris Cainbae4d072022-02-28 09:46:50 -06001680
Chris Caina7b74dc2021-11-10 17:03:43 -06001681 if (masterInstance < 0)
1682 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001683 lg2::error("validateOccMaster: Master OCC not found! (of {NUM} OCCs)",
1684 "NUM", statusObjects.size());
Chris Caina7b74dc2021-11-10 17:03:43 -06001685 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001686 statusObjects.front()->deviceError(
1687 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001688 }
1689 else
1690 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001691 lg2::info("validateOccMaster: OCC{INST} is master of {COUNT} OCCs",
1692 "INST", masterInstance, "COUNT", activeCount);
Sheldon Bailey16a5adb2025-06-10 14:10:06 -05001693
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001694 pmode->updateDbusSafeMode(false);
Chris Caina7b74dc2021-11-10 17:03:43 -06001695 }
1696}
1697
Chris Cain40501a22022-03-14 17:33:27 -05001698void Manager::updatePcapBounds() const
1699{
1700 if (pcap)
1701 {
1702 pcap->updatePcapBounds();
1703 }
1704}
1705
Chris Cainc488bac2025-03-17 09:01:15 -05001706// Clean up any variables since the OCC is no longer running.
1707// Called when pldm receives an event indicating host is powered off.
1708void Manager::hostPoweredOff()
1709{
1710 if (resetRequired)
1711 {
1712 lg2::info("hostPoweredOff: Clearing resetRequired for OCC{INST}",
1713 "INST", resetInstance);
1714 resetRequired = false;
1715 }
1716 if (resetInProgress)
1717 {
1718 lg2::info("hostPoweredOff: Clearing resetInProgress for OCC{INST}",
1719 "INST", resetInstance);
1720 resetInProgress = false;
1721 }
1722 resetInstance = 255;
1723}
1724
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301725} // namespace occ
1726} // namespace open_power