blob: ed6453c55db20199c0fee1a3e4a6469765639fac [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
Chicago Duanbb895cb2021-06-18 19:37:16 +08005#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05006#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
Chris Cainffb63212025-08-01 14:39:38 -05009#include <nlohmann/json.hpp>
George Liub5ca1012021-09-10 12:53:11 +080010#include <phosphor-logging/elog-errors.hpp>
Chris Cain37abe9b2024-10-31 17:20:31 -050011#include <phosphor-logging/lg2.hpp>
George Liub5ca1012021-09-10 12:53:11 +080012#include <xyz/openbmc_project/Common/error.hpp>
13
Matt Spinlerd267cec2021-09-01 14:49:19 -050014#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080015#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080016#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060017#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080018#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050019
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053020namespace open_power
21{
22namespace occ
23{
24
Matt Spinler8b8abee2021-08-25 15:18:21 -050025constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050026constexpr auto fruTypeSuffix = "fru_type";
27constexpr auto faultSuffix = "fault";
28constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050029constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050030
Chris Cain1718fd82022-02-16 16:39:50 -060031const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
Chris Cainffb63212025-08-01 14:39:38 -050032const std::string Manager::dumpFile = "/tmp/occ_control_dump.json";
Chris Cain1718fd82022-02-16 16:39:50 -060033
Chris Caina8857c52021-01-27 11:53:05 -060034using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060035using namespace std::literals::chrono_literals;
Chris Cainffb63212025-08-01 14:39:38 -050036using json = nlohmann::json;
Chris Caina8857c52021-01-27 11:53:05 -060037
Matt Spinlera26f1522021-08-25 15:50:20 -050038template <typename T>
39T readFile(const std::string& path)
40{
41 std::ifstream ifs;
42 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
43 std::ifstream::eofbit);
44 T data;
45
46 try
47 {
48 ifs.open(path);
49 ifs >> data;
50 ifs.close();
51 }
52 catch (const std::exception& e)
53 {
54 auto err = errno;
55 throw std::system_error(err, std::generic_category());
56 }
57
58 return data;
59}
60
Chris Cain720a3842025-01-09 10:23:36 -060061void Manager::createPldmHandle()
62{
Chris Cain720a3842025-01-09 10:23:36 -060063 pldmHandle = std::make_unique<pldm::Interface>(
64 std::bind(std::mem_fn(&Manager::updateOCCActive), this,
65 std::placeholders::_1, std::placeholders::_2),
66 std::bind(std::mem_fn(&Manager::sbeHRESETResult), this,
67 std::placeholders::_1, std::placeholders::_2),
68 std::bind(std::mem_fn(&Manager::updateOccSafeMode), this,
69 std::placeholders::_1),
Chris Cainc488bac2025-03-17 09:01:15 -050070 std::bind(std::mem_fn(&Manager::hostPoweredOff), this), event);
Chris Cain720a3842025-01-09 10:23:36 -060071}
72
Chris Cainc33171b2024-05-24 16:14:50 -050073// findAndCreateObjects():
74// Takes care of getting the required objects created and
75// finds the available devices/processors.
76// (function is called everytime the discoverTimer expires)
77// - create the PowerMode object to control OCC modes
78// - create statusObjects for each OCC device found
79// - waits for OCC Active sensors PDRs to become available
80// - restart discoverTimer if all data is not available yet
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053081void Manager::findAndCreateObjects()
82{
Chris Cain613dc902022-04-08 09:56:22 -050083 if (!pmode)
84 {
85 // Create the power mode object
86 pmode = std::make_unique<powermode::PowerMode>(
87 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
88 }
89
Chris Cain1718fd82022-02-16 16:39:50 -060090 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050091 {
Chris Cainbae4d072022-02-28 09:46:50 -060092 static bool statusObjCreated = false;
93 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060094 {
Chris Cainbae4d072022-02-28 09:46:50 -060095 // Create the OCCs based on on the /dev/occX devices
96 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060097
Chris Cainbae4d072022-02-28 09:46:50 -060098 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060099 {
Chris Cainbae4d072022-02-28 09:46:50 -0600100 // Something changed or no OCCs yet, try again in 10s.
101 // Note on the first pass prevOCCSearch will be empty,
102 // so there will be at least one delay to give things
103 // a chance to settle.
104 prevOCCSearch = occs;
105
Chris Cain37abe9b2024-10-31 17:20:31 -0500106 lg2::info(
107 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {QTY})",
108 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600109
110 discoverTimer->restartOnce(10s);
111 }
112 else
113 {
114 // All OCCs appear to be available, create status objects
115
116 // createObjects requires OCC0 first.
117 std::sort(occs.begin(), occs.end());
118
Chris Cain37abe9b2024-10-31 17:20:31 -0500119 lg2::info(
120 "Manager::findAndCreateObjects(): Creating {QTY} OCC Status Objects",
121 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600122 for (auto id : occs)
123 {
124 createObjects(std::string(OCC_NAME) + std::to_string(id));
125 }
126 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500127 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500128
129 // Find/update the processor path associated with each OCC
130 for (auto& obj : statusObjects)
131 {
132 obj->updateProcAssociation();
133 }
Chris Cainbae4d072022-02-28 09:46:50 -0600134 }
135 }
136
Chris Cain6d8f37a2022-04-29 13:46:01 -0500137 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600138 {
139 static bool tracedHostWait = false;
140 if (utils::isHostRunning())
141 {
142 if (tracedHostWait)
143 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500144 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600145 "Manager::findAndCreateObjects(): Host is running");
146 tracedHostWait = false;
147 }
Chris Cainbae4d072022-02-28 09:46:50 -0600148 checkAllActiveSensors();
149 }
150 else
151 {
152 if (!tracedHostWait)
153 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500154 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600155 "Manager::findAndCreateObjects(): Waiting for host to start");
156 tracedHostWait = true;
157 }
158 discoverTimer->restartOnce(30s);
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500159
Chris Cainc33171b2024-05-24 16:14:50 -0500160 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500161 {
162 // Host is no longer running, disable throttle timer and
163 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500164 lg2::info("findAndCreateObjects(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500165 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500166 pldmHandle->setTraceThrottle(false);
167 }
Chris Cain1718fd82022-02-16 16:39:50 -0600168 }
169 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500170 }
171 else
172 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500173 lg2::info(
174 "Manager::findAndCreateObjects(): Waiting for {FILE} to complete...",
175 "FILE", HOST_ON_FILE);
Chris Cain1718fd82022-02-16 16:39:50 -0600176 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500177 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500178}
179
Chris Cainbae4d072022-02-28 09:46:50 -0600180// Check if all occActive sensors are available
181void Manager::checkAllActiveSensors()
182{
183 static bool allActiveSensorAvailable = false;
184 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500185 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600186
Chris Cain082a6ca2023-03-21 10:27:26 -0500187 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600188 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500189 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600190 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500191 waitingForHost = false;
Chris Cain37abe9b2024-10-31 17:20:31 -0500192 lg2::info("checkAllActiveSensors(): Host is now running");
Chris Cain082a6ca2023-03-21 10:27:26 -0500193 }
194
195 // Start with the assumption that all are available
196 allActiveSensorAvailable = true;
197 for (auto& obj : statusObjects)
198 {
199 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600200 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500201 auto instance = obj->getOccInstanceID();
202 // Check if sensor was queued while waiting for discovery
203 auto match = queuedActiveState.find(instance);
204 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500205 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500206 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -0500207 lg2::info(
208 "checkAllActiveSensors(): OCC{INST} is ACTIVE (queued)",
209 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500210 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500211 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500212 else
213 {
214 allActiveSensorAvailable = false;
215 if (!tracedSensorWait)
216 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500217 lg2::info(
218 "checkAllActiveSensors(): Waiting on OCC{INST} Active sensor",
219 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500220 tracedSensorWait = true;
Chris Cainc33171b2024-05-24 16:14:50 -0500221 // Make sure PLDM traces are not throttled
Chris Cain755af102024-02-27 16:09:51 -0600222 pldmHandle->setTraceThrottle(false);
Chris Cainc33171b2024-05-24 16:14:50 -0500223 // Start timer to throttle PLDM traces when timer
Chris Cain755af102024-02-27 16:09:51 -0600224 // expires
Chris Cainc33171b2024-05-24 16:14:50 -0500225 onPldmTimeoutCreatePel = false;
226 throttlePldmTraceTimer->restartOnce(5min);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500227 }
Chris Cainf0295f52024-09-12 15:41:14 -0500228 // Ignore active sensor check if the OCCs are being reset
229 if (!resetInProgress)
230 {
231 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
232 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500233 break;
234 }
Chris Cainbd551de2022-04-26 13:41:16 -0500235 }
Chris Cainbae4d072022-02-28 09:46:50 -0600236 }
237 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500238 else
239 {
240 if (!waitingForHost)
241 {
242 waitingForHost = true;
Chris Cain37abe9b2024-10-31 17:20:31 -0500243 lg2::info("checkAllActiveSensors(): Waiting for host to start");
Chris Cainc33171b2024-05-24 16:14:50 -0500244 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500245 {
246 // Host is no longer running, disable throttle timer and
247 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500248 lg2::info("checkAllActiveSensors(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500249 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500250 pldmHandle->setTraceThrottle(false);
251 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500252 }
253 }
Chris Cainbae4d072022-02-28 09:46:50 -0600254
255 if (allActiveSensorAvailable)
256 {
257 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500258 if (discoverTimer->isEnabled())
259 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500260 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500261 }
Chris Cainc33171b2024-05-24 16:14:50 -0500262 if (throttlePldmTraceTimer->isEnabled())
Chris Cain755af102024-02-27 16:09:51 -0600263 {
264 // Disable throttle timer and make sure traces are not throttled
Chris Cainc33171b2024-05-24 16:14:50 -0500265 throttlePldmTraceTimer->setEnabled(false);
Chris Cain755af102024-02-27 16:09:51 -0600266 pldmHandle->setTraceThrottle(false);
267 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500268 if (waitingForAllOccActiveSensors)
269 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500270 lg2::info(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500271 "checkAllActiveSensors(): OCC Active sensors are available");
272 waitingForAllOccActiveSensors = false;
Chris Cainf0295f52024-09-12 15:41:14 -0500273
274 if (resetRequired)
275 {
276 initiateOccRequest(resetInstance);
277
278 if (!waitForAllOccsTimer->isEnabled())
279 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500280 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500281 "occsNotAllRunning: Restarting waitForAllOccTimer");
282 // restart occ wait timer to check status after reset
283 // completes
284 waitForAllOccsTimer->restartOnce(60s);
285 }
286 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500287 }
288 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600289 tracedSensorWait = false;
290 }
291 else
292 {
293 // Not all sensors were available, so keep waiting
294 if (!tracedSensorWait)
295 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500296 lg2::info(
Chris Cainbd551de2022-04-26 13:41:16 -0500297 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600298 tracedSensorWait = true;
299 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500300 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600301 }
302}
Chris Cainbae4d072022-02-28 09:46:50 -0600303
Matt Spinlerd267cec2021-09-01 14:49:19 -0500304std::vector<int> Manager::findOCCsInDev()
305{
306 std::vector<int> occs;
307 std::regex expr{R"(occ(\d+)$)"};
308
309 for (auto& file : fs::directory_iterator("/dev"))
310 {
311 std::smatch match;
312 std::string path{file.path().string()};
313 if (std::regex_search(path, match, expr))
314 {
315 auto num = std::stoi(match[1].str());
316
317 // /dev numbering starts at 1, ours starts at 0.
318 occs.push_back(num - 1);
319 }
320 }
321
322 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530323}
324
Patrick Williamsaf408082022-07-22 19:26:54 -0500325int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530326{
George Liubcef3b42021-09-10 12:39:02 +0800327 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530328
Patrick Williams764d3592025-11-05 00:11:16 -0500329 auto o = msg.unpack<sdbusplus::message::object_path>();
330
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530331 fs::path cpuPath(std::string(std::move(o)));
332
333 auto name = cpuPath.filename().string();
334 auto index = name.find(CPU_NAME);
335 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
336
337 createObjects(name);
338
339 return 0;
340}
341
342void Manager::createObjects(const std::string& occ)
343{
344 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
345
Gunnar Mills94df8c92018-09-14 14:50:03 -0500346 statusObjects.emplace_back(std::make_unique<Status>(
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500347 event, path.c_str(), *this, pmode,
Gunnar Mills94df8c92018-09-14 14:50:03 -0500348 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500349 std::placeholders::_1, std::placeholders::_2),
Chris Cainf0295f52024-09-12 15:41:14 -0500350 // Callback will set flag indicating reset needs to be done
351 // instead of immediately issuing a reset via PLDM.
352 std::bind(std::mem_fn(&Manager::resetOccRequest), this,
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500353 std::placeholders::_1)));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530354
Chris Cain40501a22022-03-14 17:33:27 -0500355 // Create the power cap monitor object
356 if (!pcap)
357 {
358 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
359 *statusObjects.back());
360 }
361
Chris Cain36f9cde2021-11-22 11:18:21 -0600362 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530363 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500364 lg2::info("Manager::createObjects(): OCC{INST} is the master", "INST",
365 statusObjects.back()->getOccInstanceID());
Chris Cain36f9cde2021-11-22 11:18:21 -0600366 _pollTimer->setEnabled(false);
367
Chris Cain6fa848a2022-01-24 14:54:38 -0600368 // Set the master OCC on the PowerMode object
369 pmode->setMasterOcc(path);
Chris Cain36f9cde2021-11-22 11:18:21 -0600370 }
371
Sheldon Bailey16a5adb2025-06-10 14:10:06 -0500372 passThroughObjects.emplace_back(
373 std::make_unique<PassThrough>(path.c_str(), pmode));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530374}
375
Chris Cainf0295f52024-09-12 15:41:14 -0500376// If a reset is not already outstanding, set a flag to indicate that a reset is
377// needed.
378void Manager::resetOccRequest(instanceID instance)
379{
380 if (!resetRequired)
381 {
382 resetRequired = true;
383 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500384 lg2::error(
385 "resetOccRequest: PM Complex reset was requested due to OCC{INST}",
386 "INST", instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500387 }
388 else if (instance != resetInstance)
389 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500390 lg2::warning(
391 "resetOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already outstanding for OCC{RINST}",
392 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500393 }
394}
395
396// If a reset has not been started, initiate an OCC reset via PLDM
397void Manager::initiateOccRequest(instanceID instance)
398{
399 if (!resetInProgress)
400 {
401 resetInProgress = true;
402 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500403 lg2::error(
404 "initiateOccRequest: Initiating PM Complex reset due to OCC{INST}",
405 "INST", instance);
Chris Cainf7881502025-04-16 14:48:30 -0500406
407 // Make sure ALL OCC comm stops to all OCCs before the reset
408 for (auto& obj : statusObjects)
409 {
410 if (obj->occActive())
411 {
412 obj->occActive(false);
413 }
414 }
415
Chris Cainf0295f52024-09-12 15:41:14 -0500416 pldmHandle->resetOCC(instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500417 resetRequired = false;
418 }
419 else
420 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500421 lg2::warning(
422 "initiateOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already in process for OCC{RINST}",
423 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500424 }
425}
426
Sheldon Bailey373af752022-02-21 15:14:00 -0600427void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530428{
Chris Caina7b74dc2021-11-10 17:03:43 -0600429 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600430 {
Chris Cainf0295f52024-09-12 15:41:14 -0500431 if (resetInProgress)
432 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500433 lg2::info(
Chris Cain92dfb272025-02-13 12:20:27 -0600434 "statusCallBack: Ignoring OCC{INST} activate because a reset has been initiated due to OCC{RINST}",
Chris Cain37abe9b2024-10-31 17:20:31 -0500435 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500436 return;
437 }
438
Chris Caina7b74dc2021-11-10 17:03:43 -0600439 // OCC went active
440 ++activeCount;
441
Chris Caina7b74dc2021-11-10 17:03:43 -0600442 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600443 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600444 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500445 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500446 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600447
448 if (activeCount == statusObjects.size())
449 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600450 // All OCCs are now running
451 if (waitForAllOccsTimer->isEnabled())
452 {
453 // stop occ wait timer
454 waitForAllOccsTimer->setEnabled(false);
455 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600456
Chris Cainf0295f52024-09-12 15:41:14 -0500457 // All OCCs have been found, check if we need a reset
458 if (resetRequired)
459 {
460 initiateOccRequest(resetInstance);
461
462 if (!waitForAllOccsTimer->isEnabled())
463 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500464 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500465 "occsNotAllRunning: Restarting waitForAllOccTimer");
466 // restart occ wait timer
467 waitForAllOccsTimer->restartOnce(60s);
468 }
469 }
470 else
471 {
472 // Verify master OCC and start presence monitor
473 validateOccMaster();
474 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600475 }
476
Chris Cainf7881502025-04-16 14:48:30 -0500477 // Start poll timer if not already started (since at least one OCC is
478 // running)
Chris Caina7b74dc2021-11-10 17:03:43 -0600479 if (!_pollTimer->isEnabled())
480 {
Chris Cainf7881502025-04-16 14:48:30 -0500481 // An OCC just went active, PM Complex is just coming online so
482 // clear any outstanding reset requests
483 if (resetRequired)
484 {
485 resetRequired = false;
486 lg2::error(
487 "statusCallBack: clearing resetRequired (since OCC{INST} went active, resetInProgress={RIP})",
488 "INST", instance, "RIP", resetInProgress);
489 }
490
Chris Cain37abe9b2024-10-31 17:20:31 -0500491 lg2::info("Manager: OCCs will be polled every {TIME} seconds",
492 "TIME", pollInterval);
Chris Caina7b74dc2021-11-10 17:03:43 -0600493
494 // Send poll and start OCC poll timer
495 pollerTimerExpired();
496 }
497 }
498 else
499 {
500 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500501 if (activeCount > 0)
502 {
503 --activeCount;
504 }
505 else
506 {
Sheldon Baileyb89d6192025-03-05 09:33:19 -0600507 lg2::info("OCC{INST} disabled, and no other OCCs are active",
Chris Cain37abe9b2024-10-31 17:20:31 -0500508 "INST", instance);
Chris Cain082a6ca2023-03-21 10:27:26 -0500509 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600510
511 if (activeCount == 0)
512 {
513 // No OCCs are running
514
Chris Cainf0295f52024-09-12 15:41:14 -0500515 if (resetInProgress)
516 {
517 // All OCC active sensors are clear (reset should be in
518 // progress)
Chris Cain37abe9b2024-10-31 17:20:31 -0500519 lg2::info(
520 "statusCallBack: Clearing resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
521 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500522 resetInProgress = false;
523 resetInstance = 255;
524 }
525
Chris Caina7b74dc2021-11-10 17:03:43 -0600526 // Stop OCC poll timer
527 if (_pollTimer->isEnabled())
528 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500529 lg2::info(
Chris Caina7b74dc2021-11-10 17:03:43 -0600530 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
531 _pollTimer->setEnabled(false);
532 }
533
Chris Caina7b74dc2021-11-10 17:03:43 -0600534 // stop wait timer
535 if (waitForAllOccsTimer->isEnabled())
536 {
537 waitForAllOccsTimer->setEnabled(false);
538 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600539 }
Chris Cainf0295f52024-09-12 15:41:14 -0500540 else if (resetInProgress)
541 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500542 lg2::info(
543 "statusCallBack: Skipping clear of resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
544 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500545 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600546 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500547 setSensorValueToNaN(instance);
Chris Caina8857c52021-01-27 11:53:05 -0600548 }
Chris Cainbae4d072022-02-28 09:46:50 -0600549
Chris Cainbae4d072022-02-28 09:46:50 -0600550 if (waitingForAllOccActiveSensors)
551 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500552 if (utils::isHostRunning())
553 {
554 checkAllActiveSensors();
555 }
Chris Cainbae4d072022-02-28 09:46:50 -0600556 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530557}
558
Eddie Jamescbad2192021-10-07 09:39:39 -0500559void Manager::sbeTimeout(unsigned int instance)
560{
Eddie James2a751d72022-03-04 09:16:12 -0600561 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
562 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400563 return instance == obj->getOccInstanceID();
564 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500565
Eddie Jamescb018da2022-03-05 11:49:37 -0600566 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600567 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500568 lg2::info("SBE timeout, requesting HRESET (OCC{INST})", "INST",
569 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500570
Chris Cain720a3842025-01-09 10:23:36 -0600571#ifdef PHAL_SUPPORT
Eddie James2a751d72022-03-04 09:16:12 -0600572 setSBEState(instance, SBE_STATE_NOT_USABLE);
Chris Cain720a3842025-01-09 10:23:36 -0600573#endif
Eddie James2a751d72022-03-04 09:16:12 -0600574
Chris Cain92dfb272025-02-13 12:20:27 -0600575 // Stop communication with this OCC
576 (*obj)->occActive(false);
577
Eddie James2a751d72022-03-04 09:16:12 -0600578 pldmHandle->sendHRESET(instance);
579 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500580}
581
Tom Joseph815f9f52020-07-27 12:12:13 +0530582bool Manager::updateOCCActive(instanceID instance, bool status)
583{
Chris Cain7e374fb2022-04-07 09:47:23 -0500584 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
585 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400586 return instance == obj->getOccInstanceID();
587 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500588
Chris Cain082a6ca2023-03-21 10:27:26 -0500589 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500590 if (obj != statusObjects.end())
591 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500592 if (!hostRunning && (status == true))
593 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500594 lg2::warning(
595 "updateOCCActive: Host is not running yet (OCC{INST} active={STAT}), clearing sensor received",
596 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500597 (*obj)->setPldmSensorReceived(false);
598 if (!waitingForAllOccActiveSensors)
599 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500600 lg2::info(
Chris Cain082a6ca2023-03-21 10:27:26 -0500601 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
602 waitingForAllOccActiveSensors = true;
603 }
604 discoverTimer->restartOnce(30s);
605 return false;
606 }
607 else
608 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500609 (*obj)->setPldmSensorReceived(true);
610 return (*obj)->occActive(status);
611 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500612 }
613 else
614 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500615 if (hostRunning)
616 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500617 lg2::warning(
618 "updateOCCActive: No status object to update for OCC{INST} (active={STAT})",
619 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500620 }
621 else
622 {
623 if (status == true)
624 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500625 lg2::warning(
626 "updateOCCActive: No status objects and Host is not running yet (OCC{INST} active={STAT})",
627 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500628 }
629 }
Chris Cainbd551de2022-04-26 13:41:16 -0500630 if (status == true)
631 {
632 // OCC went active
633 queuedActiveState.insert(instance);
634 }
635 else
636 {
637 auto match = queuedActiveState.find(instance);
638 if (match != queuedActiveState.end())
639 {
640 // OCC was disabled
641 queuedActiveState.erase(match);
642 }
643 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500644 return false;
645 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530646}
Eddie Jamescbad2192021-10-07 09:39:39 -0500647
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500648// Called upon pldm event To set powermode Safe Mode State for system.
649void Manager::updateOccSafeMode(bool safeMode)
650{
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500651 pmode->updateDbusSafeMode(safeMode);
Chris Cainc86d80f2023-05-04 15:49:18 -0500652 // Update the processor throttle status on dbus
653 for (auto& obj : statusObjects)
654 {
655 obj->updateThrottle(safeMode, THROTTLED_SAFE);
656 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500657}
658
Eddie Jamescbad2192021-10-07 09:39:39 -0500659void Manager::sbeHRESETResult(instanceID instance, bool success)
660{
661 if (success)
662 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500663 lg2::info("HRESET succeeded (OCC{INST})", "INST", instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500664
Chris Cain720a3842025-01-09 10:23:36 -0600665#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500666 setSBEState(instance, SBE_STATE_BOOTED);
Chris Cain720a3842025-01-09 10:23:36 -0600667#endif
Eddie Jamescbad2192021-10-07 09:39:39 -0500668
Chris Cain92dfb272025-02-13 12:20:27 -0600669 // Re-enable communication with this OCC
670 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
671 [instance](const auto& obj) {
672 return instance == obj->getOccInstanceID();
673 });
674 if (obj != statusObjects.end() && (!(*obj)->occActive()))
675 {
676 (*obj)->occActive(true);
677 }
678
Eddie Jamescbad2192021-10-07 09:39:39 -0500679 return;
680 }
681
Chris Cain720a3842025-01-09 10:23:36 -0600682#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500683 setSBEState(instance, SBE_STATE_FAILED);
684
685 if (sbeCanDump(instance))
686 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500687 lg2::info("HRESET failed (OCC{INST}), triggering SBE dump", "INST",
688 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500689
690 auto& bus = utils::getBus();
691 uint32_t src6 = instance << 16;
692 uint32_t logId =
693 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
694 src6, "SBE command timeout");
695
696 try
697 {
George Liuf3a4a692021-12-28 13:59:51 +0800698 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
699 constexpr auto function = "CreateDump";
700
Patrick Williamsd7542c82024-08-16 15:20:28 -0400701 std::string service =
702 utils::getService(OP_DUMP_OBJ_PATH, interface);
Dhruvaraj Subhashchandran1173b2b2024-06-01 11:12:13 -0500703 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH,
704 interface, function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500705
706 std::map<std::string, std::variant<std::string, uint64_t>>
707 createParams{
708 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
709 uint64_t(logId)},
710 {"com.ibm.Dump.Create.CreateParameters.DumpType",
711 "com.ibm.Dump.Create.DumpType.SBE"},
712 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
713 uint64_t(instance)},
714 };
715
716 method.append(createParams);
717
718 auto response = bus.call(method);
719 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500720 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500721 {
722 constexpr auto ERROR_DUMP_DISABLED =
723 "xyz.openbmc_project.Dump.Create.Error.Disabled";
724 if (e.name() == ERROR_DUMP_DISABLED)
725 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500726 lg2::info("Dump is disabled, skipping");
Eddie Jamescbad2192021-10-07 09:39:39 -0500727 }
728 else
729 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500730 lg2::error("Dump failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500731 }
732 }
733 }
Chris Cain720a3842025-01-09 10:23:36 -0600734#endif
Chris Cainf0295f52024-09-12 15:41:14 -0500735
736 // SBE Reset failed, try PM Complex reset
Chris Cain37abe9b2024-10-31 17:20:31 -0500737 lg2::error("sbeHRESETResult: Forcing PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500738 resetOccRequest(instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500739}
740
Chris Cain720a3842025-01-09 10:23:36 -0600741#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500742bool Manager::sbeCanDump(unsigned int instance)
743{
744 struct pdbg_target* proc = getPdbgTarget(instance);
745
746 if (!proc)
747 {
748 // allow the dump in the error case
749 return true;
750 }
751
752 try
753 {
754 if (!openpower::phal::sbe::isDumpAllowed(proc))
755 {
756 return false;
757 }
758
759 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
760 {
761 return false;
762 }
763 }
764 catch (openpower::phal::exception::SbeError& e)
765 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500766 lg2::info("Failed to query SBE state");
Eddie Jamescbad2192021-10-07 09:39:39 -0500767 }
768
769 // allow the dump in the error case
770 return true;
771}
772
773void Manager::setSBEState(unsigned int instance, enum sbe_state state)
774{
775 struct pdbg_target* proc = getPdbgTarget(instance);
776
777 if (!proc)
778 {
779 return;
780 }
781
782 try
783 {
784 openpower::phal::sbe::setState(proc, state);
785 }
786 catch (const openpower::phal::exception::SbeError& e)
787 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500788 lg2::error("Failed to set SBE state: {ERROR}", "ERROR", e.what());
Eddie Jamescbad2192021-10-07 09:39:39 -0500789 }
790}
791
792struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
793{
794 if (!pdbgInitialized)
795 {
796 try
797 {
798 openpower::phal::pdbg::init();
799 pdbgInitialized = true;
800 }
801 catch (const openpower::phal::exception::PdbgError& e)
802 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500803 lg2::error("pdbg initialization failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500804 return nullptr;
805 }
806 }
807
808 struct pdbg_target* proc = nullptr;
809 pdbg_for_each_class_target("proc", proc)
810 {
811 if (pdbg_target_index(proc) == instance)
812 {
813 return proc;
814 }
815 }
816
Chris Cain37abe9b2024-10-31 17:20:31 -0500817 lg2::error("Failed to get pdbg target");
Eddie Jamescbad2192021-10-07 09:39:39 -0500818 return nullptr;
819}
Tom Joseph815f9f52020-07-27 12:12:13 +0530820#endif
821
Chris Caina8857c52021-01-27 11:53:05 -0600822void Manager::pollerTimerExpired()
823{
Chris Caina8857c52021-01-27 11:53:05 -0600824 if (!_pollTimer)
825 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500826 lg2::error("pollerTimerExpired() ERROR: Timer not defined");
Chris Caina8857c52021-01-27 11:53:05 -0600827 return;
828 }
829
Chris Cainf0295f52024-09-12 15:41:14 -0500830 if (resetRequired)
831 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500832 lg2::error("pollerTimerExpired() - Initiating PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500833 initiateOccRequest(resetInstance);
834
835 if (!waitForAllOccsTimer->isEnabled())
836 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500837 lg2::warning("pollerTimerExpired: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -0500838 // restart occ wait timer
839 waitForAllOccsTimer->restartOnce(60s);
840 }
841 return;
842 }
Chris Cainf0295f52024-09-12 15:41:14 -0500843
Chris Caina8857c52021-01-27 11:53:05 -0600844 for (auto& obj : statusObjects)
845 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600846 if (!obj->occActive())
847 {
848 // OCC is not running yet
Chris Cain5d66a0a2022-02-09 08:52:10 -0600849 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500850 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600851 continue;
852 }
853
Chris Caina8857c52021-01-27 11:53:05 -0600854 // Read sysfs to force kernel to poll OCC
855 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800856
Chicago Duanbb895cb2021-06-18 19:37:16 +0800857 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600858 getSensorValues(obj);
Chris Caina8857c52021-01-27 11:53:05 -0600859 }
860
Chris Caina7b74dc2021-11-10 17:03:43 -0600861 if (activeCount > 0)
862 {
863 // Restart OCC poll timer
864 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
865 }
866 else
867 {
868 // No OCCs running, so poll timer will not be restarted
Chris Cain37abe9b2024-10-31 17:20:31 -0500869 lg2::info(
870 "Manager::pollerTimerExpired: poll timer will not be restarted");
Chris Caina7b74dc2021-11-10 17:03:43 -0600871 }
Chris Caina8857c52021-01-27 11:53:05 -0600872}
873
Chris Cainae157b62024-01-23 16:05:12 -0600874void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800875{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500876 // There may be more than one sensor with the same FRU type
877 // and label so make two passes: the first to read the temps
878 // from sysfs, and the second to put them on D-Bus after
879 // resolving any conflicts.
880 std::map<std::string, double> sensorData;
881
Chicago Duanbb895cb2021-06-18 19:37:16 +0800882 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
883 for (auto& file : fs::directory_iterator(path))
884 {
885 if (!std::regex_search(file.path().string(), expr))
886 {
887 continue;
888 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800889
Matt Spinlera26f1522021-08-25 15:50:20 -0500890 uint32_t labelValue{0};
891
892 try
893 {
894 labelValue = readFile<uint32_t>(file.path());
895 }
896 catch (const std::system_error& e)
897 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500898 lg2::debug(
899 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
900 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800901 continue;
902 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800903
904 const std::string& tempLabel = "label";
905 const std::string filePathString = file.path().string().substr(
906 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500907
908 uint32_t fruTypeValue{0};
909 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800910 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500911 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
912 }
913 catch (const std::system_error& e)
914 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500915 lg2::debug(
916 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
917 "PATH", filePathString + fruTypeSuffix, "ERROR",
918 e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800919 continue;
920 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800921
Patrick Williamsd7542c82024-08-16 15:20:28 -0400922 std::string sensorPath =
923 OCC_SENSORS_ROOT + std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800924
Matt Spinlerace67d82021-10-18 13:41:57 -0500925 std::string dvfsTempPath;
926
Chicago Duanbb895cb2021-06-18 19:37:16 +0800927 if (fruTypeValue == VRMVdd)
928 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400929 sensorPath.append(
930 "vrm_vdd" + std::to_string(occInstance) + "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800931 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500932 else if (fruTypeValue == processorIoRing)
933 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400934 sensorPath.append(
935 "proc" + std::to_string(occInstance) + "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500936 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600937 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500938 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800939 else
940 {
Matt Spinler14d14022021-08-25 15:38:29 -0500941 uint16_t type = (labelValue & 0xFF000000) >> 24;
942 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800943
944 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
945 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500946 if (fruTypeValue == fruTypeNotAvailable)
947 {
948 // Not all DIMM related temps are available to read
949 // (no _input file in this case)
950 continue;
951 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800952 auto iter = dimmTempSensorName.find(fruTypeValue);
953 if (iter == dimmTempSensorName.end())
954 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500955 lg2::error(
956 "readTempSensors: Fru type error! fruTypeValue = {FRU}) ",
957 "FRU", fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800958 continue;
959 }
960
Patrick Williamsd7542c82024-08-16 15:20:28 -0400961 sensorPath.append(
962 "dimm" + std::to_string(instanceID) + iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500963
964 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
965 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800966 }
967 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
968 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500969 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800970 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500971 // The OCC reports small core temps, of which there are
972 // two per big core. All current P10 systems are in big
973 // core mode, so use a big core name.
974 uint16_t coreNum = instanceID / 2;
975 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -0600976 sensorPath.append("proc" + std::to_string(occInstance) +
977 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -0500978 std::to_string(tempNum) + "_temp");
979
Chris Cainae157b62024-01-23 16:05:12 -0600980 dvfsTempPath =
981 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
982 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500983 }
984 else
985 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800986 continue;
987 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800988 }
989 else
990 {
991 continue;
992 }
993 }
994
Matt Spinlerace67d82021-10-18 13:41:57 -0500995 // The dvfs temp file only needs to be read once per chip per type.
996 if (!dvfsTempPath.empty() &&
997 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
998 {
999 try
1000 {
1001 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
1002
1003 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
1004 dvfsTempPath, dvfsValue * std::pow(10, -3));
1005 }
1006 catch (const std::system_error& e)
1007 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001008 lg2::debug(
1009 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1010 "PATH", filePathString + maxSuffix, "ERROR",
1011 e.code().value());
Matt Spinlerace67d82021-10-18 13:41:57 -05001012 }
1013 }
1014
Matt Spinlera26f1522021-08-25 15:50:20 -05001015 uint32_t faultValue{0};
1016 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001017 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001018 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
1019 }
1020 catch (const std::system_error& e)
1021 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001022 lg2::debug(
1023 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1024 "PATH", filePathString + faultSuffix, "ERROR",
1025 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001026 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001027 }
1028
Chris Cainae157b62024-01-23 16:05:12 -06001029 double tempValue{0};
1030 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -05001031 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001032 {
Chris Cainae157b62024-01-23 16:05:12 -06001033 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001034 }
Chris Cainae157b62024-01-23 16:05:12 -06001035 else
Chicago Duanbb895cb2021-06-18 19:37:16 +08001036 {
Chris Cainae157b62024-01-23 16:05:12 -06001037 // Read the temperature
1038 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001039 {
Chris Cainae157b62024-01-23 16:05:12 -06001040 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001041 }
Chris Cainae157b62024-01-23 16:05:12 -06001042 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001043 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001044 lg2::debug(
1045 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1046 "PATH", filePathString + inputSuffix, "ERROR",
1047 e.code().value());
Chris Cainae157b62024-01-23 16:05:12 -06001048
1049 // if errno == EAGAIN(Resource temporarily unavailable) then set
1050 // temp to 0, to avoid using old temp, and affecting FAN
1051 // Control.
1052 if (e.code().value() == EAGAIN)
1053 {
1054 tempValue = 0;
1055 }
1056 // else the errno would be something like
1057 // EBADF(Bad file descriptor)
1058 // or ENOENT(No such file or directory)
1059 else
1060 {
1061 continue;
1062 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001063 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001064 }
1065
Matt Spinler818cc8d2023-10-23 11:43:39 -05001066 // If this object path already has a value, only overwite
1067 // it if the previous one was an NaN or a smaller value.
1068 auto existing = sensorData.find(sensorPath);
1069 if (existing != sensorData.end())
1070 {
Chris Cainae157b62024-01-23 16:05:12 -06001071 // Multiple sensors found for this FRU type
1072 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1073 ((existing->second == 0) && std::isnan(tempValue)))
1074 {
1075 // One of the redundant sensors has failed (0xFF/nan), and the
1076 // other sensor has no reading (0), so set the FRU to NaN to
1077 // force fan increase
1078 tempValue = std::numeric_limits<double>::quiet_NaN();
1079 existing->second = tempValue;
1080 }
Matt Spinler818cc8d2023-10-23 11:43:39 -05001081 if (std::isnan(existing->second) || (tempValue > existing->second))
1082 {
1083 existing->second = tempValue;
1084 }
1085 }
1086 else
1087 {
Chris Cainae157b62024-01-23 16:05:12 -06001088 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001089 sensorData[sensorPath] = tempValue;
1090 }
1091 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001092
Matt Spinler818cc8d2023-10-23 11:43:39 -05001093 // Now publish the values on D-Bus.
1094 for (const auto& [objectPath, value] : sensorData)
1095 {
1096 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1097 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001098
Matt Spinler818cc8d2023-10-23 11:43:39 -05001099 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1100 objectPath, !std::isnan(value));
1101
1102 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001103 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001104 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001105 objectPath, {"all_sensors"});
Chris Cain6fa848a2022-01-24 14:54:38 -06001106 }
Chris Cainae157b62024-01-23 16:05:12 -06001107 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001108 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001109}
1110
Patrick Williams2d6ec902025-02-01 08:22:13 -05001111std::optional<std::string> Manager::getPowerLabelFunctionID(
1112 const std::string& value)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001113{
1114 // If the value is "system", then the FunctionID is "system".
1115 if (value == "system")
1116 {
1117 return value;
1118 }
1119
1120 // If the value is not "system", then the label value have 3 numbers, of
1121 // which we only care about the middle one:
1122 // <sensor id>_<function id>_<apss channel>
1123 // eg: The value is "0_10_5" , then the FunctionID is "10".
1124 if (value.find("_") == std::string::npos)
1125 {
1126 return std::nullopt;
1127 }
1128
1129 auto powerLabelValue = value.substr((value.find("_") + 1));
1130
1131 if (powerLabelValue.find("_") == std::string::npos)
1132 {
1133 return std::nullopt;
1134 }
1135
1136 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1137}
1138
1139void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1140{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001141 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1142 for (auto& file : fs::directory_iterator(path))
1143 {
1144 if (!std::regex_search(file.path().string(), expr))
1145 {
1146 continue;
1147 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001148
Matt Spinlera26f1522021-08-25 15:50:20 -05001149 std::string labelValue;
1150 try
1151 {
1152 labelValue = readFile<std::string>(file.path());
1153 }
1154 catch (const std::system_error& e)
1155 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001156 lg2::debug(
1157 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1158 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001159 continue;
1160 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001161
1162 auto functionID = getPowerLabelFunctionID(labelValue);
1163 if (functionID == std::nullopt)
1164 {
1165 continue;
1166 }
1167
1168 const std::string& tempLabel = "label";
1169 const std::string filePathString = file.path().string().substr(
1170 0, file.path().string().length() - tempLabel.length());
1171
1172 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1173
1174 auto iter = powerSensorName.find(*functionID);
1175 if (iter == powerSensorName.end())
1176 {
1177 continue;
1178 }
1179 sensorPath.append(iter->second);
1180
Matt Spinlera26f1522021-08-25 15:50:20 -05001181 double tempValue{0};
1182
1183 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001184 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001185 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001186 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001187 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001188 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001189 lg2::debug(
1190 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1191 "PATH", filePathString + inputSuffix, "ERROR",
1192 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001193 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001194 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001195
Chris Cain5d66a0a2022-02-09 08:52:10 -06001196 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001197 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1198
Chris Cain5d66a0a2022-02-09 08:52:10 -06001199 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001200 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1201
Patrick Williamsd7542c82024-08-16 15:20:28 -04001202 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1203 sensorPath, true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001204
Matt Spinler5901abd2021-09-23 13:50:03 -05001205 if (existingSensors.find(sensorPath) == existingSensors.end())
1206 {
Chris Cain3523cc02024-10-30 17:19:09 -05001207 std::vector<std::string> fTypeList = {"all_sensors"};
1208 if (iter->second == "total_power")
1209 {
Chris Cainff0ce402025-01-17 10:54:55 -06001210 // Set sensor purpose as TotalPower
1211 dbus::OccDBusSensors::getOccDBus().setPurpose(
1212 sensorPath,
1213 "xyz.openbmc_project.Sensor.Purpose.SensorPurpose.TotalPower");
Chris Cain3523cc02024-10-30 17:19:09 -05001214 }
Chris Cain5d66a0a2022-02-09 08:52:10 -06001215 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001216 sensorPath, fTypeList);
Matt Spinler5901abd2021-09-23 13:50:03 -05001217 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001218 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001219 }
1220 return;
1221}
1222
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001223void Manager::readExtnSensors(const fs::path& path, uint32_t id)
1224{
1225 std::regex expr{"extn\\d+_label$"}; // Example: extn5_label
1226 for (auto& file : fs::directory_iterator(path))
1227 {
1228 if (!std::regex_search(file.path().string(), expr))
1229 {
1230 continue;
1231 }
1232
1233 // Read in Label value of the sensor from file.
1234 std::string labelValue;
1235 try
1236 {
1237 labelValue = readFile<std::string>(file.path());
1238 }
1239 catch (const std::system_error& e)
1240 {
1241 lg2::debug(
1242 "readExtnSensors:label Failed reading {PATH}, errno = {ERROR}",
1243 "PATH", file.path().string(), "ERROR", e.code().value());
1244 continue;
1245 }
1246 const std::string& tempLabel = "label";
1247 const std::string filePathString = file.path().string().substr(
1248 0, file.path().string().length() - tempLabel.length());
1249
1250 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1251
1252 // Labels of EXTN sections from OCC interface Document
1253 // have different formats.
1254 // 0x464d494e : FMIN 0x46444953 : FDIS
1255 // 0x46424153 : FBAS 0x46555400 : FUT
1256 // 0x464d4158 : FMAX 0x434c4950 : CLIP
1257 // 0x4d4f4445 : MODE 0x574f4643 : WOFC
1258 // 0x574f4649 : WOFI 0x5057524d : PWRM
1259 // 0x50575250 : PWRP 0x45525248 : ERRH
1260 // Label indicating byte 5 and 6 is the current (mem,proc) power in
1261 // Watts.
1262 if ((labelValue == EXTN_LABEL_PWRM_MEMORY_POWER) ||
1263 (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER))
1264 {
1265 // Build the dbus String for this chiplet power asset.
1266 if (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER)
1267 {
1268 labelValue = "_power";
1269 }
1270 else // else EXTN_LABEL_PWRM_MEMORY_POWER
1271 {
1272 labelValue = "_mem_power";
1273 }
1274 sensorPath.append("chiplet" + std::to_string(id) + labelValue);
1275
1276 // Read in data value of the sensor from file.
1277 // Read in as string due to different format of data in sensors.
1278 std::string extnValue;
1279 try
1280 {
1281 extnValue = readFile<std::string>(filePathString + inputSuffix);
1282 }
1283 catch (const std::system_error& e)
1284 {
1285 lg2::debug(
1286 "readExtnSensors:value Failed reading {PATH}, errno = {ERROR}",
1287 "PATH", filePathString + inputSuffix, "ERROR",
1288 e.code().value());
1289 continue;
1290 }
1291
1292 // For Power field, Convert last 4 bytes of hex string into number
1293 // value.
1294 std::stringstream ssData;
1295 ssData << std::hex << extnValue.substr(extnValue.length() - 4);
1296 uint16_t MyHexNumber;
1297 ssData >> MyHexNumber;
1298
1299 // Convert output/DC power to input/AC power in Watts (round up)
1300 MyHexNumber =
1301 std::round(((MyHexNumber / (PS_DERATING_FACTOR / 100.0))));
1302
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001303 dbus::OccDBusSensors::getOccDBus().setUnit(
1304 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1305
1306 dbus::OccDBusSensors::getOccDBus().setValue(sensorPath,
1307 MyHexNumber);
1308
1309 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1310 sensorPath, true);
1311
1312 if (existingSensors.find(sensorPath) == existingSensors.end())
1313 {
1314 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1315 sensorPath, {"all_sensors"});
1316 }
1317
Sheldon Baileyb89d6192025-03-05 09:33:19 -06001318 existingSensors[sensorPath] = id;
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001319 } // End Extended Power Sensors.
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001320 } // End For loop on files for Extended Sensors.
1321 return;
1322}
1323
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001324void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001325{
1326 for (const auto& [sensorPath, occId] : existingSensors)
1327 {
1328 if (occId == id)
1329 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001330 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001331 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001332
Patrick Williamsd7542c82024-08-16 15:20:28 -04001333 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1334 sensorPath, true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001335 }
1336 }
1337 return;
1338}
1339
Sheldon Bailey373af752022-02-21 15:14:00 -06001340void Manager::setSensorValueToNonFunctional(uint32_t id) const
1341{
1342 for (const auto& [sensorPath, occId] : existingSensors)
1343 {
1344 if (occId == id)
1345 {
1346 dbus::OccDBusSensors::getOccDBus().setValue(
1347 sensorPath, std::numeric_limits<double>::quiet_NaN());
1348
Patrick Williamsd7542c82024-08-16 15:20:28 -04001349 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1350 sensorPath, false);
Sheldon Bailey373af752022-02-21 15:14:00 -06001351 }
1352 }
1353 return;
1354}
1355
Chris Cain5d66a0a2022-02-09 08:52:10 -06001356void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001357{
Chris Caine2d0a432022-03-28 11:08:49 -05001358 static bool tracedError[8] = {0};
1359 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001360 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001361
Chris Caine2d0a432022-03-28 11:08:49 -05001362 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001363 {
Chris Caine2d0a432022-03-28 11:08:49 -05001364 // Read temperature sensors
1365 readTempSensors(sensorPath, id);
Sheldon Baileyb89d6192025-03-05 09:33:19 -06001366 // Read Extended sensors
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001367 readExtnSensors(sensorPath, id);
Chris Caine2d0a432022-03-28 11:08:49 -05001368
1369 if (occ->isMasterOcc())
1370 {
1371 // Read power sensors
1372 readPowerSensors(sensorPath, id);
1373 }
1374 tracedError[id] = false;
1375 }
1376 else
1377 {
1378 if (!tracedError[id])
1379 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001380 lg2::error(
1381 "Manager::getSensorValues: OCC{INST} sensor path missing: {PATH}",
1382 "INST", id, "PATH", sensorPath);
Chris Caine2d0a432022-03-28 11:08:49 -05001383 tracedError[id] = true;
1384 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001385 }
1386
1387 return;
1388}
Chris Cain17257672021-10-22 13:41:03 -05001389
1390// Read the altitude from DBus
1391void Manager::readAltitude()
1392{
1393 static bool traceAltitudeErr = true;
1394
1395 utils::PropertyValue altitudeProperty{};
1396 try
1397 {
1398 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1399 ALTITUDE_PROP);
1400 auto sensorVal = std::get<double>(altitudeProperty);
1401 if (sensorVal < 0xFFFF)
1402 {
1403 if (sensorVal < 0)
1404 {
1405 altitude = 0;
1406 }
1407 else
1408 {
1409 // Round to nearest meter
1410 altitude = uint16_t(sensorVal + 0.5);
1411 }
Chris Cain37abe9b2024-10-31 17:20:31 -05001412 lg2::debug("readAltitude: sensor={VALUE} ({ALT}m)", "VALUE",
1413 sensorVal, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001414 traceAltitudeErr = true;
1415 }
1416 else
1417 {
1418 if (traceAltitudeErr)
1419 {
1420 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001421 lg2::debug("Invalid altitude value: {ALT}", "ALT", sensorVal);
Chris Cain17257672021-10-22 13:41:03 -05001422 }
1423 }
1424 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001425 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001426 {
1427 if (traceAltitudeErr)
1428 {
1429 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001430 lg2::info("Unable to read Altitude: {ERROR}", "ERROR", e.what());
Chris Cain17257672021-10-22 13:41:03 -05001431 }
1432 altitude = 0xFFFF; // not available
1433 }
1434}
1435
1436// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001437void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001438{
1439 double currentTemp = 0;
1440 uint8_t truncatedTemp = 0xFF;
1441 std::string msgSensor;
1442 std::map<std::string, std::variant<double>> msgData;
1443 msg.read(msgSensor, msgData);
1444
1445 auto valPropMap = msgData.find(AMBIENT_PROP);
1446 if (valPropMap == msgData.end())
1447 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001448 lg2::debug("ambientCallback: Unknown ambient property changed");
Chris Cain17257672021-10-22 13:41:03 -05001449 return;
1450 }
1451 currentTemp = std::get<double>(valPropMap->second);
1452 if (std::isnan(currentTemp))
1453 {
1454 truncatedTemp = 0xFF;
1455 }
1456 else
1457 {
1458 if (currentTemp < 0)
1459 {
1460 truncatedTemp = 0;
1461 }
1462 else
1463 {
1464 // Round to nearest degree C
1465 truncatedTemp = uint8_t(currentTemp + 0.5);
1466 }
1467 }
1468
1469 // If ambient changes, notify OCCs
1470 if (truncatedTemp != ambient)
1471 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001472 lg2::debug("ambientCallback: Ambient change from {OLD} to {NEW}C",
1473 "OLD", ambient, "NEW", currentTemp);
Chris Cain17257672021-10-22 13:41:03 -05001474
1475 ambient = truncatedTemp;
1476 if (altitude == 0xFFFF)
1477 {
1478 // No altitude yet, try reading again
1479 readAltitude();
1480 }
1481
Chris Cain37abe9b2024-10-31 17:20:31 -05001482 lg2::debug("ambientCallback: Ambient: {TEMP}C, altitude: {ALT}m",
1483 "TEMP", ambient, "ALT", altitude);
Sheldon Bailey16a5adb2025-06-10 14:10:06 -05001484
Chris Cain17257672021-10-22 13:41:03 -05001485 // Send ambient and altitude to all OCCs
1486 for (auto& obj : statusObjects)
1487 {
1488 if (obj->occActive())
1489 {
1490 obj->sendAmbient(ambient, altitude);
1491 }
1492 }
Chris Cain17257672021-10-22 13:41:03 -05001493 }
1494}
1495
1496// return the current ambient and altitude readings
1497void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1498 uint16_t& altitudeValue) const
1499{
1500 ambientValid = true;
1501 ambientTemp = ambient;
1502 altitudeValue = altitude;
1503
1504 if (ambient == 0xFF)
1505 {
1506 ambientValid = false;
1507 }
1508}
1509
Chris Cain7f89e4d2022-05-09 13:27:45 -05001510// Called when waitForAllOccsTimer expires
1511// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001512void Manager::occsNotAllRunning()
1513{
Chris Cainf0295f52024-09-12 15:41:14 -05001514 if (resetInProgress)
1515 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001516 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -05001517 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress");
1518 return;
1519 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001520 if (activeCount != statusObjects.size())
1521 {
1522 // Not all OCCs went active
Chris Cain37abe9b2024-10-31 17:20:31 -05001523 lg2::warning(
1524 "occsNotAllRunning: Active OCC count ({COUNT}) does not match expected count ({EXP})",
1525 "COUNT", activeCount, "EXP", statusObjects.size());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001526 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001527 }
1528
Chris Cainf0295f52024-09-12 15:41:14 -05001529 if (resetRequired)
1530 {
1531 initiateOccRequest(resetInstance);
1532
1533 if (!waitForAllOccsTimer->isEnabled())
1534 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001535 lg2::warning("occsNotAllRunning: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -05001536 // restart occ wait timer
1537 waitForAllOccsTimer->restartOnce(60s);
1538 }
1539 }
1540 else
1541 {
1542 validateOccMaster();
1543 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001544}
Chris Cain755af102024-02-27 16:09:51 -06001545
Chris Cainc33171b2024-05-24 16:14:50 -05001546// Called when throttlePldmTraceTimer expires.
Chris Caina19bd422024-05-24 16:39:01 -05001547// If this timer expires, that indicates there are no OCC active sensor PDRs
Chris Cainc33171b2024-05-24 16:14:50 -05001548// found which will trigger pldm traces to be throttled.
1549// The second time this timer expires, a PEL will get created.
1550void Manager::throttlePldmTraceExpired()
Chris Cain755af102024-02-27 16:09:51 -06001551{
Chris Cain7651c062024-05-02 14:14:06 -05001552 if (utils::isHostRunning())
1553 {
Chris Cainc33171b2024-05-24 16:14:50 -05001554 if (!onPldmTimeoutCreatePel)
1555 {
1556 // Throttle traces
1557 pldmHandle->setTraceThrottle(true);
1558 // Restart timer to log a PEL when timer expires
1559 onPldmTimeoutCreatePel = true;
1560 throttlePldmTraceTimer->restartOnce(40min);
1561 }
1562 else
1563 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001564 lg2::error(
Chris Cainc33171b2024-05-24 16:14:50 -05001565 "throttlePldmTraceExpired(): OCC active sensors still not available!");
1566 // Create PEL
1567 createPldmSensorPEL();
1568 }
Chris Cain7651c062024-05-02 14:14:06 -05001569 }
1570 else
1571 {
1572 // Make sure traces are not throttled
1573 pldmHandle->setTraceThrottle(false);
Chris Cain37abe9b2024-10-31 17:20:31 -05001574 lg2::info(
Chris Cainc33171b2024-05-24 16:14:50 -05001575 "throttlePldmTraceExpired(): host it not running ignoring sensor timer");
Chris Cain7651c062024-05-02 14:14:06 -05001576 }
Chris Cain4b82f3e2024-04-22 14:44:29 -05001577}
1578
1579void Manager::createPldmSensorPEL()
1580{
1581 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1582 std::map<std::string, std::string> additionalData;
1583
1584 additionalData.emplace("_PID", std::to_string(getpid()));
1585
Chris Cain37abe9b2024-10-31 17:20:31 -05001586 lg2::info(
1587 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001588
1589 auto& bus = utils::getBus();
1590
1591 try
1592 {
1593 FFDCFiles ffdc;
1594 // Add occ-control journal traces to PEL FFDC
1595 auto occJournalFile =
1596 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1597
1598 static constexpr auto loggingObjectPath =
1599 "/xyz/openbmc_project/logging";
1600 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Patrick Williamsd7542c82024-08-16 15:20:28 -04001601 std::string service =
1602 utils::getService(loggingObjectPath, opLoggingInterface);
1603 auto method =
1604 bus.new_method_call(service.c_str(), loggingObjectPath,
1605 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001606
Chris Cain1c3349e2024-04-24 14:14:11 -05001607 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001608 auto level =
1609 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1610 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001611 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001612
1613 method.append(d.path, level, additionalData, ffdc);
1614 bus.call(method);
1615 }
1616 catch (const sdbusplus::exception_t& e)
1617 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001618 lg2::error("Failed to create MISSING_OCC_SENSORS PEL: {ERROR}", "ERROR",
1619 e.what());
Chris Cain4b82f3e2024-04-22 14:44:29 -05001620 }
Chris Cain755af102024-02-27 16:09:51 -06001621}
Chris Caina7b74dc2021-11-10 17:03:43 -06001622
1623// Verify single master OCC and start presence monitor
1624void Manager::validateOccMaster()
1625{
1626 int masterInstance = -1;
1627 for (auto& obj : statusObjects)
1628 {
Chris Cainbd551de2022-04-26 13:41:16 -05001629 auto instance = obj->getOccInstanceID();
Sheldon Bailey16a5adb2025-06-10 14:10:06 -05001630
Chris Cainbae4d072022-02-28 09:46:50 -06001631 if (!obj->occActive())
1632 {
1633 if (utils::isHostRunning())
1634 {
Chris Cainbd551de2022-04-26 13:41:16 -05001635 // Check if sensor was queued while waiting for discovery
1636 auto match = queuedActiveState.find(instance);
1637 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001638 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001639 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -05001640 lg2::info("validateOccMaster: OCC{INST} is ACTIVE (queued)",
1641 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001642 obj->occActive(true);
1643 }
1644 else
1645 {
1646 // OCC does not appear to be active yet, check active sensor
1647 pldmHandle->checkActiveSensor(instance);
1648 if (obj->occActive())
1649 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001650 lg2::info(
1651 "validateOccMaster: OCC{INST} is ACTIVE after reading sensor",
1652 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001653 }
Chris Cainbae4d072022-02-28 09:46:50 -06001654 }
1655 }
1656 else
1657 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001658 lg2::warning(
1659 "validateOccMaster: HOST is not running (OCC{INST})",
1660 "INST", instance);
Chris Cainbae4d072022-02-28 09:46:50 -06001661 return;
1662 }
1663 }
Chris Cainbae4d072022-02-28 09:46:50 -06001664
Chris Caina7b74dc2021-11-10 17:03:43 -06001665 if (obj->isMasterOcc())
1666 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001667 obj->addPresenceWatchMaster();
1668
Chris Caina7b74dc2021-11-10 17:03:43 -06001669 if (masterInstance == -1)
1670 {
Chris Cainbd551de2022-04-26 13:41:16 -05001671 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001672 }
1673 else
1674 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001675 lg2::error(
1676 "validateOccMaster: Multiple OCC masters! ({MAST1} and {MAST2})",
1677 "MAST1", masterInstance, "MAST2", instance);
Chris Caina7b74dc2021-11-10 17:03:43 -06001678 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001679 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001680 }
1681 }
1682 }
Chris Cainbae4d072022-02-28 09:46:50 -06001683
Chris Caina7b74dc2021-11-10 17:03:43 -06001684 if (masterInstance < 0)
1685 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001686 lg2::error("validateOccMaster: Master OCC not found! (of {NUM} OCCs)",
1687 "NUM", statusObjects.size());
Chris Caina7b74dc2021-11-10 17:03:43 -06001688 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001689 statusObjects.front()->deviceError(
1690 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001691 }
1692 else
1693 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001694 lg2::info("validateOccMaster: OCC{INST} is master of {COUNT} OCCs",
1695 "INST", masterInstance, "COUNT", activeCount);
Sheldon Bailey16a5adb2025-06-10 14:10:06 -05001696
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001697 pmode->updateDbusSafeMode(false);
Chris Caina7b74dc2021-11-10 17:03:43 -06001698 }
1699}
1700
Chris Cain40501a22022-03-14 17:33:27 -05001701void Manager::updatePcapBounds() const
1702{
1703 if (pcap)
1704 {
1705 pcap->updatePcapBounds();
1706 }
1707}
1708
Chris Cainc488bac2025-03-17 09:01:15 -05001709// Clean up any variables since the OCC is no longer running.
1710// Called when pldm receives an event indicating host is powered off.
1711void Manager::hostPoweredOff()
1712{
1713 if (resetRequired)
1714 {
1715 lg2::info("hostPoweredOff: Clearing resetRequired for OCC{INST}",
1716 "INST", resetInstance);
1717 resetRequired = false;
1718 }
1719 if (resetInProgress)
1720 {
1721 lg2::info("hostPoweredOff: Clearing resetInProgress for OCC{INST}",
1722 "INST", resetInstance);
1723 resetInProgress = false;
1724 }
1725 resetInstance = 255;
1726}
1727
Chris Cainffb63212025-08-01 14:39:38 -05001728void Manager::collectDumpData(sdeventplus::source::Signal&,
1729 const struct signalfd_siginfo*)
1730{
1731 json data;
1732 lg2::info("collectDumpData()");
1733 data["objectCount"] = std::to_string(statusObjects.size()) + " OCC objects";
1734 if (statusObjects.size() > 0)
1735 {
1736 try
1737 {
1738 for (auto& occ : statusObjects)
1739 {
1740 json occData;
1741 auto instance = occ->getOccInstanceID();
1742 std::string occName = "occ" + std::to_string(instance);
1743
1744 if (occ->occActive())
1745 {
1746 // OCC General Info
1747 occData["occState"] = "ACTIVE";
1748 occData["occRole"] =
1749 occ->isMasterOcc() ? "MASTER" : "SECONDARY";
1750 occData["occHwmonPath"] =
1751 occ->getHwmonPath().generic_string();
1752
1753 // OCC Poll Response
1754 std::vector<std::uint8_t> cmd = {0x00, 0x00, 0x01, 0x20};
1755 std::vector<std::uint8_t> rsp;
1756 std::vector<std::string> rspHex;
1757 rsp = passThroughObjects[instance]->send(cmd);
1758 if (rsp.size() > 5)
1759 {
1760 rsp.erase(rsp.begin(),
1761 rsp.begin() + 5); // Strip rsp header
1762 rspHex = utils::hex_dump(rsp);
1763 occData["pollResponse"] = rspHex;
1764 }
1765
1766 // Debug Data: WOF Dynamic Data
1767 cmd = {0x40, 0x00, 0x01, 0x01};
1768 rsp = passThroughObjects[instance]->send(cmd);
1769 if (rsp.size() > 5)
1770 {
1771 rsp.erase(rsp.begin(),
1772 rsp.begin() + 5); // Strip rsp header
1773 rspHex = utils::hex_dump(rsp);
1774 occData["wofDataDynamic"] = rspHex;
1775 }
1776
1777 // Debug Data: WOF Dynamic Data
1778 cmd = {0x40, 0x00, 0x01, 0x0A};
1779 rsp = passThroughObjects[instance]->send(cmd);
1780 if (rsp.size() > 5)
1781 {
1782 rsp.erase(rsp.begin(),
1783 rsp.begin() + 5); // Strip rsp header
1784 rspHex = utils::hex_dump(rsp);
1785 occData["wofDataStatic"] = rspHex;
1786 }
1787 }
1788 else
1789 {
1790 occData["occState"] = "NOT ACTIVE";
1791 }
1792
1793 data[occName] = occData;
1794 }
1795 }
1796 catch (const std::exception& e)
1797 {
1798 lg2::error("Failed to collect OCC dump data: {ERR}", "ERR",
1799 e.what());
1800 }
1801 }
1802
1803 std::ofstream file{Manager::dumpFile};
1804 if (!file)
1805 {
1806 lg2::error("Failed to open {FILE} for occ-control data", "FILE",
1807 Manager::dumpFile);
1808 return;
1809 }
1810
1811 file << std::setw(4) << data;
1812}
1813
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301814} // namespace occ
1815} // namespace open_power