blob: 0a00c2e051e35ec23047541831ad064434e5e83d [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05007#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05008#include "utils.hpp"
9
George Liub5ca1012021-09-10 12:53:11 +080010#include <phosphor-logging/elog-errors.hpp>
Chris Cain37abe9b2024-10-31 17:20:31 -050011#include <phosphor-logging/lg2.hpp>
George Liub5ca1012021-09-10 12:53:11 +080012#include <xyz/openbmc_project/Common/error.hpp>
13
Matt Spinlerd267cec2021-09-01 14:49:19 -050014#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080015#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080016#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060017#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080018#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050019
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053020namespace open_power
21{
22namespace occ
23{
24
Matt Spinler8b8abee2021-08-25 15:18:21 -050025constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050026constexpr auto fruTypeSuffix = "fru_type";
27constexpr auto faultSuffix = "fault";
28constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050029constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050030
Chris Cain1718fd82022-02-16 16:39:50 -060031const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
32
Chris Caina8857c52021-01-27 11:53:05 -060033using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060034using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060035
Matt Spinlera26f1522021-08-25 15:50:20 -050036template <typename T>
37T readFile(const std::string& path)
38{
39 std::ifstream ifs;
40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
41 std::ifstream::eofbit);
42 T data;
43
44 try
45 {
46 ifs.open(path);
47 ifs >> data;
48 ifs.close();
49 }
50 catch (const std::exception& e)
51 {
52 auto err = errno;
53 throw std::system_error(err, std::generic_category());
54 }
55
56 return data;
57}
58
Chris Cain720a3842025-01-09 10:23:36 -060059void Manager::createPldmHandle()
60{
61#ifdef PLDM
62 pldmHandle = std::make_unique<pldm::Interface>(
63 std::bind(std::mem_fn(&Manager::updateOCCActive), this,
64 std::placeholders::_1, std::placeholders::_2),
65 std::bind(std::mem_fn(&Manager::sbeHRESETResult), this,
66 std::placeholders::_1, std::placeholders::_2),
67 std::bind(std::mem_fn(&Manager::updateOccSafeMode), this,
68 std::placeholders::_1),
69 event);
70#endif
71}
72
Chris Cainc33171b2024-05-24 16:14:50 -050073// findAndCreateObjects():
74// Takes care of getting the required objects created and
75// finds the available devices/processors.
76// (function is called everytime the discoverTimer expires)
77// - create the PowerMode object to control OCC modes
78// - create statusObjects for each OCC device found
79// - waits for OCC Active sensors PDRs to become available
80// - restart discoverTimer if all data is not available yet
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053081void Manager::findAndCreateObjects()
82{
Matt Spinlerd267cec2021-09-01 14:49:19 -050083#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050084 for (auto id = 0; id < MAX_CPUS; ++id)
85 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060086 // Create one occ per cpu
87 auto occ = std::string(OCC_NAME) + std::to_string(id);
88 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053089 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050090#else
Chris Cain613dc902022-04-08 09:56:22 -050091 if (!pmode)
92 {
93 // Create the power mode object
94 pmode = std::make_unique<powermode::PowerMode>(
95 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
96 }
97
Chris Cain1718fd82022-02-16 16:39:50 -060098 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050099 {
Chris Cainbae4d072022-02-28 09:46:50 -0600100 static bool statusObjCreated = false;
101 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -0600102 {
Chris Cainbae4d072022-02-28 09:46:50 -0600103 // Create the OCCs based on on the /dev/occX devices
104 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -0600105
Chris Cainbae4d072022-02-28 09:46:50 -0600106 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -0600107 {
Chris Cainbae4d072022-02-28 09:46:50 -0600108 // Something changed or no OCCs yet, try again in 10s.
109 // Note on the first pass prevOCCSearch will be empty,
110 // so there will be at least one delay to give things
111 // a chance to settle.
112 prevOCCSearch = occs;
113
Chris Cain37abe9b2024-10-31 17:20:31 -0500114 lg2::info(
115 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {QTY})",
116 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600117
118 discoverTimer->restartOnce(10s);
119 }
120 else
121 {
122 // All OCCs appear to be available, create status objects
123
124 // createObjects requires OCC0 first.
125 std::sort(occs.begin(), occs.end());
126
Chris Cain37abe9b2024-10-31 17:20:31 -0500127 lg2::info(
128 "Manager::findAndCreateObjects(): Creating {QTY} OCC Status Objects",
129 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600130 for (auto id : occs)
131 {
132 createObjects(std::string(OCC_NAME) + std::to_string(id));
133 }
134 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500135 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500136
137 // Find/update the processor path associated with each OCC
138 for (auto& obj : statusObjects)
139 {
140 obj->updateProcAssociation();
141 }
Chris Cainbae4d072022-02-28 09:46:50 -0600142 }
143 }
144
Chris Cain6d8f37a2022-04-29 13:46:01 -0500145 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600146 {
147 static bool tracedHostWait = false;
148 if (utils::isHostRunning())
149 {
150 if (tracedHostWait)
151 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500152 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600153 "Manager::findAndCreateObjects(): Host is running");
154 tracedHostWait = false;
155 }
Chris Cainbae4d072022-02-28 09:46:50 -0600156 checkAllActiveSensors();
157 }
158 else
159 {
160 if (!tracedHostWait)
161 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500162 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600163 "Manager::findAndCreateObjects(): Waiting for host to start");
164 tracedHostWait = true;
165 }
166 discoverTimer->restartOnce(30s);
Chris Cain7651c062024-05-02 14:14:06 -0500167#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500168 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500169 {
170 // Host is no longer running, disable throttle timer and
171 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500172 lg2::info("findAndCreateObjects(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500173 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500174 pldmHandle->setTraceThrottle(false);
175 }
176#endif
Chris Cain1718fd82022-02-16 16:39:50 -0600177 }
178 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500179 }
180 else
181 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500182 lg2::info(
183 "Manager::findAndCreateObjects(): Waiting for {FILE} to complete...",
184 "FILE", HOST_ON_FILE);
Chris Cain1718fd82022-02-16 16:39:50 -0600185 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500186 }
187#endif
188}
189
Chris Cainbae4d072022-02-28 09:46:50 -0600190#ifdef POWER10
191// Check if all occActive sensors are available
192void Manager::checkAllActiveSensors()
193{
194 static bool allActiveSensorAvailable = false;
195 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500196 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600197
Chris Cain082a6ca2023-03-21 10:27:26 -0500198 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600199 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500200 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600201 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500202 waitingForHost = false;
Chris Cain37abe9b2024-10-31 17:20:31 -0500203 lg2::info("checkAllActiveSensors(): Host is now running");
Chris Cain082a6ca2023-03-21 10:27:26 -0500204 }
205
206 // Start with the assumption that all are available
207 allActiveSensorAvailable = true;
208 for (auto& obj : statusObjects)
209 {
210 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600211 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500212 auto instance = obj->getOccInstanceID();
213 // Check if sensor was queued while waiting for discovery
214 auto match = queuedActiveState.find(instance);
215 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500216 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500217 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -0500218 lg2::info(
219 "checkAllActiveSensors(): OCC{INST} is ACTIVE (queued)",
220 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500221 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500222 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500223 else
224 {
225 allActiveSensorAvailable = false;
226 if (!tracedSensorWait)
227 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500228 lg2::info(
229 "checkAllActiveSensors(): Waiting on OCC{INST} Active sensor",
230 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500231 tracedSensorWait = true;
Chris Cain755af102024-02-27 16:09:51 -0600232#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500233 // Make sure PLDM traces are not throttled
Chris Cain755af102024-02-27 16:09:51 -0600234 pldmHandle->setTraceThrottle(false);
Chris Cainc33171b2024-05-24 16:14:50 -0500235 // Start timer to throttle PLDM traces when timer
Chris Cain755af102024-02-27 16:09:51 -0600236 // expires
Chris Cainc33171b2024-05-24 16:14:50 -0500237 onPldmTimeoutCreatePel = false;
238 throttlePldmTraceTimer->restartOnce(5min);
Chris Cain755af102024-02-27 16:09:51 -0600239#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500240 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600241#ifdef PLDM
Chris Cainf0295f52024-09-12 15:41:14 -0500242 // Ignore active sensor check if the OCCs are being reset
243 if (!resetInProgress)
244 {
245 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
246 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600247#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500248 break;
249 }
Chris Cainbd551de2022-04-26 13:41:16 -0500250 }
Chris Cainbae4d072022-02-28 09:46:50 -0600251 }
252 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500253 else
254 {
255 if (!waitingForHost)
256 {
257 waitingForHost = true;
Chris Cain37abe9b2024-10-31 17:20:31 -0500258 lg2::info("checkAllActiveSensors(): Waiting for host to start");
Chris Cain7651c062024-05-02 14:14:06 -0500259#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500260 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500261 {
262 // Host is no longer running, disable throttle timer and
263 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500264 lg2::info("checkAllActiveSensors(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500265 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500266 pldmHandle->setTraceThrottle(false);
267 }
268#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500269 }
270 }
Chris Cainbae4d072022-02-28 09:46:50 -0600271
272 if (allActiveSensorAvailable)
273 {
274 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500275 if (discoverTimer->isEnabled())
276 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500277 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500278 }
Chris Cain755af102024-02-27 16:09:51 -0600279#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500280 if (throttlePldmTraceTimer->isEnabled())
Chris Cain755af102024-02-27 16:09:51 -0600281 {
282 // Disable throttle timer and make sure traces are not throttled
Chris Cainc33171b2024-05-24 16:14:50 -0500283 throttlePldmTraceTimer->setEnabled(false);
Chris Cain755af102024-02-27 16:09:51 -0600284 pldmHandle->setTraceThrottle(false);
285 }
286#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500287 if (waitingForAllOccActiveSensors)
288 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500289 lg2::info(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500290 "checkAllActiveSensors(): OCC Active sensors are available");
291 waitingForAllOccActiveSensors = false;
Chris Cainf0295f52024-09-12 15:41:14 -0500292
293 if (resetRequired)
294 {
295 initiateOccRequest(resetInstance);
296
297 if (!waitForAllOccsTimer->isEnabled())
298 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500299 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500300 "occsNotAllRunning: Restarting waitForAllOccTimer");
301 // restart occ wait timer to check status after reset
302 // completes
303 waitForAllOccsTimer->restartOnce(60s);
304 }
305 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500306 }
307 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600308 tracedSensorWait = false;
309 }
310 else
311 {
312 // Not all sensors were available, so keep waiting
313 if (!tracedSensorWait)
314 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500315 lg2::info(
Chris Cainbd551de2022-04-26 13:41:16 -0500316 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600317 tracedSensorWait = true;
318 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500319 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600320 }
321}
322#endif
323
Matt Spinlerd267cec2021-09-01 14:49:19 -0500324std::vector<int> Manager::findOCCsInDev()
325{
326 std::vector<int> occs;
327 std::regex expr{R"(occ(\d+)$)"};
328
329 for (auto& file : fs::directory_iterator("/dev"))
330 {
331 std::smatch match;
332 std::string path{file.path().string()};
333 if (std::regex_search(path, match, expr))
334 {
335 auto num = std::stoi(match[1].str());
336
337 // /dev numbering starts at 1, ours starts at 0.
338 occs.push_back(num - 1);
339 }
340 }
341
342 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530343}
344
Patrick Williamsaf408082022-07-22 19:26:54 -0500345int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530346{
George Liubcef3b42021-09-10 12:39:02 +0800347 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530348
349 sdbusplus::message::object_path o;
350 msg.read(o);
351 fs::path cpuPath(std::string(std::move(o)));
352
353 auto name = cpuPath.filename().string();
354 auto index = name.find(CPU_NAME);
355 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
356
357 createObjects(name);
358
359 return 0;
360}
361
362void Manager::createObjects(const std::string& occ)
363{
364 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
365
Gunnar Mills94df8c92018-09-14 14:50:03 -0500366 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800367 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600368#ifdef POWER10
369 pmode,
370#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500371 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600372 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530373#ifdef PLDM
374 ,
Chris Cainf0295f52024-09-12 15:41:14 -0500375 // Callback will set flag indicating reset needs to be done
376 // instead of immediately issuing a reset via PLDM.
377 std::bind(std::mem_fn(&Manager::resetOccRequest), this,
Tom Joseph00325232020-07-29 17:51:48 +0530378 std::placeholders::_1)
379#endif
380 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530381
Chris Cain40501a22022-03-14 17:33:27 -0500382 // Create the power cap monitor object
383 if (!pcap)
384 {
385 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
386 *statusObjects.back());
387 }
388
Chris Cain36f9cde2021-11-22 11:18:21 -0600389 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530390 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500391 lg2::info("Manager::createObjects(): OCC{INST} is the master", "INST",
392 statusObjects.back()->getOccInstanceID());
Chris Cain36f9cde2021-11-22 11:18:21 -0600393 _pollTimer->setEnabled(false);
394
Chris Cain78e86012021-03-04 16:15:31 -0600395#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600396 // Set the master OCC on the PowerMode object
397 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600398#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600399 }
400
Patrick Williamsd7542c82024-08-16 15:20:28 -0400401 passThroughObjects.emplace_back(std::make_unique<PassThrough>(
402 path.c_str()
Chris Cain36f9cde2021-11-22 11:18:21 -0600403#ifdef POWER10
Patrick Williamsd7542c82024-08-16 15:20:28 -0400404 ,
405 pmode
Chris Cain36f9cde2021-11-22 11:18:21 -0600406#endif
Patrick Williamsd7542c82024-08-16 15:20:28 -0400407 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530408}
409
Chris Cainf0295f52024-09-12 15:41:14 -0500410// If a reset is not already outstanding, set a flag to indicate that a reset is
411// needed.
412void Manager::resetOccRequest(instanceID instance)
413{
414 if (!resetRequired)
415 {
416 resetRequired = true;
417 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500418 lg2::error(
419 "resetOccRequest: PM Complex reset was requested due to OCC{INST}",
420 "INST", instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500421 }
422 else if (instance != resetInstance)
423 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500424 lg2::warning(
425 "resetOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already outstanding for OCC{RINST}",
426 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500427 }
428}
429
430// If a reset has not been started, initiate an OCC reset via PLDM
431void Manager::initiateOccRequest(instanceID instance)
432{
433 if (!resetInProgress)
434 {
435 resetInProgress = true;
436 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500437 lg2::error(
438 "initiateOccRequest: Initiating PM Complex reset due to OCC{INST}",
439 "INST", instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500440#ifdef PLDM
441 pldmHandle->resetOCC(instance);
442#endif
443 resetRequired = false;
444 }
445 else
446 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500447 lg2::warning(
448 "initiateOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already in process for OCC{RINST}",
449 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500450 }
451}
452
Sheldon Bailey373af752022-02-21 15:14:00 -0600453void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530454{
Chris Caina7b74dc2021-11-10 17:03:43 -0600455 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600456 {
Chris Cainf0295f52024-09-12 15:41:14 -0500457 if (resetInProgress)
458 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500459 lg2::info(
Chris Cain92dfb272025-02-13 12:20:27 -0600460 "statusCallBack: Ignoring OCC{INST} activate because a reset has been initiated due to OCC{RINST}",
Chris Cain37abe9b2024-10-31 17:20:31 -0500461 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500462 return;
463 }
464
Chris Caina7b74dc2021-11-10 17:03:43 -0600465 // OCC went active
466 ++activeCount;
467
468#ifdef POWER10
469 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600470 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600471 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500472 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500473 }
474#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600475
476 if (activeCount == statusObjects.size())
477 {
478#ifdef POWER10
479 // All OCCs are now running
480 if (waitForAllOccsTimer->isEnabled())
481 {
482 // stop occ wait timer
483 waitForAllOccsTimer->setEnabled(false);
484 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600485
Chris Cainf0295f52024-09-12 15:41:14 -0500486 // All OCCs have been found, check if we need a reset
487 if (resetRequired)
488 {
489 initiateOccRequest(resetInstance);
490
491 if (!waitForAllOccsTimer->isEnabled())
492 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500493 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500494 "occsNotAllRunning: Restarting waitForAllOccTimer");
495 // restart occ wait timer
496 waitForAllOccsTimer->restartOnce(60s);
497 }
498 }
499 else
500 {
501 // Verify master OCC and start presence monitor
502 validateOccMaster();
503 }
504#else
Chris Caina7b74dc2021-11-10 17:03:43 -0600505 // Verify master OCC and start presence monitor
506 validateOccMaster();
Chris Cainf0295f52024-09-12 15:41:14 -0500507#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600508 }
509
510 // Start poll timer if not already started
511 if (!_pollTimer->isEnabled())
512 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500513 lg2::info("Manager: OCCs will be polled every {TIME} seconds",
514 "TIME", pollInterval);
Chris Caina7b74dc2021-11-10 17:03:43 -0600515
516 // Send poll and start OCC poll timer
517 pollerTimerExpired();
518 }
519 }
520 else
521 {
522 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500523 if (activeCount > 0)
524 {
525 --activeCount;
526 }
527 else
528 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500529 lg2::info("OCC{INST} disabled, but currently no active OCCs",
530 "INST", instance);
Chris Cain082a6ca2023-03-21 10:27:26 -0500531 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600532
533 if (activeCount == 0)
534 {
535 // No OCCs are running
536
Chris Cainf0295f52024-09-12 15:41:14 -0500537 if (resetInProgress)
538 {
539 // All OCC active sensors are clear (reset should be in
540 // progress)
Chris Cain37abe9b2024-10-31 17:20:31 -0500541 lg2::info(
542 "statusCallBack: Clearing resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
543 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500544 resetInProgress = false;
545 resetInstance = 255;
546 }
547
Chris Caina7b74dc2021-11-10 17:03:43 -0600548 // Stop OCC poll timer
549 if (_pollTimer->isEnabled())
550 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500551 lg2::info(
Chris Caina7b74dc2021-11-10 17:03:43 -0600552 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
553 _pollTimer->setEnabled(false);
554 }
555
556#ifdef POWER10
557 // stop wait timer
558 if (waitForAllOccsTimer->isEnabled())
559 {
560 waitForAllOccsTimer->setEnabled(false);
561 }
562#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600563 }
Chris Cainf0295f52024-09-12 15:41:14 -0500564 else if (resetInProgress)
565 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500566 lg2::info(
567 "statusCallBack: Skipping clear of resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
568 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500569 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600570#ifdef READ_OCC_SENSORS
571 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500572 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600573#endif
Chris Caina8857c52021-01-27 11:53:05 -0600574 }
Chris Cainbae4d072022-02-28 09:46:50 -0600575
576#ifdef POWER10
577 if (waitingForAllOccActiveSensors)
578 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500579 if (utils::isHostRunning())
580 {
581 checkAllActiveSensors();
582 }
Chris Cainbae4d072022-02-28 09:46:50 -0600583 }
584#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530585}
586
587#ifdef I2C_OCC
588void Manager::initStatusObjects()
589{
590 // Make sure we have a valid path string
591 static_assert(sizeof(DEV_PATH) != 0);
592
593 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
594 for (auto& name : deviceNames)
595 {
596 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800597 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530598 auto path = fs::path(OCC_CONTROL_ROOT) / name;
599 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800600 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530601 }
Chris Cain40501a22022-03-14 17:33:27 -0500602 // The first device is master occ
603 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
604 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600605#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600606 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
607 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600608 // Set the master OCC on the PowerMode object
609 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600610#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530611}
612#endif
613
Tom Joseph815f9f52020-07-27 12:12:13 +0530614#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500615void Manager::sbeTimeout(unsigned int instance)
616{
Eddie James2a751d72022-03-04 09:16:12 -0600617 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
618 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400619 return instance == obj->getOccInstanceID();
620 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500621
Eddie Jamescb018da2022-03-05 11:49:37 -0600622 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600623 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500624 lg2::info("SBE timeout, requesting HRESET (OCC{INST})", "INST",
625 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500626
Chris Cain720a3842025-01-09 10:23:36 -0600627#ifdef PHAL_SUPPORT
Eddie James2a751d72022-03-04 09:16:12 -0600628 setSBEState(instance, SBE_STATE_NOT_USABLE);
Chris Cain720a3842025-01-09 10:23:36 -0600629#endif
Eddie James2a751d72022-03-04 09:16:12 -0600630
Chris Cain92dfb272025-02-13 12:20:27 -0600631 // Stop communication with this OCC
632 (*obj)->occActive(false);
633
Eddie James2a751d72022-03-04 09:16:12 -0600634 pldmHandle->sendHRESET(instance);
635 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500636}
637
Tom Joseph815f9f52020-07-27 12:12:13 +0530638bool Manager::updateOCCActive(instanceID instance, bool status)
639{
Chris Cain7e374fb2022-04-07 09:47:23 -0500640 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
641 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400642 return instance == obj->getOccInstanceID();
643 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500644
Chris Cain082a6ca2023-03-21 10:27:26 -0500645 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500646 if (obj != statusObjects.end())
647 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500648 if (!hostRunning && (status == true))
649 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500650 lg2::warning(
651 "updateOCCActive: Host is not running yet (OCC{INST} active={STAT}), clearing sensor received",
652 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500653 (*obj)->setPldmSensorReceived(false);
654 if (!waitingForAllOccActiveSensors)
655 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500656 lg2::info(
Chris Cain082a6ca2023-03-21 10:27:26 -0500657 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
658 waitingForAllOccActiveSensors = true;
659 }
Chris Cain755af102024-02-27 16:09:51 -0600660#ifdef POWER10
Chris Cain082a6ca2023-03-21 10:27:26 -0500661 discoverTimer->restartOnce(30s);
Chris Cain755af102024-02-27 16:09:51 -0600662#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500663 return false;
664 }
665 else
666 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500667 (*obj)->setPldmSensorReceived(true);
668 return (*obj)->occActive(status);
669 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500670 }
671 else
672 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500673 if (hostRunning)
674 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500675 lg2::warning(
676 "updateOCCActive: No status object to update for OCC{INST} (active={STAT})",
677 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500678 }
679 else
680 {
681 if (status == true)
682 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500683 lg2::warning(
684 "updateOCCActive: No status objects and Host is not running yet (OCC{INST} active={STAT})",
685 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500686 }
687 }
Chris Cainbd551de2022-04-26 13:41:16 -0500688 if (status == true)
689 {
690 // OCC went active
691 queuedActiveState.insert(instance);
692 }
693 else
694 {
695 auto match = queuedActiveState.find(instance);
696 if (match != queuedActiveState.end())
697 {
698 // OCC was disabled
699 queuedActiveState.erase(match);
700 }
701 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500702 return false;
703 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530704}
Eddie Jamescbad2192021-10-07 09:39:39 -0500705
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500706// Called upon pldm event To set powermode Safe Mode State for system.
707void Manager::updateOccSafeMode(bool safeMode)
708{
709#ifdef POWER10
710 pmode->updateDbusSafeMode(safeMode);
711#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500712 // Update the processor throttle status on dbus
713 for (auto& obj : statusObjects)
714 {
715 obj->updateThrottle(safeMode, THROTTLED_SAFE);
716 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500717}
718
Eddie Jamescbad2192021-10-07 09:39:39 -0500719void Manager::sbeHRESETResult(instanceID instance, bool success)
720{
721 if (success)
722 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500723 lg2::info("HRESET succeeded (OCC{INST})", "INST", instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500724
Chris Cain720a3842025-01-09 10:23:36 -0600725#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500726 setSBEState(instance, SBE_STATE_BOOTED);
Chris Cain720a3842025-01-09 10:23:36 -0600727#endif
Eddie Jamescbad2192021-10-07 09:39:39 -0500728
Chris Cain92dfb272025-02-13 12:20:27 -0600729 // Re-enable communication with this OCC
730 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
731 [instance](const auto& obj) {
732 return instance == obj->getOccInstanceID();
733 });
734 if (obj != statusObjects.end() && (!(*obj)->occActive()))
735 {
736 (*obj)->occActive(true);
737 }
738
Eddie Jamescbad2192021-10-07 09:39:39 -0500739 return;
740 }
741
Chris Cain720a3842025-01-09 10:23:36 -0600742#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500743 setSBEState(instance, SBE_STATE_FAILED);
744
745 if (sbeCanDump(instance))
746 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500747 lg2::info("HRESET failed (OCC{INST}), triggering SBE dump", "INST",
748 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500749
750 auto& bus = utils::getBus();
751 uint32_t src6 = instance << 16;
752 uint32_t logId =
753 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
754 src6, "SBE command timeout");
755
756 try
757 {
George Liuf3a4a692021-12-28 13:59:51 +0800758 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
759 constexpr auto function = "CreateDump";
760
Patrick Williamsd7542c82024-08-16 15:20:28 -0400761 std::string service =
762 utils::getService(OP_DUMP_OBJ_PATH, interface);
Dhruvaraj Subhashchandran1173b2b2024-06-01 11:12:13 -0500763 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH,
764 interface, function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500765
766 std::map<std::string, std::variant<std::string, uint64_t>>
767 createParams{
768 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
769 uint64_t(logId)},
770 {"com.ibm.Dump.Create.CreateParameters.DumpType",
771 "com.ibm.Dump.Create.DumpType.SBE"},
772 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
773 uint64_t(instance)},
774 };
775
776 method.append(createParams);
777
778 auto response = bus.call(method);
779 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500780 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500781 {
782 constexpr auto ERROR_DUMP_DISABLED =
783 "xyz.openbmc_project.Dump.Create.Error.Disabled";
784 if (e.name() == ERROR_DUMP_DISABLED)
785 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500786 lg2::info("Dump is disabled, skipping");
Eddie Jamescbad2192021-10-07 09:39:39 -0500787 }
788 else
789 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500790 lg2::error("Dump failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500791 }
792 }
793 }
Chris Cain720a3842025-01-09 10:23:36 -0600794#endif
Chris Cainf0295f52024-09-12 15:41:14 -0500795
796 // SBE Reset failed, try PM Complex reset
Chris Cain37abe9b2024-10-31 17:20:31 -0500797 lg2::error("sbeHRESETResult: Forcing PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500798 resetOccRequest(instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500799}
800
Chris Cain720a3842025-01-09 10:23:36 -0600801#ifdef PHAL_SUPPORT
Eddie Jamescbad2192021-10-07 09:39:39 -0500802bool Manager::sbeCanDump(unsigned int instance)
803{
804 struct pdbg_target* proc = getPdbgTarget(instance);
805
806 if (!proc)
807 {
808 // allow the dump in the error case
809 return true;
810 }
811
812 try
813 {
814 if (!openpower::phal::sbe::isDumpAllowed(proc))
815 {
816 return false;
817 }
818
819 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
820 {
821 return false;
822 }
823 }
824 catch (openpower::phal::exception::SbeError& e)
825 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500826 lg2::info("Failed to query SBE state");
Eddie Jamescbad2192021-10-07 09:39:39 -0500827 }
828
829 // allow the dump in the error case
830 return true;
831}
832
833void Manager::setSBEState(unsigned int instance, enum sbe_state state)
834{
835 struct pdbg_target* proc = getPdbgTarget(instance);
836
837 if (!proc)
838 {
839 return;
840 }
841
842 try
843 {
844 openpower::phal::sbe::setState(proc, state);
845 }
846 catch (const openpower::phal::exception::SbeError& e)
847 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500848 lg2::error("Failed to set SBE state: {ERROR}", "ERROR", e.what());
Eddie Jamescbad2192021-10-07 09:39:39 -0500849 }
850}
851
852struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
853{
854 if (!pdbgInitialized)
855 {
856 try
857 {
858 openpower::phal::pdbg::init();
859 pdbgInitialized = true;
860 }
861 catch (const openpower::phal::exception::PdbgError& e)
862 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500863 lg2::error("pdbg initialization failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500864 return nullptr;
865 }
866 }
867
868 struct pdbg_target* proc = nullptr;
869 pdbg_for_each_class_target("proc", proc)
870 {
871 if (pdbg_target_index(proc) == instance)
872 {
873 return proc;
874 }
875 }
876
Chris Cain37abe9b2024-10-31 17:20:31 -0500877 lg2::error("Failed to get pdbg target");
Eddie Jamescbad2192021-10-07 09:39:39 -0500878 return nullptr;
879}
Tom Joseph815f9f52020-07-27 12:12:13 +0530880#endif
Chris Cain720a3842025-01-09 10:23:36 -0600881#endif
Tom Joseph815f9f52020-07-27 12:12:13 +0530882
Chris Caina8857c52021-01-27 11:53:05 -0600883void Manager::pollerTimerExpired()
884{
Chris Caina8857c52021-01-27 11:53:05 -0600885 if (!_pollTimer)
886 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500887 lg2::error("pollerTimerExpired() ERROR: Timer not defined");
Chris Caina8857c52021-01-27 11:53:05 -0600888 return;
889 }
890
Chris Cainf0295f52024-09-12 15:41:14 -0500891#ifdef POWER10
892 if (resetRequired)
893 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500894 lg2::error("pollerTimerExpired() - Initiating PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500895 initiateOccRequest(resetInstance);
896
897 if (!waitForAllOccsTimer->isEnabled())
898 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500899 lg2::warning("pollerTimerExpired: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -0500900 // restart occ wait timer
901 waitForAllOccsTimer->restartOnce(60s);
902 }
903 return;
904 }
905#endif
906
Chris Caina8857c52021-01-27 11:53:05 -0600907 for (auto& obj : statusObjects)
908 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600909 if (!obj->occActive())
910 {
911 // OCC is not running yet
912#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600913 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500914 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600915#endif
916 continue;
917 }
918
Chris Caina8857c52021-01-27 11:53:05 -0600919 // Read sysfs to force kernel to poll OCC
920 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800921
922#ifdef READ_OCC_SENSORS
923 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600924 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800925#endif
Chris Caina8857c52021-01-27 11:53:05 -0600926 }
927
Chris Caina7b74dc2021-11-10 17:03:43 -0600928 if (activeCount > 0)
929 {
930 // Restart OCC poll timer
931 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
932 }
933 else
934 {
935 // No OCCs running, so poll timer will not be restarted
Chris Cain37abe9b2024-10-31 17:20:31 -0500936 lg2::info(
937 "Manager::pollerTimerExpired: poll timer will not be restarted");
Chris Caina7b74dc2021-11-10 17:03:43 -0600938 }
Chris Caina8857c52021-01-27 11:53:05 -0600939}
940
Chicago Duanbb895cb2021-06-18 19:37:16 +0800941#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600942void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800943{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500944 // There may be more than one sensor with the same FRU type
945 // and label so make two passes: the first to read the temps
946 // from sysfs, and the second to put them on D-Bus after
947 // resolving any conflicts.
948 std::map<std::string, double> sensorData;
949
Chicago Duanbb895cb2021-06-18 19:37:16 +0800950 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
951 for (auto& file : fs::directory_iterator(path))
952 {
953 if (!std::regex_search(file.path().string(), expr))
954 {
955 continue;
956 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800957
Matt Spinlera26f1522021-08-25 15:50:20 -0500958 uint32_t labelValue{0};
959
960 try
961 {
962 labelValue = readFile<uint32_t>(file.path());
963 }
964 catch (const std::system_error& e)
965 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500966 lg2::debug(
967 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
968 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800969 continue;
970 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800971
972 const std::string& tempLabel = "label";
973 const std::string filePathString = file.path().string().substr(
974 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500975
976 uint32_t fruTypeValue{0};
977 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800978 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500979 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
980 }
981 catch (const std::system_error& e)
982 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500983 lg2::debug(
984 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
985 "PATH", filePathString + fruTypeSuffix, "ERROR",
986 e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800987 continue;
988 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800989
Patrick Williamsd7542c82024-08-16 15:20:28 -0400990 std::string sensorPath =
991 OCC_SENSORS_ROOT + std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800992
Matt Spinlerace67d82021-10-18 13:41:57 -0500993 std::string dvfsTempPath;
994
Chicago Duanbb895cb2021-06-18 19:37:16 +0800995 if (fruTypeValue == VRMVdd)
996 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400997 sensorPath.append(
998 "vrm_vdd" + std::to_string(occInstance) + "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800999 }
Matt Spinlerace67d82021-10-18 13:41:57 -05001000 else if (fruTypeValue == processorIoRing)
1001 {
Patrick Williamsd7542c82024-08-16 15:20:28 -04001002 sensorPath.append(
1003 "proc" + std::to_string(occInstance) + "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -05001004 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -06001005 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -05001006 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001007 else
1008 {
Matt Spinler14d14022021-08-25 15:38:29 -05001009 uint16_t type = (labelValue & 0xFF000000) >> 24;
1010 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001011
1012 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
1013 {
Matt Spinler8b8abee2021-08-25 15:18:21 -05001014 if (fruTypeValue == fruTypeNotAvailable)
1015 {
1016 // Not all DIMM related temps are available to read
1017 // (no _input file in this case)
1018 continue;
1019 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001020 auto iter = dimmTempSensorName.find(fruTypeValue);
1021 if (iter == dimmTempSensorName.end())
1022 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001023 lg2::error(
1024 "readTempSensors: Fru type error! fruTypeValue = {FRU}) ",
1025 "FRU", fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001026 continue;
1027 }
1028
Patrick Williamsd7542c82024-08-16 15:20:28 -04001029 sensorPath.append(
1030 "dimm" + std::to_string(instanceID) + iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -05001031
1032 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
1033 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001034 }
1035 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
1036 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001037 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001038 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001039 // The OCC reports small core temps, of which there are
1040 // two per big core. All current P10 systems are in big
1041 // core mode, so use a big core name.
1042 uint16_t coreNum = instanceID / 2;
1043 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -06001044 sensorPath.append("proc" + std::to_string(occInstance) +
1045 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -05001046 std::to_string(tempNum) + "_temp");
1047
Chris Cainae157b62024-01-23 16:05:12 -06001048 dvfsTempPath =
1049 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
1050 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -05001051 }
1052 else
1053 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001054 continue;
1055 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001056 }
1057 else
1058 {
1059 continue;
1060 }
1061 }
1062
Matt Spinlerace67d82021-10-18 13:41:57 -05001063 // The dvfs temp file only needs to be read once per chip per type.
1064 if (!dvfsTempPath.empty() &&
1065 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
1066 {
1067 try
1068 {
1069 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
1070
1071 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
1072 dvfsTempPath, dvfsValue * std::pow(10, -3));
1073 }
1074 catch (const std::system_error& e)
1075 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001076 lg2::debug(
1077 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1078 "PATH", filePathString + maxSuffix, "ERROR",
1079 e.code().value());
Matt Spinlerace67d82021-10-18 13:41:57 -05001080 }
1081 }
1082
Matt Spinlera26f1522021-08-25 15:50:20 -05001083 uint32_t faultValue{0};
1084 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001085 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001086 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
1087 }
1088 catch (const std::system_error& e)
1089 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001090 lg2::debug(
1091 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1092 "PATH", filePathString + faultSuffix, "ERROR",
1093 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001094 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001095 }
1096
Chris Cainae157b62024-01-23 16:05:12 -06001097 double tempValue{0};
1098 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -05001099 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001100 {
Chris Cainae157b62024-01-23 16:05:12 -06001101 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001102 }
Chris Cainae157b62024-01-23 16:05:12 -06001103 else
Chicago Duanbb895cb2021-06-18 19:37:16 +08001104 {
Chris Cainae157b62024-01-23 16:05:12 -06001105 // Read the temperature
1106 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001107 {
Chris Cainae157b62024-01-23 16:05:12 -06001108 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001109 }
Chris Cainae157b62024-01-23 16:05:12 -06001110 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001111 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001112 lg2::debug(
1113 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1114 "PATH", filePathString + inputSuffix, "ERROR",
1115 e.code().value());
Chris Cainae157b62024-01-23 16:05:12 -06001116
1117 // if errno == EAGAIN(Resource temporarily unavailable) then set
1118 // temp to 0, to avoid using old temp, and affecting FAN
1119 // Control.
1120 if (e.code().value() == EAGAIN)
1121 {
1122 tempValue = 0;
1123 }
1124 // else the errno would be something like
1125 // EBADF(Bad file descriptor)
1126 // or ENOENT(No such file or directory)
1127 else
1128 {
1129 continue;
1130 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001131 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001132 }
1133
Matt Spinler818cc8d2023-10-23 11:43:39 -05001134 // If this object path already has a value, only overwite
1135 // it if the previous one was an NaN or a smaller value.
1136 auto existing = sensorData.find(sensorPath);
1137 if (existing != sensorData.end())
1138 {
Chris Cainae157b62024-01-23 16:05:12 -06001139 // Multiple sensors found for this FRU type
1140 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1141 ((existing->second == 0) && std::isnan(tempValue)))
1142 {
1143 // One of the redundant sensors has failed (0xFF/nan), and the
1144 // other sensor has no reading (0), so set the FRU to NaN to
1145 // force fan increase
1146 tempValue = std::numeric_limits<double>::quiet_NaN();
1147 existing->second = tempValue;
1148 }
Matt Spinler818cc8d2023-10-23 11:43:39 -05001149 if (std::isnan(existing->second) || (tempValue > existing->second))
1150 {
1151 existing->second = tempValue;
1152 }
1153 }
1154 else
1155 {
Chris Cainae157b62024-01-23 16:05:12 -06001156 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001157 sensorData[sensorPath] = tempValue;
1158 }
1159 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001160
Matt Spinler818cc8d2023-10-23 11:43:39 -05001161 // Now publish the values on D-Bus.
1162 for (const auto& [objectPath, value] : sensorData)
1163 {
1164 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1165 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001166
Matt Spinler818cc8d2023-10-23 11:43:39 -05001167 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1168 objectPath, !std::isnan(value));
1169
1170 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001171 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001172 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001173 objectPath, {"all_sensors"});
Chris Cain6fa848a2022-01-24 14:54:38 -06001174 }
1175
Chris Cainae157b62024-01-23 16:05:12 -06001176 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001177 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001178}
1179
Patrick Williams2d6ec902025-02-01 08:22:13 -05001180std::optional<std::string> Manager::getPowerLabelFunctionID(
1181 const std::string& value)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001182{
1183 // If the value is "system", then the FunctionID is "system".
1184 if (value == "system")
1185 {
1186 return value;
1187 }
1188
1189 // If the value is not "system", then the label value have 3 numbers, of
1190 // which we only care about the middle one:
1191 // <sensor id>_<function id>_<apss channel>
1192 // eg: The value is "0_10_5" , then the FunctionID is "10".
1193 if (value.find("_") == std::string::npos)
1194 {
1195 return std::nullopt;
1196 }
1197
1198 auto powerLabelValue = value.substr((value.find("_") + 1));
1199
1200 if (powerLabelValue.find("_") == std::string::npos)
1201 {
1202 return std::nullopt;
1203 }
1204
1205 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1206}
1207
1208void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1209{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001210 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1211 for (auto& file : fs::directory_iterator(path))
1212 {
1213 if (!std::regex_search(file.path().string(), expr))
1214 {
1215 continue;
1216 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001217
Matt Spinlera26f1522021-08-25 15:50:20 -05001218 std::string labelValue;
1219 try
1220 {
1221 labelValue = readFile<std::string>(file.path());
1222 }
1223 catch (const std::system_error& e)
1224 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001225 lg2::debug(
1226 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1227 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001228 continue;
1229 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001230
1231 auto functionID = getPowerLabelFunctionID(labelValue);
1232 if (functionID == std::nullopt)
1233 {
1234 continue;
1235 }
1236
1237 const std::string& tempLabel = "label";
1238 const std::string filePathString = file.path().string().substr(
1239 0, file.path().string().length() - tempLabel.length());
1240
1241 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1242
1243 auto iter = powerSensorName.find(*functionID);
1244 if (iter == powerSensorName.end())
1245 {
1246 continue;
1247 }
1248 sensorPath.append(iter->second);
1249
Matt Spinlera26f1522021-08-25 15:50:20 -05001250 double tempValue{0};
1251
1252 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001253 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001254 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001255 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001256 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001257 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001258 lg2::debug(
1259 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1260 "PATH", filePathString + inputSuffix, "ERROR",
1261 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001262 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001263 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001264
Chris Cain5d66a0a2022-02-09 08:52:10 -06001265 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001266 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1267
Chris Cain5d66a0a2022-02-09 08:52:10 -06001268 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001269 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1270
Patrick Williamsd7542c82024-08-16 15:20:28 -04001271 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1272 sensorPath, true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001273
Matt Spinler5901abd2021-09-23 13:50:03 -05001274 if (existingSensors.find(sensorPath) == existingSensors.end())
1275 {
Chris Cain3523cc02024-10-30 17:19:09 -05001276 std::vector<std::string> fTypeList = {"all_sensors"};
1277 if (iter->second == "total_power")
1278 {
Chris Cainff0ce402025-01-17 10:54:55 -06001279 // Set sensor purpose as TotalPower
1280 dbus::OccDBusSensors::getOccDBus().setPurpose(
1281 sensorPath,
1282 "xyz.openbmc_project.Sensor.Purpose.SensorPurpose.TotalPower");
Chris Cain3523cc02024-10-30 17:19:09 -05001283 }
Chris Cain5d66a0a2022-02-09 08:52:10 -06001284 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001285 sensorPath, fTypeList);
Matt Spinler5901abd2021-09-23 13:50:03 -05001286 }
1287
Matt Spinlera26f1522021-08-25 15:50:20 -05001288 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001289 }
1290 return;
1291}
1292
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001293void Manager::readExtnSensors(const fs::path& path, uint32_t id)
1294{
1295 std::regex expr{"extn\\d+_label$"}; // Example: extn5_label
1296 for (auto& file : fs::directory_iterator(path))
1297 {
1298 if (!std::regex_search(file.path().string(), expr))
1299 {
1300 continue;
1301 }
1302
1303 // Read in Label value of the sensor from file.
1304 std::string labelValue;
1305 try
1306 {
1307 labelValue = readFile<std::string>(file.path());
1308 }
1309 catch (const std::system_error& e)
1310 {
1311 lg2::debug(
1312 "readExtnSensors:label Failed reading {PATH}, errno = {ERROR}",
1313 "PATH", file.path().string(), "ERROR", e.code().value());
1314 continue;
1315 }
1316 const std::string& tempLabel = "label";
1317 const std::string filePathString = file.path().string().substr(
1318 0, file.path().string().length() - tempLabel.length());
1319
1320 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1321
1322 // Labels of EXTN sections from OCC interface Document
1323 // have different formats.
1324 // 0x464d494e : FMIN 0x46444953 : FDIS
1325 // 0x46424153 : FBAS 0x46555400 : FUT
1326 // 0x464d4158 : FMAX 0x434c4950 : CLIP
1327 // 0x4d4f4445 : MODE 0x574f4643 : WOFC
1328 // 0x574f4649 : WOFI 0x5057524d : PWRM
1329 // 0x50575250 : PWRP 0x45525248 : ERRH
1330 // Label indicating byte 5 and 6 is the current (mem,proc) power in
1331 // Watts.
1332 if ((labelValue == EXTN_LABEL_PWRM_MEMORY_POWER) ||
1333 (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER))
1334 {
1335 // Build the dbus String for this chiplet power asset.
1336 if (labelValue == EXTN_LABEL_PWRP_PROCESSOR_POWER)
1337 {
1338 labelValue = "_power";
1339 }
1340 else // else EXTN_LABEL_PWRM_MEMORY_POWER
1341 {
1342 labelValue = "_mem_power";
1343 }
1344 sensorPath.append("chiplet" + std::to_string(id) + labelValue);
1345
1346 // Read in data value of the sensor from file.
1347 // Read in as string due to different format of data in sensors.
1348 std::string extnValue;
1349 try
1350 {
1351 extnValue = readFile<std::string>(filePathString + inputSuffix);
1352 }
1353 catch (const std::system_error& e)
1354 {
1355 lg2::debug(
1356 "readExtnSensors:value Failed reading {PATH}, errno = {ERROR}",
1357 "PATH", filePathString + inputSuffix, "ERROR",
1358 e.code().value());
1359 continue;
1360 }
1361
1362 // For Power field, Convert last 4 bytes of hex string into number
1363 // value.
1364 std::stringstream ssData;
1365 ssData << std::hex << extnValue.substr(extnValue.length() - 4);
1366 uint16_t MyHexNumber;
1367 ssData >> MyHexNumber;
1368
1369 // Convert output/DC power to input/AC power in Watts (round up)
1370 MyHexNumber =
1371 std::round(((MyHexNumber / (PS_DERATING_FACTOR / 100.0))));
1372
1373 lg2::debug("OCC{ID}: FILE:{FILE} -- {ACWATTS} AC Watts", "ID", id,
1374 "FILE", filePathString + inputSuffix, "ACWATTS",
1375 MyHexNumber);
1376
1377 dbus::OccDBusSensors::getOccDBus().setUnit(
1378 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1379
1380 dbus::OccDBusSensors::getOccDBus().setValue(sensorPath,
1381 MyHexNumber);
1382
1383 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1384 sensorPath, true);
1385
1386 if (existingSensors.find(sensorPath) == existingSensors.end())
1387 {
1388 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1389 sensorPath, {"all_sensors"});
1390 }
1391
1392 } // End Extended Power Sensors.
1393 // else put in other label formats here to dbus.
1394
1395 existingSensors[sensorPath] = id;
1396
1397 } // End For loop on files for Extended Sensors.
1398 return;
1399}
1400
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001401void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001402{
1403 for (const auto& [sensorPath, occId] : existingSensors)
1404 {
1405 if (occId == id)
1406 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001407 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001408 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001409
Patrick Williamsd7542c82024-08-16 15:20:28 -04001410 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1411 sensorPath, true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001412 }
1413 }
1414 return;
1415}
1416
Sheldon Bailey373af752022-02-21 15:14:00 -06001417void Manager::setSensorValueToNonFunctional(uint32_t id) const
1418{
1419 for (const auto& [sensorPath, occId] : existingSensors)
1420 {
1421 if (occId == id)
1422 {
1423 dbus::OccDBusSensors::getOccDBus().setValue(
1424 sensorPath, std::numeric_limits<double>::quiet_NaN());
1425
Patrick Williamsd7542c82024-08-16 15:20:28 -04001426 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1427 sensorPath, false);
Sheldon Bailey373af752022-02-21 15:14:00 -06001428 }
1429 }
1430 return;
1431}
1432
Chris Cain5d66a0a2022-02-09 08:52:10 -06001433void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001434{
Chris Caine2d0a432022-03-28 11:08:49 -05001435 static bool tracedError[8] = {0};
1436 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001437 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001438
Chris Caine2d0a432022-03-28 11:08:49 -05001439 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001440 {
Chris Caine2d0a432022-03-28 11:08:49 -05001441 // Read temperature sensors
1442 readTempSensors(sensorPath, id);
Sheldon Baileyd2b044f2025-02-12 11:50:24 -06001443 readExtnSensors(sensorPath, id);
Chris Caine2d0a432022-03-28 11:08:49 -05001444
1445 if (occ->isMasterOcc())
1446 {
1447 // Read power sensors
1448 readPowerSensors(sensorPath, id);
1449 }
1450 tracedError[id] = false;
1451 }
1452 else
1453 {
1454 if (!tracedError[id])
1455 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001456 lg2::error(
1457 "Manager::getSensorValues: OCC{INST} sensor path missing: {PATH}",
1458 "INST", id, "PATH", sensorPath);
Chris Caine2d0a432022-03-28 11:08:49 -05001459 tracedError[id] = true;
1460 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001461 }
1462
1463 return;
1464}
1465#endif
Chris Cain17257672021-10-22 13:41:03 -05001466
1467// Read the altitude from DBus
1468void Manager::readAltitude()
1469{
1470 static bool traceAltitudeErr = true;
1471
1472 utils::PropertyValue altitudeProperty{};
1473 try
1474 {
1475 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1476 ALTITUDE_PROP);
1477 auto sensorVal = std::get<double>(altitudeProperty);
1478 if (sensorVal < 0xFFFF)
1479 {
1480 if (sensorVal < 0)
1481 {
1482 altitude = 0;
1483 }
1484 else
1485 {
1486 // Round to nearest meter
1487 altitude = uint16_t(sensorVal + 0.5);
1488 }
Chris Cain37abe9b2024-10-31 17:20:31 -05001489 lg2::debug("readAltitude: sensor={VALUE} ({ALT}m)", "VALUE",
1490 sensorVal, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001491 traceAltitudeErr = true;
1492 }
1493 else
1494 {
1495 if (traceAltitudeErr)
1496 {
1497 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001498 lg2::debug("Invalid altitude value: {ALT}", "ALT", sensorVal);
Chris Cain17257672021-10-22 13:41:03 -05001499 }
1500 }
1501 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001502 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001503 {
1504 if (traceAltitudeErr)
1505 {
1506 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001507 lg2::info("Unable to read Altitude: {ERROR}", "ERROR", e.what());
Chris Cain17257672021-10-22 13:41:03 -05001508 }
1509 altitude = 0xFFFF; // not available
1510 }
1511}
1512
1513// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001514void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001515{
1516 double currentTemp = 0;
1517 uint8_t truncatedTemp = 0xFF;
1518 std::string msgSensor;
1519 std::map<std::string, std::variant<double>> msgData;
1520 msg.read(msgSensor, msgData);
1521
1522 auto valPropMap = msgData.find(AMBIENT_PROP);
1523 if (valPropMap == msgData.end())
1524 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001525 lg2::debug("ambientCallback: Unknown ambient property changed");
Chris Cain17257672021-10-22 13:41:03 -05001526 return;
1527 }
1528 currentTemp = std::get<double>(valPropMap->second);
1529 if (std::isnan(currentTemp))
1530 {
1531 truncatedTemp = 0xFF;
1532 }
1533 else
1534 {
1535 if (currentTemp < 0)
1536 {
1537 truncatedTemp = 0;
1538 }
1539 else
1540 {
1541 // Round to nearest degree C
1542 truncatedTemp = uint8_t(currentTemp + 0.5);
1543 }
1544 }
1545
1546 // If ambient changes, notify OCCs
1547 if (truncatedTemp != ambient)
1548 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001549 lg2::debug("ambientCallback: Ambient change from {OLD} to {NEW}C",
1550 "OLD", ambient, "NEW", currentTemp);
Chris Cain17257672021-10-22 13:41:03 -05001551
1552 ambient = truncatedTemp;
1553 if (altitude == 0xFFFF)
1554 {
1555 // No altitude yet, try reading again
1556 readAltitude();
1557 }
1558
Chris Cain37abe9b2024-10-31 17:20:31 -05001559 lg2::debug("ambientCallback: Ambient: {TEMP}C, altitude: {ALT}m",
1560 "TEMP", ambient, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001561#ifdef POWER10
1562 // Send ambient and altitude to all OCCs
1563 for (auto& obj : statusObjects)
1564 {
1565 if (obj->occActive())
1566 {
1567 obj->sendAmbient(ambient, altitude);
1568 }
1569 }
1570#endif // POWER10
1571 }
1572}
1573
1574// return the current ambient and altitude readings
1575void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1576 uint16_t& altitudeValue) const
1577{
1578 ambientValid = true;
1579 ambientTemp = ambient;
1580 altitudeValue = altitude;
1581
1582 if (ambient == 0xFF)
1583 {
1584 ambientValid = false;
1585 }
1586}
1587
Chris Caina7b74dc2021-11-10 17:03:43 -06001588#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001589// Called when waitForAllOccsTimer expires
1590// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001591void Manager::occsNotAllRunning()
1592{
Chris Cainf0295f52024-09-12 15:41:14 -05001593 if (resetInProgress)
1594 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001595 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -05001596 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress");
1597 return;
1598 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001599 if (activeCount != statusObjects.size())
1600 {
1601 // Not all OCCs went active
Chris Cain37abe9b2024-10-31 17:20:31 -05001602 lg2::warning(
1603 "occsNotAllRunning: Active OCC count ({COUNT}) does not match expected count ({EXP})",
1604 "COUNT", activeCount, "EXP", statusObjects.size());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001605 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001606 }
1607
Chris Cainf0295f52024-09-12 15:41:14 -05001608 if (resetRequired)
1609 {
1610 initiateOccRequest(resetInstance);
1611
1612 if (!waitForAllOccsTimer->isEnabled())
1613 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001614 lg2::warning("occsNotAllRunning: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -05001615 // restart occ wait timer
1616 waitForAllOccsTimer->restartOnce(60s);
1617 }
1618 }
1619 else
1620 {
1621 validateOccMaster();
1622 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001623}
Chris Cain755af102024-02-27 16:09:51 -06001624
1625#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -05001626// Called when throttlePldmTraceTimer expires.
Chris Caina19bd422024-05-24 16:39:01 -05001627// If this timer expires, that indicates there are no OCC active sensor PDRs
Chris Cainc33171b2024-05-24 16:14:50 -05001628// found which will trigger pldm traces to be throttled.
1629// The second time this timer expires, a PEL will get created.
1630void Manager::throttlePldmTraceExpired()
Chris Cain755af102024-02-27 16:09:51 -06001631{
Chris Cain7651c062024-05-02 14:14:06 -05001632 if (utils::isHostRunning())
1633 {
Chris Cainc33171b2024-05-24 16:14:50 -05001634 if (!onPldmTimeoutCreatePel)
1635 {
1636 // Throttle traces
1637 pldmHandle->setTraceThrottle(true);
1638 // Restart timer to log a PEL when timer expires
1639 onPldmTimeoutCreatePel = true;
1640 throttlePldmTraceTimer->restartOnce(40min);
1641 }
1642 else
1643 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001644 lg2::error(
Chris Cainc33171b2024-05-24 16:14:50 -05001645 "throttlePldmTraceExpired(): OCC active sensors still not available!");
1646 // Create PEL
1647 createPldmSensorPEL();
1648 }
Chris Cain7651c062024-05-02 14:14:06 -05001649 }
1650 else
1651 {
1652 // Make sure traces are not throttled
1653 pldmHandle->setTraceThrottle(false);
Chris Cain37abe9b2024-10-31 17:20:31 -05001654 lg2::info(
Chris Cainc33171b2024-05-24 16:14:50 -05001655 "throttlePldmTraceExpired(): host it not running ignoring sensor timer");
Chris Cain7651c062024-05-02 14:14:06 -05001656 }
Chris Cain4b82f3e2024-04-22 14:44:29 -05001657}
1658
1659void Manager::createPldmSensorPEL()
1660{
1661 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1662 std::map<std::string, std::string> additionalData;
1663
1664 additionalData.emplace("_PID", std::to_string(getpid()));
1665
Chris Cain37abe9b2024-10-31 17:20:31 -05001666 lg2::info(
1667 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001668
1669 auto& bus = utils::getBus();
1670
1671 try
1672 {
1673 FFDCFiles ffdc;
1674 // Add occ-control journal traces to PEL FFDC
1675 auto occJournalFile =
1676 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1677
1678 static constexpr auto loggingObjectPath =
1679 "/xyz/openbmc_project/logging";
1680 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Patrick Williamsd7542c82024-08-16 15:20:28 -04001681 std::string service =
1682 utils::getService(loggingObjectPath, opLoggingInterface);
1683 auto method =
1684 bus.new_method_call(service.c_str(), loggingObjectPath,
1685 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001686
Chris Cain1c3349e2024-04-24 14:14:11 -05001687 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001688 auto level =
1689 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1690 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001691 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001692
1693 method.append(d.path, level, additionalData, ffdc);
1694 bus.call(method);
1695 }
1696 catch (const sdbusplus::exception_t& e)
1697 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001698 lg2::error("Failed to create MISSING_OCC_SENSORS PEL: {ERROR}", "ERROR",
1699 e.what());
Chris Cain4b82f3e2024-04-22 14:44:29 -05001700 }
Chris Cain755af102024-02-27 16:09:51 -06001701}
1702#endif // PLDM
Chris Caina7b74dc2021-11-10 17:03:43 -06001703#endif // POWER10
1704
1705// Verify single master OCC and start presence monitor
1706void Manager::validateOccMaster()
1707{
1708 int masterInstance = -1;
1709 for (auto& obj : statusObjects)
1710 {
Chris Cainbd551de2022-04-26 13:41:16 -05001711 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001712#ifdef POWER10
1713 if (!obj->occActive())
1714 {
1715 if (utils::isHostRunning())
1716 {
Chris Cainbd551de2022-04-26 13:41:16 -05001717 // Check if sensor was queued while waiting for discovery
1718 auto match = queuedActiveState.find(instance);
1719 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001720 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001721 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -05001722 lg2::info("validateOccMaster: OCC{INST} is ACTIVE (queued)",
1723 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001724 obj->occActive(true);
1725 }
1726 else
1727 {
1728 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001729#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001730 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001731#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001732 if (obj->occActive())
1733 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001734 lg2::info(
1735 "validateOccMaster: OCC{INST} is ACTIVE after reading sensor",
1736 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001737 }
Chris Cainbae4d072022-02-28 09:46:50 -06001738 }
1739 }
1740 else
1741 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001742 lg2::warning(
1743 "validateOccMaster: HOST is not running (OCC{INST})",
1744 "INST", instance);
Chris Cainbae4d072022-02-28 09:46:50 -06001745 return;
1746 }
1747 }
1748#endif // POWER10
1749
Chris Caina7b74dc2021-11-10 17:03:43 -06001750 if (obj->isMasterOcc())
1751 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001752 obj->addPresenceWatchMaster();
1753
Chris Caina7b74dc2021-11-10 17:03:43 -06001754 if (masterInstance == -1)
1755 {
Chris Cainbd551de2022-04-26 13:41:16 -05001756 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001757 }
1758 else
1759 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001760 lg2::error(
1761 "validateOccMaster: Multiple OCC masters! ({MAST1} and {MAST2})",
1762 "MAST1", masterInstance, "MAST2", instance);
Chris Caina7b74dc2021-11-10 17:03:43 -06001763 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001764 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001765 }
1766 }
1767 }
Chris Cainbae4d072022-02-28 09:46:50 -06001768
Chris Caina7b74dc2021-11-10 17:03:43 -06001769 if (masterInstance < 0)
1770 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001771 lg2::error("validateOccMaster: Master OCC not found! (of {NUM} OCCs)",
1772 "NUM", statusObjects.size());
Chris Caina7b74dc2021-11-10 17:03:43 -06001773 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001774 statusObjects.front()->deviceError(
1775 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001776 }
1777 else
1778 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001779 lg2::info("validateOccMaster: OCC{INST} is master of {COUNT} OCCs",
1780 "INST", masterInstance, "COUNT", activeCount);
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001781#ifdef POWER10
1782 pmode->updateDbusSafeMode(false);
1783#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001784 }
1785}
1786
Chris Cain40501a22022-03-14 17:33:27 -05001787void Manager::updatePcapBounds() const
1788{
1789 if (pcap)
1790 {
1791 pcap->updatePcapBounds();
1792 }
1793}
1794
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301795} // namespace occ
1796} // namespace open_power