blob: 62a2b388ea4cef93af10228a337549686f98a488 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Chris Cain4b82f3e2024-04-22 14:44:29 -05007#include "occ_errors.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05008#include "utils.hpp"
9
George Liub5ca1012021-09-10 12:53:11 +080010#include <phosphor-logging/elog-errors.hpp>
Chris Cain37abe9b2024-10-31 17:20:31 -050011#include <phosphor-logging/lg2.hpp>
George Liub5ca1012021-09-10 12:53:11 +080012#include <xyz/openbmc_project/Common/error.hpp>
13
Matt Spinlerd267cec2021-09-01 14:49:19 -050014#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080015#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080016#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060017#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080018#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050019
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053020namespace open_power
21{
22namespace occ
23{
24
Matt Spinler8b8abee2021-08-25 15:18:21 -050025constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050026constexpr auto fruTypeSuffix = "fru_type";
27constexpr auto faultSuffix = "fault";
28constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050029constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050030
Chris Cain1718fd82022-02-16 16:39:50 -060031const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
32
Chris Caina8857c52021-01-27 11:53:05 -060033using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060034using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060035
Matt Spinlera26f1522021-08-25 15:50:20 -050036template <typename T>
37T readFile(const std::string& path)
38{
39 std::ifstream ifs;
40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
41 std::ifstream::eofbit);
42 T data;
43
44 try
45 {
46 ifs.open(path);
47 ifs >> data;
48 ifs.close();
49 }
50 catch (const std::exception& e)
51 {
52 auto err = errno;
53 throw std::system_error(err, std::generic_category());
54 }
55
56 return data;
57}
58
Chris Cainc33171b2024-05-24 16:14:50 -050059// findAndCreateObjects():
60// Takes care of getting the required objects created and
61// finds the available devices/processors.
62// (function is called everytime the discoverTimer expires)
63// - create the PowerMode object to control OCC modes
64// - create statusObjects for each OCC device found
65// - waits for OCC Active sensors PDRs to become available
66// - restart discoverTimer if all data is not available yet
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053067void Manager::findAndCreateObjects()
68{
Matt Spinlerd267cec2021-09-01 14:49:19 -050069#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050070 for (auto id = 0; id < MAX_CPUS; ++id)
71 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060072 // Create one occ per cpu
73 auto occ = std::string(OCC_NAME) + std::to_string(id);
74 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053075 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050076#else
Chris Cain613dc902022-04-08 09:56:22 -050077 if (!pmode)
78 {
79 // Create the power mode object
80 pmode = std::make_unique<powermode::PowerMode>(
81 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
82 }
83
Chris Cain1718fd82022-02-16 16:39:50 -060084 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050085 {
Chris Cainbae4d072022-02-28 09:46:50 -060086 static bool statusObjCreated = false;
87 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060088 {
Chris Cainbae4d072022-02-28 09:46:50 -060089 // Create the OCCs based on on the /dev/occX devices
90 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060091
Chris Cainbae4d072022-02-28 09:46:50 -060092 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060093 {
Chris Cainbae4d072022-02-28 09:46:50 -060094 // Something changed or no OCCs yet, try again in 10s.
95 // Note on the first pass prevOCCSearch will be empty,
96 // so there will be at least one delay to give things
97 // a chance to settle.
98 prevOCCSearch = occs;
99
Chris Cain37abe9b2024-10-31 17:20:31 -0500100 lg2::info(
101 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {QTY})",
102 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600103
104 discoverTimer->restartOnce(10s);
105 }
106 else
107 {
108 // All OCCs appear to be available, create status objects
109
110 // createObjects requires OCC0 first.
111 std::sort(occs.begin(), occs.end());
112
Chris Cain37abe9b2024-10-31 17:20:31 -0500113 lg2::info(
114 "Manager::findAndCreateObjects(): Creating {QTY} OCC Status Objects",
115 "QTY", occs.size());
Chris Cainbae4d072022-02-28 09:46:50 -0600116 for (auto id : occs)
117 {
118 createObjects(std::string(OCC_NAME) + std::to_string(id));
119 }
120 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500121 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500122
123 // Find/update the processor path associated with each OCC
124 for (auto& obj : statusObjects)
125 {
126 obj->updateProcAssociation();
127 }
Chris Cainbae4d072022-02-28 09:46:50 -0600128 }
129 }
130
Chris Cain6d8f37a2022-04-29 13:46:01 -0500131 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600132 {
133 static bool tracedHostWait = false;
134 if (utils::isHostRunning())
135 {
136 if (tracedHostWait)
137 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500138 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600139 "Manager::findAndCreateObjects(): Host is running");
140 tracedHostWait = false;
141 }
Chris Cainbae4d072022-02-28 09:46:50 -0600142 checkAllActiveSensors();
143 }
144 else
145 {
146 if (!tracedHostWait)
147 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500148 lg2::info(
Chris Cainbae4d072022-02-28 09:46:50 -0600149 "Manager::findAndCreateObjects(): Waiting for host to start");
150 tracedHostWait = true;
151 }
152 discoverTimer->restartOnce(30s);
Chris Cain7651c062024-05-02 14:14:06 -0500153#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500154 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500155 {
156 // Host is no longer running, disable throttle timer and
157 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500158 lg2::info("findAndCreateObjects(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500159 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500160 pldmHandle->setTraceThrottle(false);
161 }
162#endif
Chris Cain1718fd82022-02-16 16:39:50 -0600163 }
164 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500165 }
166 else
167 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500168 lg2::info(
169 "Manager::findAndCreateObjects(): Waiting for {FILE} to complete...",
170 "FILE", HOST_ON_FILE);
Chris Cain1718fd82022-02-16 16:39:50 -0600171 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500172 }
173#endif
174}
175
Chris Cainbae4d072022-02-28 09:46:50 -0600176#ifdef POWER10
177// Check if all occActive sensors are available
178void Manager::checkAllActiveSensors()
179{
180 static bool allActiveSensorAvailable = false;
181 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500182 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600183
Chris Cain082a6ca2023-03-21 10:27:26 -0500184 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600185 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500186 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600187 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500188 waitingForHost = false;
Chris Cain37abe9b2024-10-31 17:20:31 -0500189 lg2::info("checkAllActiveSensors(): Host is now running");
Chris Cain082a6ca2023-03-21 10:27:26 -0500190 }
191
192 // Start with the assumption that all are available
193 allActiveSensorAvailable = true;
194 for (auto& obj : statusObjects)
195 {
196 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600197 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500198 auto instance = obj->getOccInstanceID();
199 // Check if sensor was queued while waiting for discovery
200 auto match = queuedActiveState.find(instance);
201 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500202 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500203 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -0500204 lg2::info(
205 "checkAllActiveSensors(): OCC{INST} is ACTIVE (queued)",
206 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500207 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500208 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500209 else
210 {
211 allActiveSensorAvailable = false;
212 if (!tracedSensorWait)
213 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500214 lg2::info(
215 "checkAllActiveSensors(): Waiting on OCC{INST} Active sensor",
216 "INST", instance);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500217 tracedSensorWait = true;
Chris Cain755af102024-02-27 16:09:51 -0600218#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500219 // Make sure PLDM traces are not throttled
Chris Cain755af102024-02-27 16:09:51 -0600220 pldmHandle->setTraceThrottle(false);
Chris Cainc33171b2024-05-24 16:14:50 -0500221 // Start timer to throttle PLDM traces when timer
Chris Cain755af102024-02-27 16:09:51 -0600222 // expires
Chris Cainc33171b2024-05-24 16:14:50 -0500223 onPldmTimeoutCreatePel = false;
224 throttlePldmTraceTimer->restartOnce(5min);
Chris Cain755af102024-02-27 16:09:51 -0600225#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500226 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600227#ifdef PLDM
Chris Cainf0295f52024-09-12 15:41:14 -0500228 // Ignore active sensor check if the OCCs are being reset
229 if (!resetInProgress)
230 {
231 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
232 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600233#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500234 break;
235 }
Chris Cainbd551de2022-04-26 13:41:16 -0500236 }
Chris Cainbae4d072022-02-28 09:46:50 -0600237 }
238 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500239 else
240 {
241 if (!waitingForHost)
242 {
243 waitingForHost = true;
Chris Cain37abe9b2024-10-31 17:20:31 -0500244 lg2::info("checkAllActiveSensors(): Waiting for host to start");
Chris Cain7651c062024-05-02 14:14:06 -0500245#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500246 if (throttlePldmTraceTimer->isEnabled())
Chris Cain7651c062024-05-02 14:14:06 -0500247 {
248 // Host is no longer running, disable throttle timer and
249 // make sure traces are not throttled
Chris Cain37abe9b2024-10-31 17:20:31 -0500250 lg2::info("checkAllActiveSensors(): disabling sensor timer");
Chris Cainc33171b2024-05-24 16:14:50 -0500251 throttlePldmTraceTimer->setEnabled(false);
Chris Cain7651c062024-05-02 14:14:06 -0500252 pldmHandle->setTraceThrottle(false);
253 }
254#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500255 }
256 }
Chris Cainbae4d072022-02-28 09:46:50 -0600257
258 if (allActiveSensorAvailable)
259 {
260 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500261 if (discoverTimer->isEnabled())
262 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500263 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500264 }
Chris Cain755af102024-02-27 16:09:51 -0600265#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -0500266 if (throttlePldmTraceTimer->isEnabled())
Chris Cain755af102024-02-27 16:09:51 -0600267 {
268 // Disable throttle timer and make sure traces are not throttled
Chris Cainc33171b2024-05-24 16:14:50 -0500269 throttlePldmTraceTimer->setEnabled(false);
Chris Cain755af102024-02-27 16:09:51 -0600270 pldmHandle->setTraceThrottle(false);
271 }
272#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500273 if (waitingForAllOccActiveSensors)
274 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500275 lg2::info(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500276 "checkAllActiveSensors(): OCC Active sensors are available");
277 waitingForAllOccActiveSensors = false;
Chris Cainf0295f52024-09-12 15:41:14 -0500278
279 if (resetRequired)
280 {
281 initiateOccRequest(resetInstance);
282
283 if (!waitForAllOccsTimer->isEnabled())
284 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500285 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500286 "occsNotAllRunning: Restarting waitForAllOccTimer");
287 // restart occ wait timer to check status after reset
288 // completes
289 waitForAllOccsTimer->restartOnce(60s);
290 }
291 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500292 }
293 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600294 tracedSensorWait = false;
295 }
296 else
297 {
298 // Not all sensors were available, so keep waiting
299 if (!tracedSensorWait)
300 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500301 lg2::info(
Chris Cainbd551de2022-04-26 13:41:16 -0500302 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600303 tracedSensorWait = true;
304 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500305 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600306 }
307}
308#endif
309
Matt Spinlerd267cec2021-09-01 14:49:19 -0500310std::vector<int> Manager::findOCCsInDev()
311{
312 std::vector<int> occs;
313 std::regex expr{R"(occ(\d+)$)"};
314
315 for (auto& file : fs::directory_iterator("/dev"))
316 {
317 std::smatch match;
318 std::string path{file.path().string()};
319 if (std::regex_search(path, match, expr))
320 {
321 auto num = std::stoi(match[1].str());
322
323 // /dev numbering starts at 1, ours starts at 0.
324 occs.push_back(num - 1);
325 }
326 }
327
328 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530329}
330
Patrick Williamsaf408082022-07-22 19:26:54 -0500331int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530332{
George Liubcef3b42021-09-10 12:39:02 +0800333 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530334
335 sdbusplus::message::object_path o;
336 msg.read(o);
337 fs::path cpuPath(std::string(std::move(o)));
338
339 auto name = cpuPath.filename().string();
340 auto index = name.find(CPU_NAME);
341 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
342
343 createObjects(name);
344
345 return 0;
346}
347
348void Manager::createObjects(const std::string& occ)
349{
350 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
351
Gunnar Mills94df8c92018-09-14 14:50:03 -0500352 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800353 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600354#ifdef POWER10
355 pmode,
356#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500357 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600358 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530359#ifdef PLDM
360 ,
Chris Cainf0295f52024-09-12 15:41:14 -0500361 // Callback will set flag indicating reset needs to be done
362 // instead of immediately issuing a reset via PLDM.
363 std::bind(std::mem_fn(&Manager::resetOccRequest), this,
Tom Joseph00325232020-07-29 17:51:48 +0530364 std::placeholders::_1)
365#endif
366 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530367
Chris Cain40501a22022-03-14 17:33:27 -0500368 // Create the power cap monitor object
369 if (!pcap)
370 {
371 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
372 *statusObjects.back());
373 }
374
Chris Cain36f9cde2021-11-22 11:18:21 -0600375 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530376 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500377 lg2::info("Manager::createObjects(): OCC{INST} is the master", "INST",
378 statusObjects.back()->getOccInstanceID());
Chris Cain36f9cde2021-11-22 11:18:21 -0600379 _pollTimer->setEnabled(false);
380
Chris Cain78e86012021-03-04 16:15:31 -0600381#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600382 // Set the master OCC on the PowerMode object
383 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600384#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600385 }
386
Patrick Williamsd7542c82024-08-16 15:20:28 -0400387 passThroughObjects.emplace_back(std::make_unique<PassThrough>(
388 path.c_str()
Chris Cain36f9cde2021-11-22 11:18:21 -0600389#ifdef POWER10
Patrick Williamsd7542c82024-08-16 15:20:28 -0400390 ,
391 pmode
Chris Cain36f9cde2021-11-22 11:18:21 -0600392#endif
Patrick Williamsd7542c82024-08-16 15:20:28 -0400393 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530394}
395
Chris Cainf0295f52024-09-12 15:41:14 -0500396// If a reset is not already outstanding, set a flag to indicate that a reset is
397// needed.
398void Manager::resetOccRequest(instanceID instance)
399{
400 if (!resetRequired)
401 {
402 resetRequired = true;
403 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500404 lg2::error(
405 "resetOccRequest: PM Complex reset was requested due to OCC{INST}",
406 "INST", instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500407 }
408 else if (instance != resetInstance)
409 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500410 lg2::warning(
411 "resetOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already outstanding for OCC{RINST}",
412 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500413 }
414}
415
416// If a reset has not been started, initiate an OCC reset via PLDM
417void Manager::initiateOccRequest(instanceID instance)
418{
419 if (!resetInProgress)
420 {
421 resetInProgress = true;
422 resetInstance = instance;
Chris Cain37abe9b2024-10-31 17:20:31 -0500423 lg2::error(
424 "initiateOccRequest: Initiating PM Complex reset due to OCC{INST}",
425 "INST", instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500426#ifdef PLDM
427 pldmHandle->resetOCC(instance);
428#endif
429 resetRequired = false;
430 }
431 else
432 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500433 lg2::warning(
434 "initiateOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already in process for OCC{RINST}",
435 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500436 }
437}
438
Sheldon Bailey373af752022-02-21 15:14:00 -0600439void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530440{
Chris Caina7b74dc2021-11-10 17:03:43 -0600441 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600442 {
Chris Cainf0295f52024-09-12 15:41:14 -0500443 if (resetInProgress)
444 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500445 lg2::info(
446 "statusCallBack: Ignoring OCC{INST} activate because a reset has been initiated due to OCC{INST}",
447 "INST", instance, "RINST", resetInstance);
Chris Cainf0295f52024-09-12 15:41:14 -0500448 return;
449 }
450
Chris Caina7b74dc2021-11-10 17:03:43 -0600451 // OCC went active
452 ++activeCount;
453
454#ifdef POWER10
455 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600456 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600457 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500458 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500459 }
460#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600461
462 if (activeCount == statusObjects.size())
463 {
464#ifdef POWER10
465 // All OCCs are now running
466 if (waitForAllOccsTimer->isEnabled())
467 {
468 // stop occ wait timer
469 waitForAllOccsTimer->setEnabled(false);
470 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600471
Chris Cainf0295f52024-09-12 15:41:14 -0500472 // All OCCs have been found, check if we need a reset
473 if (resetRequired)
474 {
475 initiateOccRequest(resetInstance);
476
477 if (!waitForAllOccsTimer->isEnabled())
478 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500479 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -0500480 "occsNotAllRunning: Restarting waitForAllOccTimer");
481 // restart occ wait timer
482 waitForAllOccsTimer->restartOnce(60s);
483 }
484 }
485 else
486 {
487 // Verify master OCC and start presence monitor
488 validateOccMaster();
489 }
490#else
Chris Caina7b74dc2021-11-10 17:03:43 -0600491 // Verify master OCC and start presence monitor
492 validateOccMaster();
Chris Cainf0295f52024-09-12 15:41:14 -0500493#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600494 }
495
496 // Start poll timer if not already started
497 if (!_pollTimer->isEnabled())
498 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500499 lg2::info("Manager: OCCs will be polled every {TIME} seconds",
500 "TIME", pollInterval);
Chris Caina7b74dc2021-11-10 17:03:43 -0600501
502 // Send poll and start OCC poll timer
503 pollerTimerExpired();
504 }
505 }
506 else
507 {
508 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500509 if (activeCount > 0)
510 {
511 --activeCount;
512 }
513 else
514 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500515 lg2::info("OCC{INST} disabled, but currently no active OCCs",
516 "INST", instance);
Chris Cain082a6ca2023-03-21 10:27:26 -0500517 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600518
519 if (activeCount == 0)
520 {
521 // No OCCs are running
522
Chris Cainf0295f52024-09-12 15:41:14 -0500523 if (resetInProgress)
524 {
525 // All OCC active sensors are clear (reset should be in
526 // progress)
Chris Cain37abe9b2024-10-31 17:20:31 -0500527 lg2::info(
528 "statusCallBack: Clearing resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
529 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500530 resetInProgress = false;
531 resetInstance = 255;
532 }
533
Chris Caina7b74dc2021-11-10 17:03:43 -0600534 // Stop OCC poll timer
535 if (_pollTimer->isEnabled())
536 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500537 lg2::info(
Chris Caina7b74dc2021-11-10 17:03:43 -0600538 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
539 _pollTimer->setEnabled(false);
540 }
541
542#ifdef POWER10
543 // stop wait timer
544 if (waitForAllOccsTimer->isEnabled())
545 {
546 waitForAllOccsTimer->setEnabled(false);
547 }
548#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600549 }
Chris Cainf0295f52024-09-12 15:41:14 -0500550 else if (resetInProgress)
551 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500552 lg2::info(
553 "statusCallBack: Skipping clear of resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
554 "COUNT", activeCount, "INST", instance, "STATUS", status);
Chris Cainf0295f52024-09-12 15:41:14 -0500555 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600556#ifdef READ_OCC_SENSORS
557 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500558 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600559#endif
Chris Caina8857c52021-01-27 11:53:05 -0600560 }
Chris Cainbae4d072022-02-28 09:46:50 -0600561
562#ifdef POWER10
563 if (waitingForAllOccActiveSensors)
564 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500565 if (utils::isHostRunning())
566 {
567 checkAllActiveSensors();
568 }
Chris Cainbae4d072022-02-28 09:46:50 -0600569 }
570#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530571}
572
573#ifdef I2C_OCC
574void Manager::initStatusObjects()
575{
576 // Make sure we have a valid path string
577 static_assert(sizeof(DEV_PATH) != 0);
578
579 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
580 for (auto& name : deviceNames)
581 {
582 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800583 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530584 auto path = fs::path(OCC_CONTROL_ROOT) / name;
585 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800586 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530587 }
Chris Cain40501a22022-03-14 17:33:27 -0500588 // The first device is master occ
589 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
590 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600591#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600592 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
593 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600594 // Set the master OCC on the PowerMode object
595 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600596#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530597}
598#endif
599
Tom Joseph815f9f52020-07-27 12:12:13 +0530600#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500601void Manager::sbeTimeout(unsigned int instance)
602{
Eddie James2a751d72022-03-04 09:16:12 -0600603 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
604 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400605 return instance == obj->getOccInstanceID();
606 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500607
Eddie Jamescb018da2022-03-05 11:49:37 -0600608 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600609 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500610 lg2::info("SBE timeout, requesting HRESET (OCC{INST})", "INST",
611 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500612
Eddie James2a751d72022-03-04 09:16:12 -0600613 setSBEState(instance, SBE_STATE_NOT_USABLE);
614
615 pldmHandle->sendHRESET(instance);
616 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500617}
618
Tom Joseph815f9f52020-07-27 12:12:13 +0530619bool Manager::updateOCCActive(instanceID instance, bool status)
620{
Chris Cain7e374fb2022-04-07 09:47:23 -0500621 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
622 [instance](const auto& obj) {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400623 return instance == obj->getOccInstanceID();
624 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500625
Chris Cain082a6ca2023-03-21 10:27:26 -0500626 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500627 if (obj != statusObjects.end())
628 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500629 if (!hostRunning && (status == true))
630 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500631 lg2::warning(
632 "updateOCCActive: Host is not running yet (OCC{INST} active={STAT}), clearing sensor received",
633 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500634 (*obj)->setPldmSensorReceived(false);
635 if (!waitingForAllOccActiveSensors)
636 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500637 lg2::info(
Chris Cain082a6ca2023-03-21 10:27:26 -0500638 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
639 waitingForAllOccActiveSensors = true;
640 }
Chris Cain755af102024-02-27 16:09:51 -0600641#ifdef POWER10
Chris Cain082a6ca2023-03-21 10:27:26 -0500642 discoverTimer->restartOnce(30s);
Chris Cain755af102024-02-27 16:09:51 -0600643#endif
Chris Cain082a6ca2023-03-21 10:27:26 -0500644 return false;
645 }
646 else
647 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500648 (*obj)->setPldmSensorReceived(true);
649 return (*obj)->occActive(status);
650 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500651 }
652 else
653 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500654 if (hostRunning)
655 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500656 lg2::warning(
657 "updateOCCActive: No status object to update for OCC{INST} (active={STAT})",
658 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500659 }
660 else
661 {
662 if (status == true)
663 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500664 lg2::warning(
665 "updateOCCActive: No status objects and Host is not running yet (OCC{INST} active={STAT})",
666 "INST", instance, "STAT", status);
Chris Cain082a6ca2023-03-21 10:27:26 -0500667 }
668 }
Chris Cainbd551de2022-04-26 13:41:16 -0500669 if (status == true)
670 {
671 // OCC went active
672 queuedActiveState.insert(instance);
673 }
674 else
675 {
676 auto match = queuedActiveState.find(instance);
677 if (match != queuedActiveState.end())
678 {
679 // OCC was disabled
680 queuedActiveState.erase(match);
681 }
682 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500683 return false;
684 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530685}
Eddie Jamescbad2192021-10-07 09:39:39 -0500686
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500687// Called upon pldm event To set powermode Safe Mode State for system.
688void Manager::updateOccSafeMode(bool safeMode)
689{
690#ifdef POWER10
691 pmode->updateDbusSafeMode(safeMode);
692#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500693 // Update the processor throttle status on dbus
694 for (auto& obj : statusObjects)
695 {
696 obj->updateThrottle(safeMode, THROTTLED_SAFE);
697 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500698}
699
Eddie Jamescbad2192021-10-07 09:39:39 -0500700void Manager::sbeHRESETResult(instanceID instance, bool success)
701{
702 if (success)
703 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500704 lg2::info("HRESET succeeded (OCC{INST})", "INST", instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500705
706 setSBEState(instance, SBE_STATE_BOOTED);
707
708 return;
709 }
710
711 setSBEState(instance, SBE_STATE_FAILED);
712
713 if (sbeCanDump(instance))
714 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500715 lg2::info("HRESET failed (OCC{INST}), triggering SBE dump", "INST",
716 instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500717
718 auto& bus = utils::getBus();
719 uint32_t src6 = instance << 16;
720 uint32_t logId =
721 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
722 src6, "SBE command timeout");
723
724 try
725 {
George Liuf3a4a692021-12-28 13:59:51 +0800726 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
727 constexpr auto function = "CreateDump";
728
Patrick Williamsd7542c82024-08-16 15:20:28 -0400729 std::string service =
730 utils::getService(OP_DUMP_OBJ_PATH, interface);
Dhruvaraj Subhashchandran1173b2b2024-06-01 11:12:13 -0500731 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH,
732 interface, function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500733
734 std::map<std::string, std::variant<std::string, uint64_t>>
735 createParams{
736 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
737 uint64_t(logId)},
738 {"com.ibm.Dump.Create.CreateParameters.DumpType",
739 "com.ibm.Dump.Create.DumpType.SBE"},
740 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
741 uint64_t(instance)},
742 };
743
744 method.append(createParams);
745
746 auto response = bus.call(method);
747 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500748 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500749 {
750 constexpr auto ERROR_DUMP_DISABLED =
751 "xyz.openbmc_project.Dump.Create.Error.Disabled";
752 if (e.name() == ERROR_DUMP_DISABLED)
753 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500754 lg2::info("Dump is disabled, skipping");
Eddie Jamescbad2192021-10-07 09:39:39 -0500755 }
756 else
757 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500758 lg2::error("Dump failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500759 }
760 }
761 }
Chris Cainf0295f52024-09-12 15:41:14 -0500762
763 // SBE Reset failed, try PM Complex reset
Chris Cain37abe9b2024-10-31 17:20:31 -0500764 lg2::error("sbeHRESETResult: Forcing PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500765 resetOccRequest(instance);
Eddie Jamescbad2192021-10-07 09:39:39 -0500766}
767
768bool Manager::sbeCanDump(unsigned int instance)
769{
770 struct pdbg_target* proc = getPdbgTarget(instance);
771
772 if (!proc)
773 {
774 // allow the dump in the error case
775 return true;
776 }
777
778 try
779 {
780 if (!openpower::phal::sbe::isDumpAllowed(proc))
781 {
782 return false;
783 }
784
785 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
786 {
787 return false;
788 }
789 }
790 catch (openpower::phal::exception::SbeError& e)
791 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500792 lg2::info("Failed to query SBE state");
Eddie Jamescbad2192021-10-07 09:39:39 -0500793 }
794
795 // allow the dump in the error case
796 return true;
797}
798
799void Manager::setSBEState(unsigned int instance, enum sbe_state state)
800{
801 struct pdbg_target* proc = getPdbgTarget(instance);
802
803 if (!proc)
804 {
805 return;
806 }
807
808 try
809 {
810 openpower::phal::sbe::setState(proc, state);
811 }
812 catch (const openpower::phal::exception::SbeError& e)
813 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500814 lg2::error("Failed to set SBE state: {ERROR}", "ERROR", e.what());
Eddie Jamescbad2192021-10-07 09:39:39 -0500815 }
816}
817
818struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
819{
820 if (!pdbgInitialized)
821 {
822 try
823 {
824 openpower::phal::pdbg::init();
825 pdbgInitialized = true;
826 }
827 catch (const openpower::phal::exception::PdbgError& e)
828 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500829 lg2::error("pdbg initialization failed");
Eddie Jamescbad2192021-10-07 09:39:39 -0500830 return nullptr;
831 }
832 }
833
834 struct pdbg_target* proc = nullptr;
835 pdbg_for_each_class_target("proc", proc)
836 {
837 if (pdbg_target_index(proc) == instance)
838 {
839 return proc;
840 }
841 }
842
Chris Cain37abe9b2024-10-31 17:20:31 -0500843 lg2::error("Failed to get pdbg target");
Eddie Jamescbad2192021-10-07 09:39:39 -0500844 return nullptr;
845}
Tom Joseph815f9f52020-07-27 12:12:13 +0530846#endif
847
Chris Caina8857c52021-01-27 11:53:05 -0600848void Manager::pollerTimerExpired()
849{
Chris Caina8857c52021-01-27 11:53:05 -0600850 if (!_pollTimer)
851 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500852 lg2::error("pollerTimerExpired() ERROR: Timer not defined");
Chris Caina8857c52021-01-27 11:53:05 -0600853 return;
854 }
855
Chris Cainf0295f52024-09-12 15:41:14 -0500856#ifdef POWER10
857 if (resetRequired)
858 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500859 lg2::error("pollerTimerExpired() - Initiating PM Complex reset");
Chris Cainf0295f52024-09-12 15:41:14 -0500860 initiateOccRequest(resetInstance);
861
862 if (!waitForAllOccsTimer->isEnabled())
863 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500864 lg2::warning("pollerTimerExpired: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -0500865 // restart occ wait timer
866 waitForAllOccsTimer->restartOnce(60s);
867 }
868 return;
869 }
870#endif
871
Chris Caina8857c52021-01-27 11:53:05 -0600872 for (auto& obj : statusObjects)
873 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600874 if (!obj->occActive())
875 {
876 // OCC is not running yet
877#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600878 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500879 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600880#endif
881 continue;
882 }
883
Chris Caina8857c52021-01-27 11:53:05 -0600884 // Read sysfs to force kernel to poll OCC
885 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800886
887#ifdef READ_OCC_SENSORS
888 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600889 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800890#endif
Chris Caina8857c52021-01-27 11:53:05 -0600891 }
892
Chris Caina7b74dc2021-11-10 17:03:43 -0600893 if (activeCount > 0)
894 {
895 // Restart OCC poll timer
896 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
897 }
898 else
899 {
900 // No OCCs running, so poll timer will not be restarted
Chris Cain37abe9b2024-10-31 17:20:31 -0500901 lg2::info(
902 "Manager::pollerTimerExpired: poll timer will not be restarted");
Chris Caina7b74dc2021-11-10 17:03:43 -0600903 }
Chris Caina8857c52021-01-27 11:53:05 -0600904}
905
Chicago Duanbb895cb2021-06-18 19:37:16 +0800906#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600907void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800908{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500909 // There may be more than one sensor with the same FRU type
910 // and label so make two passes: the first to read the temps
911 // from sysfs, and the second to put them on D-Bus after
912 // resolving any conflicts.
913 std::map<std::string, double> sensorData;
914
Chicago Duanbb895cb2021-06-18 19:37:16 +0800915 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
916 for (auto& file : fs::directory_iterator(path))
917 {
918 if (!std::regex_search(file.path().string(), expr))
919 {
920 continue;
921 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800922
Matt Spinlera26f1522021-08-25 15:50:20 -0500923 uint32_t labelValue{0};
924
925 try
926 {
927 labelValue = readFile<uint32_t>(file.path());
928 }
929 catch (const std::system_error& e)
930 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500931 lg2::debug(
932 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
933 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800934 continue;
935 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800936
937 const std::string& tempLabel = "label";
938 const std::string filePathString = file.path().string().substr(
939 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500940
941 uint32_t fruTypeValue{0};
942 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800943 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500944 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
945 }
946 catch (const std::system_error& e)
947 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500948 lg2::debug(
949 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
950 "PATH", filePathString + fruTypeSuffix, "ERROR",
951 e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800952 continue;
953 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800954
Patrick Williamsd7542c82024-08-16 15:20:28 -0400955 std::string sensorPath =
956 OCC_SENSORS_ROOT + std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800957
Matt Spinlerace67d82021-10-18 13:41:57 -0500958 std::string dvfsTempPath;
959
Chicago Duanbb895cb2021-06-18 19:37:16 +0800960 if (fruTypeValue == VRMVdd)
961 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400962 sensorPath.append(
963 "vrm_vdd" + std::to_string(occInstance) + "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800964 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500965 else if (fruTypeValue == processorIoRing)
966 {
Patrick Williamsd7542c82024-08-16 15:20:28 -0400967 sensorPath.append(
968 "proc" + std::to_string(occInstance) + "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500969 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600970 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500971 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800972 else
973 {
Matt Spinler14d14022021-08-25 15:38:29 -0500974 uint16_t type = (labelValue & 0xFF000000) >> 24;
975 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800976
977 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
978 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500979 if (fruTypeValue == fruTypeNotAvailable)
980 {
981 // Not all DIMM related temps are available to read
982 // (no _input file in this case)
983 continue;
984 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800985 auto iter = dimmTempSensorName.find(fruTypeValue);
986 if (iter == dimmTempSensorName.end())
987 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500988 lg2::error(
989 "readTempSensors: Fru type error! fruTypeValue = {FRU}) ",
990 "FRU", fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800991 continue;
992 }
993
Patrick Williamsd7542c82024-08-16 15:20:28 -0400994 sensorPath.append(
995 "dimm" + std::to_string(instanceID) + iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500996
997 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
998 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800999 }
1000 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
1001 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001002 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001003 {
Matt Spinlerace67d82021-10-18 13:41:57 -05001004 // The OCC reports small core temps, of which there are
1005 // two per big core. All current P10 systems are in big
1006 // core mode, so use a big core name.
1007 uint16_t coreNum = instanceID / 2;
1008 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -06001009 sensorPath.append("proc" + std::to_string(occInstance) +
1010 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -05001011 std::to_string(tempNum) + "_temp");
1012
Chris Cainae157b62024-01-23 16:05:12 -06001013 dvfsTempPath =
1014 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
1015 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -05001016 }
1017 else
1018 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001019 continue;
1020 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001021 }
1022 else
1023 {
1024 continue;
1025 }
1026 }
1027
Matt Spinlerace67d82021-10-18 13:41:57 -05001028 // The dvfs temp file only needs to be read once per chip per type.
1029 if (!dvfsTempPath.empty() &&
1030 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
1031 {
1032 try
1033 {
1034 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
1035
1036 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
1037 dvfsTempPath, dvfsValue * std::pow(10, -3));
1038 }
1039 catch (const std::system_error& e)
1040 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001041 lg2::debug(
1042 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1043 "PATH", filePathString + maxSuffix, "ERROR",
1044 e.code().value());
Matt Spinlerace67d82021-10-18 13:41:57 -05001045 }
1046 }
1047
Matt Spinlera26f1522021-08-25 15:50:20 -05001048 uint32_t faultValue{0};
1049 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001050 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001051 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
1052 }
1053 catch (const std::system_error& e)
1054 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001055 lg2::debug(
1056 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1057 "PATH", filePathString + faultSuffix, "ERROR",
1058 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001059 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001060 }
1061
Chris Cainae157b62024-01-23 16:05:12 -06001062 double tempValue{0};
1063 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -05001064 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001065 {
Chris Cainae157b62024-01-23 16:05:12 -06001066 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001067 }
Chris Cainae157b62024-01-23 16:05:12 -06001068 else
Chicago Duanbb895cb2021-06-18 19:37:16 +08001069 {
Chris Cainae157b62024-01-23 16:05:12 -06001070 // Read the temperature
1071 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001072 {
Chris Cainae157b62024-01-23 16:05:12 -06001073 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001074 }
Chris Cainae157b62024-01-23 16:05:12 -06001075 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001076 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001077 lg2::debug(
1078 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1079 "PATH", filePathString + inputSuffix, "ERROR",
1080 e.code().value());
Chris Cainae157b62024-01-23 16:05:12 -06001081
1082 // if errno == EAGAIN(Resource temporarily unavailable) then set
1083 // temp to 0, to avoid using old temp, and affecting FAN
1084 // Control.
1085 if (e.code().value() == EAGAIN)
1086 {
1087 tempValue = 0;
1088 }
1089 // else the errno would be something like
1090 // EBADF(Bad file descriptor)
1091 // or ENOENT(No such file or directory)
1092 else
1093 {
1094 continue;
1095 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -05001096 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001097 }
1098
Matt Spinler818cc8d2023-10-23 11:43:39 -05001099 // If this object path already has a value, only overwite
1100 // it if the previous one was an NaN or a smaller value.
1101 auto existing = sensorData.find(sensorPath);
1102 if (existing != sensorData.end())
1103 {
Chris Cainae157b62024-01-23 16:05:12 -06001104 // Multiple sensors found for this FRU type
1105 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1106 ((existing->second == 0) && std::isnan(tempValue)))
1107 {
1108 // One of the redundant sensors has failed (0xFF/nan), and the
1109 // other sensor has no reading (0), so set the FRU to NaN to
1110 // force fan increase
1111 tempValue = std::numeric_limits<double>::quiet_NaN();
1112 existing->second = tempValue;
1113 }
Matt Spinler818cc8d2023-10-23 11:43:39 -05001114 if (std::isnan(existing->second) || (tempValue > existing->second))
1115 {
1116 existing->second = tempValue;
1117 }
1118 }
1119 else
1120 {
Chris Cainae157b62024-01-23 16:05:12 -06001121 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -05001122 sensorData[sensorPath] = tempValue;
1123 }
1124 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001125
Matt Spinler818cc8d2023-10-23 11:43:39 -05001126 // Now publish the values on D-Bus.
1127 for (const auto& [objectPath, value] : sensorData)
1128 {
1129 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1130 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -05001131
Matt Spinler818cc8d2023-10-23 11:43:39 -05001132 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1133 objectPath, !std::isnan(value));
1134
1135 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -06001136 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001137 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001138 objectPath, {"all_sensors"});
Chris Cain6fa848a2022-01-24 14:54:38 -06001139 }
1140
Chris Cainae157b62024-01-23 16:05:12 -06001141 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001142 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001143}
1144
1145std::optional<std::string>
1146 Manager::getPowerLabelFunctionID(const std::string& value)
1147{
1148 // If the value is "system", then the FunctionID is "system".
1149 if (value == "system")
1150 {
1151 return value;
1152 }
1153
1154 // If the value is not "system", then the label value have 3 numbers, of
1155 // which we only care about the middle one:
1156 // <sensor id>_<function id>_<apss channel>
1157 // eg: The value is "0_10_5" , then the FunctionID is "10".
1158 if (value.find("_") == std::string::npos)
1159 {
1160 return std::nullopt;
1161 }
1162
1163 auto powerLabelValue = value.substr((value.find("_") + 1));
1164
1165 if (powerLabelValue.find("_") == std::string::npos)
1166 {
1167 return std::nullopt;
1168 }
1169
1170 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1171}
1172
1173void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1174{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001175 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1176 for (auto& file : fs::directory_iterator(path))
1177 {
1178 if (!std::regex_search(file.path().string(), expr))
1179 {
1180 continue;
1181 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001182
Matt Spinlera26f1522021-08-25 15:50:20 -05001183 std::string labelValue;
1184 try
1185 {
1186 labelValue = readFile<std::string>(file.path());
1187 }
1188 catch (const std::system_error& e)
1189 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001190 lg2::debug(
1191 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1192 "PATH", file.path().string(), "ERROR", e.code().value());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001193 continue;
1194 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001195
1196 auto functionID = getPowerLabelFunctionID(labelValue);
1197 if (functionID == std::nullopt)
1198 {
1199 continue;
1200 }
1201
1202 const std::string& tempLabel = "label";
1203 const std::string filePathString = file.path().string().substr(
1204 0, file.path().string().length() - tempLabel.length());
1205
1206 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1207
1208 auto iter = powerSensorName.find(*functionID);
1209 if (iter == powerSensorName.end())
1210 {
1211 continue;
1212 }
1213 sensorPath.append(iter->second);
1214
Matt Spinlera26f1522021-08-25 15:50:20 -05001215 double tempValue{0};
1216
1217 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001218 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001219 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001220 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001221 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001222 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001223 lg2::debug(
1224 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1225 "PATH", filePathString + inputSuffix, "ERROR",
1226 e.code().value());
Matt Spinlera26f1522021-08-25 15:50:20 -05001227 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001228 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001229
Chris Cain5d66a0a2022-02-09 08:52:10 -06001230 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001231 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1232
Chris Cain5d66a0a2022-02-09 08:52:10 -06001233 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001234 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1235
Patrick Williamsd7542c82024-08-16 15:20:28 -04001236 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1237 sensorPath, true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001238
Matt Spinler5901abd2021-09-23 13:50:03 -05001239 if (existingSensors.find(sensorPath) == existingSensors.end())
1240 {
Chris Cain3523cc02024-10-30 17:19:09 -05001241 std::vector<int> occs;
1242 std::vector<std::string> fTypeList = {"all_sensors"};
1243 if (iter->second == "total_power")
1244 {
1245 // Total system power has its own chassis association
1246 fTypeList.push_back("total_power");
1247 }
Chris Cain5d66a0a2022-02-09 08:52:10 -06001248 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Chris Cain3523cc02024-10-30 17:19:09 -05001249 sensorPath, fTypeList);
Matt Spinler5901abd2021-09-23 13:50:03 -05001250 }
1251
Matt Spinlera26f1522021-08-25 15:50:20 -05001252 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001253 }
1254 return;
1255}
1256
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001257void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001258{
1259 for (const auto& [sensorPath, occId] : existingSensors)
1260 {
1261 if (occId == id)
1262 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001263 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001264 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001265
Patrick Williamsd7542c82024-08-16 15:20:28 -04001266 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1267 sensorPath, true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001268 }
1269 }
1270 return;
1271}
1272
Sheldon Bailey373af752022-02-21 15:14:00 -06001273void Manager::setSensorValueToNonFunctional(uint32_t id) const
1274{
1275 for (const auto& [sensorPath, occId] : existingSensors)
1276 {
1277 if (occId == id)
1278 {
1279 dbus::OccDBusSensors::getOccDBus().setValue(
1280 sensorPath, std::numeric_limits<double>::quiet_NaN());
1281
Patrick Williamsd7542c82024-08-16 15:20:28 -04001282 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1283 sensorPath, false);
Sheldon Bailey373af752022-02-21 15:14:00 -06001284 }
1285 }
1286 return;
1287}
1288
Chris Cain5d66a0a2022-02-09 08:52:10 -06001289void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001290{
Chris Caine2d0a432022-03-28 11:08:49 -05001291 static bool tracedError[8] = {0};
1292 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001293 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001294
Chris Caine2d0a432022-03-28 11:08:49 -05001295 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001296 {
Chris Caine2d0a432022-03-28 11:08:49 -05001297 // Read temperature sensors
1298 readTempSensors(sensorPath, id);
1299
1300 if (occ->isMasterOcc())
1301 {
1302 // Read power sensors
1303 readPowerSensors(sensorPath, id);
1304 }
1305 tracedError[id] = false;
1306 }
1307 else
1308 {
1309 if (!tracedError[id])
1310 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001311 lg2::error(
1312 "Manager::getSensorValues: OCC{INST} sensor path missing: {PATH}",
1313 "INST", id, "PATH", sensorPath);
Chris Caine2d0a432022-03-28 11:08:49 -05001314 tracedError[id] = true;
1315 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001316 }
1317
1318 return;
1319}
1320#endif
Chris Cain17257672021-10-22 13:41:03 -05001321
1322// Read the altitude from DBus
1323void Manager::readAltitude()
1324{
1325 static bool traceAltitudeErr = true;
1326
1327 utils::PropertyValue altitudeProperty{};
1328 try
1329 {
1330 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1331 ALTITUDE_PROP);
1332 auto sensorVal = std::get<double>(altitudeProperty);
1333 if (sensorVal < 0xFFFF)
1334 {
1335 if (sensorVal < 0)
1336 {
1337 altitude = 0;
1338 }
1339 else
1340 {
1341 // Round to nearest meter
1342 altitude = uint16_t(sensorVal + 0.5);
1343 }
Chris Cain37abe9b2024-10-31 17:20:31 -05001344 lg2::debug("readAltitude: sensor={VALUE} ({ALT}m)", "VALUE",
1345 sensorVal, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001346 traceAltitudeErr = true;
1347 }
1348 else
1349 {
1350 if (traceAltitudeErr)
1351 {
1352 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001353 lg2::debug("Invalid altitude value: {ALT}", "ALT", sensorVal);
Chris Cain17257672021-10-22 13:41:03 -05001354 }
1355 }
1356 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001357 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001358 {
1359 if (traceAltitudeErr)
1360 {
1361 traceAltitudeErr = false;
Chris Cain37abe9b2024-10-31 17:20:31 -05001362 lg2::info("Unable to read Altitude: {ERROR}", "ERROR", e.what());
Chris Cain17257672021-10-22 13:41:03 -05001363 }
1364 altitude = 0xFFFF; // not available
1365 }
1366}
1367
1368// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001369void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001370{
1371 double currentTemp = 0;
1372 uint8_t truncatedTemp = 0xFF;
1373 std::string msgSensor;
1374 std::map<std::string, std::variant<double>> msgData;
1375 msg.read(msgSensor, msgData);
1376
1377 auto valPropMap = msgData.find(AMBIENT_PROP);
1378 if (valPropMap == msgData.end())
1379 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001380 lg2::debug("ambientCallback: Unknown ambient property changed");
Chris Cain17257672021-10-22 13:41:03 -05001381 return;
1382 }
1383 currentTemp = std::get<double>(valPropMap->second);
1384 if (std::isnan(currentTemp))
1385 {
1386 truncatedTemp = 0xFF;
1387 }
1388 else
1389 {
1390 if (currentTemp < 0)
1391 {
1392 truncatedTemp = 0;
1393 }
1394 else
1395 {
1396 // Round to nearest degree C
1397 truncatedTemp = uint8_t(currentTemp + 0.5);
1398 }
1399 }
1400
1401 // If ambient changes, notify OCCs
1402 if (truncatedTemp != ambient)
1403 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001404 lg2::debug("ambientCallback: Ambient change from {OLD} to {NEW}C",
1405 "OLD", ambient, "NEW", currentTemp);
Chris Cain17257672021-10-22 13:41:03 -05001406
1407 ambient = truncatedTemp;
1408 if (altitude == 0xFFFF)
1409 {
1410 // No altitude yet, try reading again
1411 readAltitude();
1412 }
1413
Chris Cain37abe9b2024-10-31 17:20:31 -05001414 lg2::debug("ambientCallback: Ambient: {TEMP}C, altitude: {ALT}m",
1415 "TEMP", ambient, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -05001416#ifdef POWER10
1417 // Send ambient and altitude to all OCCs
1418 for (auto& obj : statusObjects)
1419 {
1420 if (obj->occActive())
1421 {
1422 obj->sendAmbient(ambient, altitude);
1423 }
1424 }
1425#endif // POWER10
1426 }
1427}
1428
1429// return the current ambient and altitude readings
1430void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1431 uint16_t& altitudeValue) const
1432{
1433 ambientValid = true;
1434 ambientTemp = ambient;
1435 altitudeValue = altitude;
1436
1437 if (ambient == 0xFF)
1438 {
1439 ambientValid = false;
1440 }
1441}
1442
Chris Caina7b74dc2021-11-10 17:03:43 -06001443#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001444// Called when waitForAllOccsTimer expires
1445// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001446void Manager::occsNotAllRunning()
1447{
Chris Cainf0295f52024-09-12 15:41:14 -05001448 if (resetInProgress)
1449 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001450 lg2::warning(
Chris Cainf0295f52024-09-12 15:41:14 -05001451 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress");
1452 return;
1453 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001454 if (activeCount != statusObjects.size())
1455 {
1456 // Not all OCCs went active
Chris Cain37abe9b2024-10-31 17:20:31 -05001457 lg2::warning(
1458 "occsNotAllRunning: Active OCC count ({COUNT}) does not match expected count ({EXP})",
1459 "COUNT", activeCount, "EXP", statusObjects.size());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001460 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001461 }
1462
Chris Cainf0295f52024-09-12 15:41:14 -05001463 if (resetRequired)
1464 {
1465 initiateOccRequest(resetInstance);
1466
1467 if (!waitForAllOccsTimer->isEnabled())
1468 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001469 lg2::warning("occsNotAllRunning: Restarting waitForAllOccTimer");
Chris Cainf0295f52024-09-12 15:41:14 -05001470 // restart occ wait timer
1471 waitForAllOccsTimer->restartOnce(60s);
1472 }
1473 }
1474 else
1475 {
1476 validateOccMaster();
1477 }
Chris Caina7b74dc2021-11-10 17:03:43 -06001478}
Chris Cain755af102024-02-27 16:09:51 -06001479
1480#ifdef PLDM
Chris Cainc33171b2024-05-24 16:14:50 -05001481// Called when throttlePldmTraceTimer expires.
Chris Caina19bd422024-05-24 16:39:01 -05001482// If this timer expires, that indicates there are no OCC active sensor PDRs
Chris Cainc33171b2024-05-24 16:14:50 -05001483// found which will trigger pldm traces to be throttled.
1484// The second time this timer expires, a PEL will get created.
1485void Manager::throttlePldmTraceExpired()
Chris Cain755af102024-02-27 16:09:51 -06001486{
Chris Cain7651c062024-05-02 14:14:06 -05001487 if (utils::isHostRunning())
1488 {
Chris Cainc33171b2024-05-24 16:14:50 -05001489 if (!onPldmTimeoutCreatePel)
1490 {
1491 // Throttle traces
1492 pldmHandle->setTraceThrottle(true);
1493 // Restart timer to log a PEL when timer expires
1494 onPldmTimeoutCreatePel = true;
1495 throttlePldmTraceTimer->restartOnce(40min);
1496 }
1497 else
1498 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001499 lg2::error(
Chris Cainc33171b2024-05-24 16:14:50 -05001500 "throttlePldmTraceExpired(): OCC active sensors still not available!");
1501 // Create PEL
1502 createPldmSensorPEL();
1503 }
Chris Cain7651c062024-05-02 14:14:06 -05001504 }
1505 else
1506 {
1507 // Make sure traces are not throttled
1508 pldmHandle->setTraceThrottle(false);
Chris Cain37abe9b2024-10-31 17:20:31 -05001509 lg2::info(
Chris Cainc33171b2024-05-24 16:14:50 -05001510 "throttlePldmTraceExpired(): host it not running ignoring sensor timer");
Chris Cain7651c062024-05-02 14:14:06 -05001511 }
Chris Cain4b82f3e2024-04-22 14:44:29 -05001512}
1513
1514void Manager::createPldmSensorPEL()
1515{
1516 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1517 std::map<std::string, std::string> additionalData;
1518
1519 additionalData.emplace("_PID", std::to_string(getpid()));
1520
Chris Cain37abe9b2024-10-31 17:20:31 -05001521 lg2::info(
1522 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001523
1524 auto& bus = utils::getBus();
1525
1526 try
1527 {
1528 FFDCFiles ffdc;
1529 // Add occ-control journal traces to PEL FFDC
1530 auto occJournalFile =
1531 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1532
1533 static constexpr auto loggingObjectPath =
1534 "/xyz/openbmc_project/logging";
1535 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
Patrick Williamsd7542c82024-08-16 15:20:28 -04001536 std::string service =
1537 utils::getService(loggingObjectPath, opLoggingInterface);
1538 auto method =
1539 bus.new_method_call(service.c_str(), loggingObjectPath,
1540 opLoggingInterface, "CreatePELWithFFDCFiles");
Chris Cain4b82f3e2024-04-22 14:44:29 -05001541
Chris Cain1c3349e2024-04-24 14:14:11 -05001542 // Set level to Warning (Predictive).
Chris Cain4b82f3e2024-04-22 14:44:29 -05001543 auto level =
1544 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1545 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
Chris Cain1c3349e2024-04-24 14:14:11 -05001546 Warning);
Chris Cain4b82f3e2024-04-22 14:44:29 -05001547
1548 method.append(d.path, level, additionalData, ffdc);
1549 bus.call(method);
1550 }
1551 catch (const sdbusplus::exception_t& e)
1552 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001553 lg2::error("Failed to create MISSING_OCC_SENSORS PEL: {ERROR}", "ERROR",
1554 e.what());
Chris Cain4b82f3e2024-04-22 14:44:29 -05001555 }
Chris Cain755af102024-02-27 16:09:51 -06001556}
1557#endif // PLDM
Chris Caina7b74dc2021-11-10 17:03:43 -06001558#endif // POWER10
1559
1560// Verify single master OCC and start presence monitor
1561void Manager::validateOccMaster()
1562{
1563 int masterInstance = -1;
1564 for (auto& obj : statusObjects)
1565 {
Chris Cainbd551de2022-04-26 13:41:16 -05001566 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001567#ifdef POWER10
1568 if (!obj->occActive())
1569 {
1570 if (utils::isHostRunning())
1571 {
Chris Cainbd551de2022-04-26 13:41:16 -05001572 // Check if sensor was queued while waiting for discovery
1573 auto match = queuedActiveState.find(instance);
1574 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001575 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001576 queuedActiveState.erase(match);
Chris Cain37abe9b2024-10-31 17:20:31 -05001577 lg2::info("validateOccMaster: OCC{INST} is ACTIVE (queued)",
1578 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001579 obj->occActive(true);
1580 }
1581 else
1582 {
1583 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001584#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001585 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001586#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001587 if (obj->occActive())
1588 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001589 lg2::info(
1590 "validateOccMaster: OCC{INST} is ACTIVE after reading sensor",
1591 "INST", instance);
Chris Cainbd551de2022-04-26 13:41:16 -05001592 }
Chris Cainbae4d072022-02-28 09:46:50 -06001593 }
1594 }
1595 else
1596 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001597 lg2::warning(
1598 "validateOccMaster: HOST is not running (OCC{INST})",
1599 "INST", instance);
Chris Cainbae4d072022-02-28 09:46:50 -06001600 return;
1601 }
1602 }
1603#endif // POWER10
1604
Chris Caina7b74dc2021-11-10 17:03:43 -06001605 if (obj->isMasterOcc())
1606 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001607 obj->addPresenceWatchMaster();
1608
Chris Caina7b74dc2021-11-10 17:03:43 -06001609 if (masterInstance == -1)
1610 {
Chris Cainbd551de2022-04-26 13:41:16 -05001611 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001612 }
1613 else
1614 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001615 lg2::error(
1616 "validateOccMaster: Multiple OCC masters! ({MAST1} and {MAST2})",
1617 "MAST1", masterInstance, "MAST2", instance);
Chris Caina7b74dc2021-11-10 17:03:43 -06001618 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001619 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001620 }
1621 }
1622 }
Chris Cainbae4d072022-02-28 09:46:50 -06001623
Chris Caina7b74dc2021-11-10 17:03:43 -06001624 if (masterInstance < 0)
1625 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001626 lg2::error("validateOccMaster: Master OCC not found! (of {NUM} OCCs)",
1627 "NUM", statusObjects.size());
Chris Caina7b74dc2021-11-10 17:03:43 -06001628 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001629 statusObjects.front()->deviceError(
1630 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001631 }
1632 else
1633 {
Chris Cain37abe9b2024-10-31 17:20:31 -05001634 lg2::info("validateOccMaster: OCC{INST} is master of {COUNT} OCCs",
1635 "INST", masterInstance, "COUNT", activeCount);
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001636#ifdef POWER10
1637 pmode->updateDbusSafeMode(false);
1638#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001639 }
1640}
1641
Chris Cain40501a22022-03-14 17:33:27 -05001642void Manager::updatePcapBounds() const
1643{
1644 if (pcap)
1645 {
1646 pcap->updatePcapBounds();
1647 }
1648}
1649
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301650} // namespace occ
1651} // namespace open_power