blob: 5386c43f3f765df32338148b8968f2bad742c76d [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain613dc902022-04-08 09:56:22 -050068 if (!pmode)
69 {
70 // Create the power mode object
71 pmode = std::make_unique<powermode::PowerMode>(
72 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
73 }
74
Chris Cain1718fd82022-02-16 16:39:50 -060075 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050076 {
Chris Cainbae4d072022-02-28 09:46:50 -060077 static bool statusObjCreated = false;
78 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060079 {
Chris Cainbae4d072022-02-28 09:46:50 -060080 // Create the OCCs based on on the /dev/occX devices
81 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060082
Chris Cainbae4d072022-02-28 09:46:50 -060083 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060084 {
Chris Cainbae4d072022-02-28 09:46:50 -060085 // Something changed or no OCCs yet, try again in 10s.
86 // Note on the first pass prevOCCSearch will be empty,
87 // so there will be at least one delay to give things
88 // a chance to settle.
89 prevOCCSearch = occs;
90
91 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -060092 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -060093 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
94 occs.size())
95 .c_str());
96
97 discoverTimer->restartOnce(10s);
98 }
99 else
100 {
101 // All OCCs appear to be available, create status objects
102
103 // createObjects requires OCC0 first.
104 std::sort(occs.begin(), occs.end());
105
106 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600107 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -0600108 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
109 occs.size())
110 .c_str());
111 for (auto id : occs)
112 {
113 createObjects(std::string(OCC_NAME) + std::to_string(id));
114 }
115 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500116 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500117
118 // Find/update the processor path associated with each OCC
119 for (auto& obj : statusObjects)
120 {
121 obj->updateProcAssociation();
122 }
Chris Cainbae4d072022-02-28 09:46:50 -0600123 }
124 }
125
Chris Cain6d8f37a2022-04-29 13:46:01 -0500126 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600127 {
128 static bool tracedHostWait = false;
129 if (utils::isHostRunning())
130 {
131 if (tracedHostWait)
132 {
133 log<level::INFO>(
134 "Manager::findAndCreateObjects(): Host is running");
135 tracedHostWait = false;
136 }
Chris Cainbae4d072022-02-28 09:46:50 -0600137 checkAllActiveSensors();
138 }
139 else
140 {
141 if (!tracedHostWait)
142 {
143 log<level::INFO>(
144 "Manager::findAndCreateObjects(): Waiting for host to start");
145 tracedHostWait = true;
146 }
147 discoverTimer->restartOnce(30s);
Chris Cain1718fd82022-02-16 16:39:50 -0600148 }
149 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500150 }
151 else
152 {
Chris Cain1718fd82022-02-16 16:39:50 -0600153 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600154 std::format(
Chris Cain1718fd82022-02-16 16:39:50 -0600155 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
156 HOST_ON_FILE)
157 .c_str());
158 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500159 }
160#endif
161}
162
Chris Cainbae4d072022-02-28 09:46:50 -0600163#ifdef POWER10
164// Check if all occActive sensors are available
165void Manager::checkAllActiveSensors()
166{
167 static bool allActiveSensorAvailable = false;
168 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500169 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600170
Chris Cain082a6ca2023-03-21 10:27:26 -0500171 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600172 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500173 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600174 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500175 waitingForHost = false;
176 log<level::INFO>("checkAllActiveSensors(): Host is now running");
177 }
178
179 // Start with the assumption that all are available
180 allActiveSensorAvailable = true;
181 for (auto& obj : statusObjects)
182 {
183 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600184 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500185 auto instance = obj->getOccInstanceID();
186 // Check if sensor was queued while waiting for discovery
187 auto match = queuedActiveState.find(instance);
188 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500189 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500190 queuedActiveState.erase(match);
Chris Cainbd551de2022-04-26 13:41:16 -0500191 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600192 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500193 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)",
Chris Cainbd551de2022-04-26 13:41:16 -0500194 instance)
195 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -0500196 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500197 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500198 else
199 {
200 allActiveSensorAvailable = false;
201 if (!tracedSensorWait)
202 {
203 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600204 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500205 "checkAllActiveSensors(): Waiting on OCC{} Active sensor",
206 instance)
207 .c_str());
208 tracedSensorWait = true;
209 }
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600210#ifdef PLDM
Chris Cain7f89e4d2022-05-09 13:27:45 -0500211 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
Patrick Williamsfb0a5c32024-02-28 11:27:00 -0600212#endif
Chris Cain7f89e4d2022-05-09 13:27:45 -0500213 break;
214 }
Chris Cainbd551de2022-04-26 13:41:16 -0500215 }
Chris Cainbae4d072022-02-28 09:46:50 -0600216 }
217 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500218 else
219 {
220 if (!waitingForHost)
221 {
222 waitingForHost = true;
223 log<level::INFO>(
224 "checkAllActiveSensors(): Waiting for host to start");
225 }
226 }
Chris Cainbae4d072022-02-28 09:46:50 -0600227
228 if (allActiveSensorAvailable)
229 {
230 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500231 if (discoverTimer->isEnabled())
232 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500233 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500234 }
Chris Cainbae4d072022-02-28 09:46:50 -0600235
Chris Cain7f89e4d2022-05-09 13:27:45 -0500236 if (waitingForAllOccActiveSensors)
237 {
238 log<level::INFO>(
239 "checkAllActiveSensors(): OCC Active sensors are available");
240 waitingForAllOccActiveSensors = false;
241 }
242 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600243 tracedSensorWait = false;
244 }
245 else
246 {
247 // Not all sensors were available, so keep waiting
248 if (!tracedSensorWait)
249 {
250 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500251 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600252 tracedSensorWait = true;
253 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500254 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600255 }
256}
257#endif
258
Matt Spinlerd267cec2021-09-01 14:49:19 -0500259std::vector<int> Manager::findOCCsInDev()
260{
261 std::vector<int> occs;
262 std::regex expr{R"(occ(\d+)$)"};
263
264 for (auto& file : fs::directory_iterator("/dev"))
265 {
266 std::smatch match;
267 std::string path{file.path().string()};
268 if (std::regex_search(path, match, expr))
269 {
270 auto num = std::stoi(match[1].str());
271
272 // /dev numbering starts at 1, ours starts at 0.
273 occs.push_back(num - 1);
274 }
275 }
276
277 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530278}
279
Patrick Williamsaf408082022-07-22 19:26:54 -0500280int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530281{
George Liubcef3b42021-09-10 12:39:02 +0800282 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530283
284 sdbusplus::message::object_path o;
285 msg.read(o);
286 fs::path cpuPath(std::string(std::move(o)));
287
288 auto name = cpuPath.filename().string();
289 auto index = name.find(CPU_NAME);
290 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
291
292 createObjects(name);
293
294 return 0;
295}
296
297void Manager::createObjects(const std::string& occ)
298{
299 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
300
Gunnar Mills94df8c92018-09-14 14:50:03 -0500301 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800302 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600303#ifdef POWER10
304 pmode,
305#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500306 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600307 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530308#ifdef PLDM
309 ,
310 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
311 std::placeholders::_1)
312#endif
313 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530314
Chris Cain40501a22022-03-14 17:33:27 -0500315 // Create the power cap monitor object
316 if (!pcap)
317 {
318 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
319 *statusObjects.back());
320 }
321
Chris Cain36f9cde2021-11-22 11:18:21 -0600322 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530323 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600324 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600325 std::format("Manager::createObjects(): OCC{} is the master",
Chris Cain36f9cde2021-11-22 11:18:21 -0600326 statusObjects.back()->getOccInstanceID())
327 .c_str());
328 _pollTimer->setEnabled(false);
329
Chris Cain78e86012021-03-04 16:15:31 -0600330#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600331 // Set the master OCC on the PowerMode object
332 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600333#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600334 }
335
336 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
337#ifdef POWER10
338 ,
339 pmode
340#endif
341 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530342}
343
Sheldon Bailey373af752022-02-21 15:14:00 -0600344void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530345{
Chris Caina7b74dc2021-11-10 17:03:43 -0600346 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600347 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600348 // OCC went active
349 ++activeCount;
350
351#ifdef POWER10
352 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600353 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600354 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500355 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500356 }
357#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600358
359 if (activeCount == statusObjects.size())
360 {
361#ifdef POWER10
362 // All OCCs are now running
363 if (waitForAllOccsTimer->isEnabled())
364 {
365 // stop occ wait timer
366 waitForAllOccsTimer->setEnabled(false);
367 }
368#endif
369
370 // Verify master OCC and start presence monitor
371 validateOccMaster();
372 }
373
374 // Start poll timer if not already started
375 if (!_pollTimer->isEnabled())
376 {
377 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600378 std::format("Manager: OCCs will be polled every {} seconds",
Chris Cain36f9cde2021-11-22 11:18:21 -0600379 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600380 .c_str());
381
382 // Send poll and start OCC poll timer
383 pollerTimerExpired();
384 }
385 }
386 else
387 {
388 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500389 if (activeCount > 0)
390 {
391 --activeCount;
392 }
393 else
394 {
395 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600396 std::format("OCC{} disabled, but currently no active OCCs",
Chris Cain082a6ca2023-03-21 10:27:26 -0500397 instance)
398 .c_str());
399 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600400
401 if (activeCount == 0)
402 {
403 // No OCCs are running
404
405 // Stop OCC poll timer
406 if (_pollTimer->isEnabled())
407 {
408 log<level::INFO>(
409 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
410 _pollTimer->setEnabled(false);
411 }
412
413#ifdef POWER10
414 // stop wait timer
415 if (waitForAllOccsTimer->isEnabled())
416 {
417 waitForAllOccsTimer->setEnabled(false);
418 }
419#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600420 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600421#ifdef READ_OCC_SENSORS
422 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500423 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600424#endif
Chris Caina8857c52021-01-27 11:53:05 -0600425 }
Chris Cainbae4d072022-02-28 09:46:50 -0600426
427#ifdef POWER10
428 if (waitingForAllOccActiveSensors)
429 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500430 if (utils::isHostRunning())
431 {
432 checkAllActiveSensors();
433 }
Chris Cainbae4d072022-02-28 09:46:50 -0600434 }
435#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530436}
437
438#ifdef I2C_OCC
439void Manager::initStatusObjects()
440{
441 // Make sure we have a valid path string
442 static_assert(sizeof(DEV_PATH) != 0);
443
444 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
445 for (auto& name : deviceNames)
446 {
447 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800448 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530449 auto path = fs::path(OCC_CONTROL_ROOT) / name;
450 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800451 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530452 }
Chris Cain40501a22022-03-14 17:33:27 -0500453 // The first device is master occ
454 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
455 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600456#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600457 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
458 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600459 // Set the master OCC on the PowerMode object
460 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600461#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530462}
463#endif
464
Tom Joseph815f9f52020-07-27 12:12:13 +0530465#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500466void Manager::sbeTimeout(unsigned int instance)
467{
Eddie James2a751d72022-03-04 09:16:12 -0600468 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
469 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500470 return instance == obj->getOccInstanceID();
471 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500472
Eddie Jamescb018da2022-03-05 11:49:37 -0600473 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600474 {
Chris Cainbae4d072022-02-28 09:46:50 -0600475 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600476 std::format("SBE timeout, requesting HRESET (OCC{})", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600477 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500478
Eddie James2a751d72022-03-04 09:16:12 -0600479 setSBEState(instance, SBE_STATE_NOT_USABLE);
480
481 pldmHandle->sendHRESET(instance);
482 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500483}
484
Tom Joseph815f9f52020-07-27 12:12:13 +0530485bool Manager::updateOCCActive(instanceID instance, bool status)
486{
Chris Cain7e374fb2022-04-07 09:47:23 -0500487 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
488 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500489 return instance == obj->getOccInstanceID();
490 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500491
Chris Cain082a6ca2023-03-21 10:27:26 -0500492 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500493 if (obj != statusObjects.end())
494 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500495 if (!hostRunning && (status == true))
496 {
497 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600498 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500499 "updateOCCActive: Host is not running yet (OCC{} active={}), clearing sensor received",
500 instance, status)
501 .c_str());
502 (*obj)->setPldmSensorReceived(false);
503 if (!waitingForAllOccActiveSensors)
504 {
505 log<level::INFO>(
506 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
507 waitingForAllOccActiveSensors = true;
508 }
509 discoverTimer->restartOnce(30s);
510 return false;
511 }
512 else
513 {
Patrick Williams48002492024-02-13 21:43:32 -0600514 log<level::INFO>(std::format("updateOCCActive: OCC{} active={}",
Chris Cain082a6ca2023-03-21 10:27:26 -0500515 instance, status)
516 .c_str());
517 (*obj)->setPldmSensorReceived(true);
518 return (*obj)->occActive(status);
519 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500520 }
521 else
522 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500523 if (hostRunning)
524 {
525 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600526 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500527 "updateOCCActive: No status object to update for OCC{} (active={})",
528 instance, status)
529 .c_str());
530 }
531 else
532 {
533 if (status == true)
534 {
535 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600536 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500537 "updateOCCActive: No status objects and Host is not running yet (OCC{} active={})",
538 instance, status)
539 .c_str());
540 }
541 }
Chris Cainbd551de2022-04-26 13:41:16 -0500542 if (status == true)
543 {
544 // OCC went active
545 queuedActiveState.insert(instance);
546 }
547 else
548 {
549 auto match = queuedActiveState.find(instance);
550 if (match != queuedActiveState.end())
551 {
552 // OCC was disabled
553 queuedActiveState.erase(match);
554 }
555 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500556 return false;
557 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530558}
Eddie Jamescbad2192021-10-07 09:39:39 -0500559
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500560// Called upon pldm event To set powermode Safe Mode State for system.
561void Manager::updateOccSafeMode(bool safeMode)
562{
563#ifdef POWER10
564 pmode->updateDbusSafeMode(safeMode);
565#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500566 // Update the processor throttle status on dbus
567 for (auto& obj : statusObjects)
568 {
569 obj->updateThrottle(safeMode, THROTTLED_SAFE);
570 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500571}
572
Eddie Jamescbad2192021-10-07 09:39:39 -0500573void Manager::sbeHRESETResult(instanceID instance, bool success)
574{
575 if (success)
576 {
Chris Cainbae4d072022-02-28 09:46:50 -0600577 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600578 std::format("HRESET succeeded (OCC{})", instance).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500579
580 setSBEState(instance, SBE_STATE_BOOTED);
581
582 return;
583 }
584
585 setSBEState(instance, SBE_STATE_FAILED);
586
587 if (sbeCanDump(instance))
588 {
Chris Cainbae4d072022-02-28 09:46:50 -0600589 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600590 std::format("HRESET failed (OCC{}), triggering SBE dump", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600591 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500592
593 auto& bus = utils::getBus();
594 uint32_t src6 = instance << 16;
595 uint32_t logId =
596 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
597 src6, "SBE command timeout");
598
599 try
600 {
George Liuf3a4a692021-12-28 13:59:51 +0800601 constexpr auto path = "/org/openpower/dump";
602 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
603 constexpr auto function = "CreateDump";
604
Eddie Jamescbad2192021-10-07 09:39:39 -0500605 std::string service = utils::getService(path, interface);
Patrick Williamsa49c9872023-05-10 07:50:35 -0500606 auto method = bus.new_method_call(service.c_str(), path, interface,
607 function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500608
609 std::map<std::string, std::variant<std::string, uint64_t>>
610 createParams{
611 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
612 uint64_t(logId)},
613 {"com.ibm.Dump.Create.CreateParameters.DumpType",
614 "com.ibm.Dump.Create.DumpType.SBE"},
615 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
616 uint64_t(instance)},
617 };
618
619 method.append(createParams);
620
621 auto response = bus.call(method);
622 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500623 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500624 {
625 constexpr auto ERROR_DUMP_DISABLED =
626 "xyz.openbmc_project.Dump.Create.Error.Disabled";
627 if (e.name() == ERROR_DUMP_DISABLED)
628 {
629 log<level::INFO>("Dump is disabled, skipping");
630 }
631 else
632 {
633 log<level::ERR>("Dump failed");
634 }
635 }
636 }
637}
638
639bool Manager::sbeCanDump(unsigned int instance)
640{
641 struct pdbg_target* proc = getPdbgTarget(instance);
642
643 if (!proc)
644 {
645 // allow the dump in the error case
646 return true;
647 }
648
649 try
650 {
651 if (!openpower::phal::sbe::isDumpAllowed(proc))
652 {
653 return false;
654 }
655
656 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
657 {
658 return false;
659 }
660 }
661 catch (openpower::phal::exception::SbeError& e)
662 {
663 log<level::INFO>("Failed to query SBE state");
664 }
665
666 // allow the dump in the error case
667 return true;
668}
669
670void Manager::setSBEState(unsigned int instance, enum sbe_state state)
671{
672 struct pdbg_target* proc = getPdbgTarget(instance);
673
674 if (!proc)
675 {
676 return;
677 }
678
679 try
680 {
681 openpower::phal::sbe::setState(proc, state);
682 }
683 catch (const openpower::phal::exception::SbeError& e)
684 {
685 log<level::ERR>("Failed to set SBE state");
686 }
687}
688
689struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
690{
691 if (!pdbgInitialized)
692 {
693 try
694 {
695 openpower::phal::pdbg::init();
696 pdbgInitialized = true;
697 }
698 catch (const openpower::phal::exception::PdbgError& e)
699 {
700 log<level::ERR>("pdbg initialization failed");
701 return nullptr;
702 }
703 }
704
705 struct pdbg_target* proc = nullptr;
706 pdbg_for_each_class_target("proc", proc)
707 {
708 if (pdbg_target_index(proc) == instance)
709 {
710 return proc;
711 }
712 }
713
714 log<level::ERR>("Failed to get pdbg target");
715 return nullptr;
716}
Tom Joseph815f9f52020-07-27 12:12:13 +0530717#endif
718
Chris Caina8857c52021-01-27 11:53:05 -0600719void Manager::pollerTimerExpired()
720{
Chris Caina8857c52021-01-27 11:53:05 -0600721 if (!_pollTimer)
722 {
723 log<level::ERR>(
724 "Manager::pollerTimerExpired() ERROR: Timer not defined");
725 return;
726 }
727
728 for (auto& obj : statusObjects)
729 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600730 if (!obj->occActive())
731 {
732 // OCC is not running yet
733#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600734 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500735 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600736#endif
737 continue;
738 }
739
Chris Caina8857c52021-01-27 11:53:05 -0600740 // Read sysfs to force kernel to poll OCC
741 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800742
743#ifdef READ_OCC_SENSORS
744 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600745 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800746#endif
Chris Caina8857c52021-01-27 11:53:05 -0600747 }
748
Chris Caina7b74dc2021-11-10 17:03:43 -0600749 if (activeCount > 0)
750 {
751 // Restart OCC poll timer
752 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
753 }
754 else
755 {
756 // No OCCs running, so poll timer will not be restarted
757 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600758 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -0600759 "Manager::pollerTimerExpired: poll timer will not be restarted")
760 .c_str());
761 }
Chris Caina8857c52021-01-27 11:53:05 -0600762}
763
Chicago Duanbb895cb2021-06-18 19:37:16 +0800764#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600765void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800766{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500767 // There may be more than one sensor with the same FRU type
768 // and label so make two passes: the first to read the temps
769 // from sysfs, and the second to put them on D-Bus after
770 // resolving any conflicts.
771 std::map<std::string, double> sensorData;
772
Chicago Duanbb895cb2021-06-18 19:37:16 +0800773 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
774 for (auto& file : fs::directory_iterator(path))
775 {
776 if (!std::regex_search(file.path().string(), expr))
777 {
778 continue;
779 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800780
Matt Spinlera26f1522021-08-25 15:50:20 -0500781 uint32_t labelValue{0};
782
783 try
784 {
785 labelValue = readFile<uint32_t>(file.path());
786 }
787 catch (const std::system_error& e)
788 {
789 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600790 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500791 file.path().string(), e.code().value())
792 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800793 continue;
794 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800795
796 const std::string& tempLabel = "label";
797 const std::string filePathString = file.path().string().substr(
798 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500799
800 uint32_t fruTypeValue{0};
801 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800802 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500803 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
804 }
805 catch (const std::system_error& e)
806 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800807 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600808 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500809 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800810 .c_str());
811 continue;
812 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800813
Patrick Williamsa49c9872023-05-10 07:50:35 -0500814 std::string sensorPath = OCC_SENSORS_ROOT +
815 std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800816
Matt Spinlerace67d82021-10-18 13:41:57 -0500817 std::string dvfsTempPath;
818
Chicago Duanbb895cb2021-06-18 19:37:16 +0800819 if (fruTypeValue == VRMVdd)
820 {
Chris Cainae157b62024-01-23 16:05:12 -0600821 sensorPath.append("vrm_vdd" + std::to_string(occInstance) +
822 "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800823 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500824 else if (fruTypeValue == processorIoRing)
825 {
Chris Cainae157b62024-01-23 16:05:12 -0600826 sensorPath.append("proc" + std::to_string(occInstance) +
827 "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500828 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600829 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500830 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800831 else
832 {
Matt Spinler14d14022021-08-25 15:38:29 -0500833 uint16_t type = (labelValue & 0xFF000000) >> 24;
834 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800835
836 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
837 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500838 if (fruTypeValue == fruTypeNotAvailable)
839 {
840 // Not all DIMM related temps are available to read
841 // (no _input file in this case)
842 continue;
843 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800844 auto iter = dimmTempSensorName.find(fruTypeValue);
845 if (iter == dimmTempSensorName.end())
846 {
George Liub5ca1012021-09-10 12:53:11 +0800847 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600848 std::format(
George Liub5ca1012021-09-10 12:53:11 +0800849 "readTempSensors: Fru type error! fruTypeValue = {}) ",
850 fruTypeValue)
851 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800852 continue;
853 }
854
855 sensorPath.append("dimm" + std::to_string(instanceID) +
856 iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500857
858 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
859 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800860 }
861 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
862 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500863 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800864 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500865 // The OCC reports small core temps, of which there are
866 // two per big core. All current P10 systems are in big
867 // core mode, so use a big core name.
868 uint16_t coreNum = instanceID / 2;
869 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -0600870 sensorPath.append("proc" + std::to_string(occInstance) +
871 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -0500872 std::to_string(tempNum) + "_temp");
873
Chris Cainae157b62024-01-23 16:05:12 -0600874 dvfsTempPath =
875 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
876 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500877 }
878 else
879 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800880 continue;
881 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800882 }
883 else
884 {
885 continue;
886 }
887 }
888
Matt Spinlerace67d82021-10-18 13:41:57 -0500889 // The dvfs temp file only needs to be read once per chip per type.
890 if (!dvfsTempPath.empty() &&
891 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
892 {
893 try
894 {
895 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
896
897 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
898 dvfsTempPath, dvfsValue * std::pow(10, -3));
899 }
900 catch (const std::system_error& e)
901 {
902 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600903 std::format(
Matt Spinlerace67d82021-10-18 13:41:57 -0500904 "readTempSensors: Failed reading {}, errno = {}",
905 filePathString + maxSuffix, e.code().value())
906 .c_str());
907 }
908 }
909
Matt Spinlera26f1522021-08-25 15:50:20 -0500910 uint32_t faultValue{0};
911 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800912 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500913 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
914 }
915 catch (const std::system_error& e)
916 {
917 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600918 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500919 filePathString + faultSuffix, e.code().value())
920 .c_str());
921 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800922 }
923
Chris Cainae157b62024-01-23 16:05:12 -0600924 double tempValue{0};
925 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -0500926 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800927 {
Chris Cainae157b62024-01-23 16:05:12 -0600928 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800929 }
Chris Cainae157b62024-01-23 16:05:12 -0600930 else
Chicago Duanbb895cb2021-06-18 19:37:16 +0800931 {
Chris Cainae157b62024-01-23 16:05:12 -0600932 // Read the temperature
933 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500934 {
Chris Cainae157b62024-01-23 16:05:12 -0600935 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500936 }
Chris Cainae157b62024-01-23 16:05:12 -0600937 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500938 {
Chris Cainae157b62024-01-23 16:05:12 -0600939 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600940 std::format(
Chris Cainae157b62024-01-23 16:05:12 -0600941 "readTempSensors: Failed reading {}, errno = {}",
942 filePathString + inputSuffix, e.code().value())
943 .c_str());
944
945 // if errno == EAGAIN(Resource temporarily unavailable) then set
946 // temp to 0, to avoid using old temp, and affecting FAN
947 // Control.
948 if (e.code().value() == EAGAIN)
949 {
950 tempValue = 0;
951 }
952 // else the errno would be something like
953 // EBADF(Bad file descriptor)
954 // or ENOENT(No such file or directory)
955 else
956 {
957 continue;
958 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500959 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500960 }
961
Matt Spinler818cc8d2023-10-23 11:43:39 -0500962 // If this object path already has a value, only overwite
963 // it if the previous one was an NaN or a smaller value.
964 auto existing = sensorData.find(sensorPath);
965 if (existing != sensorData.end())
966 {
Chris Cainae157b62024-01-23 16:05:12 -0600967 // Multiple sensors found for this FRU type
968 if ((std::isnan(existing->second) && (tempValue == 0)) ||
969 ((existing->second == 0) && std::isnan(tempValue)))
970 {
971 // One of the redundant sensors has failed (0xFF/nan), and the
972 // other sensor has no reading (0), so set the FRU to NaN to
973 // force fan increase
974 tempValue = std::numeric_limits<double>::quiet_NaN();
975 existing->second = tempValue;
976 }
Matt Spinler818cc8d2023-10-23 11:43:39 -0500977 if (std::isnan(existing->second) || (tempValue > existing->second))
978 {
979 existing->second = tempValue;
980 }
981 }
982 else
983 {
Chris Cainae157b62024-01-23 16:05:12 -0600984 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -0500985 sensorData[sensorPath] = tempValue;
986 }
987 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500988
Matt Spinler818cc8d2023-10-23 11:43:39 -0500989 // Now publish the values on D-Bus.
990 for (const auto& [objectPath, value] : sensorData)
991 {
992 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
993 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -0500994
Matt Spinler818cc8d2023-10-23 11:43:39 -0500995 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
996 objectPath, !std::isnan(value));
997
998 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -0600999 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001000 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Matt Spinler818cc8d2023-10-23 11:43:39 -05001001 objectPath);
Chris Cain6fa848a2022-01-24 14:54:38 -06001002 }
1003
Chris Cainae157b62024-01-23 16:05:12 -06001004 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001005 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001006}
1007
1008std::optional<std::string>
1009 Manager::getPowerLabelFunctionID(const std::string& value)
1010{
1011 // If the value is "system", then the FunctionID is "system".
1012 if (value == "system")
1013 {
1014 return value;
1015 }
1016
1017 // If the value is not "system", then the label value have 3 numbers, of
1018 // which we only care about the middle one:
1019 // <sensor id>_<function id>_<apss channel>
1020 // eg: The value is "0_10_5" , then the FunctionID is "10".
1021 if (value.find("_") == std::string::npos)
1022 {
1023 return std::nullopt;
1024 }
1025
1026 auto powerLabelValue = value.substr((value.find("_") + 1));
1027
1028 if (powerLabelValue.find("_") == std::string::npos)
1029 {
1030 return std::nullopt;
1031 }
1032
1033 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1034}
1035
1036void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1037{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001038 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1039 for (auto& file : fs::directory_iterator(path))
1040 {
1041 if (!std::regex_search(file.path().string(), expr))
1042 {
1043 continue;
1044 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001045
Matt Spinlera26f1522021-08-25 15:50:20 -05001046 std::string labelValue;
1047 try
1048 {
1049 labelValue = readFile<std::string>(file.path());
1050 }
1051 catch (const std::system_error& e)
1052 {
1053 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001054 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001055 file.path().string(), e.code().value())
1056 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001057 continue;
1058 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001059
1060 auto functionID = getPowerLabelFunctionID(labelValue);
1061 if (functionID == std::nullopt)
1062 {
1063 continue;
1064 }
1065
1066 const std::string& tempLabel = "label";
1067 const std::string filePathString = file.path().string().substr(
1068 0, file.path().string().length() - tempLabel.length());
1069
1070 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1071
1072 auto iter = powerSensorName.find(*functionID);
1073 if (iter == powerSensorName.end())
1074 {
1075 continue;
1076 }
1077 sensorPath.append(iter->second);
1078
Matt Spinlera26f1522021-08-25 15:50:20 -05001079 double tempValue{0};
1080
1081 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001082 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001083 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001084 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001085 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001086 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001087 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001088 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001089 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +08001090 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -05001091 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001092 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001093
Chris Cain5d66a0a2022-02-09 08:52:10 -06001094 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001095 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1096
Chris Cain5d66a0a2022-02-09 08:52:10 -06001097 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001098 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1099
Chris Cain5d66a0a2022-02-09 08:52:10 -06001100 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1101 true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001102
Matt Spinler5901abd2021-09-23 13:50:03 -05001103 if (existingSensors.find(sensorPath) == existingSensors.end())
1104 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001105 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1106 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -05001107 }
1108
Matt Spinlera26f1522021-08-25 15:50:20 -05001109 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001110 }
1111 return;
1112}
1113
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001114void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001115{
1116 for (const auto& [sensorPath, occId] : existingSensors)
1117 {
1118 if (occId == id)
1119 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001120 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001121 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001122
1123 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1124 true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001125 }
1126 }
1127 return;
1128}
1129
Sheldon Bailey373af752022-02-21 15:14:00 -06001130void Manager::setSensorValueToNonFunctional(uint32_t id) const
1131{
1132 for (const auto& [sensorPath, occId] : existingSensors)
1133 {
1134 if (occId == id)
1135 {
1136 dbus::OccDBusSensors::getOccDBus().setValue(
1137 sensorPath, std::numeric_limits<double>::quiet_NaN());
1138
1139 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1140 false);
1141 }
1142 }
1143 return;
1144}
1145
Chris Cain5d66a0a2022-02-09 08:52:10 -06001146void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001147{
Chris Caine2d0a432022-03-28 11:08:49 -05001148 static bool tracedError[8] = {0};
1149 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001150 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001151
Chris Caine2d0a432022-03-28 11:08:49 -05001152 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001153 {
Chris Caine2d0a432022-03-28 11:08:49 -05001154 // Read temperature sensors
1155 readTempSensors(sensorPath, id);
1156
1157 if (occ->isMasterOcc())
1158 {
1159 // Read power sensors
1160 readPowerSensors(sensorPath, id);
1161 }
1162 tracedError[id] = false;
1163 }
1164 else
1165 {
1166 if (!tracedError[id])
1167 {
1168 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001169 std::format(
Chris Caine2d0a432022-03-28 11:08:49 -05001170 "Manager::getSensorValues: OCC{} sensor path missing: {}",
1171 id, sensorPath.c_str())
1172 .c_str());
1173 tracedError[id] = true;
1174 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001175 }
1176
1177 return;
1178}
1179#endif
Chris Cain17257672021-10-22 13:41:03 -05001180
1181// Read the altitude from DBus
1182void Manager::readAltitude()
1183{
1184 static bool traceAltitudeErr = true;
1185
1186 utils::PropertyValue altitudeProperty{};
1187 try
1188 {
1189 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1190 ALTITUDE_PROP);
1191 auto sensorVal = std::get<double>(altitudeProperty);
1192 if (sensorVal < 0xFFFF)
1193 {
1194 if (sensorVal < 0)
1195 {
1196 altitude = 0;
1197 }
1198 else
1199 {
1200 // Round to nearest meter
1201 altitude = uint16_t(sensorVal + 0.5);
1202 }
Patrick Williams48002492024-02-13 21:43:32 -06001203 log<level::DEBUG>(std::format("readAltitude: sensor={} ({}m)",
Chris Cain17257672021-10-22 13:41:03 -05001204 sensorVal, altitude)
1205 .c_str());
1206 traceAltitudeErr = true;
1207 }
1208 else
1209 {
1210 if (traceAltitudeErr)
1211 {
1212 traceAltitudeErr = false;
1213 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001214 std::format("Invalid altitude value: {}", sensorVal)
Chris Cain17257672021-10-22 13:41:03 -05001215 .c_str());
1216 }
1217 }
1218 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001219 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001220 {
1221 if (traceAltitudeErr)
1222 {
1223 traceAltitudeErr = false;
1224 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001225 std::format("Unable to read Altitude: {}", e.what()).c_str());
Chris Cain17257672021-10-22 13:41:03 -05001226 }
1227 altitude = 0xFFFF; // not available
1228 }
1229}
1230
1231// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001232void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001233{
1234 double currentTemp = 0;
1235 uint8_t truncatedTemp = 0xFF;
1236 std::string msgSensor;
1237 std::map<std::string, std::variant<double>> msgData;
1238 msg.read(msgSensor, msgData);
1239
1240 auto valPropMap = msgData.find(AMBIENT_PROP);
1241 if (valPropMap == msgData.end())
1242 {
1243 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
1244 return;
1245 }
1246 currentTemp = std::get<double>(valPropMap->second);
1247 if (std::isnan(currentTemp))
1248 {
1249 truncatedTemp = 0xFF;
1250 }
1251 else
1252 {
1253 if (currentTemp < 0)
1254 {
1255 truncatedTemp = 0;
1256 }
1257 else
1258 {
1259 // Round to nearest degree C
1260 truncatedTemp = uint8_t(currentTemp + 0.5);
1261 }
1262 }
1263
1264 // If ambient changes, notify OCCs
1265 if (truncatedTemp != ambient)
1266 {
1267 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001268 std::format("ambientCallback: Ambient change from {} to {}C",
Chris Cain17257672021-10-22 13:41:03 -05001269 ambient, currentTemp)
1270 .c_str());
1271
1272 ambient = truncatedTemp;
1273 if (altitude == 0xFFFF)
1274 {
1275 // No altitude yet, try reading again
1276 readAltitude();
1277 }
1278
1279 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001280 std::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
Chris Cain17257672021-10-22 13:41:03 -05001281 altitude)
1282 .c_str());
1283#ifdef POWER10
1284 // Send ambient and altitude to all OCCs
1285 for (auto& obj : statusObjects)
1286 {
1287 if (obj->occActive())
1288 {
1289 obj->sendAmbient(ambient, altitude);
1290 }
1291 }
1292#endif // POWER10
1293 }
1294}
1295
1296// return the current ambient and altitude readings
1297void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1298 uint16_t& altitudeValue) const
1299{
1300 ambientValid = true;
1301 ambientTemp = ambient;
1302 altitudeValue = altitude;
1303
1304 if (ambient == 0xFF)
1305 {
1306 ambientValid = false;
1307 }
1308}
1309
Chris Caina7b74dc2021-11-10 17:03:43 -06001310#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001311// Called when waitForAllOccsTimer expires
1312// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001313void Manager::occsNotAllRunning()
1314{
Chris Caina7b74dc2021-11-10 17:03:43 -06001315 if (activeCount != statusObjects.size())
1316 {
1317 // Not all OCCs went active
1318 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001319 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001320 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1321 activeCount, statusObjects.size())
1322 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001323 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001324 }
1325
1326 validateOccMaster();
1327}
1328#endif // POWER10
1329
1330// Verify single master OCC and start presence monitor
1331void Manager::validateOccMaster()
1332{
1333 int masterInstance = -1;
1334 for (auto& obj : statusObjects)
1335 {
Chris Cainbd551de2022-04-26 13:41:16 -05001336 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001337#ifdef POWER10
1338 if (!obj->occActive())
1339 {
1340 if (utils::isHostRunning())
1341 {
Chris Cainbd551de2022-04-26 13:41:16 -05001342 // Check if sensor was queued while waiting for discovery
1343 auto match = queuedActiveState.find(instance);
1344 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001345 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001346 queuedActiveState.erase(match);
Chris Cainbae4d072022-02-28 09:46:50 -06001347 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001348 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001349 "validateOccMaster: OCC{} is ACTIVE (queued)",
1350 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001351 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -05001352 obj->occActive(true);
1353 }
1354 else
1355 {
1356 // OCC does not appear to be active yet, check active sensor
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001357#ifdef PLDM
Chris Cainbd551de2022-04-26 13:41:16 -05001358 pldmHandle->checkActiveSensor(instance);
Patrick Williamsfb0a5c32024-02-28 11:27:00 -06001359#endif
Chris Cainbd551de2022-04-26 13:41:16 -05001360 if (obj->occActive())
1361 {
1362 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001363 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001364 "validateOccMaster: OCC{} is ACTIVE after reading sensor",
1365 instance)
1366 .c_str());
1367 }
Chris Cainbae4d072022-02-28 09:46:50 -06001368 }
1369 }
1370 else
1371 {
1372 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001373 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -06001374 "validateOccMaster: HOST is not running (OCC{})",
Chris Cainbd551de2022-04-26 13:41:16 -05001375 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001376 .c_str());
1377 return;
1378 }
1379 }
1380#endif // POWER10
1381
Chris Caina7b74dc2021-11-10 17:03:43 -06001382 if (obj->isMasterOcc())
1383 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001384 obj->addPresenceWatchMaster();
1385
Chris Caina7b74dc2021-11-10 17:03:43 -06001386 if (masterInstance == -1)
1387 {
Chris Cainbd551de2022-04-26 13:41:16 -05001388 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001389 }
1390 else
1391 {
1392 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001393 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001394 "validateOccMaster: Multiple OCC masters! ({} and {})",
Chris Cainbd551de2022-04-26 13:41:16 -05001395 masterInstance, instance)
Chris Caina7b74dc2021-11-10 17:03:43 -06001396 .c_str());
1397 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001398 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001399 }
1400 }
1401 }
Chris Cainbae4d072022-02-28 09:46:50 -06001402
Chris Caina7b74dc2021-11-10 17:03:43 -06001403 if (masterInstance < 0)
1404 {
Chris Cainbae4d072022-02-28 09:46:50 -06001405 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001406 std::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
Chris Cainbae4d072022-02-28 09:46:50 -06001407 statusObjects.size())
1408 .c_str());
Chris Caina7b74dc2021-11-10 17:03:43 -06001409 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001410 statusObjects.front()->deviceError(
1411 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001412 }
1413 else
1414 {
1415 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001416 std::format("validateOccMaster: OCC{} is master of {} OCCs",
Chris Cain36f9cde2021-11-22 11:18:21 -06001417 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001418 .c_str());
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001419#ifdef POWER10
1420 pmode->updateDbusSafeMode(false);
1421#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001422 }
1423}
1424
Chris Cain40501a22022-03-14 17:33:27 -05001425void Manager::updatePcapBounds() const
1426{
1427 if (pcap)
1428 {
1429 pcap->updatePcapBounds();
1430 }
1431}
1432
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301433} // namespace occ
1434} // namespace open_power