blob: fa93dade2d169c1607dab002f45457fdf7762814 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain613dc902022-04-08 09:56:22 -050068 if (!pmode)
69 {
70 // Create the power mode object
71 pmode = std::make_unique<powermode::PowerMode>(
72 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
73 }
74
Chris Cain1718fd82022-02-16 16:39:50 -060075 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050076 {
Chris Cainbae4d072022-02-28 09:46:50 -060077 static bool statusObjCreated = false;
78 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060079 {
Chris Cainbae4d072022-02-28 09:46:50 -060080 // Create the OCCs based on on the /dev/occX devices
81 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060082
Chris Cainbae4d072022-02-28 09:46:50 -060083 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060084 {
Chris Cainbae4d072022-02-28 09:46:50 -060085 // Something changed or no OCCs yet, try again in 10s.
86 // Note on the first pass prevOCCSearch will be empty,
87 // so there will be at least one delay to give things
88 // a chance to settle.
89 prevOCCSearch = occs;
90
91 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -060092 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -060093 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
94 occs.size())
95 .c_str());
96
97 discoverTimer->restartOnce(10s);
98 }
99 else
100 {
101 // All OCCs appear to be available, create status objects
102
103 // createObjects requires OCC0 first.
104 std::sort(occs.begin(), occs.end());
105
106 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600107 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -0600108 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
109 occs.size())
110 .c_str());
111 for (auto id : occs)
112 {
113 createObjects(std::string(OCC_NAME) + std::to_string(id));
114 }
115 statusObjCreated = true;
Chris Cain6d8f37a2022-04-29 13:46:01 -0500116 waitingForAllOccActiveSensors = true;
Chris Cainc86d80f2023-05-04 15:49:18 -0500117
118 // Find/update the processor path associated with each OCC
119 for (auto& obj : statusObjects)
120 {
121 obj->updateProcAssociation();
122 }
Chris Cainbae4d072022-02-28 09:46:50 -0600123 }
124 }
125
Chris Cain6d8f37a2022-04-29 13:46:01 -0500126 if (statusObjCreated && waitingForAllOccActiveSensors)
Chris Cainbae4d072022-02-28 09:46:50 -0600127 {
128 static bool tracedHostWait = false;
129 if (utils::isHostRunning())
130 {
131 if (tracedHostWait)
132 {
133 log<level::INFO>(
134 "Manager::findAndCreateObjects(): Host is running");
135 tracedHostWait = false;
136 }
Chris Cainbae4d072022-02-28 09:46:50 -0600137 checkAllActiveSensors();
138 }
139 else
140 {
141 if (!tracedHostWait)
142 {
143 log<level::INFO>(
144 "Manager::findAndCreateObjects(): Waiting for host to start");
145 tracedHostWait = true;
146 }
147 discoverTimer->restartOnce(30s);
Chris Cain1718fd82022-02-16 16:39:50 -0600148 }
149 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500150 }
151 else
152 {
Chris Cain1718fd82022-02-16 16:39:50 -0600153 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600154 std::format(
Chris Cain1718fd82022-02-16 16:39:50 -0600155 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
156 HOST_ON_FILE)
157 .c_str());
158 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500159 }
160#endif
161}
162
Chris Cainbae4d072022-02-28 09:46:50 -0600163#ifdef POWER10
164// Check if all occActive sensors are available
165void Manager::checkAllActiveSensors()
166{
167 static bool allActiveSensorAvailable = false;
168 static bool tracedSensorWait = false;
Chris Cain082a6ca2023-03-21 10:27:26 -0500169 static bool waitingForHost = false;
Chris Cainbae4d072022-02-28 09:46:50 -0600170
Chris Cain082a6ca2023-03-21 10:27:26 -0500171 if (open_power::occ::utils::isHostRunning())
Chris Cainbae4d072022-02-28 09:46:50 -0600172 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500173 if (waitingForHost)
Chris Cainbae4d072022-02-28 09:46:50 -0600174 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500175 waitingForHost = false;
176 log<level::INFO>("checkAllActiveSensors(): Host is now running");
177 }
178
179 // Start with the assumption that all are available
180 allActiveSensorAvailable = true;
181 for (auto& obj : statusObjects)
182 {
183 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
Chris Cainbae4d072022-02-28 09:46:50 -0600184 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500185 auto instance = obj->getOccInstanceID();
186 // Check if sensor was queued while waiting for discovery
187 auto match = queuedActiveState.find(instance);
188 if (match != queuedActiveState.end())
Chris Cainbd551de2022-04-26 13:41:16 -0500189 {
Chris Cain7f89e4d2022-05-09 13:27:45 -0500190 queuedActiveState.erase(match);
Chris Cainbd551de2022-04-26 13:41:16 -0500191 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600192 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500193 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)",
Chris Cainbd551de2022-04-26 13:41:16 -0500194 instance)
195 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -0500196 obj->occActive(true);
Chris Cainbd551de2022-04-26 13:41:16 -0500197 }
Chris Cain7f89e4d2022-05-09 13:27:45 -0500198 else
199 {
200 allActiveSensorAvailable = false;
201 if (!tracedSensorWait)
202 {
203 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600204 std::format(
Chris Cain7f89e4d2022-05-09 13:27:45 -0500205 "checkAllActiveSensors(): Waiting on OCC{} Active sensor",
206 instance)
207 .c_str());
208 tracedSensorWait = true;
209 }
210 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
211 break;
212 }
Chris Cainbd551de2022-04-26 13:41:16 -0500213 }
Chris Cainbae4d072022-02-28 09:46:50 -0600214 }
215 }
Chris Cain082a6ca2023-03-21 10:27:26 -0500216 else
217 {
218 if (!waitingForHost)
219 {
220 waitingForHost = true;
221 log<level::INFO>(
222 "checkAllActiveSensors(): Waiting for host to start");
223 }
224 }
Chris Cainbae4d072022-02-28 09:46:50 -0600225
226 if (allActiveSensorAvailable)
227 {
228 // All sensors were found, disable the discovery timer
Chris Cain7f89e4d2022-05-09 13:27:45 -0500229 if (discoverTimer->isEnabled())
230 {
Chris Cainf55f91a2022-05-27 13:40:15 -0500231 discoverTimer->setEnabled(false);
Chris Cain7f89e4d2022-05-09 13:27:45 -0500232 }
Chris Cainbae4d072022-02-28 09:46:50 -0600233
Chris Cain7f89e4d2022-05-09 13:27:45 -0500234 if (waitingForAllOccActiveSensors)
235 {
236 log<level::INFO>(
237 "checkAllActiveSensors(): OCC Active sensors are available");
238 waitingForAllOccActiveSensors = false;
239 }
240 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600241 tracedSensorWait = false;
242 }
243 else
244 {
245 // Not all sensors were available, so keep waiting
246 if (!tracedSensorWait)
247 {
248 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500249 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600250 tracedSensorWait = true;
251 }
Chris Cainf55f91a2022-05-27 13:40:15 -0500252 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600253 }
254}
255#endif
256
Matt Spinlerd267cec2021-09-01 14:49:19 -0500257std::vector<int> Manager::findOCCsInDev()
258{
259 std::vector<int> occs;
260 std::regex expr{R"(occ(\d+)$)"};
261
262 for (auto& file : fs::directory_iterator("/dev"))
263 {
264 std::smatch match;
265 std::string path{file.path().string()};
266 if (std::regex_search(path, match, expr))
267 {
268 auto num = std::stoi(match[1].str());
269
270 // /dev numbering starts at 1, ours starts at 0.
271 occs.push_back(num - 1);
272 }
273 }
274
275 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530276}
277
Patrick Williamsaf408082022-07-22 19:26:54 -0500278int Manager::cpuCreated(sdbusplus::message_t& msg)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530279{
George Liubcef3b42021-09-10 12:39:02 +0800280 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530281
282 sdbusplus::message::object_path o;
283 msg.read(o);
284 fs::path cpuPath(std::string(std::move(o)));
285
286 auto name = cpuPath.filename().string();
287 auto index = name.find(CPU_NAME);
288 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
289
290 createObjects(name);
291
292 return 0;
293}
294
295void Manager::createObjects(const std::string& occ)
296{
297 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
298
Gunnar Mills94df8c92018-09-14 14:50:03 -0500299 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800300 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600301#ifdef POWER10
302 pmode,
303#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500304 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600305 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530306#ifdef PLDM
307 ,
308 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
309 std::placeholders::_1)
310#endif
311 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530312
Chris Cain40501a22022-03-14 17:33:27 -0500313 // Create the power cap monitor object
314 if (!pcap)
315 {
316 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
317 *statusObjects.back());
318 }
319
Chris Cain36f9cde2021-11-22 11:18:21 -0600320 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530321 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600322 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600323 std::format("Manager::createObjects(): OCC{} is the master",
Chris Cain36f9cde2021-11-22 11:18:21 -0600324 statusObjects.back()->getOccInstanceID())
325 .c_str());
326 _pollTimer->setEnabled(false);
327
Chris Cain78e86012021-03-04 16:15:31 -0600328#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600329 // Set the master OCC on the PowerMode object
330 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600331#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600332 }
333
334 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
335#ifdef POWER10
336 ,
337 pmode
338#endif
339 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530340}
341
Sheldon Bailey373af752022-02-21 15:14:00 -0600342void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530343{
Chris Caina7b74dc2021-11-10 17:03:43 -0600344 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600345 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600346 // OCC went active
347 ++activeCount;
348
349#ifdef POWER10
350 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600351 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600352 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500353 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500354 }
355#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600356
357 if (activeCount == statusObjects.size())
358 {
359#ifdef POWER10
360 // All OCCs are now running
361 if (waitForAllOccsTimer->isEnabled())
362 {
363 // stop occ wait timer
364 waitForAllOccsTimer->setEnabled(false);
365 }
366#endif
367
368 // Verify master OCC and start presence monitor
369 validateOccMaster();
370 }
371
372 // Start poll timer if not already started
373 if (!_pollTimer->isEnabled())
374 {
375 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600376 std::format("Manager: OCCs will be polled every {} seconds",
Chris Cain36f9cde2021-11-22 11:18:21 -0600377 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600378 .c_str());
379
380 // Send poll and start OCC poll timer
381 pollerTimerExpired();
382 }
383 }
384 else
385 {
386 // OCC went away
Chris Cain082a6ca2023-03-21 10:27:26 -0500387 if (activeCount > 0)
388 {
389 --activeCount;
390 }
391 else
392 {
393 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600394 std::format("OCC{} disabled, but currently no active OCCs",
Chris Cain082a6ca2023-03-21 10:27:26 -0500395 instance)
396 .c_str());
397 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600398
399 if (activeCount == 0)
400 {
401 // No OCCs are running
402
403 // Stop OCC poll timer
404 if (_pollTimer->isEnabled())
405 {
406 log<level::INFO>(
407 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
408 _pollTimer->setEnabled(false);
409 }
410
411#ifdef POWER10
412 // stop wait timer
413 if (waitForAllOccsTimer->isEnabled())
414 {
415 waitForAllOccsTimer->setEnabled(false);
416 }
417#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600418 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600419#ifdef READ_OCC_SENSORS
420 // Clear OCC sensors
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500421 setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600422#endif
Chris Caina8857c52021-01-27 11:53:05 -0600423 }
Chris Cainbae4d072022-02-28 09:46:50 -0600424
425#ifdef POWER10
426 if (waitingForAllOccActiveSensors)
427 {
Chris Cain6d8f37a2022-04-29 13:46:01 -0500428 if (utils::isHostRunning())
429 {
430 checkAllActiveSensors();
431 }
Chris Cainbae4d072022-02-28 09:46:50 -0600432 }
433#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530434}
435
436#ifdef I2C_OCC
437void Manager::initStatusObjects()
438{
439 // Make sure we have a valid path string
440 static_assert(sizeof(DEV_PATH) != 0);
441
442 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
443 for (auto& name : deviceNames)
444 {
445 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800446 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530447 auto path = fs::path(OCC_CONTROL_ROOT) / name;
448 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800449 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530450 }
Chris Cain40501a22022-03-14 17:33:27 -0500451 // The first device is master occ
452 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
453 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600454#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600455 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
456 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600457 // Set the master OCC on the PowerMode object
458 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600459#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530460}
461#endif
462
Tom Joseph815f9f52020-07-27 12:12:13 +0530463#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500464void Manager::sbeTimeout(unsigned int instance)
465{
Eddie James2a751d72022-03-04 09:16:12 -0600466 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
467 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500468 return instance == obj->getOccInstanceID();
469 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500470
Eddie Jamescb018da2022-03-05 11:49:37 -0600471 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600472 {
Chris Cainbae4d072022-02-28 09:46:50 -0600473 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600474 std::format("SBE timeout, requesting HRESET (OCC{})", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600475 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500476
Eddie James2a751d72022-03-04 09:16:12 -0600477 setSBEState(instance, SBE_STATE_NOT_USABLE);
478
479 pldmHandle->sendHRESET(instance);
480 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500481}
482
Tom Joseph815f9f52020-07-27 12:12:13 +0530483bool Manager::updateOCCActive(instanceID instance, bool status)
484{
Chris Cain7e374fb2022-04-07 09:47:23 -0500485 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
486 [instance](const auto& obj) {
Patrick Williamsa49c9872023-05-10 07:50:35 -0500487 return instance == obj->getOccInstanceID();
488 });
Chris Cain7e374fb2022-04-07 09:47:23 -0500489
Chris Cain082a6ca2023-03-21 10:27:26 -0500490 const bool hostRunning = open_power::occ::utils::isHostRunning();
Chris Cain7e374fb2022-04-07 09:47:23 -0500491 if (obj != statusObjects.end())
492 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500493 if (!hostRunning && (status == true))
494 {
495 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600496 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500497 "updateOCCActive: Host is not running yet (OCC{} active={}), clearing sensor received",
498 instance, status)
499 .c_str());
500 (*obj)->setPldmSensorReceived(false);
501 if (!waitingForAllOccActiveSensors)
502 {
503 log<level::INFO>(
504 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
505 waitingForAllOccActiveSensors = true;
506 }
507 discoverTimer->restartOnce(30s);
508 return false;
509 }
510 else
511 {
Patrick Williams48002492024-02-13 21:43:32 -0600512 log<level::INFO>(std::format("updateOCCActive: OCC{} active={}",
Chris Cain082a6ca2023-03-21 10:27:26 -0500513 instance, status)
514 .c_str());
515 (*obj)->setPldmSensorReceived(true);
516 return (*obj)->occActive(status);
517 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500518 }
519 else
520 {
Chris Cain082a6ca2023-03-21 10:27:26 -0500521 if (hostRunning)
522 {
523 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600524 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500525 "updateOCCActive: No status object to update for OCC{} (active={})",
526 instance, status)
527 .c_str());
528 }
529 else
530 {
531 if (status == true)
532 {
533 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -0600534 std::format(
Chris Cain082a6ca2023-03-21 10:27:26 -0500535 "updateOCCActive: No status objects and Host is not running yet (OCC{} active={})",
536 instance, status)
537 .c_str());
538 }
539 }
Chris Cainbd551de2022-04-26 13:41:16 -0500540 if (status == true)
541 {
542 // OCC went active
543 queuedActiveState.insert(instance);
544 }
545 else
546 {
547 auto match = queuedActiveState.find(instance);
548 if (match != queuedActiveState.end())
549 {
550 // OCC was disabled
551 queuedActiveState.erase(match);
552 }
553 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500554 return false;
555 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530556}
Eddie Jamescbad2192021-10-07 09:39:39 -0500557
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500558// Called upon pldm event To set powermode Safe Mode State for system.
559void Manager::updateOccSafeMode(bool safeMode)
560{
561#ifdef POWER10
562 pmode->updateDbusSafeMode(safeMode);
563#endif
Chris Cainc86d80f2023-05-04 15:49:18 -0500564 // Update the processor throttle status on dbus
565 for (auto& obj : statusObjects)
566 {
567 obj->updateThrottle(safeMode, THROTTLED_SAFE);
568 }
Sheldon Bailey31a2f132022-05-20 11:31:52 -0500569}
570
Eddie Jamescbad2192021-10-07 09:39:39 -0500571void Manager::sbeHRESETResult(instanceID instance, bool success)
572{
573 if (success)
574 {
Chris Cainbae4d072022-02-28 09:46:50 -0600575 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600576 std::format("HRESET succeeded (OCC{})", instance).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500577
578 setSBEState(instance, SBE_STATE_BOOTED);
579
580 return;
581 }
582
583 setSBEState(instance, SBE_STATE_FAILED);
584
585 if (sbeCanDump(instance))
586 {
Chris Cainbae4d072022-02-28 09:46:50 -0600587 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600588 std::format("HRESET failed (OCC{}), triggering SBE dump", instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600589 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500590
591 auto& bus = utils::getBus();
592 uint32_t src6 = instance << 16;
593 uint32_t logId =
594 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
595 src6, "SBE command timeout");
596
597 try
598 {
George Liuf3a4a692021-12-28 13:59:51 +0800599 constexpr auto path = "/org/openpower/dump";
600 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
601 constexpr auto function = "CreateDump";
602
Eddie Jamescbad2192021-10-07 09:39:39 -0500603 std::string service = utils::getService(path, interface);
Patrick Williamsa49c9872023-05-10 07:50:35 -0500604 auto method = bus.new_method_call(service.c_str(), path, interface,
605 function);
Eddie Jamescbad2192021-10-07 09:39:39 -0500606
607 std::map<std::string, std::variant<std::string, uint64_t>>
608 createParams{
609 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
610 uint64_t(logId)},
611 {"com.ibm.Dump.Create.CreateParameters.DumpType",
612 "com.ibm.Dump.Create.DumpType.SBE"},
613 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
614 uint64_t(instance)},
615 };
616
617 method.append(createParams);
618
619 auto response = bus.call(method);
620 }
Patrick Williamsaf408082022-07-22 19:26:54 -0500621 catch (const sdbusplus::exception_t& e)
Eddie Jamescbad2192021-10-07 09:39:39 -0500622 {
623 constexpr auto ERROR_DUMP_DISABLED =
624 "xyz.openbmc_project.Dump.Create.Error.Disabled";
625 if (e.name() == ERROR_DUMP_DISABLED)
626 {
627 log<level::INFO>("Dump is disabled, skipping");
628 }
629 else
630 {
631 log<level::ERR>("Dump failed");
632 }
633 }
634 }
635}
636
637bool Manager::sbeCanDump(unsigned int instance)
638{
639 struct pdbg_target* proc = getPdbgTarget(instance);
640
641 if (!proc)
642 {
643 // allow the dump in the error case
644 return true;
645 }
646
647 try
648 {
649 if (!openpower::phal::sbe::isDumpAllowed(proc))
650 {
651 return false;
652 }
653
654 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
655 {
656 return false;
657 }
658 }
659 catch (openpower::phal::exception::SbeError& e)
660 {
661 log<level::INFO>("Failed to query SBE state");
662 }
663
664 // allow the dump in the error case
665 return true;
666}
667
668void Manager::setSBEState(unsigned int instance, enum sbe_state state)
669{
670 struct pdbg_target* proc = getPdbgTarget(instance);
671
672 if (!proc)
673 {
674 return;
675 }
676
677 try
678 {
679 openpower::phal::sbe::setState(proc, state);
680 }
681 catch (const openpower::phal::exception::SbeError& e)
682 {
683 log<level::ERR>("Failed to set SBE state");
684 }
685}
686
687struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
688{
689 if (!pdbgInitialized)
690 {
691 try
692 {
693 openpower::phal::pdbg::init();
694 pdbgInitialized = true;
695 }
696 catch (const openpower::phal::exception::PdbgError& e)
697 {
698 log<level::ERR>("pdbg initialization failed");
699 return nullptr;
700 }
701 }
702
703 struct pdbg_target* proc = nullptr;
704 pdbg_for_each_class_target("proc", proc)
705 {
706 if (pdbg_target_index(proc) == instance)
707 {
708 return proc;
709 }
710 }
711
712 log<level::ERR>("Failed to get pdbg target");
713 return nullptr;
714}
Tom Joseph815f9f52020-07-27 12:12:13 +0530715#endif
716
Chris Caina8857c52021-01-27 11:53:05 -0600717void Manager::pollerTimerExpired()
718{
Chris Caina8857c52021-01-27 11:53:05 -0600719 if (!_pollTimer)
720 {
721 log<level::ERR>(
722 "Manager::pollerTimerExpired() ERROR: Timer not defined");
723 return;
724 }
725
726 for (auto& obj : statusObjects)
727 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600728 if (!obj->occActive())
729 {
730 // OCC is not running yet
731#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600732 auto id = obj->getOccInstanceID();
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500733 setSensorValueToNaN(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600734#endif
735 continue;
736 }
737
Chris Caina8857c52021-01-27 11:53:05 -0600738 // Read sysfs to force kernel to poll OCC
739 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800740
741#ifdef READ_OCC_SENSORS
742 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600743 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800744#endif
Chris Caina8857c52021-01-27 11:53:05 -0600745 }
746
Chris Caina7b74dc2021-11-10 17:03:43 -0600747 if (activeCount > 0)
748 {
749 // Restart OCC poll timer
750 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
751 }
752 else
753 {
754 // No OCCs running, so poll timer will not be restarted
755 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -0600756 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -0600757 "Manager::pollerTimerExpired: poll timer will not be restarted")
758 .c_str());
759 }
Chris Caina8857c52021-01-27 11:53:05 -0600760}
761
Chicago Duanbb895cb2021-06-18 19:37:16 +0800762#ifdef READ_OCC_SENSORS
Chris Cainae157b62024-01-23 16:05:12 -0600763void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800764{
Matt Spinler818cc8d2023-10-23 11:43:39 -0500765 // There may be more than one sensor with the same FRU type
766 // and label so make two passes: the first to read the temps
767 // from sysfs, and the second to put them on D-Bus after
768 // resolving any conflicts.
769 std::map<std::string, double> sensorData;
770
Chicago Duanbb895cb2021-06-18 19:37:16 +0800771 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
772 for (auto& file : fs::directory_iterator(path))
773 {
774 if (!std::regex_search(file.path().string(), expr))
775 {
776 continue;
777 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800778
Matt Spinlera26f1522021-08-25 15:50:20 -0500779 uint32_t labelValue{0};
780
781 try
782 {
783 labelValue = readFile<uint32_t>(file.path());
784 }
785 catch (const std::system_error& e)
786 {
787 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600788 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500789 file.path().string(), e.code().value())
790 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800791 continue;
792 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800793
794 const std::string& tempLabel = "label";
795 const std::string filePathString = file.path().string().substr(
796 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500797
798 uint32_t fruTypeValue{0};
799 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800800 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500801 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
802 }
803 catch (const std::system_error& e)
804 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800805 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600806 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500807 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800808 .c_str());
809 continue;
810 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800811
Patrick Williamsa49c9872023-05-10 07:50:35 -0500812 std::string sensorPath = OCC_SENSORS_ROOT +
813 std::string("/temperature/");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800814
Matt Spinlerace67d82021-10-18 13:41:57 -0500815 std::string dvfsTempPath;
816
Chicago Duanbb895cb2021-06-18 19:37:16 +0800817 if (fruTypeValue == VRMVdd)
818 {
Chris Cainae157b62024-01-23 16:05:12 -0600819 sensorPath.append("vrm_vdd" + std::to_string(occInstance) +
820 "_temp");
Chicago Duanbb895cb2021-06-18 19:37:16 +0800821 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500822 else if (fruTypeValue == processorIoRing)
823 {
Chris Cainae157b62024-01-23 16:05:12 -0600824 sensorPath.append("proc" + std::to_string(occInstance) +
825 "_ioring_temp");
Matt Spinlerace67d82021-10-18 13:41:57 -0500826 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
Chris Cainae157b62024-01-23 16:05:12 -0600827 std::to_string(occInstance) + "_ioring_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500828 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800829 else
830 {
Matt Spinler14d14022021-08-25 15:38:29 -0500831 uint16_t type = (labelValue & 0xFF000000) >> 24;
832 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800833
834 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
835 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500836 if (fruTypeValue == fruTypeNotAvailable)
837 {
838 // Not all DIMM related temps are available to read
839 // (no _input file in this case)
840 continue;
841 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800842 auto iter = dimmTempSensorName.find(fruTypeValue);
843 if (iter == dimmTempSensorName.end())
844 {
George Liub5ca1012021-09-10 12:53:11 +0800845 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -0600846 std::format(
George Liub5ca1012021-09-10 12:53:11 +0800847 "readTempSensors: Fru type error! fruTypeValue = {}) ",
848 fruTypeValue)
849 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800850 continue;
851 }
852
853 sensorPath.append("dimm" + std::to_string(instanceID) +
854 iter->second);
Matt Spinlerad8f4522023-10-25 11:14:46 -0500855
856 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
857 dimmDVFSSensorName.at(fruTypeValue);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800858 }
859 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
860 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500861 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800862 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500863 // The OCC reports small core temps, of which there are
864 // two per big core. All current P10 systems are in big
865 // core mode, so use a big core name.
866 uint16_t coreNum = instanceID / 2;
867 uint16_t tempNum = instanceID % 2;
Chris Cainae157b62024-01-23 16:05:12 -0600868 sensorPath.append("proc" + std::to_string(occInstance) +
869 "_core" + std::to_string(coreNum) + "_" +
Matt Spinlerace67d82021-10-18 13:41:57 -0500870 std::to_string(tempNum) + "_temp");
871
Chris Cainae157b62024-01-23 16:05:12 -0600872 dvfsTempPath =
873 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
874 std::to_string(occInstance) + "_core_dvfs_temp";
Matt Spinlerace67d82021-10-18 13:41:57 -0500875 }
876 else
877 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800878 continue;
879 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800880 }
881 else
882 {
883 continue;
884 }
885 }
886
Matt Spinlerace67d82021-10-18 13:41:57 -0500887 // The dvfs temp file only needs to be read once per chip per type.
888 if (!dvfsTempPath.empty() &&
889 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
890 {
891 try
892 {
893 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
894
895 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
896 dvfsTempPath, dvfsValue * std::pow(10, -3));
897 }
898 catch (const std::system_error& e)
899 {
900 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600901 std::format(
Matt Spinlerace67d82021-10-18 13:41:57 -0500902 "readTempSensors: Failed reading {}, errno = {}",
903 filePathString + maxSuffix, e.code().value())
904 .c_str());
905 }
906 }
907
Matt Spinlera26f1522021-08-25 15:50:20 -0500908 uint32_t faultValue{0};
909 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800910 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500911 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
912 }
913 catch (const std::system_error& e)
914 {
915 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600916 std::format("readTempSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500917 filePathString + faultSuffix, e.code().value())
918 .c_str());
919 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800920 }
921
Chris Cainae157b62024-01-23 16:05:12 -0600922 double tempValue{0};
923 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
Matt Spinlera26f1522021-08-25 15:50:20 -0500924 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800925 {
Chris Cainae157b62024-01-23 16:05:12 -0600926 tempValue = std::numeric_limits<double>::quiet_NaN();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800927 }
Chris Cainae157b62024-01-23 16:05:12 -0600928 else
Chicago Duanbb895cb2021-06-18 19:37:16 +0800929 {
Chris Cainae157b62024-01-23 16:05:12 -0600930 // Read the temperature
931 try
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500932 {
Chris Cainae157b62024-01-23 16:05:12 -0600933 tempValue = readFile<double>(filePathString + inputSuffix);
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500934 }
Chris Cainae157b62024-01-23 16:05:12 -0600935 catch (const std::system_error& e)
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500936 {
Chris Cainae157b62024-01-23 16:05:12 -0600937 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -0600938 std::format(
Chris Cainae157b62024-01-23 16:05:12 -0600939 "readTempSensors: Failed reading {}, errno = {}",
940 filePathString + inputSuffix, e.code().value())
941 .c_str());
942
943 // if errno == EAGAIN(Resource temporarily unavailable) then set
944 // temp to 0, to avoid using old temp, and affecting FAN
945 // Control.
946 if (e.code().value() == EAGAIN)
947 {
948 tempValue = 0;
949 }
950 // else the errno would be something like
951 // EBADF(Bad file descriptor)
952 // or ENOENT(No such file or directory)
953 else
954 {
955 continue;
956 }
Sheldon Baileycd0940b2022-04-26 14:24:05 -0500957 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500958 }
959
Matt Spinler818cc8d2023-10-23 11:43:39 -0500960 // If this object path already has a value, only overwite
961 // it if the previous one was an NaN or a smaller value.
962 auto existing = sensorData.find(sensorPath);
963 if (existing != sensorData.end())
964 {
Chris Cainae157b62024-01-23 16:05:12 -0600965 // Multiple sensors found for this FRU type
966 if ((std::isnan(existing->second) && (tempValue == 0)) ||
967 ((existing->second == 0) && std::isnan(tempValue)))
968 {
969 // One of the redundant sensors has failed (0xFF/nan), and the
970 // other sensor has no reading (0), so set the FRU to NaN to
971 // force fan increase
972 tempValue = std::numeric_limits<double>::quiet_NaN();
973 existing->second = tempValue;
974 }
Matt Spinler818cc8d2023-10-23 11:43:39 -0500975 if (std::isnan(existing->second) || (tempValue > existing->second))
976 {
977 existing->second = tempValue;
978 }
979 }
980 else
981 {
Chris Cainae157b62024-01-23 16:05:12 -0600982 // First sensor for this FRU type
Matt Spinler818cc8d2023-10-23 11:43:39 -0500983 sensorData[sensorPath] = tempValue;
984 }
985 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500986
Matt Spinler818cc8d2023-10-23 11:43:39 -0500987 // Now publish the values on D-Bus.
988 for (const auto& [objectPath, value] : sensorData)
989 {
990 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
991 value * std::pow(10, -3));
Matt Spinlera26f1522021-08-25 15:50:20 -0500992
Matt Spinler818cc8d2023-10-23 11:43:39 -0500993 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
994 objectPath, !std::isnan(value));
995
996 if (existingSensors.find(objectPath) == existingSensors.end())
Chris Cain6fa848a2022-01-24 14:54:38 -0600997 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600998 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
Matt Spinler818cc8d2023-10-23 11:43:39 -0500999 objectPath);
Chris Cain6fa848a2022-01-24 14:54:38 -06001000 }
1001
Chris Cainae157b62024-01-23 16:05:12 -06001002 existingSensors[objectPath] = occInstance;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001003 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001004}
1005
1006std::optional<std::string>
1007 Manager::getPowerLabelFunctionID(const std::string& value)
1008{
1009 // If the value is "system", then the FunctionID is "system".
1010 if (value == "system")
1011 {
1012 return value;
1013 }
1014
1015 // If the value is not "system", then the label value have 3 numbers, of
1016 // which we only care about the middle one:
1017 // <sensor id>_<function id>_<apss channel>
1018 // eg: The value is "0_10_5" , then the FunctionID is "10".
1019 if (value.find("_") == std::string::npos)
1020 {
1021 return std::nullopt;
1022 }
1023
1024 auto powerLabelValue = value.substr((value.find("_") + 1));
1025
1026 if (powerLabelValue.find("_") == std::string::npos)
1027 {
1028 return std::nullopt;
1029 }
1030
1031 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1032}
1033
1034void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1035{
Chicago Duanbb895cb2021-06-18 19:37:16 +08001036 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1037 for (auto& file : fs::directory_iterator(path))
1038 {
1039 if (!std::regex_search(file.path().string(), expr))
1040 {
1041 continue;
1042 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001043
Matt Spinlera26f1522021-08-25 15:50:20 -05001044 std::string labelValue;
1045 try
1046 {
1047 labelValue = readFile<std::string>(file.path());
1048 }
1049 catch (const std::system_error& e)
1050 {
1051 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001052 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001053 file.path().string(), e.code().value())
1054 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +08001055 continue;
1056 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001057
1058 auto functionID = getPowerLabelFunctionID(labelValue);
1059 if (functionID == std::nullopt)
1060 {
1061 continue;
1062 }
1063
1064 const std::string& tempLabel = "label";
1065 const std::string filePathString = file.path().string().substr(
1066 0, file.path().string().length() - tempLabel.length());
1067
1068 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1069
1070 auto iter = powerSensorName.find(*functionID);
1071 if (iter == powerSensorName.end())
1072 {
1073 continue;
1074 }
1075 sensorPath.append(iter->second);
1076
Matt Spinlera26f1522021-08-25 15:50:20 -05001077 double tempValue{0};
1078
1079 try
Chicago Duanbb895cb2021-06-18 19:37:16 +08001080 {
Matt Spinlera26f1522021-08-25 15:50:20 -05001081 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001082 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001083 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001084 {
Chicago Duanbb895cb2021-06-18 19:37:16 +08001085 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001086 std::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -05001087 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +08001088 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -05001089 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001090 }
Matt Spinlera26f1522021-08-25 15:50:20 -05001091
Chris Cain5d66a0a2022-02-09 08:52:10 -06001092 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -06001093 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1094
Chris Cain5d66a0a2022-02-09 08:52:10 -06001095 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -05001096 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1097
Chris Cain5d66a0a2022-02-09 08:52:10 -06001098 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1099 true);
Matt Spinlera26f1522021-08-25 15:50:20 -05001100
Matt Spinler5901abd2021-09-23 13:50:03 -05001101 if (existingSensors.find(sensorPath) == existingSensors.end())
1102 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001103 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1104 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -05001105 }
1106
Matt Spinlera26f1522021-08-25 15:50:20 -05001107 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +08001108 }
1109 return;
1110}
1111
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001112void Manager::setSensorValueToNaN(uint32_t id) const
Chicago Duanbb895cb2021-06-18 19:37:16 +08001113{
1114 for (const auto& [sensorPath, occId] : existingSensors)
1115 {
1116 if (occId == id)
1117 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001118 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +08001119 sensorPath, std::numeric_limits<double>::quiet_NaN());
Sheldon Baileyc8dd4592022-05-12 10:15:14 -05001120
1121 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1122 true);
Chicago Duanbb895cb2021-06-18 19:37:16 +08001123 }
1124 }
1125 return;
1126}
1127
Sheldon Bailey373af752022-02-21 15:14:00 -06001128void Manager::setSensorValueToNonFunctional(uint32_t id) const
1129{
1130 for (const auto& [sensorPath, occId] : existingSensors)
1131 {
1132 if (occId == id)
1133 {
1134 dbus::OccDBusSensors::getOccDBus().setValue(
1135 sensorPath, std::numeric_limits<double>::quiet_NaN());
1136
1137 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1138 false);
1139 }
1140 }
1141 return;
1142}
1143
Chris Cain5d66a0a2022-02-09 08:52:10 -06001144void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001145{
Chris Caine2d0a432022-03-28 11:08:49 -05001146 static bool tracedError[8] = {0};
1147 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001148 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001149
Chris Caine2d0a432022-03-28 11:08:49 -05001150 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001151 {
Chris Caine2d0a432022-03-28 11:08:49 -05001152 // Read temperature sensors
1153 readTempSensors(sensorPath, id);
1154
1155 if (occ->isMasterOcc())
1156 {
1157 // Read power sensors
1158 readPowerSensors(sensorPath, id);
1159 }
1160 tracedError[id] = false;
1161 }
1162 else
1163 {
1164 if (!tracedError[id])
1165 {
1166 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001167 std::format(
Chris Caine2d0a432022-03-28 11:08:49 -05001168 "Manager::getSensorValues: OCC{} sensor path missing: {}",
1169 id, sensorPath.c_str())
1170 .c_str());
1171 tracedError[id] = true;
1172 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001173 }
1174
1175 return;
1176}
1177#endif
Chris Cain17257672021-10-22 13:41:03 -05001178
1179// Read the altitude from DBus
1180void Manager::readAltitude()
1181{
1182 static bool traceAltitudeErr = true;
1183
1184 utils::PropertyValue altitudeProperty{};
1185 try
1186 {
1187 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1188 ALTITUDE_PROP);
1189 auto sensorVal = std::get<double>(altitudeProperty);
1190 if (sensorVal < 0xFFFF)
1191 {
1192 if (sensorVal < 0)
1193 {
1194 altitude = 0;
1195 }
1196 else
1197 {
1198 // Round to nearest meter
1199 altitude = uint16_t(sensorVal + 0.5);
1200 }
Patrick Williams48002492024-02-13 21:43:32 -06001201 log<level::DEBUG>(std::format("readAltitude: sensor={} ({}m)",
Chris Cain17257672021-10-22 13:41:03 -05001202 sensorVal, altitude)
1203 .c_str());
1204 traceAltitudeErr = true;
1205 }
1206 else
1207 {
1208 if (traceAltitudeErr)
1209 {
1210 traceAltitudeErr = false;
1211 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001212 std::format("Invalid altitude value: {}", sensorVal)
Chris Cain17257672021-10-22 13:41:03 -05001213 .c_str());
1214 }
1215 }
1216 }
Patrick Williamsaf408082022-07-22 19:26:54 -05001217 catch (const sdbusplus::exception_t& e)
Chris Cain17257672021-10-22 13:41:03 -05001218 {
1219 if (traceAltitudeErr)
1220 {
1221 traceAltitudeErr = false;
1222 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001223 std::format("Unable to read Altitude: {}", e.what()).c_str());
Chris Cain17257672021-10-22 13:41:03 -05001224 }
1225 altitude = 0xFFFF; // not available
1226 }
1227}
1228
1229// Callback function when ambient temperature changes
Patrick Williamsaf408082022-07-22 19:26:54 -05001230void Manager::ambientCallback(sdbusplus::message_t& msg)
Chris Cain17257672021-10-22 13:41:03 -05001231{
1232 double currentTemp = 0;
1233 uint8_t truncatedTemp = 0xFF;
1234 std::string msgSensor;
1235 std::map<std::string, std::variant<double>> msgData;
1236 msg.read(msgSensor, msgData);
1237
1238 auto valPropMap = msgData.find(AMBIENT_PROP);
1239 if (valPropMap == msgData.end())
1240 {
1241 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
1242 return;
1243 }
1244 currentTemp = std::get<double>(valPropMap->second);
1245 if (std::isnan(currentTemp))
1246 {
1247 truncatedTemp = 0xFF;
1248 }
1249 else
1250 {
1251 if (currentTemp < 0)
1252 {
1253 truncatedTemp = 0;
1254 }
1255 else
1256 {
1257 // Round to nearest degree C
1258 truncatedTemp = uint8_t(currentTemp + 0.5);
1259 }
1260 }
1261
1262 // If ambient changes, notify OCCs
1263 if (truncatedTemp != ambient)
1264 {
1265 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001266 std::format("ambientCallback: Ambient change from {} to {}C",
Chris Cain17257672021-10-22 13:41:03 -05001267 ambient, currentTemp)
1268 .c_str());
1269
1270 ambient = truncatedTemp;
1271 if (altitude == 0xFFFF)
1272 {
1273 // No altitude yet, try reading again
1274 readAltitude();
1275 }
1276
1277 log<level::DEBUG>(
Patrick Williams48002492024-02-13 21:43:32 -06001278 std::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
Chris Cain17257672021-10-22 13:41:03 -05001279 altitude)
1280 .c_str());
1281#ifdef POWER10
1282 // Send ambient and altitude to all OCCs
1283 for (auto& obj : statusObjects)
1284 {
1285 if (obj->occActive())
1286 {
1287 obj->sendAmbient(ambient, altitude);
1288 }
1289 }
1290#endif // POWER10
1291 }
1292}
1293
1294// return the current ambient and altitude readings
1295void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1296 uint16_t& altitudeValue) const
1297{
1298 ambientValid = true;
1299 ambientTemp = ambient;
1300 altitudeValue = altitude;
1301
1302 if (ambient == 0xFF)
1303 {
1304 ambientValid = false;
1305 }
1306}
1307
Chris Caina7b74dc2021-11-10 17:03:43 -06001308#ifdef POWER10
Chris Cain7f89e4d2022-05-09 13:27:45 -05001309// Called when waitForAllOccsTimer expires
1310// After the first OCC goes active, this timer will be started (60 seconds)
Chris Caina7b74dc2021-11-10 17:03:43 -06001311void Manager::occsNotAllRunning()
1312{
Chris Caina7b74dc2021-11-10 17:03:43 -06001313 if (activeCount != statusObjects.size())
1314 {
1315 // Not all OCCs went active
1316 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001317 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001318 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1319 activeCount, statusObjects.size())
1320 .c_str());
Chris Cain7f89e4d2022-05-09 13:27:45 -05001321 // Procs may be garded, so may be expected
Chris Caina7b74dc2021-11-10 17:03:43 -06001322 }
1323
1324 validateOccMaster();
1325}
1326#endif // POWER10
1327
1328// Verify single master OCC and start presence monitor
1329void Manager::validateOccMaster()
1330{
1331 int masterInstance = -1;
1332 for (auto& obj : statusObjects)
1333 {
Chris Cainbd551de2022-04-26 13:41:16 -05001334 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001335#ifdef POWER10
1336 if (!obj->occActive())
1337 {
1338 if (utils::isHostRunning())
1339 {
Chris Cainbd551de2022-04-26 13:41:16 -05001340 // Check if sensor was queued while waiting for discovery
1341 auto match = queuedActiveState.find(instance);
1342 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001343 {
Chris Cain7f89e4d2022-05-09 13:27:45 -05001344 queuedActiveState.erase(match);
Chris Cainbae4d072022-02-28 09:46:50 -06001345 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001346 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001347 "validateOccMaster: OCC{} is ACTIVE (queued)",
1348 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001349 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -05001350 obj->occActive(true);
1351 }
1352 else
1353 {
1354 // OCC does not appear to be active yet, check active sensor
1355 pldmHandle->checkActiveSensor(instance);
1356 if (obj->occActive())
1357 {
1358 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001359 std::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001360 "validateOccMaster: OCC{} is ACTIVE after reading sensor",
1361 instance)
1362 .c_str());
1363 }
Chris Cainbae4d072022-02-28 09:46:50 -06001364 }
1365 }
1366 else
1367 {
1368 log<level::WARNING>(
Patrick Williams48002492024-02-13 21:43:32 -06001369 std::format(
Chris Cainbae4d072022-02-28 09:46:50 -06001370 "validateOccMaster: HOST is not running (OCC{})",
Chris Cainbd551de2022-04-26 13:41:16 -05001371 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001372 .c_str());
1373 return;
1374 }
1375 }
1376#endif // POWER10
1377
Chris Caina7b74dc2021-11-10 17:03:43 -06001378 if (obj->isMasterOcc())
1379 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001380 obj->addPresenceWatchMaster();
1381
Chris Caina7b74dc2021-11-10 17:03:43 -06001382 if (masterInstance == -1)
1383 {
Chris Cainbd551de2022-04-26 13:41:16 -05001384 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001385 }
1386 else
1387 {
1388 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001389 std::format(
Chris Caina7b74dc2021-11-10 17:03:43 -06001390 "validateOccMaster: Multiple OCC masters! ({} and {})",
Chris Cainbd551de2022-04-26 13:41:16 -05001391 masterInstance, instance)
Chris Caina7b74dc2021-11-10 17:03:43 -06001392 .c_str());
1393 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001394 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001395 }
1396 }
1397 }
Chris Cainbae4d072022-02-28 09:46:50 -06001398
Chris Caina7b74dc2021-11-10 17:03:43 -06001399 if (masterInstance < 0)
1400 {
Chris Cainbae4d072022-02-28 09:46:50 -06001401 log<level::ERR>(
Patrick Williams48002492024-02-13 21:43:32 -06001402 std::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
Chris Cainbae4d072022-02-28 09:46:50 -06001403 statusObjects.size())
1404 .c_str());
Chris Caina7b74dc2021-11-10 17:03:43 -06001405 // request reset
Eddie James9789e712022-05-25 15:43:40 -05001406 statusObjects.front()->deviceError(
1407 Error::Descriptor(PRESENCE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -06001408 }
1409 else
1410 {
1411 log<level::INFO>(
Patrick Williams48002492024-02-13 21:43:32 -06001412 std::format("validateOccMaster: OCC{} is master of {} OCCs",
Chris Cain36f9cde2021-11-22 11:18:21 -06001413 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001414 .c_str());
Sheldon Bailey31a2f132022-05-20 11:31:52 -05001415#ifdef POWER10
1416 pmode->updateDbusSafeMode(false);
1417#endif
Chris Caina7b74dc2021-11-10 17:03:43 -06001418 }
1419}
1420
Chris Cain40501a22022-03-14 17:33:27 -05001421void Manager::updatePcapBounds() const
1422{
1423 if (pcap)
1424 {
1425 pcap->updatePcapBounds();
1426 }
1427}
1428
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301429} // namespace occ
1430} // namespace open_power