blob: 34a815afc07974299e6408211728ac894721fb3c [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain613dc902022-04-08 09:56:22 -050068 if (!pmode)
69 {
70 // Create the power mode object
71 pmode = std::make_unique<powermode::PowerMode>(
72 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
73 }
74
Chris Cain1718fd82022-02-16 16:39:50 -060075 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050076 {
Chris Cainbae4d072022-02-28 09:46:50 -060077 static bool statusObjCreated = false;
78 if (!statusObjCreated)
Chris Cain1718fd82022-02-16 16:39:50 -060079 {
Chris Cainbae4d072022-02-28 09:46:50 -060080 // Create the OCCs based on on the /dev/occX devices
81 auto occs = findOCCsInDev();
Chris Cain1718fd82022-02-16 16:39:50 -060082
Chris Cainbae4d072022-02-28 09:46:50 -060083 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
Chris Cain1718fd82022-02-16 16:39:50 -060084 {
Chris Cainbae4d072022-02-28 09:46:50 -060085 // Something changed or no OCCs yet, try again in 10s.
86 // Note on the first pass prevOCCSearch will be empty,
87 // so there will be at least one delay to give things
88 // a chance to settle.
89 prevOCCSearch = occs;
90
91 log<level::INFO>(
92 fmt::format(
93 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})",
94 occs.size())
95 .c_str());
96
97 discoverTimer->restartOnce(10s);
98 }
99 else
100 {
101 // All OCCs appear to be available, create status objects
102
103 // createObjects requires OCC0 first.
104 std::sort(occs.begin(), occs.end());
105
106 log<level::INFO>(
107 fmt::format(
108 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects",
109 occs.size())
110 .c_str());
111 for (auto id : occs)
112 {
113 createObjects(std::string(OCC_NAME) + std::to_string(id));
114 }
115 statusObjCreated = true;
116 }
117 }
118
119 if (statusObjCreated)
120 {
121 static bool tracedHostWait = false;
122 if (utils::isHostRunning())
123 {
124 if (tracedHostWait)
125 {
126 log<level::INFO>(
127 "Manager::findAndCreateObjects(): Host is running");
128 tracedHostWait = false;
129 }
130 waitingForAllOccActiveSensors = true;
131 checkAllActiveSensors();
132 }
133 else
134 {
135 if (!tracedHostWait)
136 {
137 log<level::INFO>(
138 "Manager::findAndCreateObjects(): Waiting for host to start");
139 tracedHostWait = true;
140 }
141 discoverTimer->restartOnce(30s);
Chris Cain1718fd82022-02-16 16:39:50 -0600142 }
143 }
Matt Spinlerd267cec2021-09-01 14:49:19 -0500144 }
145 else
146 {
Chris Cain1718fd82022-02-16 16:39:50 -0600147 log<level::INFO>(
148 fmt::format(
149 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
150 HOST_ON_FILE)
151 .c_str());
152 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500153 }
154#endif
155}
156
Chris Cainbae4d072022-02-28 09:46:50 -0600157#ifdef POWER10
158// Check if all occActive sensors are available
159void Manager::checkAllActiveSensors()
160{
161 static bool allActiveSensorAvailable = false;
162 static bool tracedSensorWait = false;
163
164 // Start with the assumption that all are available
165 allActiveSensorAvailable = true;
166 for (auto& obj : statusObjects)
167 {
168 // If active sensor is already true, then no need to query sensor
169 if (!obj->occActive())
170 {
Chris Cainbd551de2022-04-26 13:41:16 -0500171 auto instance = obj->getOccInstanceID();
172 // Check if sensor was queued while waiting for discovery
173 auto match = queuedActiveState.find(instance);
174 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -0600175 {
176 log<level::INFO>(
177 fmt::format(
Chris Cainbd551de2022-04-26 13:41:16 -0500178 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)",
179 instance)
Chris Cainbae4d072022-02-28 09:46:50 -0600180 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -0500181 obj->occActive(true);
Chris Cainbae4d072022-02-28 09:46:50 -0600182 }
Chris Cainbd551de2022-04-26 13:41:16 -0500183 else
184 {
185 allActiveSensorAvailable = false;
186 if (!tracedSensorWait)
187 {
188 log<level::INFO>(
189 fmt::format(
190 "checkAllActiveSensors(): Waiting on OCC{} Active sensor",
191 instance)
192 .c_str());
193 tracedSensorWait = true;
194 }
195 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
196 break;
197 }
Chris Cainbae4d072022-02-28 09:46:50 -0600198 }
199 }
200
201 if (allActiveSensorAvailable)
202 {
203 // All sensors were found, disable the discovery timer
204 discoverTimer.reset();
205 waitingForAllOccActiveSensors = false;
Chris Cainbd551de2022-04-26 13:41:16 -0500206 queuedActiveState.clear();
Chris Cainbae4d072022-02-28 09:46:50 -0600207
208 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500209 "checkAllActiveSensors(): OCC Active sensors are available");
Chris Cainbae4d072022-02-28 09:46:50 -0600210 tracedSensorWait = false;
211 }
212 else
213 {
214 // Not all sensors were available, so keep waiting
215 if (!tracedSensorWait)
216 {
217 log<level::INFO>(
Chris Cainbd551de2022-04-26 13:41:16 -0500218 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
Chris Cainbae4d072022-02-28 09:46:50 -0600219 tracedSensorWait = true;
220 }
Chris Cainbd551de2022-04-26 13:41:16 -0500221 discoverTimer->restartOnce(10s);
Chris Cainbae4d072022-02-28 09:46:50 -0600222 }
223}
224#endif
225
Matt Spinlerd267cec2021-09-01 14:49:19 -0500226std::vector<int> Manager::findOCCsInDev()
227{
228 std::vector<int> occs;
229 std::regex expr{R"(occ(\d+)$)"};
230
231 for (auto& file : fs::directory_iterator("/dev"))
232 {
233 std::smatch match;
234 std::string path{file.path().string()};
235 if (std::regex_search(path, match, expr))
236 {
237 auto num = std::stoi(match[1].str());
238
239 // /dev numbering starts at 1, ours starts at 0.
240 occs.push_back(num - 1);
241 }
242 }
243
244 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530245}
246
247int Manager::cpuCreated(sdbusplus::message::message& msg)
248{
George Liubcef3b42021-09-10 12:39:02 +0800249 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530250
251 sdbusplus::message::object_path o;
252 msg.read(o);
253 fs::path cpuPath(std::string(std::move(o)));
254
255 auto name = cpuPath.filename().string();
256 auto index = name.find(CPU_NAME);
257 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
258
259 createObjects(name);
260
261 return 0;
262}
263
264void Manager::createObjects(const std::string& occ)
265{
266 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
267
Gunnar Mills94df8c92018-09-14 14:50:03 -0500268 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800269 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600270#ifdef POWER10
271 pmode,
272#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500273 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600274 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530275#ifdef PLDM
276 ,
277 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
278 std::placeholders::_1)
279#endif
280 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530281
Chris Cain40501a22022-03-14 17:33:27 -0500282 // Create the power cap monitor object
283 if (!pcap)
284 {
285 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
286 *statusObjects.back());
287 }
288
Chris Cain36f9cde2021-11-22 11:18:21 -0600289 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530290 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600291 log<level::INFO>(
292 fmt::format("Manager::createObjects(): OCC{} is the master",
293 statusObjects.back()->getOccInstanceID())
294 .c_str());
295 _pollTimer->setEnabled(false);
296
Chris Cain78e86012021-03-04 16:15:31 -0600297#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600298 // Set the master OCC on the PowerMode object
299 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600300#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600301 }
302
303 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
304#ifdef POWER10
305 ,
306 pmode
307#endif
308 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530309}
310
Sheldon Bailey373af752022-02-21 15:14:00 -0600311void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530312{
Gunnar Mills94df8c92018-09-14 14:50:03 -0500313 using InternalFailure =
314 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530315
316 // At this time, it won't happen but keeping it
317 // here just in case something changes in the future
318 if ((activeCount == 0) && (!status))
319 {
Sheldon Bailey373af752022-02-21 15:14:00 -0600320 log<level::ERR>(
321 fmt::format("Invalid update on OCCActive with OCC{}", instance)
322 .c_str());
323
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530324 elog<InternalFailure>();
325 }
326
Chris Caina7b74dc2021-11-10 17:03:43 -0600327 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600328 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600329 // OCC went active
330 ++activeCount;
331
332#ifdef POWER10
333 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600334 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600335 // First OCC went active (allow some time for all OCCs to go active)
Chris Cainbd551de2022-04-26 13:41:16 -0500336 waitForAllOccsTimer->restartOnce(60s);
Matt Spinler53f68142021-08-25 15:47:31 -0500337 }
338#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600339
340 if (activeCount == statusObjects.size())
341 {
342#ifdef POWER10
343 // All OCCs are now running
344 if (waitForAllOccsTimer->isEnabled())
345 {
346 // stop occ wait timer
347 waitForAllOccsTimer->setEnabled(false);
348 }
349#endif
350
351 // Verify master OCC and start presence monitor
352 validateOccMaster();
353 }
354
355 // Start poll timer if not already started
356 if (!_pollTimer->isEnabled())
357 {
358 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -0600359 fmt::format("Manager: OCCs will be polled every {} seconds",
360 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600361 .c_str());
362
363 // Send poll and start OCC poll timer
364 pollerTimerExpired();
365 }
366 }
367 else
368 {
369 // OCC went away
370 --activeCount;
371
372 if (activeCount == 0)
373 {
374 // No OCCs are running
375
376 // Stop OCC poll timer
377 if (_pollTimer->isEnabled())
378 {
379 log<level::INFO>(
380 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
381 _pollTimer->setEnabled(false);
382 }
383
384#ifdef POWER10
385 // stop wait timer
386 if (waitForAllOccsTimer->isEnabled())
387 {
388 waitForAllOccsTimer->setEnabled(false);
389 }
390#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600391 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600392#ifdef READ_OCC_SENSORS
393 // Clear OCC sensors
394 setSensorValueToNonFunctional(instance);
395#endif
Chris Caina8857c52021-01-27 11:53:05 -0600396 }
Chris Cainbae4d072022-02-28 09:46:50 -0600397
398#ifdef POWER10
399 if (waitingForAllOccActiveSensors)
400 {
401 checkAllActiveSensors();
402 }
403#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530404}
405
406#ifdef I2C_OCC
407void Manager::initStatusObjects()
408{
409 // Make sure we have a valid path string
410 static_assert(sizeof(DEV_PATH) != 0);
411
412 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
413 for (auto& name : deviceNames)
414 {
415 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800416 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530417 auto path = fs::path(OCC_CONTROL_ROOT) / name;
418 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800419 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530420 }
Chris Cain40501a22022-03-14 17:33:27 -0500421 // The first device is master occ
422 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
423 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600424#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600425 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
426 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600427 // Set the master OCC on the PowerMode object
428 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600429#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530430}
431#endif
432
Tom Joseph815f9f52020-07-27 12:12:13 +0530433#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500434void Manager::sbeTimeout(unsigned int instance)
435{
Eddie James2a751d72022-03-04 09:16:12 -0600436 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
437 [instance](const auto& obj) {
438 return instance == obj->getOccInstanceID();
439 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500440
Eddie Jamescb018da2022-03-05 11:49:37 -0600441 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600442 {
Chris Cainbae4d072022-02-28 09:46:50 -0600443 log<level::INFO>(
444 fmt::format("SBE timeout, requesting HRESET (OCC{})", instance)
445 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500446
Eddie James2a751d72022-03-04 09:16:12 -0600447 setSBEState(instance, SBE_STATE_NOT_USABLE);
448
449 pldmHandle->sendHRESET(instance);
450 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500451}
452
Tom Joseph815f9f52020-07-27 12:12:13 +0530453bool Manager::updateOCCActive(instanceID instance, bool status)
454{
Chris Cain7e374fb2022-04-07 09:47:23 -0500455 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
456 [instance](const auto& obj) {
457 return instance == obj->getOccInstanceID();
458 });
459
460 if (obj != statusObjects.end())
461 {
462 return (*obj)->occActive(status);
463 }
464 else
465 {
466 log<level::WARNING>(
467 fmt::format(
468 "Manager::updateOCCActive: No status object to update for OCC{} (active={})",
469 instance, status)
470 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -0500471 if (status == true)
472 {
473 // OCC went active
474 queuedActiveState.insert(instance);
475 }
476 else
477 {
478 auto match = queuedActiveState.find(instance);
479 if (match != queuedActiveState.end())
480 {
481 // OCC was disabled
482 queuedActiveState.erase(match);
483 }
484 }
Chris Cain7e374fb2022-04-07 09:47:23 -0500485 return false;
486 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530487}
Eddie Jamescbad2192021-10-07 09:39:39 -0500488
489void Manager::sbeHRESETResult(instanceID instance, bool success)
490{
491 if (success)
492 {
Chris Cainbae4d072022-02-28 09:46:50 -0600493 log<level::INFO>(
494 fmt::format("HRESET succeeded (OCC{})", instance).c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500495
496 setSBEState(instance, SBE_STATE_BOOTED);
497
498 return;
499 }
500
501 setSBEState(instance, SBE_STATE_FAILED);
502
503 if (sbeCanDump(instance))
504 {
Chris Cainbae4d072022-02-28 09:46:50 -0600505 log<level::INFO>(
506 fmt::format("HRESET failed (OCC{}), triggering SBE dump", instance)
507 .c_str());
Eddie Jamescbad2192021-10-07 09:39:39 -0500508
509 auto& bus = utils::getBus();
510 uint32_t src6 = instance << 16;
511 uint32_t logId =
512 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
513 src6, "SBE command timeout");
514
515 try
516 {
George Liuf3a4a692021-12-28 13:59:51 +0800517 constexpr auto path = "/org/openpower/dump";
518 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
519 constexpr auto function = "CreateDump";
520
Eddie Jamescbad2192021-10-07 09:39:39 -0500521 std::string service = utils::getService(path, interface);
522 auto method =
523 bus.new_method_call(service.c_str(), path, interface, function);
524
525 std::map<std::string, std::variant<std::string, uint64_t>>
526 createParams{
527 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
528 uint64_t(logId)},
529 {"com.ibm.Dump.Create.CreateParameters.DumpType",
530 "com.ibm.Dump.Create.DumpType.SBE"},
531 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
532 uint64_t(instance)},
533 };
534
535 method.append(createParams);
536
537 auto response = bus.call(method);
538 }
539 catch (const sdbusplus::exception::exception& e)
540 {
541 constexpr auto ERROR_DUMP_DISABLED =
542 "xyz.openbmc_project.Dump.Create.Error.Disabled";
543 if (e.name() == ERROR_DUMP_DISABLED)
544 {
545 log<level::INFO>("Dump is disabled, skipping");
546 }
547 else
548 {
549 log<level::ERR>("Dump failed");
550 }
551 }
552 }
553}
554
555bool Manager::sbeCanDump(unsigned int instance)
556{
557 struct pdbg_target* proc = getPdbgTarget(instance);
558
559 if (!proc)
560 {
561 // allow the dump in the error case
562 return true;
563 }
564
565 try
566 {
567 if (!openpower::phal::sbe::isDumpAllowed(proc))
568 {
569 return false;
570 }
571
572 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
573 {
574 return false;
575 }
576 }
577 catch (openpower::phal::exception::SbeError& e)
578 {
579 log<level::INFO>("Failed to query SBE state");
580 }
581
582 // allow the dump in the error case
583 return true;
584}
585
586void Manager::setSBEState(unsigned int instance, enum sbe_state state)
587{
588 struct pdbg_target* proc = getPdbgTarget(instance);
589
590 if (!proc)
591 {
592 return;
593 }
594
595 try
596 {
597 openpower::phal::sbe::setState(proc, state);
598 }
599 catch (const openpower::phal::exception::SbeError& e)
600 {
601 log<level::ERR>("Failed to set SBE state");
602 }
603}
604
605struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
606{
607 if (!pdbgInitialized)
608 {
609 try
610 {
611 openpower::phal::pdbg::init();
612 pdbgInitialized = true;
613 }
614 catch (const openpower::phal::exception::PdbgError& e)
615 {
616 log<level::ERR>("pdbg initialization failed");
617 return nullptr;
618 }
619 }
620
621 struct pdbg_target* proc = nullptr;
622 pdbg_for_each_class_target("proc", proc)
623 {
624 if (pdbg_target_index(proc) == instance)
625 {
626 return proc;
627 }
628 }
629
630 log<level::ERR>("Failed to get pdbg target");
631 return nullptr;
632}
Tom Joseph815f9f52020-07-27 12:12:13 +0530633#endif
634
Chris Caina8857c52021-01-27 11:53:05 -0600635void Manager::pollerTimerExpired()
636{
Chris Caina8857c52021-01-27 11:53:05 -0600637 if (!_pollTimer)
638 {
639 log<level::ERR>(
640 "Manager::pollerTimerExpired() ERROR: Timer not defined");
641 return;
642 }
643
644 for (auto& obj : statusObjects)
645 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600646 if (!obj->occActive())
647 {
648 // OCC is not running yet
649#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600650 auto id = obj->getOccInstanceID();
Sheldon Bailey373af752022-02-21 15:14:00 -0600651 setSensorValueToNonFunctional(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600652#endif
653 continue;
654 }
655
Chris Caina8857c52021-01-27 11:53:05 -0600656 // Read sysfs to force kernel to poll OCC
657 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800658
659#ifdef READ_OCC_SENSORS
660 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600661 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800662#endif
Chris Caina8857c52021-01-27 11:53:05 -0600663 }
664
Chris Caina7b74dc2021-11-10 17:03:43 -0600665 if (activeCount > 0)
666 {
667 // Restart OCC poll timer
668 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
669 }
670 else
671 {
672 // No OCCs running, so poll timer will not be restarted
673 log<level::INFO>(
674 fmt::format(
675 "Manager::pollerTimerExpired: poll timer will not be restarted")
676 .c_str());
677 }
Chris Caina8857c52021-01-27 11:53:05 -0600678}
679
Chicago Duanbb895cb2021-06-18 19:37:16 +0800680#ifdef READ_OCC_SENSORS
681void Manager::readTempSensors(const fs::path& path, uint32_t id)
682{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800683 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
684 for (auto& file : fs::directory_iterator(path))
685 {
686 if (!std::regex_search(file.path().string(), expr))
687 {
688 continue;
689 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800690
Matt Spinlera26f1522021-08-25 15:50:20 -0500691 uint32_t labelValue{0};
692
693 try
694 {
695 labelValue = readFile<uint32_t>(file.path());
696 }
697 catch (const std::system_error& e)
698 {
699 log<level::DEBUG>(
700 fmt::format("readTempSensors: Failed reading {}, errno = {}",
701 file.path().string(), e.code().value())
702 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800703 continue;
704 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800705
706 const std::string& tempLabel = "label";
707 const std::string filePathString = file.path().string().substr(
708 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500709
710 uint32_t fruTypeValue{0};
711 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800712 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500713 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
714 }
715 catch (const std::system_error& e)
716 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800717 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500718 fmt::format("readTempSensors: Failed reading {}, errno = {}",
719 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800720 .c_str());
721 continue;
722 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800723
724 std::string sensorPath =
725 OCC_SENSORS_ROOT + std::string("/temperature/");
726
Matt Spinlerace67d82021-10-18 13:41:57 -0500727 std::string dvfsTempPath;
728
Chicago Duanbb895cb2021-06-18 19:37:16 +0800729 if (fruTypeValue == VRMVdd)
730 {
731 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
732 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500733 else if (fruTypeValue == processorIoRing)
734 {
735 sensorPath.append("proc" + std::to_string(id) + "_ioring_temp");
736 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
737 std::to_string(id) + "_ioring_dvfs_temp";
738 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800739 else
740 {
Matt Spinler14d14022021-08-25 15:38:29 -0500741 uint16_t type = (labelValue & 0xFF000000) >> 24;
742 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800743
744 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
745 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500746 if (fruTypeValue == fruTypeNotAvailable)
747 {
748 // Not all DIMM related temps are available to read
749 // (no _input file in this case)
750 continue;
751 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800752 auto iter = dimmTempSensorName.find(fruTypeValue);
753 if (iter == dimmTempSensorName.end())
754 {
George Liub5ca1012021-09-10 12:53:11 +0800755 log<level::ERR>(
756 fmt::format(
757 "readTempSensors: Fru type error! fruTypeValue = {}) ",
758 fruTypeValue)
759 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800760 continue;
761 }
762
763 sensorPath.append("dimm" + std::to_string(instanceID) +
764 iter->second);
765 }
766 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
767 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500768 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800769 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500770 // The OCC reports small core temps, of which there are
771 // two per big core. All current P10 systems are in big
772 // core mode, so use a big core name.
773 uint16_t coreNum = instanceID / 2;
774 uint16_t tempNum = instanceID % 2;
775 sensorPath.append("proc" + std::to_string(id) + "_core" +
776 std::to_string(coreNum) + "_" +
777 std::to_string(tempNum) + "_temp");
778
779 dvfsTempPath = std::string{OCC_SENSORS_ROOT} +
780 "/temperature/proc" + std::to_string(id) +
781 "_core_dvfs_temp";
782 }
783 else
784 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800785 continue;
786 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800787 }
788 else
789 {
790 continue;
791 }
792 }
793
Matt Spinlerace67d82021-10-18 13:41:57 -0500794 // The dvfs temp file only needs to be read once per chip per type.
795 if (!dvfsTempPath.empty() &&
796 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
797 {
798 try
799 {
800 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
801
802 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
803 dvfsTempPath, dvfsValue * std::pow(10, -3));
804 }
805 catch (const std::system_error& e)
806 {
807 log<level::DEBUG>(
808 fmt::format(
809 "readTempSensors: Failed reading {}, errno = {}",
810 filePathString + maxSuffix, e.code().value())
811 .c_str());
812 }
813 }
814
Matt Spinlera26f1522021-08-25 15:50:20 -0500815 uint32_t faultValue{0};
816 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800817 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500818 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
819 }
820 catch (const std::system_error& e)
821 {
822 log<level::DEBUG>(
823 fmt::format("readTempSensors: Failed reading {}, errno = {}",
824 filePathString + faultSuffix, e.code().value())
825 .c_str());
826 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800827 }
828
Matt Spinlera26f1522021-08-25 15:50:20 -0500829 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800830 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600831 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500832 sensorPath, std::numeric_limits<double>::quiet_NaN());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800833
Chris Cain5d66a0a2022-02-09 08:52:10 -0600834 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
835 false);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800836
Matt Spinlera26f1522021-08-25 15:50:20 -0500837 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800838 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500839
840 double tempValue{0};
841
842 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800843 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500844 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800845 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500846 catch (const std::system_error& e)
847 {
848 log<level::DEBUG>(
849 fmt::format("readTempSensors: Failed reading {}, errno = {}",
850 filePathString + inputSuffix, e.code().value())
851 .c_str());
852 continue;
853 }
854
Chris Cain5d66a0a2022-02-09 08:52:10 -0600855 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500856 sensorPath, tempValue * std::pow(10, -3));
857
Chris Cain5d66a0a2022-02-09 08:52:10 -0600858 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
859 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500860
Chris Cain6fa848a2022-01-24 14:54:38 -0600861 // At this point, the sensor will be created for sure.
862 if (existingSensors.find(sensorPath) == existingSensors.end())
863 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600864 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
865 sensorPath);
Chris Cain6fa848a2022-01-24 14:54:38 -0600866 }
867
Matt Spinlera26f1522021-08-25 15:50:20 -0500868 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800869 }
870 return;
871}
872
873std::optional<std::string>
874 Manager::getPowerLabelFunctionID(const std::string& value)
875{
876 // If the value is "system", then the FunctionID is "system".
877 if (value == "system")
878 {
879 return value;
880 }
881
882 // If the value is not "system", then the label value have 3 numbers, of
883 // which we only care about the middle one:
884 // <sensor id>_<function id>_<apss channel>
885 // eg: The value is "0_10_5" , then the FunctionID is "10".
886 if (value.find("_") == std::string::npos)
887 {
888 return std::nullopt;
889 }
890
891 auto powerLabelValue = value.substr((value.find("_") + 1));
892
893 if (powerLabelValue.find("_") == std::string::npos)
894 {
895 return std::nullopt;
896 }
897
898 return powerLabelValue.substr(0, powerLabelValue.find("_"));
899}
900
901void Manager::readPowerSensors(const fs::path& path, uint32_t id)
902{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800903 std::regex expr{"power\\d+_label$"}; // Example: power5_label
904 for (auto& file : fs::directory_iterator(path))
905 {
906 if (!std::regex_search(file.path().string(), expr))
907 {
908 continue;
909 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800910
Matt Spinlera26f1522021-08-25 15:50:20 -0500911 std::string labelValue;
912 try
913 {
914 labelValue = readFile<std::string>(file.path());
915 }
916 catch (const std::system_error& e)
917 {
918 log<level::DEBUG>(
919 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
920 file.path().string(), e.code().value())
921 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800922 continue;
923 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800924
925 auto functionID = getPowerLabelFunctionID(labelValue);
926 if (functionID == std::nullopt)
927 {
928 continue;
929 }
930
931 const std::string& tempLabel = "label";
932 const std::string filePathString = file.path().string().substr(
933 0, file.path().string().length() - tempLabel.length());
934
935 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
936
937 auto iter = powerSensorName.find(*functionID);
938 if (iter == powerSensorName.end())
939 {
940 continue;
941 }
942 sensorPath.append(iter->second);
943
Matt Spinlera26f1522021-08-25 15:50:20 -0500944 double tempValue{0};
945
946 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800947 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500948 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800949 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500950 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800951 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800952 log<level::DEBUG>(
Chris Cain5d66a0a2022-02-09 08:52:10 -0600953 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500954 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800955 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -0500956 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800957 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500958
Chris Cain5d66a0a2022-02-09 08:52:10 -0600959 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -0600960 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
961
Chris Cain5d66a0a2022-02-09 08:52:10 -0600962 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500963 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
964
Chris Cain5d66a0a2022-02-09 08:52:10 -0600965 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
966 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500967
Matt Spinler5901abd2021-09-23 13:50:03 -0500968 if (existingSensors.find(sensorPath) == existingSensors.end())
969 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600970 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
971 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -0500972 }
973
Matt Spinlera26f1522021-08-25 15:50:20 -0500974 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800975 }
976 return;
977}
978
979void Manager::setSensorValueToNaN(uint32_t id)
980{
981 for (const auto& [sensorPath, occId] : existingSensors)
982 {
983 if (occId == id)
984 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600985 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +0800986 sensorPath, std::numeric_limits<double>::quiet_NaN());
987 }
988 }
989 return;
990}
991
Sheldon Bailey373af752022-02-21 15:14:00 -0600992void Manager::setSensorValueToNonFunctional(uint32_t id) const
993{
994 for (const auto& [sensorPath, occId] : existingSensors)
995 {
996 if (occId == id)
997 {
998 dbus::OccDBusSensors::getOccDBus().setValue(
999 sensorPath, std::numeric_limits<double>::quiet_NaN());
1000
1001 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
1002 false);
1003 }
1004 }
1005 return;
1006}
1007
Chris Cain5d66a0a2022-02-09 08:52:10 -06001008void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +08001009{
Chris Caine2d0a432022-03-28 11:08:49 -05001010 static bool tracedError[8] = {0};
1011 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -06001012 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +08001013
Chris Caine2d0a432022-03-28 11:08:49 -05001014 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +08001015 {
Chris Caine2d0a432022-03-28 11:08:49 -05001016 // Read temperature sensors
1017 readTempSensors(sensorPath, id);
1018
1019 if (occ->isMasterOcc())
1020 {
1021 // Read power sensors
1022 readPowerSensors(sensorPath, id);
1023 }
1024 tracedError[id] = false;
1025 }
1026 else
1027 {
1028 if (!tracedError[id])
1029 {
1030 log<level::ERR>(
1031 fmt::format(
1032 "Manager::getSensorValues: OCC{} sensor path missing: {}",
1033 id, sensorPath.c_str())
1034 .c_str());
1035 tracedError[id] = true;
1036 }
Chicago Duanbb895cb2021-06-18 19:37:16 +08001037 }
1038
1039 return;
1040}
1041#endif
Chris Cain17257672021-10-22 13:41:03 -05001042
1043// Read the altitude from DBus
1044void Manager::readAltitude()
1045{
1046 static bool traceAltitudeErr = true;
1047
1048 utils::PropertyValue altitudeProperty{};
1049 try
1050 {
1051 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1052 ALTITUDE_PROP);
1053 auto sensorVal = std::get<double>(altitudeProperty);
1054 if (sensorVal < 0xFFFF)
1055 {
1056 if (sensorVal < 0)
1057 {
1058 altitude = 0;
1059 }
1060 else
1061 {
1062 // Round to nearest meter
1063 altitude = uint16_t(sensorVal + 0.5);
1064 }
1065 log<level::DEBUG>(fmt::format("readAltitude: sensor={} ({}m)",
1066 sensorVal, altitude)
1067 .c_str());
1068 traceAltitudeErr = true;
1069 }
1070 else
1071 {
1072 if (traceAltitudeErr)
1073 {
1074 traceAltitudeErr = false;
1075 log<level::DEBUG>(
1076 fmt::format("Invalid altitude value: {}", sensorVal)
1077 .c_str());
1078 }
1079 }
1080 }
1081 catch (const sdbusplus::exception::exception& e)
1082 {
1083 if (traceAltitudeErr)
1084 {
1085 traceAltitudeErr = false;
1086 log<level::INFO>(
1087 fmt::format("Unable to read Altitude: {}", e.what()).c_str());
1088 }
1089 altitude = 0xFFFF; // not available
1090 }
1091}
1092
1093// Callback function when ambient temperature changes
1094void Manager::ambientCallback(sdbusplus::message::message& msg)
1095{
1096 double currentTemp = 0;
1097 uint8_t truncatedTemp = 0xFF;
1098 std::string msgSensor;
1099 std::map<std::string, std::variant<double>> msgData;
1100 msg.read(msgSensor, msgData);
1101
1102 auto valPropMap = msgData.find(AMBIENT_PROP);
1103 if (valPropMap == msgData.end())
1104 {
1105 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
1106 return;
1107 }
1108 currentTemp = std::get<double>(valPropMap->second);
1109 if (std::isnan(currentTemp))
1110 {
1111 truncatedTemp = 0xFF;
1112 }
1113 else
1114 {
1115 if (currentTemp < 0)
1116 {
1117 truncatedTemp = 0;
1118 }
1119 else
1120 {
1121 // Round to nearest degree C
1122 truncatedTemp = uint8_t(currentTemp + 0.5);
1123 }
1124 }
1125
1126 // If ambient changes, notify OCCs
1127 if (truncatedTemp != ambient)
1128 {
1129 log<level::DEBUG>(
1130 fmt::format("ambientCallback: Ambient change from {} to {}C",
1131 ambient, currentTemp)
1132 .c_str());
1133
1134 ambient = truncatedTemp;
1135 if (altitude == 0xFFFF)
1136 {
1137 // No altitude yet, try reading again
1138 readAltitude();
1139 }
1140
1141 log<level::DEBUG>(
1142 fmt::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
1143 altitude)
1144 .c_str());
1145#ifdef POWER10
1146 // Send ambient and altitude to all OCCs
1147 for (auto& obj : statusObjects)
1148 {
1149 if (obj->occActive())
1150 {
1151 obj->sendAmbient(ambient, altitude);
1152 }
1153 }
1154#endif // POWER10
1155 }
1156}
1157
1158// return the current ambient and altitude readings
1159void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1160 uint16_t& altitudeValue) const
1161{
1162 ambientValid = true;
1163 ambientTemp = ambient;
1164 altitudeValue = altitude;
1165
1166 if (ambient == 0xFF)
1167 {
1168 ambientValid = false;
1169 }
1170}
1171
Chris Caina7b74dc2021-11-10 17:03:43 -06001172#ifdef POWER10
1173void Manager::occsNotAllRunning()
1174{
Chris Cain6fa848a2022-01-24 14:54:38 -06001175 // Function will also gets called when occ-control app gets
1176 // restarted. (occ active sensors do not change, so the Status
1177 // object does not call Manager back for all OCCs)
Chris Caina7b74dc2021-11-10 17:03:43 -06001178
1179 if (activeCount != statusObjects.size())
1180 {
1181 // Not all OCCs went active
1182 log<level::WARNING>(
1183 fmt::format(
1184 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1185 activeCount, statusObjects.size())
1186 .c_str());
1187 // Procs may be garded, so may not need reset.
1188 }
1189
1190 validateOccMaster();
1191}
1192#endif // POWER10
1193
1194// Verify single master OCC and start presence monitor
1195void Manager::validateOccMaster()
1196{
1197 int masterInstance = -1;
1198 for (auto& obj : statusObjects)
1199 {
Chris Cainbd551de2022-04-26 13:41:16 -05001200 auto instance = obj->getOccInstanceID();
Chris Cainbae4d072022-02-28 09:46:50 -06001201#ifdef POWER10
1202 if (!obj->occActive())
1203 {
1204 if (utils::isHostRunning())
1205 {
Chris Cainbd551de2022-04-26 13:41:16 -05001206 // Check if sensor was queued while waiting for discovery
1207 auto match = queuedActiveState.find(instance);
1208 if (match != queuedActiveState.end())
Chris Cainbae4d072022-02-28 09:46:50 -06001209 {
1210 log<level::INFO>(
1211 fmt::format(
Chris Cainbd551de2022-04-26 13:41:16 -05001212 "validateOccMaster: OCC{} is ACTIVE (queued)",
1213 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001214 .c_str());
Chris Cainbd551de2022-04-26 13:41:16 -05001215 obj->occActive(true);
1216 }
1217 else
1218 {
1219 // OCC does not appear to be active yet, check active sensor
1220 pldmHandle->checkActiveSensor(instance);
1221 if (obj->occActive())
1222 {
1223 log<level::INFO>(
1224 fmt::format(
1225 "validateOccMaster: OCC{} is ACTIVE after reading sensor",
1226 instance)
1227 .c_str());
1228 }
Chris Cainbae4d072022-02-28 09:46:50 -06001229 }
1230 }
1231 else
1232 {
1233 log<level::WARNING>(
1234 fmt::format(
1235 "validateOccMaster: HOST is not running (OCC{})",
Chris Cainbd551de2022-04-26 13:41:16 -05001236 instance)
Chris Cainbae4d072022-02-28 09:46:50 -06001237 .c_str());
1238 return;
1239 }
1240 }
1241#endif // POWER10
1242
Chris Caina7b74dc2021-11-10 17:03:43 -06001243 if (obj->isMasterOcc())
1244 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001245 obj->addPresenceWatchMaster();
1246
Chris Caina7b74dc2021-11-10 17:03:43 -06001247 if (masterInstance == -1)
1248 {
Chris Cainbd551de2022-04-26 13:41:16 -05001249 masterInstance = instance;
Chris Caina7b74dc2021-11-10 17:03:43 -06001250 }
1251 else
1252 {
1253 log<level::ERR>(
1254 fmt::format(
1255 "validateOccMaster: Multiple OCC masters! ({} and {})",
Chris Cainbd551de2022-04-26 13:41:16 -05001256 masterInstance, instance)
Chris Caina7b74dc2021-11-10 17:03:43 -06001257 .c_str());
1258 // request reset
1259 obj->deviceError();
1260 }
1261 }
1262 }
Chris Cainbae4d072022-02-28 09:46:50 -06001263
Chris Caina7b74dc2021-11-10 17:03:43 -06001264 if (masterInstance < 0)
1265 {
Chris Cainbae4d072022-02-28 09:46:50 -06001266 log<level::ERR>(
1267 fmt::format("validateOccMaster: Master OCC not found! (of {} OCCs)",
1268 statusObjects.size())
1269 .c_str());
Chris Caina7b74dc2021-11-10 17:03:43 -06001270 // request reset
1271 statusObjects.front()->deviceError();
1272 }
1273 else
1274 {
1275 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -06001276 fmt::format("validateOccMaster: OCC{} is master of {} OCCs",
1277 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001278 .c_str());
1279 }
1280}
1281
Chris Cain40501a22022-03-14 17:33:27 -05001282void Manager::updatePcapBounds() const
1283{
1284 if (pcap)
1285 {
1286 pcap->updatePcapBounds();
1287 }
1288}
1289
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301290} // namespace occ
1291} // namespace open_power