blob: a77d49bd043ee927f7815e8e47637eb10381e193 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain1718fd82022-02-16 16:39:50 -060068 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050069 {
Chris Cain1718fd82022-02-16 16:39:50 -060070 // Create the OCCs based on on the /dev/occX devices
71 auto occs = findOCCsInDev();
Matt Spinlerd267cec2021-09-01 14:49:19 -050072
Chris Cain1718fd82022-02-16 16:39:50 -060073 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
74 {
75 // Something changed or no OCCs yet, try again in 10s.
76 // Note on the first pass prevOCCSearch will be empty,
77 // so there will be at least one delay to give things
78 // a chance to settle.
79 prevOCCSearch = occs;
80
81 discoverTimer->restartOnce(10s);
82 }
83 else
84 {
85 discoverTimer.reset();
86
87 // createObjects requires OCC0 first.
88 std::sort(occs.begin(), occs.end());
89
90 for (auto id : occs)
91 {
92 createObjects(std::string(OCC_NAME) + std::to_string(id));
93 }
94 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050095 }
96 else
97 {
Chris Cain1718fd82022-02-16 16:39:50 -060098 log<level::INFO>(
99 fmt::format(
100 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
101 HOST_ON_FILE)
102 .c_str());
103 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500104 }
105#endif
106}
107
108std::vector<int> Manager::findOCCsInDev()
109{
110 std::vector<int> occs;
111 std::regex expr{R"(occ(\d+)$)"};
112
113 for (auto& file : fs::directory_iterator("/dev"))
114 {
115 std::smatch match;
116 std::string path{file.path().string()};
117 if (std::regex_search(path, match, expr))
118 {
119 auto num = std::stoi(match[1].str());
120
121 // /dev numbering starts at 1, ours starts at 0.
122 occs.push_back(num - 1);
123 }
124 }
125
126 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530127}
128
129int Manager::cpuCreated(sdbusplus::message::message& msg)
130{
George Liubcef3b42021-09-10 12:39:02 +0800131 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530132
133 sdbusplus::message::object_path o;
134 msg.read(o);
135 fs::path cpuPath(std::string(std::move(o)));
136
137 auto name = cpuPath.filename().string();
138 auto index = name.find(CPU_NAME);
139 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
140
141 createObjects(name);
142
143 return 0;
144}
145
146void Manager::createObjects(const std::string& occ)
147{
148 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
149
Chris Cain6fa848a2022-01-24 14:54:38 -0600150#ifdef POWER10
151 if (!pmode)
152 {
Chris Cain1be43372021-12-09 19:29:37 -0600153 // Create the power mode object
Chris Cain5d66a0a2022-02-09 08:52:10 -0600154 pmode = std::make_unique<powermode::PowerMode>(
Chris Cain1be43372021-12-09 19:29:37 -0600155 *this, powermode::PMODE_PATH, powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600156 }
157#endif
158
Gunnar Mills94df8c92018-09-14 14:50:03 -0500159 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800160 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600161#ifdef POWER10
162 pmode,
163#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500164 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Sheldon Bailey373af752022-02-21 15:14:00 -0600165 std::placeholders::_1, std::placeholders::_2)
Tom Joseph00325232020-07-29 17:51:48 +0530166#ifdef PLDM
167 ,
168 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
169 std::placeholders::_1)
170#endif
171 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530172
Chris Cain40501a22022-03-14 17:33:27 -0500173 // Create the power cap monitor object
174 if (!pcap)
175 {
176 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
177 *statusObjects.back());
178 }
179
Chris Cain36f9cde2021-11-22 11:18:21 -0600180 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530181 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600182 log<level::INFO>(
183 fmt::format("Manager::createObjects(): OCC{} is the master",
184 statusObjects.back()->getOccInstanceID())
185 .c_str());
186 _pollTimer->setEnabled(false);
187
Chris Cain78e86012021-03-04 16:15:31 -0600188#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600189 // Set the master OCC on the PowerMode object
190 pmode->setMasterOcc(path);
Chris Cain40501a22022-03-14 17:33:27 -0500191 // Update power cap bounds
192 pcap->updatePcapBounds();
Chris Cain78e86012021-03-04 16:15:31 -0600193#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600194 }
195
196 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
197#ifdef POWER10
198 ,
199 pmode
200#endif
201 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530202}
203
Sheldon Bailey373af752022-02-21 15:14:00 -0600204void Manager::statusCallBack(instanceID instance, bool status)
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530205{
Gunnar Mills94df8c92018-09-14 14:50:03 -0500206 using InternalFailure =
207 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530208
209 // At this time, it won't happen but keeping it
210 // here just in case something changes in the future
211 if ((activeCount == 0) && (!status))
212 {
Sheldon Bailey373af752022-02-21 15:14:00 -0600213 log<level::ERR>(
214 fmt::format("Invalid update on OCCActive with OCC{}", instance)
215 .c_str());
216
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530217 elog<InternalFailure>();
218 }
219
Chris Caina7b74dc2021-11-10 17:03:43 -0600220 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600221 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600222 // OCC went active
223 ++activeCount;
224
225#ifdef POWER10
226 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600227 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600228 // First OCC went active (allow some time for all OCCs to go active)
229 waitForAllOccsTimer->restartOnce(30s);
Matt Spinler53f68142021-08-25 15:47:31 -0500230 }
231#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600232
233 if (activeCount == statusObjects.size())
234 {
235#ifdef POWER10
236 // All OCCs are now running
237 if (waitForAllOccsTimer->isEnabled())
238 {
239 // stop occ wait timer
240 waitForAllOccsTimer->setEnabled(false);
241 }
242#endif
243
244 // Verify master OCC and start presence monitor
245 validateOccMaster();
246 }
247
248 // Start poll timer if not already started
249 if (!_pollTimer->isEnabled())
250 {
251 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -0600252 fmt::format("Manager: OCCs will be polled every {} seconds",
253 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600254 .c_str());
255
256 // Send poll and start OCC poll timer
257 pollerTimerExpired();
258 }
259 }
260 else
261 {
262 // OCC went away
263 --activeCount;
264
265 if (activeCount == 0)
266 {
267 // No OCCs are running
268
269 // Stop OCC poll timer
270 if (_pollTimer->isEnabled())
271 {
272 log<level::INFO>(
273 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
274 _pollTimer->setEnabled(false);
275 }
276
277#ifdef POWER10
278 // stop wait timer
279 if (waitForAllOccsTimer->isEnabled())
280 {
281 waitForAllOccsTimer->setEnabled(false);
282 }
283#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600284 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600285#ifdef READ_OCC_SENSORS
286 // Clear OCC sensors
287 setSensorValueToNonFunctional(instance);
288#endif
Chris Caina8857c52021-01-27 11:53:05 -0600289 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530290}
291
292#ifdef I2C_OCC
293void Manager::initStatusObjects()
294{
295 // Make sure we have a valid path string
296 static_assert(sizeof(DEV_PATH) != 0);
297
298 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
299 for (auto& name : deviceNames)
300 {
301 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800302 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530303 auto path = fs::path(OCC_CONTROL_ROOT) / name;
304 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800305 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530306 }
Chris Cain40501a22022-03-14 17:33:27 -0500307 // The first device is master occ
308 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
309 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600310#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600311 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
312 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600313 // Set the master OCC on the PowerMode object
314 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600315#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530316}
317#endif
318
Tom Joseph815f9f52020-07-27 12:12:13 +0530319#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500320void Manager::sbeTimeout(unsigned int instance)
321{
Eddie James2a751d72022-03-04 09:16:12 -0600322 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
323 [instance](const auto& obj) {
324 return instance == obj->getOccInstanceID();
325 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500326
Eddie Jamescb018da2022-03-05 11:49:37 -0600327 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600328 {
329 log<level::INFO>("SBE timeout, requesting HRESET",
330 entry("SBE=%d", instance));
Eddie Jamescbad2192021-10-07 09:39:39 -0500331
Eddie James2a751d72022-03-04 09:16:12 -0600332 setSBEState(instance, SBE_STATE_NOT_USABLE);
333
334 pldmHandle->sendHRESET(instance);
335 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500336}
337
Tom Joseph815f9f52020-07-27 12:12:13 +0530338bool Manager::updateOCCActive(instanceID instance, bool status)
339{
Chris Cain7e374fb2022-04-07 09:47:23 -0500340 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
341 [instance](const auto& obj) {
342 return instance == obj->getOccInstanceID();
343 });
344
345 if (obj != statusObjects.end())
346 {
347 return (*obj)->occActive(status);
348 }
349 else
350 {
351 log<level::WARNING>(
352 fmt::format(
353 "Manager::updateOCCActive: No status object to update for OCC{} (active={})",
354 instance, status)
355 .c_str());
356 return false;
357 }
Tom Joseph815f9f52020-07-27 12:12:13 +0530358}
Eddie Jamescbad2192021-10-07 09:39:39 -0500359
360void Manager::sbeHRESETResult(instanceID instance, bool success)
361{
362 if (success)
363 {
364 log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
365
366 setSBEState(instance, SBE_STATE_BOOTED);
367
368 return;
369 }
370
371 setSBEState(instance, SBE_STATE_FAILED);
372
373 if (sbeCanDump(instance))
374 {
Eddie Jamescbad2192021-10-07 09:39:39 -0500375 log<level::INFO>("HRESET failed, triggering SBE dump",
376 entry("SBE=%d", instance));
377
378 auto& bus = utils::getBus();
379 uint32_t src6 = instance << 16;
380 uint32_t logId =
381 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
382 src6, "SBE command timeout");
383
384 try
385 {
George Liuf3a4a692021-12-28 13:59:51 +0800386 constexpr auto path = "/org/openpower/dump";
387 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
388 constexpr auto function = "CreateDump";
389
Eddie Jamescbad2192021-10-07 09:39:39 -0500390 std::string service = utils::getService(path, interface);
391 auto method =
392 bus.new_method_call(service.c_str(), path, interface, function);
393
394 std::map<std::string, std::variant<std::string, uint64_t>>
395 createParams{
396 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
397 uint64_t(logId)},
398 {"com.ibm.Dump.Create.CreateParameters.DumpType",
399 "com.ibm.Dump.Create.DumpType.SBE"},
400 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
401 uint64_t(instance)},
402 };
403
404 method.append(createParams);
405
406 auto response = bus.call(method);
407 }
408 catch (const sdbusplus::exception::exception& e)
409 {
410 constexpr auto ERROR_DUMP_DISABLED =
411 "xyz.openbmc_project.Dump.Create.Error.Disabled";
412 if (e.name() == ERROR_DUMP_DISABLED)
413 {
414 log<level::INFO>("Dump is disabled, skipping");
415 }
416 else
417 {
418 log<level::ERR>("Dump failed");
419 }
420 }
421 }
422}
423
424bool Manager::sbeCanDump(unsigned int instance)
425{
426 struct pdbg_target* proc = getPdbgTarget(instance);
427
428 if (!proc)
429 {
430 // allow the dump in the error case
431 return true;
432 }
433
434 try
435 {
436 if (!openpower::phal::sbe::isDumpAllowed(proc))
437 {
438 return false;
439 }
440
441 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
442 {
443 return false;
444 }
445 }
446 catch (openpower::phal::exception::SbeError& e)
447 {
448 log<level::INFO>("Failed to query SBE state");
449 }
450
451 // allow the dump in the error case
452 return true;
453}
454
455void Manager::setSBEState(unsigned int instance, enum sbe_state state)
456{
457 struct pdbg_target* proc = getPdbgTarget(instance);
458
459 if (!proc)
460 {
461 return;
462 }
463
464 try
465 {
466 openpower::phal::sbe::setState(proc, state);
467 }
468 catch (const openpower::phal::exception::SbeError& e)
469 {
470 log<level::ERR>("Failed to set SBE state");
471 }
472}
473
474struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
475{
476 if (!pdbgInitialized)
477 {
478 try
479 {
480 openpower::phal::pdbg::init();
481 pdbgInitialized = true;
482 }
483 catch (const openpower::phal::exception::PdbgError& e)
484 {
485 log<level::ERR>("pdbg initialization failed");
486 return nullptr;
487 }
488 }
489
490 struct pdbg_target* proc = nullptr;
491 pdbg_for_each_class_target("proc", proc)
492 {
493 if (pdbg_target_index(proc) == instance)
494 {
495 return proc;
496 }
497 }
498
499 log<level::ERR>("Failed to get pdbg target");
500 return nullptr;
501}
Tom Joseph815f9f52020-07-27 12:12:13 +0530502#endif
503
Chris Caina8857c52021-01-27 11:53:05 -0600504void Manager::pollerTimerExpired()
505{
Chris Caina8857c52021-01-27 11:53:05 -0600506 if (!_pollTimer)
507 {
508 log<level::ERR>(
509 "Manager::pollerTimerExpired() ERROR: Timer not defined");
510 return;
511 }
512
513 for (auto& obj : statusObjects)
514 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600515 if (!obj->occActive())
516 {
517 // OCC is not running yet
518#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600519 auto id = obj->getOccInstanceID();
Sheldon Bailey373af752022-02-21 15:14:00 -0600520 setSensorValueToNonFunctional(id);
Chris Caina7b74dc2021-11-10 17:03:43 -0600521#endif
522 continue;
523 }
524
Chris Caina8857c52021-01-27 11:53:05 -0600525 // Read sysfs to force kernel to poll OCC
526 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800527
528#ifdef READ_OCC_SENSORS
529 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600530 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800531#endif
Chris Caina8857c52021-01-27 11:53:05 -0600532 }
533
Chris Caina7b74dc2021-11-10 17:03:43 -0600534 if (activeCount > 0)
535 {
536 // Restart OCC poll timer
537 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
538 }
539 else
540 {
541 // No OCCs running, so poll timer will not be restarted
542 log<level::INFO>(
543 fmt::format(
544 "Manager::pollerTimerExpired: poll timer will not be restarted")
545 .c_str());
546 }
Chris Caina8857c52021-01-27 11:53:05 -0600547}
548
Chicago Duanbb895cb2021-06-18 19:37:16 +0800549#ifdef READ_OCC_SENSORS
550void Manager::readTempSensors(const fs::path& path, uint32_t id)
551{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800552 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
553 for (auto& file : fs::directory_iterator(path))
554 {
555 if (!std::regex_search(file.path().string(), expr))
556 {
557 continue;
558 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800559
Matt Spinlera26f1522021-08-25 15:50:20 -0500560 uint32_t labelValue{0};
561
562 try
563 {
564 labelValue = readFile<uint32_t>(file.path());
565 }
566 catch (const std::system_error& e)
567 {
568 log<level::DEBUG>(
569 fmt::format("readTempSensors: Failed reading {}, errno = {}",
570 file.path().string(), e.code().value())
571 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800572 continue;
573 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800574
575 const std::string& tempLabel = "label";
576 const std::string filePathString = file.path().string().substr(
577 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500578
579 uint32_t fruTypeValue{0};
580 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800581 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500582 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
583 }
584 catch (const std::system_error& e)
585 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800586 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500587 fmt::format("readTempSensors: Failed reading {}, errno = {}",
588 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800589 .c_str());
590 continue;
591 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800592
593 std::string sensorPath =
594 OCC_SENSORS_ROOT + std::string("/temperature/");
595
Matt Spinlerace67d82021-10-18 13:41:57 -0500596 std::string dvfsTempPath;
597
Chicago Duanbb895cb2021-06-18 19:37:16 +0800598 if (fruTypeValue == VRMVdd)
599 {
600 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
601 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500602 else if (fruTypeValue == processorIoRing)
603 {
604 sensorPath.append("proc" + std::to_string(id) + "_ioring_temp");
605 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
606 std::to_string(id) + "_ioring_dvfs_temp";
607 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800608 else
609 {
Matt Spinler14d14022021-08-25 15:38:29 -0500610 uint16_t type = (labelValue & 0xFF000000) >> 24;
611 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800612
613 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
614 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500615 if (fruTypeValue == fruTypeNotAvailable)
616 {
617 // Not all DIMM related temps are available to read
618 // (no _input file in this case)
619 continue;
620 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800621 auto iter = dimmTempSensorName.find(fruTypeValue);
622 if (iter == dimmTempSensorName.end())
623 {
George Liub5ca1012021-09-10 12:53:11 +0800624 log<level::ERR>(
625 fmt::format(
626 "readTempSensors: Fru type error! fruTypeValue = {}) ",
627 fruTypeValue)
628 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800629 continue;
630 }
631
632 sensorPath.append("dimm" + std::to_string(instanceID) +
633 iter->second);
634 }
635 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
636 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500637 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800638 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500639 // The OCC reports small core temps, of which there are
640 // two per big core. All current P10 systems are in big
641 // core mode, so use a big core name.
642 uint16_t coreNum = instanceID / 2;
643 uint16_t tempNum = instanceID % 2;
644 sensorPath.append("proc" + std::to_string(id) + "_core" +
645 std::to_string(coreNum) + "_" +
646 std::to_string(tempNum) + "_temp");
647
648 dvfsTempPath = std::string{OCC_SENSORS_ROOT} +
649 "/temperature/proc" + std::to_string(id) +
650 "_core_dvfs_temp";
651 }
652 else
653 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800654 continue;
655 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800656 }
657 else
658 {
659 continue;
660 }
661 }
662
Matt Spinlerace67d82021-10-18 13:41:57 -0500663 // The dvfs temp file only needs to be read once per chip per type.
664 if (!dvfsTempPath.empty() &&
665 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
666 {
667 try
668 {
669 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
670
671 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
672 dvfsTempPath, dvfsValue * std::pow(10, -3));
673 }
674 catch (const std::system_error& e)
675 {
676 log<level::DEBUG>(
677 fmt::format(
678 "readTempSensors: Failed reading {}, errno = {}",
679 filePathString + maxSuffix, e.code().value())
680 .c_str());
681 }
682 }
683
Matt Spinlera26f1522021-08-25 15:50:20 -0500684 uint32_t faultValue{0};
685 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800686 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500687 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
688 }
689 catch (const std::system_error& e)
690 {
691 log<level::DEBUG>(
692 fmt::format("readTempSensors: Failed reading {}, errno = {}",
693 filePathString + faultSuffix, e.code().value())
694 .c_str());
695 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800696 }
697
Matt Spinlera26f1522021-08-25 15:50:20 -0500698 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800699 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600700 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500701 sensorPath, std::numeric_limits<double>::quiet_NaN());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800702
Chris Cain5d66a0a2022-02-09 08:52:10 -0600703 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
704 false);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800705
Matt Spinlera26f1522021-08-25 15:50:20 -0500706 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800707 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500708
709 double tempValue{0};
710
711 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800712 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500713 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800714 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500715 catch (const std::system_error& e)
716 {
717 log<level::DEBUG>(
718 fmt::format("readTempSensors: Failed reading {}, errno = {}",
719 filePathString + inputSuffix, e.code().value())
720 .c_str());
721 continue;
722 }
723
Chris Cain5d66a0a2022-02-09 08:52:10 -0600724 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500725 sensorPath, tempValue * std::pow(10, -3));
726
Chris Cain5d66a0a2022-02-09 08:52:10 -0600727 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
728 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500729
Chris Cain6fa848a2022-01-24 14:54:38 -0600730 // At this point, the sensor will be created for sure.
731 if (existingSensors.find(sensorPath) == existingSensors.end())
732 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600733 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
734 sensorPath);
Chris Cain6fa848a2022-01-24 14:54:38 -0600735 }
736
Matt Spinlera26f1522021-08-25 15:50:20 -0500737 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800738 }
739 return;
740}
741
742std::optional<std::string>
743 Manager::getPowerLabelFunctionID(const std::string& value)
744{
745 // If the value is "system", then the FunctionID is "system".
746 if (value == "system")
747 {
748 return value;
749 }
750
751 // If the value is not "system", then the label value have 3 numbers, of
752 // which we only care about the middle one:
753 // <sensor id>_<function id>_<apss channel>
754 // eg: The value is "0_10_5" , then the FunctionID is "10".
755 if (value.find("_") == std::string::npos)
756 {
757 return std::nullopt;
758 }
759
760 auto powerLabelValue = value.substr((value.find("_") + 1));
761
762 if (powerLabelValue.find("_") == std::string::npos)
763 {
764 return std::nullopt;
765 }
766
767 return powerLabelValue.substr(0, powerLabelValue.find("_"));
768}
769
770void Manager::readPowerSensors(const fs::path& path, uint32_t id)
771{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800772 std::regex expr{"power\\d+_label$"}; // Example: power5_label
773 for (auto& file : fs::directory_iterator(path))
774 {
775 if (!std::regex_search(file.path().string(), expr))
776 {
777 continue;
778 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800779
Matt Spinlera26f1522021-08-25 15:50:20 -0500780 std::string labelValue;
781 try
782 {
783 labelValue = readFile<std::string>(file.path());
784 }
785 catch (const std::system_error& e)
786 {
787 log<level::DEBUG>(
788 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
789 file.path().string(), e.code().value())
790 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800791 continue;
792 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800793
794 auto functionID = getPowerLabelFunctionID(labelValue);
795 if (functionID == std::nullopt)
796 {
797 continue;
798 }
799
800 const std::string& tempLabel = "label";
801 const std::string filePathString = file.path().string().substr(
802 0, file.path().string().length() - tempLabel.length());
803
804 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
805
806 auto iter = powerSensorName.find(*functionID);
807 if (iter == powerSensorName.end())
808 {
809 continue;
810 }
811 sensorPath.append(iter->second);
812
Matt Spinlera26f1522021-08-25 15:50:20 -0500813 double tempValue{0};
814
815 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800816 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500817 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800818 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500819 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800820 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800821 log<level::DEBUG>(
Chris Cain5d66a0a2022-02-09 08:52:10 -0600822 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500823 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800824 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -0500825 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800826 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500827
Chris Cain5d66a0a2022-02-09 08:52:10 -0600828 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -0600829 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
830
Chris Cain5d66a0a2022-02-09 08:52:10 -0600831 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500832 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
833
Chris Cain5d66a0a2022-02-09 08:52:10 -0600834 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
835 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500836
Matt Spinler5901abd2021-09-23 13:50:03 -0500837 if (existingSensors.find(sensorPath) == existingSensors.end())
838 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600839 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
840 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -0500841 }
842
Matt Spinlera26f1522021-08-25 15:50:20 -0500843 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800844 }
845 return;
846}
847
848void Manager::setSensorValueToNaN(uint32_t id)
849{
850 for (const auto& [sensorPath, occId] : existingSensors)
851 {
852 if (occId == id)
853 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600854 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +0800855 sensorPath, std::numeric_limits<double>::quiet_NaN());
856 }
857 }
858 return;
859}
860
Sheldon Bailey373af752022-02-21 15:14:00 -0600861void Manager::setSensorValueToNonFunctional(uint32_t id) const
862{
863 for (const auto& [sensorPath, occId] : existingSensors)
864 {
865 if (occId == id)
866 {
867 dbus::OccDBusSensors::getOccDBus().setValue(
868 sensorPath, std::numeric_limits<double>::quiet_NaN());
869
870 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
871 false);
872 }
873 }
874 return;
875}
876
Chris Cain5d66a0a2022-02-09 08:52:10 -0600877void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800878{
Chris Caine2d0a432022-03-28 11:08:49 -0500879 static bool tracedError[8] = {0};
880 const fs::path sensorPath = occ->getHwmonPath();
Chris Cain5d66a0a2022-02-09 08:52:10 -0600881 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800882
Chris Caine2d0a432022-03-28 11:08:49 -0500883 if (fs::exists(sensorPath))
Chicago Duanbb895cb2021-06-18 19:37:16 +0800884 {
Chris Caine2d0a432022-03-28 11:08:49 -0500885 // Read temperature sensors
886 readTempSensors(sensorPath, id);
887
888 if (occ->isMasterOcc())
889 {
890 // Read power sensors
891 readPowerSensors(sensorPath, id);
892 }
893 tracedError[id] = false;
894 }
895 else
896 {
897 if (!tracedError[id])
898 {
899 log<level::ERR>(
900 fmt::format(
901 "Manager::getSensorValues: OCC{} sensor path missing: {}",
902 id, sensorPath.c_str())
903 .c_str());
904 tracedError[id] = true;
905 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800906 }
907
908 return;
909}
910#endif
Chris Cain17257672021-10-22 13:41:03 -0500911
912// Read the altitude from DBus
913void Manager::readAltitude()
914{
915 static bool traceAltitudeErr = true;
916
917 utils::PropertyValue altitudeProperty{};
918 try
919 {
920 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
921 ALTITUDE_PROP);
922 auto sensorVal = std::get<double>(altitudeProperty);
923 if (sensorVal < 0xFFFF)
924 {
925 if (sensorVal < 0)
926 {
927 altitude = 0;
928 }
929 else
930 {
931 // Round to nearest meter
932 altitude = uint16_t(sensorVal + 0.5);
933 }
934 log<level::DEBUG>(fmt::format("readAltitude: sensor={} ({}m)",
935 sensorVal, altitude)
936 .c_str());
937 traceAltitudeErr = true;
938 }
939 else
940 {
941 if (traceAltitudeErr)
942 {
943 traceAltitudeErr = false;
944 log<level::DEBUG>(
945 fmt::format("Invalid altitude value: {}", sensorVal)
946 .c_str());
947 }
948 }
949 }
950 catch (const sdbusplus::exception::exception& e)
951 {
952 if (traceAltitudeErr)
953 {
954 traceAltitudeErr = false;
955 log<level::INFO>(
956 fmt::format("Unable to read Altitude: {}", e.what()).c_str());
957 }
958 altitude = 0xFFFF; // not available
959 }
960}
961
962// Callback function when ambient temperature changes
963void Manager::ambientCallback(sdbusplus::message::message& msg)
964{
965 double currentTemp = 0;
966 uint8_t truncatedTemp = 0xFF;
967 std::string msgSensor;
968 std::map<std::string, std::variant<double>> msgData;
969 msg.read(msgSensor, msgData);
970
971 auto valPropMap = msgData.find(AMBIENT_PROP);
972 if (valPropMap == msgData.end())
973 {
974 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
975 return;
976 }
977 currentTemp = std::get<double>(valPropMap->second);
978 if (std::isnan(currentTemp))
979 {
980 truncatedTemp = 0xFF;
981 }
982 else
983 {
984 if (currentTemp < 0)
985 {
986 truncatedTemp = 0;
987 }
988 else
989 {
990 // Round to nearest degree C
991 truncatedTemp = uint8_t(currentTemp + 0.5);
992 }
993 }
994
995 // If ambient changes, notify OCCs
996 if (truncatedTemp != ambient)
997 {
998 log<level::DEBUG>(
999 fmt::format("ambientCallback: Ambient change from {} to {}C",
1000 ambient, currentTemp)
1001 .c_str());
1002
1003 ambient = truncatedTemp;
1004 if (altitude == 0xFFFF)
1005 {
1006 // No altitude yet, try reading again
1007 readAltitude();
1008 }
1009
1010 log<level::DEBUG>(
1011 fmt::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
1012 altitude)
1013 .c_str());
1014#ifdef POWER10
1015 // Send ambient and altitude to all OCCs
1016 for (auto& obj : statusObjects)
1017 {
1018 if (obj->occActive())
1019 {
1020 obj->sendAmbient(ambient, altitude);
1021 }
1022 }
1023#endif // POWER10
1024 }
1025}
1026
1027// return the current ambient and altitude readings
1028void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1029 uint16_t& altitudeValue) const
1030{
1031 ambientValid = true;
1032 ambientTemp = ambient;
1033 altitudeValue = altitude;
1034
1035 if (ambient == 0xFF)
1036 {
1037 ambientValid = false;
1038 }
1039}
1040
Chris Caina7b74dc2021-11-10 17:03:43 -06001041#ifdef POWER10
1042void Manager::occsNotAllRunning()
1043{
Chris Cain6fa848a2022-01-24 14:54:38 -06001044 // Function will also gets called when occ-control app gets
1045 // restarted. (occ active sensors do not change, so the Status
1046 // object does not call Manager back for all OCCs)
Chris Caina7b74dc2021-11-10 17:03:43 -06001047
1048 if (activeCount != statusObjects.size())
1049 {
1050 // Not all OCCs went active
1051 log<level::WARNING>(
1052 fmt::format(
1053 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1054 activeCount, statusObjects.size())
1055 .c_str());
1056 // Procs may be garded, so may not need reset.
1057 }
1058
1059 validateOccMaster();
1060}
1061#endif // POWER10
1062
1063// Verify single master OCC and start presence monitor
1064void Manager::validateOccMaster()
1065{
1066 int masterInstance = -1;
1067 for (auto& obj : statusObjects)
1068 {
Chris Caina7b74dc2021-11-10 17:03:43 -06001069 if (obj->isMasterOcc())
1070 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001071 obj->addPresenceWatchMaster();
1072
Chris Caina7b74dc2021-11-10 17:03:43 -06001073 if (masterInstance == -1)
1074 {
1075 masterInstance = obj->getOccInstanceID();
1076 }
1077 else
1078 {
1079 log<level::ERR>(
1080 fmt::format(
1081 "validateOccMaster: Multiple OCC masters! ({} and {})",
1082 masterInstance, obj->getOccInstanceID())
1083 .c_str());
1084 // request reset
1085 obj->deviceError();
1086 }
1087 }
1088 }
1089 if (masterInstance < 0)
1090 {
1091 log<level::ERR>("validateOccMaster: Master OCC not found!");
1092 // request reset
1093 statusObjects.front()->deviceError();
1094 }
1095 else
1096 {
1097 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -06001098 fmt::format("validateOccMaster: OCC{} is master of {} OCCs",
1099 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001100 .c_str());
1101 }
1102}
1103
Chris Cain40501a22022-03-14 17:33:27 -05001104void Manager::updatePcapBounds() const
1105{
1106 if (pcap)
1107 {
1108 pcap->updatePcapBounds();
1109 }
1110}
1111
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301112} // namespace occ
1113} // namespace open_power