blob: 78ac9f22bbc81bf147f9e61ce2963016726ca105 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain1718fd82022-02-16 16:39:50 -060068 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050069 {
Chris Cain1718fd82022-02-16 16:39:50 -060070 // Create the OCCs based on on the /dev/occX devices
71 auto occs = findOCCsInDev();
Matt Spinlerd267cec2021-09-01 14:49:19 -050072
Chris Cain1718fd82022-02-16 16:39:50 -060073 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
74 {
75 // Something changed or no OCCs yet, try again in 10s.
76 // Note on the first pass prevOCCSearch will be empty,
77 // so there will be at least one delay to give things
78 // a chance to settle.
79 prevOCCSearch = occs;
80
81 discoverTimer->restartOnce(10s);
82 }
83 else
84 {
85 discoverTimer.reset();
86
87 // createObjects requires OCC0 first.
88 std::sort(occs.begin(), occs.end());
89
90 for (auto id : occs)
91 {
92 createObjects(std::string(OCC_NAME) + std::to_string(id));
93 }
94 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050095 }
96 else
97 {
Chris Cain1718fd82022-02-16 16:39:50 -060098 log<level::INFO>(
99 fmt::format(
100 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
101 HOST_ON_FILE)
102 .c_str());
103 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500104 }
105#endif
106}
107
108std::vector<int> Manager::findOCCsInDev()
109{
110 std::vector<int> occs;
111 std::regex expr{R"(occ(\d+)$)"};
112
113 for (auto& file : fs::directory_iterator("/dev"))
114 {
115 std::smatch match;
116 std::string path{file.path().string()};
117 if (std::regex_search(path, match, expr))
118 {
119 auto num = std::stoi(match[1].str());
120
121 // /dev numbering starts at 1, ours starts at 0.
122 occs.push_back(num - 1);
123 }
124 }
125
126 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530127}
128
129int Manager::cpuCreated(sdbusplus::message::message& msg)
130{
George Liubcef3b42021-09-10 12:39:02 +0800131 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530132
133 sdbusplus::message::object_path o;
134 msg.read(o);
135 fs::path cpuPath(std::string(std::move(o)));
136
137 auto name = cpuPath.filename().string();
138 auto index = name.find(CPU_NAME);
139 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
140
141 createObjects(name);
142
143 return 0;
144}
145
146void Manager::createObjects(const std::string& occ)
147{
148 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
149
Chris Cain6fa848a2022-01-24 14:54:38 -0600150#ifdef POWER10
151 if (!pmode)
152 {
Chris Cain1be43372021-12-09 19:29:37 -0600153 // Create the power mode object
Chris Cain5d66a0a2022-02-09 08:52:10 -0600154 pmode = std::make_unique<powermode::PowerMode>(
Chris Cain1be43372021-12-09 19:29:37 -0600155 *this, powermode::PMODE_PATH, powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600156 }
157#endif
158
Gunnar Mills94df8c92018-09-14 14:50:03 -0500159 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800160 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600161#ifdef POWER10
162 pmode,
163#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500164 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Tom Joseph00325232020-07-29 17:51:48 +0530165 std::placeholders::_1)
166#ifdef PLDM
167 ,
168 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
169 std::placeholders::_1)
170#endif
171 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530172
Chris Cain40501a22022-03-14 17:33:27 -0500173 // Create the power cap monitor object
174 if (!pcap)
175 {
176 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
177 *statusObjects.back());
178 }
179
Chris Cain36f9cde2021-11-22 11:18:21 -0600180 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530181 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600182 log<level::INFO>(
183 fmt::format("Manager::createObjects(): OCC{} is the master",
184 statusObjects.back()->getOccInstanceID())
185 .c_str());
186 _pollTimer->setEnabled(false);
187
Chris Cain78e86012021-03-04 16:15:31 -0600188#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600189 // Set the master OCC on the PowerMode object
190 pmode->setMasterOcc(path);
Chris Cain40501a22022-03-14 17:33:27 -0500191 // Update power cap bounds
192 pcap->updatePcapBounds();
Chris Cain78e86012021-03-04 16:15:31 -0600193#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600194 }
195
196 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
197#ifdef POWER10
198 ,
199 pmode
200#endif
201 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530202}
203
204void Manager::statusCallBack(bool status)
205{
Gunnar Mills94df8c92018-09-14 14:50:03 -0500206 using InternalFailure =
207 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530208
209 // At this time, it won't happen but keeping it
210 // here just in case something changes in the future
211 if ((activeCount == 0) && (!status))
212 {
213 log<level::ERR>("Invalid update on OCCActive");
214 elog<InternalFailure>();
215 }
216
Chris Caina7b74dc2021-11-10 17:03:43 -0600217 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600218 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600219 // OCC went active
220 ++activeCount;
221
222#ifdef POWER10
223 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600224 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600225 // First OCC went active (allow some time for all OCCs to go active)
226 waitForAllOccsTimer->restartOnce(30s);
Matt Spinler53f68142021-08-25 15:47:31 -0500227 }
228#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600229
230 if (activeCount == statusObjects.size())
231 {
232#ifdef POWER10
233 // All OCCs are now running
234 if (waitForAllOccsTimer->isEnabled())
235 {
236 // stop occ wait timer
237 waitForAllOccsTimer->setEnabled(false);
238 }
239#endif
240
241 // Verify master OCC and start presence monitor
242 validateOccMaster();
243 }
244
245 // Start poll timer if not already started
246 if (!_pollTimer->isEnabled())
247 {
248 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -0600249 fmt::format("Manager: OCCs will be polled every {} seconds",
250 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600251 .c_str());
252
253 // Send poll and start OCC poll timer
254 pollerTimerExpired();
255 }
256 }
257 else
258 {
259 // OCC went away
260 --activeCount;
261
262 if (activeCount == 0)
263 {
264 // No OCCs are running
265
266 // Stop OCC poll timer
267 if (_pollTimer->isEnabled())
268 {
269 log<level::INFO>(
270 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
271 _pollTimer->setEnabled(false);
272 }
273
274#ifdef POWER10
275 // stop wait timer
276 if (waitForAllOccsTimer->isEnabled())
277 {
278 waitForAllOccsTimer->setEnabled(false);
279 }
280#endif
281
282#ifdef READ_OCC_SENSORS
283 // Clear OCC sensors
284 for (auto& obj : statusObjects)
285 {
286 setSensorValueToNaN(obj->getOccInstanceID());
287 }
288#endif
289 }
Chris Caina8857c52021-01-27 11:53:05 -0600290 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530291}
292
293#ifdef I2C_OCC
294void Manager::initStatusObjects()
295{
296 // Make sure we have a valid path string
297 static_assert(sizeof(DEV_PATH) != 0);
298
299 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
300 for (auto& name : deviceNames)
301 {
302 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800303 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530304 auto path = fs::path(OCC_CONTROL_ROOT) / name;
305 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800306 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530307 }
Chris Cain40501a22022-03-14 17:33:27 -0500308 // The first device is master occ
309 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
310 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600311#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600312 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
313 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600314 // Set the master OCC on the PowerMode object
315 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600316#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530317}
318#endif
319
Tom Joseph815f9f52020-07-27 12:12:13 +0530320#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500321void Manager::sbeTimeout(unsigned int instance)
322{
Eddie James2a751d72022-03-04 09:16:12 -0600323 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
324 [instance](const auto& obj) {
325 return instance == obj->getOccInstanceID();
326 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500327
Eddie Jamescb018da2022-03-05 11:49:37 -0600328 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600329 {
330 log<level::INFO>("SBE timeout, requesting HRESET",
331 entry("SBE=%d", instance));
Eddie Jamescbad2192021-10-07 09:39:39 -0500332
Eddie James2a751d72022-03-04 09:16:12 -0600333 setSBEState(instance, SBE_STATE_NOT_USABLE);
334
335 pldmHandle->sendHRESET(instance);
336 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500337}
338
Tom Joseph815f9f52020-07-27 12:12:13 +0530339bool Manager::updateOCCActive(instanceID instance, bool status)
340{
341 return (statusObjects[instance])->occActive(status);
342}
Eddie Jamescbad2192021-10-07 09:39:39 -0500343
344void Manager::sbeHRESETResult(instanceID instance, bool success)
345{
346 if (success)
347 {
348 log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
349
350 setSBEState(instance, SBE_STATE_BOOTED);
351
352 return;
353 }
354
355 setSBEState(instance, SBE_STATE_FAILED);
356
357 if (sbeCanDump(instance))
358 {
Eddie Jamescbad2192021-10-07 09:39:39 -0500359 log<level::INFO>("HRESET failed, triggering SBE dump",
360 entry("SBE=%d", instance));
361
362 auto& bus = utils::getBus();
363 uint32_t src6 = instance << 16;
364 uint32_t logId =
365 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
366 src6, "SBE command timeout");
367
368 try
369 {
George Liuf3a4a692021-12-28 13:59:51 +0800370 constexpr auto path = "/org/openpower/dump";
371 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
372 constexpr auto function = "CreateDump";
373
Eddie Jamescbad2192021-10-07 09:39:39 -0500374 std::string service = utils::getService(path, interface);
375 auto method =
376 bus.new_method_call(service.c_str(), path, interface, function);
377
378 std::map<std::string, std::variant<std::string, uint64_t>>
379 createParams{
380 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
381 uint64_t(logId)},
382 {"com.ibm.Dump.Create.CreateParameters.DumpType",
383 "com.ibm.Dump.Create.DumpType.SBE"},
384 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
385 uint64_t(instance)},
386 };
387
388 method.append(createParams);
389
390 auto response = bus.call(method);
391 }
392 catch (const sdbusplus::exception::exception& e)
393 {
394 constexpr auto ERROR_DUMP_DISABLED =
395 "xyz.openbmc_project.Dump.Create.Error.Disabled";
396 if (e.name() == ERROR_DUMP_DISABLED)
397 {
398 log<level::INFO>("Dump is disabled, skipping");
399 }
400 else
401 {
402 log<level::ERR>("Dump failed");
403 }
404 }
405 }
406}
407
408bool Manager::sbeCanDump(unsigned int instance)
409{
410 struct pdbg_target* proc = getPdbgTarget(instance);
411
412 if (!proc)
413 {
414 // allow the dump in the error case
415 return true;
416 }
417
418 try
419 {
420 if (!openpower::phal::sbe::isDumpAllowed(proc))
421 {
422 return false;
423 }
424
425 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
426 {
427 return false;
428 }
429 }
430 catch (openpower::phal::exception::SbeError& e)
431 {
432 log<level::INFO>("Failed to query SBE state");
433 }
434
435 // allow the dump in the error case
436 return true;
437}
438
439void Manager::setSBEState(unsigned int instance, enum sbe_state state)
440{
441 struct pdbg_target* proc = getPdbgTarget(instance);
442
443 if (!proc)
444 {
445 return;
446 }
447
448 try
449 {
450 openpower::phal::sbe::setState(proc, state);
451 }
452 catch (const openpower::phal::exception::SbeError& e)
453 {
454 log<level::ERR>("Failed to set SBE state");
455 }
456}
457
458struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
459{
460 if (!pdbgInitialized)
461 {
462 try
463 {
464 openpower::phal::pdbg::init();
465 pdbgInitialized = true;
466 }
467 catch (const openpower::phal::exception::PdbgError& e)
468 {
469 log<level::ERR>("pdbg initialization failed");
470 return nullptr;
471 }
472 }
473
474 struct pdbg_target* proc = nullptr;
475 pdbg_for_each_class_target("proc", proc)
476 {
477 if (pdbg_target_index(proc) == instance)
478 {
479 return proc;
480 }
481 }
482
483 log<level::ERR>("Failed to get pdbg target");
484 return nullptr;
485}
Tom Joseph815f9f52020-07-27 12:12:13 +0530486#endif
487
Chris Caina8857c52021-01-27 11:53:05 -0600488void Manager::pollerTimerExpired()
489{
Chris Caina8857c52021-01-27 11:53:05 -0600490 if (!_pollTimer)
491 {
492 log<level::ERR>(
493 "Manager::pollerTimerExpired() ERROR: Timer not defined");
494 return;
495 }
496
497 for (auto& obj : statusObjects)
498 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600499 if (!obj->occActive())
500 {
501 // OCC is not running yet
502#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600503 auto id = obj->getOccInstanceID();
Chris Caina7b74dc2021-11-10 17:03:43 -0600504 setSensorValueToNaN(id);
505#endif
506 continue;
507 }
508
Chris Caina8857c52021-01-27 11:53:05 -0600509 // Read sysfs to force kernel to poll OCC
510 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800511
512#ifdef READ_OCC_SENSORS
513 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600514 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800515#endif
Chris Caina8857c52021-01-27 11:53:05 -0600516 }
517
Chris Caina7b74dc2021-11-10 17:03:43 -0600518 if (activeCount > 0)
519 {
520 // Restart OCC poll timer
521 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
522 }
523 else
524 {
525 // No OCCs running, so poll timer will not be restarted
526 log<level::INFO>(
527 fmt::format(
528 "Manager::pollerTimerExpired: poll timer will not be restarted")
529 .c_str());
530 }
Chris Caina8857c52021-01-27 11:53:05 -0600531}
532
Chicago Duanbb895cb2021-06-18 19:37:16 +0800533#ifdef READ_OCC_SENSORS
534void Manager::readTempSensors(const fs::path& path, uint32_t id)
535{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800536 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
537 for (auto& file : fs::directory_iterator(path))
538 {
539 if (!std::regex_search(file.path().string(), expr))
540 {
541 continue;
542 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800543
Matt Spinlera26f1522021-08-25 15:50:20 -0500544 uint32_t labelValue{0};
545
546 try
547 {
548 labelValue = readFile<uint32_t>(file.path());
549 }
550 catch (const std::system_error& e)
551 {
552 log<level::DEBUG>(
553 fmt::format("readTempSensors: Failed reading {}, errno = {}",
554 file.path().string(), e.code().value())
555 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800556 continue;
557 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800558
559 const std::string& tempLabel = "label";
560 const std::string filePathString = file.path().string().substr(
561 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500562
563 uint32_t fruTypeValue{0};
564 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800565 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500566 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
567 }
568 catch (const std::system_error& e)
569 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800570 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500571 fmt::format("readTempSensors: Failed reading {}, errno = {}",
572 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800573 .c_str());
574 continue;
575 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800576
577 std::string sensorPath =
578 OCC_SENSORS_ROOT + std::string("/temperature/");
579
Matt Spinlerace67d82021-10-18 13:41:57 -0500580 std::string dvfsTempPath;
581
Chicago Duanbb895cb2021-06-18 19:37:16 +0800582 if (fruTypeValue == VRMVdd)
583 {
584 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
585 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500586 else if (fruTypeValue == processorIoRing)
587 {
588 sensorPath.append("proc" + std::to_string(id) + "_ioring_temp");
589 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
590 std::to_string(id) + "_ioring_dvfs_temp";
591 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800592 else
593 {
Matt Spinler14d14022021-08-25 15:38:29 -0500594 uint16_t type = (labelValue & 0xFF000000) >> 24;
595 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800596
597 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
598 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500599 if (fruTypeValue == fruTypeNotAvailable)
600 {
601 // Not all DIMM related temps are available to read
602 // (no _input file in this case)
603 continue;
604 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800605 auto iter = dimmTempSensorName.find(fruTypeValue);
606 if (iter == dimmTempSensorName.end())
607 {
George Liub5ca1012021-09-10 12:53:11 +0800608 log<level::ERR>(
609 fmt::format(
610 "readTempSensors: Fru type error! fruTypeValue = {}) ",
611 fruTypeValue)
612 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800613 continue;
614 }
615
616 sensorPath.append("dimm" + std::to_string(instanceID) +
617 iter->second);
618 }
619 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
620 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500621 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800622 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500623 // The OCC reports small core temps, of which there are
624 // two per big core. All current P10 systems are in big
625 // core mode, so use a big core name.
626 uint16_t coreNum = instanceID / 2;
627 uint16_t tempNum = instanceID % 2;
628 sensorPath.append("proc" + std::to_string(id) + "_core" +
629 std::to_string(coreNum) + "_" +
630 std::to_string(tempNum) + "_temp");
631
632 dvfsTempPath = std::string{OCC_SENSORS_ROOT} +
633 "/temperature/proc" + std::to_string(id) +
634 "_core_dvfs_temp";
635 }
636 else
637 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800638 continue;
639 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800640 }
641 else
642 {
643 continue;
644 }
645 }
646
Matt Spinlerace67d82021-10-18 13:41:57 -0500647 // The dvfs temp file only needs to be read once per chip per type.
648 if (!dvfsTempPath.empty() &&
649 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
650 {
651 try
652 {
653 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
654
655 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
656 dvfsTempPath, dvfsValue * std::pow(10, -3));
657 }
658 catch (const std::system_error& e)
659 {
660 log<level::DEBUG>(
661 fmt::format(
662 "readTempSensors: Failed reading {}, errno = {}",
663 filePathString + maxSuffix, e.code().value())
664 .c_str());
665 }
666 }
667
Matt Spinlera26f1522021-08-25 15:50:20 -0500668 uint32_t faultValue{0};
669 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800670 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500671 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
672 }
673 catch (const std::system_error& e)
674 {
675 log<level::DEBUG>(
676 fmt::format("readTempSensors: Failed reading {}, errno = {}",
677 filePathString + faultSuffix, e.code().value())
678 .c_str());
679 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800680 }
681
Matt Spinlera26f1522021-08-25 15:50:20 -0500682 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800683 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600684 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500685 sensorPath, std::numeric_limits<double>::quiet_NaN());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800686
Chris Cain5d66a0a2022-02-09 08:52:10 -0600687 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
688 false);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800689
Matt Spinlera26f1522021-08-25 15:50:20 -0500690 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800691 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500692
693 double tempValue{0};
694
695 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800696 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500697 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800698 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500699 catch (const std::system_error& e)
700 {
701 log<level::DEBUG>(
702 fmt::format("readTempSensors: Failed reading {}, errno = {}",
703 filePathString + inputSuffix, e.code().value())
704 .c_str());
705 continue;
706 }
707
Chris Cain5d66a0a2022-02-09 08:52:10 -0600708 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500709 sensorPath, tempValue * std::pow(10, -3));
710
Chris Cain5d66a0a2022-02-09 08:52:10 -0600711 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
712 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500713
Chris Cain6fa848a2022-01-24 14:54:38 -0600714 // At this point, the sensor will be created for sure.
715 if (existingSensors.find(sensorPath) == existingSensors.end())
716 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600717 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
718 sensorPath);
Chris Cain6fa848a2022-01-24 14:54:38 -0600719 }
720
Matt Spinlera26f1522021-08-25 15:50:20 -0500721 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800722 }
723 return;
724}
725
726std::optional<std::string>
727 Manager::getPowerLabelFunctionID(const std::string& value)
728{
729 // If the value is "system", then the FunctionID is "system".
730 if (value == "system")
731 {
732 return value;
733 }
734
735 // If the value is not "system", then the label value have 3 numbers, of
736 // which we only care about the middle one:
737 // <sensor id>_<function id>_<apss channel>
738 // eg: The value is "0_10_5" , then the FunctionID is "10".
739 if (value.find("_") == std::string::npos)
740 {
741 return std::nullopt;
742 }
743
744 auto powerLabelValue = value.substr((value.find("_") + 1));
745
746 if (powerLabelValue.find("_") == std::string::npos)
747 {
748 return std::nullopt;
749 }
750
751 return powerLabelValue.substr(0, powerLabelValue.find("_"));
752}
753
754void Manager::readPowerSensors(const fs::path& path, uint32_t id)
755{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800756 std::regex expr{"power\\d+_label$"}; // Example: power5_label
757 for (auto& file : fs::directory_iterator(path))
758 {
759 if (!std::regex_search(file.path().string(), expr))
760 {
761 continue;
762 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800763
Matt Spinlera26f1522021-08-25 15:50:20 -0500764 std::string labelValue;
765 try
766 {
767 labelValue = readFile<std::string>(file.path());
768 }
769 catch (const std::system_error& e)
770 {
771 log<level::DEBUG>(
772 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
773 file.path().string(), e.code().value())
774 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800775 continue;
776 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800777
778 auto functionID = getPowerLabelFunctionID(labelValue);
779 if (functionID == std::nullopt)
780 {
781 continue;
782 }
783
784 const std::string& tempLabel = "label";
785 const std::string filePathString = file.path().string().substr(
786 0, file.path().string().length() - tempLabel.length());
787
788 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
789
790 auto iter = powerSensorName.find(*functionID);
791 if (iter == powerSensorName.end())
792 {
793 continue;
794 }
795 sensorPath.append(iter->second);
796
Matt Spinlera26f1522021-08-25 15:50:20 -0500797 double tempValue{0};
798
799 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800800 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500801 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800802 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500803 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800804 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800805 log<level::DEBUG>(
Chris Cain5d66a0a2022-02-09 08:52:10 -0600806 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500807 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800808 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -0500809 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800810 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500811
Chris Cain5d66a0a2022-02-09 08:52:10 -0600812 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -0600813 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
814
Chris Cain5d66a0a2022-02-09 08:52:10 -0600815 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500816 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
817
Chris Cain5d66a0a2022-02-09 08:52:10 -0600818 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
819 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500820
Matt Spinler5901abd2021-09-23 13:50:03 -0500821 if (existingSensors.find(sensorPath) == existingSensors.end())
822 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600823 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
824 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -0500825 }
826
Matt Spinlera26f1522021-08-25 15:50:20 -0500827 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800828 }
829 return;
830}
831
832void Manager::setSensorValueToNaN(uint32_t id)
833{
834 for (const auto& [sensorPath, occId] : existingSensors)
835 {
836 if (occId == id)
837 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600838 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +0800839 sensorPath, std::numeric_limits<double>::quiet_NaN());
840 }
841 }
842 return;
843}
844
Chris Cain5d66a0a2022-02-09 08:52:10 -0600845void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800846{
Chris Cain5d66a0a2022-02-09 08:52:10 -0600847 const fs::path fileName = occ->getHwmonPath();
848 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800849
850 // Read temperature sensors
851 readTempSensors(fileName, id);
852
Chris Cain5d66a0a2022-02-09 08:52:10 -0600853 if (occ->isMasterOcc())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800854 {
855 // Read power sensors
856 readPowerSensors(fileName, id);
857 }
858
859 return;
860}
861#endif
Chris Cain17257672021-10-22 13:41:03 -0500862
863// Read the altitude from DBus
864void Manager::readAltitude()
865{
866 static bool traceAltitudeErr = true;
867
868 utils::PropertyValue altitudeProperty{};
869 try
870 {
871 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
872 ALTITUDE_PROP);
873 auto sensorVal = std::get<double>(altitudeProperty);
874 if (sensorVal < 0xFFFF)
875 {
876 if (sensorVal < 0)
877 {
878 altitude = 0;
879 }
880 else
881 {
882 // Round to nearest meter
883 altitude = uint16_t(sensorVal + 0.5);
884 }
885 log<level::DEBUG>(fmt::format("readAltitude: sensor={} ({}m)",
886 sensorVal, altitude)
887 .c_str());
888 traceAltitudeErr = true;
889 }
890 else
891 {
892 if (traceAltitudeErr)
893 {
894 traceAltitudeErr = false;
895 log<level::DEBUG>(
896 fmt::format("Invalid altitude value: {}", sensorVal)
897 .c_str());
898 }
899 }
900 }
901 catch (const sdbusplus::exception::exception& e)
902 {
903 if (traceAltitudeErr)
904 {
905 traceAltitudeErr = false;
906 log<level::INFO>(
907 fmt::format("Unable to read Altitude: {}", e.what()).c_str());
908 }
909 altitude = 0xFFFF; // not available
910 }
911}
912
913// Callback function when ambient temperature changes
914void Manager::ambientCallback(sdbusplus::message::message& msg)
915{
916 double currentTemp = 0;
917 uint8_t truncatedTemp = 0xFF;
918 std::string msgSensor;
919 std::map<std::string, std::variant<double>> msgData;
920 msg.read(msgSensor, msgData);
921
922 auto valPropMap = msgData.find(AMBIENT_PROP);
923 if (valPropMap == msgData.end())
924 {
925 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
926 return;
927 }
928 currentTemp = std::get<double>(valPropMap->second);
929 if (std::isnan(currentTemp))
930 {
931 truncatedTemp = 0xFF;
932 }
933 else
934 {
935 if (currentTemp < 0)
936 {
937 truncatedTemp = 0;
938 }
939 else
940 {
941 // Round to nearest degree C
942 truncatedTemp = uint8_t(currentTemp + 0.5);
943 }
944 }
945
946 // If ambient changes, notify OCCs
947 if (truncatedTemp != ambient)
948 {
949 log<level::DEBUG>(
950 fmt::format("ambientCallback: Ambient change from {} to {}C",
951 ambient, currentTemp)
952 .c_str());
953
954 ambient = truncatedTemp;
955 if (altitude == 0xFFFF)
956 {
957 // No altitude yet, try reading again
958 readAltitude();
959 }
960
961 log<level::DEBUG>(
962 fmt::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
963 altitude)
964 .c_str());
965#ifdef POWER10
966 // Send ambient and altitude to all OCCs
967 for (auto& obj : statusObjects)
968 {
969 if (obj->occActive())
970 {
971 obj->sendAmbient(ambient, altitude);
972 }
973 }
974#endif // POWER10
975 }
976}
977
978// return the current ambient and altitude readings
979void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
980 uint16_t& altitudeValue) const
981{
982 ambientValid = true;
983 ambientTemp = ambient;
984 altitudeValue = altitude;
985
986 if (ambient == 0xFF)
987 {
988 ambientValid = false;
989 }
990}
991
Chris Caina7b74dc2021-11-10 17:03:43 -0600992#ifdef POWER10
993void Manager::occsNotAllRunning()
994{
Chris Cain6fa848a2022-01-24 14:54:38 -0600995 // Function will also gets called when occ-control app gets
996 // restarted. (occ active sensors do not change, so the Status
997 // object does not call Manager back for all OCCs)
Chris Caina7b74dc2021-11-10 17:03:43 -0600998
999 if (activeCount != statusObjects.size())
1000 {
1001 // Not all OCCs went active
1002 log<level::WARNING>(
1003 fmt::format(
1004 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1005 activeCount, statusObjects.size())
1006 .c_str());
1007 // Procs may be garded, so may not need reset.
1008 }
1009
1010 validateOccMaster();
1011}
1012#endif // POWER10
1013
1014// Verify single master OCC and start presence monitor
1015void Manager::validateOccMaster()
1016{
1017 int masterInstance = -1;
1018 for (auto& obj : statusObjects)
1019 {
Chris Caina7b74dc2021-11-10 17:03:43 -06001020 if (obj->isMasterOcc())
1021 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001022 obj->addPresenceWatchMaster();
1023
Chris Caina7b74dc2021-11-10 17:03:43 -06001024 if (masterInstance == -1)
1025 {
1026 masterInstance = obj->getOccInstanceID();
1027 }
1028 else
1029 {
1030 log<level::ERR>(
1031 fmt::format(
1032 "validateOccMaster: Multiple OCC masters! ({} and {})",
1033 masterInstance, obj->getOccInstanceID())
1034 .c_str());
1035 // request reset
1036 obj->deviceError();
1037 }
1038 }
1039 }
1040 if (masterInstance < 0)
1041 {
1042 log<level::ERR>("validateOccMaster: Master OCC not found!");
1043 // request reset
1044 statusObjects.front()->deviceError();
1045 }
1046 else
1047 {
1048 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -06001049 fmt::format("validateOccMaster: OCC{} is master of {} OCCs",
1050 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001051 .c_str());
1052 }
1053}
1054
Chris Cain40501a22022-03-14 17:33:27 -05001055void Manager::updatePcapBounds() const
1056{
1057 if (pcap)
1058 {
1059 pcap->updatePcapBounds();
1060 }
1061}
1062
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301063} // namespace occ
1064} // namespace open_power