blob: ea02cf3f2eeaa059e8a93a4359faf68fe8d96db9 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain1718fd82022-02-16 16:39:50 -060068 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050069 {
Chris Cain1718fd82022-02-16 16:39:50 -060070 // Create the OCCs based on on the /dev/occX devices
71 auto occs = findOCCsInDev();
Matt Spinlerd267cec2021-09-01 14:49:19 -050072
Chris Cain1718fd82022-02-16 16:39:50 -060073 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
74 {
75 // Something changed or no OCCs yet, try again in 10s.
76 // Note on the first pass prevOCCSearch will be empty,
77 // so there will be at least one delay to give things
78 // a chance to settle.
79 prevOCCSearch = occs;
80
81 discoverTimer->restartOnce(10s);
82 }
83 else
84 {
85 discoverTimer.reset();
86
87 // createObjects requires OCC0 first.
88 std::sort(occs.begin(), occs.end());
89
90 for (auto id : occs)
91 {
92 createObjects(std::string(OCC_NAME) + std::to_string(id));
93 }
94 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050095 }
96 else
97 {
Chris Cain1718fd82022-02-16 16:39:50 -060098 log<level::INFO>(
99 fmt::format(
100 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
101 HOST_ON_FILE)
102 .c_str());
103 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500104 }
105#endif
106}
107
108std::vector<int> Manager::findOCCsInDev()
109{
110 std::vector<int> occs;
111 std::regex expr{R"(occ(\d+)$)"};
112
113 for (auto& file : fs::directory_iterator("/dev"))
114 {
115 std::smatch match;
116 std::string path{file.path().string()};
117 if (std::regex_search(path, match, expr))
118 {
119 auto num = std::stoi(match[1].str());
120
121 // /dev numbering starts at 1, ours starts at 0.
122 occs.push_back(num - 1);
123 }
124 }
125
126 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530127}
128
129int Manager::cpuCreated(sdbusplus::message::message& msg)
130{
George Liubcef3b42021-09-10 12:39:02 +0800131 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530132
133 sdbusplus::message::object_path o;
134 msg.read(o);
135 fs::path cpuPath(std::string(std::move(o)));
136
137 auto name = cpuPath.filename().string();
138 auto index = name.find(CPU_NAME);
139 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
140
141 createObjects(name);
142
143 return 0;
144}
145
146void Manager::createObjects(const std::string& occ)
147{
148 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
149
Chris Cain6fa848a2022-01-24 14:54:38 -0600150#ifdef POWER10
151 if (!pmode)
152 {
Chris Cain1be43372021-12-09 19:29:37 -0600153 // Create the power mode object
Chris Cain5d66a0a2022-02-09 08:52:10 -0600154 pmode = std::make_unique<powermode::PowerMode>(
Chris Cain1be43372021-12-09 19:29:37 -0600155 *this, powermode::PMODE_PATH, powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600156 }
157#endif
158
Gunnar Mills94df8c92018-09-14 14:50:03 -0500159 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800160 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600161#ifdef POWER10
162 pmode,
163#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500164 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Tom Joseph00325232020-07-29 17:51:48 +0530165 std::placeholders::_1)
166#ifdef PLDM
167 ,
168 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
169 std::placeholders::_1)
170#endif
171 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530172
Chris Cain36f9cde2021-11-22 11:18:21 -0600173 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530174 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600175 log<level::INFO>(
176 fmt::format("Manager::createObjects(): OCC{} is the master",
177 statusObjects.back()->getOccInstanceID())
178 .c_str());
179 _pollTimer->setEnabled(false);
180
Chris Cain78e86012021-03-04 16:15:31 -0600181#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600182 // Set the master OCC on the PowerMode object
183 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600184#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600185 }
186
187 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
188#ifdef POWER10
189 ,
190 pmode
191#endif
192 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530193}
194
195void Manager::statusCallBack(bool status)
196{
Gunnar Mills94df8c92018-09-14 14:50:03 -0500197 using InternalFailure =
198 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530199
200 // At this time, it won't happen but keeping it
201 // here just in case something changes in the future
202 if ((activeCount == 0) && (!status))
203 {
204 log<level::ERR>("Invalid update on OCCActive");
205 elog<InternalFailure>();
206 }
207
Chris Caina7b74dc2021-11-10 17:03:43 -0600208 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600209 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600210 // OCC went active
211 ++activeCount;
212
213#ifdef POWER10
214 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600215 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600216 // First OCC went active (allow some time for all OCCs to go active)
217 waitForAllOccsTimer->restartOnce(30s);
Matt Spinler53f68142021-08-25 15:47:31 -0500218 }
219#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600220
221 if (activeCount == statusObjects.size())
222 {
223#ifdef POWER10
224 // All OCCs are now running
225 if (waitForAllOccsTimer->isEnabled())
226 {
227 // stop occ wait timer
228 waitForAllOccsTimer->setEnabled(false);
229 }
230#endif
231
232 // Verify master OCC and start presence monitor
233 validateOccMaster();
234 }
235
236 // Start poll timer if not already started
237 if (!_pollTimer->isEnabled())
238 {
239 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -0600240 fmt::format("Manager: OCCs will be polled every {} seconds",
241 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600242 .c_str());
243
244 // Send poll and start OCC poll timer
245 pollerTimerExpired();
246 }
247 }
248 else
249 {
250 // OCC went away
251 --activeCount;
252
253 if (activeCount == 0)
254 {
255 // No OCCs are running
256
257 // Stop OCC poll timer
258 if (_pollTimer->isEnabled())
259 {
260 log<level::INFO>(
261 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
262 _pollTimer->setEnabled(false);
263 }
264
265#ifdef POWER10
266 // stop wait timer
267 if (waitForAllOccsTimer->isEnabled())
268 {
269 waitForAllOccsTimer->setEnabled(false);
270 }
271#endif
272
273#ifdef READ_OCC_SENSORS
274 // Clear OCC sensors
275 for (auto& obj : statusObjects)
276 {
277 setSensorValueToNaN(obj->getOccInstanceID());
278 }
279#endif
280 }
Chris Caina8857c52021-01-27 11:53:05 -0600281 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530282}
283
284#ifdef I2C_OCC
285void Manager::initStatusObjects()
286{
287 // Make sure we have a valid path string
288 static_assert(sizeof(DEV_PATH) != 0);
289
290 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
291 for (auto& name : deviceNames)
292 {
293 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800294 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530295 auto path = fs::path(OCC_CONTROL_ROOT) / name;
296 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800297 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530298 }
Chris Cain78e86012021-03-04 16:15:31 -0600299#ifdef POWER10
Chris Cain5d66a0a2022-02-09 08:52:10 -0600300 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
301 powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600302 // Set the master OCC on the PowerMode object
303 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600304#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530305}
306#endif
307
Tom Joseph815f9f52020-07-27 12:12:13 +0530308#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500309void Manager::sbeTimeout(unsigned int instance)
310{
Eddie James2a751d72022-03-04 09:16:12 -0600311 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
312 [instance](const auto& obj) {
313 return instance == obj->getOccInstanceID();
314 });
Eddie Jamescbad2192021-10-07 09:39:39 -0500315
Eddie Jamescb018da2022-03-05 11:49:37 -0600316 if (obj != statusObjects.end() && (*obj)->occActive())
Eddie James2a751d72022-03-04 09:16:12 -0600317 {
318 log<level::INFO>("SBE timeout, requesting HRESET",
319 entry("SBE=%d", instance));
Eddie Jamescbad2192021-10-07 09:39:39 -0500320
Eddie James2a751d72022-03-04 09:16:12 -0600321 setSBEState(instance, SBE_STATE_NOT_USABLE);
322
323 pldmHandle->sendHRESET(instance);
324 }
Eddie Jamescbad2192021-10-07 09:39:39 -0500325}
326
Tom Joseph815f9f52020-07-27 12:12:13 +0530327bool Manager::updateOCCActive(instanceID instance, bool status)
328{
329 return (statusObjects[instance])->occActive(status);
330}
Eddie Jamescbad2192021-10-07 09:39:39 -0500331
332void Manager::sbeHRESETResult(instanceID instance, bool success)
333{
334 if (success)
335 {
336 log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
337
338 setSBEState(instance, SBE_STATE_BOOTED);
339
340 return;
341 }
342
343 setSBEState(instance, SBE_STATE_FAILED);
344
345 if (sbeCanDump(instance))
346 {
Eddie Jamescbad2192021-10-07 09:39:39 -0500347 log<level::INFO>("HRESET failed, triggering SBE dump",
348 entry("SBE=%d", instance));
349
350 auto& bus = utils::getBus();
351 uint32_t src6 = instance << 16;
352 uint32_t logId =
353 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
354 src6, "SBE command timeout");
355
356 try
357 {
George Liuf3a4a692021-12-28 13:59:51 +0800358 constexpr auto path = "/org/openpower/dump";
359 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
360 constexpr auto function = "CreateDump";
361
Eddie Jamescbad2192021-10-07 09:39:39 -0500362 std::string service = utils::getService(path, interface);
363 auto method =
364 bus.new_method_call(service.c_str(), path, interface, function);
365
366 std::map<std::string, std::variant<std::string, uint64_t>>
367 createParams{
368 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
369 uint64_t(logId)},
370 {"com.ibm.Dump.Create.CreateParameters.DumpType",
371 "com.ibm.Dump.Create.DumpType.SBE"},
372 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
373 uint64_t(instance)},
374 };
375
376 method.append(createParams);
377
378 auto response = bus.call(method);
379 }
380 catch (const sdbusplus::exception::exception& e)
381 {
382 constexpr auto ERROR_DUMP_DISABLED =
383 "xyz.openbmc_project.Dump.Create.Error.Disabled";
384 if (e.name() == ERROR_DUMP_DISABLED)
385 {
386 log<level::INFO>("Dump is disabled, skipping");
387 }
388 else
389 {
390 log<level::ERR>("Dump failed");
391 }
392 }
393 }
394}
395
396bool Manager::sbeCanDump(unsigned int instance)
397{
398 struct pdbg_target* proc = getPdbgTarget(instance);
399
400 if (!proc)
401 {
402 // allow the dump in the error case
403 return true;
404 }
405
406 try
407 {
408 if (!openpower::phal::sbe::isDumpAllowed(proc))
409 {
410 return false;
411 }
412
413 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
414 {
415 return false;
416 }
417 }
418 catch (openpower::phal::exception::SbeError& e)
419 {
420 log<level::INFO>("Failed to query SBE state");
421 }
422
423 // allow the dump in the error case
424 return true;
425}
426
427void Manager::setSBEState(unsigned int instance, enum sbe_state state)
428{
429 struct pdbg_target* proc = getPdbgTarget(instance);
430
431 if (!proc)
432 {
433 return;
434 }
435
436 try
437 {
438 openpower::phal::sbe::setState(proc, state);
439 }
440 catch (const openpower::phal::exception::SbeError& e)
441 {
442 log<level::ERR>("Failed to set SBE state");
443 }
444}
445
446struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
447{
448 if (!pdbgInitialized)
449 {
450 try
451 {
452 openpower::phal::pdbg::init();
453 pdbgInitialized = true;
454 }
455 catch (const openpower::phal::exception::PdbgError& e)
456 {
457 log<level::ERR>("pdbg initialization failed");
458 return nullptr;
459 }
460 }
461
462 struct pdbg_target* proc = nullptr;
463 pdbg_for_each_class_target("proc", proc)
464 {
465 if (pdbg_target_index(proc) == instance)
466 {
467 return proc;
468 }
469 }
470
471 log<level::ERR>("Failed to get pdbg target");
472 return nullptr;
473}
Tom Joseph815f9f52020-07-27 12:12:13 +0530474#endif
475
Chris Caina8857c52021-01-27 11:53:05 -0600476void Manager::pollerTimerExpired()
477{
Chris Caina8857c52021-01-27 11:53:05 -0600478 if (!_pollTimer)
479 {
480 log<level::ERR>(
481 "Manager::pollerTimerExpired() ERROR: Timer not defined");
482 return;
483 }
484
485 for (auto& obj : statusObjects)
486 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600487 if (!obj->occActive())
488 {
489 // OCC is not running yet
490#ifdef READ_OCC_SENSORS
Chris Cain5d66a0a2022-02-09 08:52:10 -0600491 auto id = obj->getOccInstanceID();
Chris Caina7b74dc2021-11-10 17:03:43 -0600492 setSensorValueToNaN(id);
493#endif
494 continue;
495 }
496
Chris Caina8857c52021-01-27 11:53:05 -0600497 // Read sysfs to force kernel to poll OCC
498 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800499
500#ifdef READ_OCC_SENSORS
501 // Read occ sensor values
Chris Cain5d66a0a2022-02-09 08:52:10 -0600502 getSensorValues(obj);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800503#endif
Chris Caina8857c52021-01-27 11:53:05 -0600504 }
505
Chris Caina7b74dc2021-11-10 17:03:43 -0600506 if (activeCount > 0)
507 {
508 // Restart OCC poll timer
509 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
510 }
511 else
512 {
513 // No OCCs running, so poll timer will not be restarted
514 log<level::INFO>(
515 fmt::format(
516 "Manager::pollerTimerExpired: poll timer will not be restarted")
517 .c_str());
518 }
Chris Caina8857c52021-01-27 11:53:05 -0600519}
520
Chicago Duanbb895cb2021-06-18 19:37:16 +0800521#ifdef READ_OCC_SENSORS
522void Manager::readTempSensors(const fs::path& path, uint32_t id)
523{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800524 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
525 for (auto& file : fs::directory_iterator(path))
526 {
527 if (!std::regex_search(file.path().string(), expr))
528 {
529 continue;
530 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800531
Matt Spinlera26f1522021-08-25 15:50:20 -0500532 uint32_t labelValue{0};
533
534 try
535 {
536 labelValue = readFile<uint32_t>(file.path());
537 }
538 catch (const std::system_error& e)
539 {
540 log<level::DEBUG>(
541 fmt::format("readTempSensors: Failed reading {}, errno = {}",
542 file.path().string(), e.code().value())
543 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800544 continue;
545 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800546
547 const std::string& tempLabel = "label";
548 const std::string filePathString = file.path().string().substr(
549 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500550
551 uint32_t fruTypeValue{0};
552 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800553 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500554 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
555 }
556 catch (const std::system_error& e)
557 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800558 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500559 fmt::format("readTempSensors: Failed reading {}, errno = {}",
560 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800561 .c_str());
562 continue;
563 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800564
565 std::string sensorPath =
566 OCC_SENSORS_ROOT + std::string("/temperature/");
567
Matt Spinlerace67d82021-10-18 13:41:57 -0500568 std::string dvfsTempPath;
569
Chicago Duanbb895cb2021-06-18 19:37:16 +0800570 if (fruTypeValue == VRMVdd)
571 {
572 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
573 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500574 else if (fruTypeValue == processorIoRing)
575 {
576 sensorPath.append("proc" + std::to_string(id) + "_ioring_temp");
577 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
578 std::to_string(id) + "_ioring_dvfs_temp";
579 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800580 else
581 {
Matt Spinler14d14022021-08-25 15:38:29 -0500582 uint16_t type = (labelValue & 0xFF000000) >> 24;
583 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800584
585 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
586 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500587 if (fruTypeValue == fruTypeNotAvailable)
588 {
589 // Not all DIMM related temps are available to read
590 // (no _input file in this case)
591 continue;
592 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800593 auto iter = dimmTempSensorName.find(fruTypeValue);
594 if (iter == dimmTempSensorName.end())
595 {
George Liub5ca1012021-09-10 12:53:11 +0800596 log<level::ERR>(
597 fmt::format(
598 "readTempSensors: Fru type error! fruTypeValue = {}) ",
599 fruTypeValue)
600 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800601 continue;
602 }
603
604 sensorPath.append("dimm" + std::to_string(instanceID) +
605 iter->second);
606 }
607 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
608 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500609 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800610 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500611 // The OCC reports small core temps, of which there are
612 // two per big core. All current P10 systems are in big
613 // core mode, so use a big core name.
614 uint16_t coreNum = instanceID / 2;
615 uint16_t tempNum = instanceID % 2;
616 sensorPath.append("proc" + std::to_string(id) + "_core" +
617 std::to_string(coreNum) + "_" +
618 std::to_string(tempNum) + "_temp");
619
620 dvfsTempPath = std::string{OCC_SENSORS_ROOT} +
621 "/temperature/proc" + std::to_string(id) +
622 "_core_dvfs_temp";
623 }
624 else
625 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800626 continue;
627 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800628 }
629 else
630 {
631 continue;
632 }
633 }
634
Matt Spinlerace67d82021-10-18 13:41:57 -0500635 // The dvfs temp file only needs to be read once per chip per type.
636 if (!dvfsTempPath.empty() &&
637 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
638 {
639 try
640 {
641 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
642
643 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
644 dvfsTempPath, dvfsValue * std::pow(10, -3));
645 }
646 catch (const std::system_error& e)
647 {
648 log<level::DEBUG>(
649 fmt::format(
650 "readTempSensors: Failed reading {}, errno = {}",
651 filePathString + maxSuffix, e.code().value())
652 .c_str());
653 }
654 }
655
Matt Spinlera26f1522021-08-25 15:50:20 -0500656 uint32_t faultValue{0};
657 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800658 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500659 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
660 }
661 catch (const std::system_error& e)
662 {
663 log<level::DEBUG>(
664 fmt::format("readTempSensors: Failed reading {}, errno = {}",
665 filePathString + faultSuffix, e.code().value())
666 .c_str());
667 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800668 }
669
Matt Spinlera26f1522021-08-25 15:50:20 -0500670 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800671 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600672 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500673 sensorPath, std::numeric_limits<double>::quiet_NaN());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800674
Chris Cain5d66a0a2022-02-09 08:52:10 -0600675 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
676 false);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800677
Matt Spinlera26f1522021-08-25 15:50:20 -0500678 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800679 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500680
681 double tempValue{0};
682
683 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800684 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500685 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800686 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500687 catch (const std::system_error& e)
688 {
689 log<level::DEBUG>(
690 fmt::format("readTempSensors: Failed reading {}, errno = {}",
691 filePathString + inputSuffix, e.code().value())
692 .c_str());
693 continue;
694 }
695
Chris Cain5d66a0a2022-02-09 08:52:10 -0600696 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500697 sensorPath, tempValue * std::pow(10, -3));
698
Chris Cain5d66a0a2022-02-09 08:52:10 -0600699 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
700 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500701
Chris Cain6fa848a2022-01-24 14:54:38 -0600702 // At this point, the sensor will be created for sure.
703 if (existingSensors.find(sensorPath) == existingSensors.end())
704 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600705 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
706 sensorPath);
Chris Cain6fa848a2022-01-24 14:54:38 -0600707 }
708
Matt Spinlera26f1522021-08-25 15:50:20 -0500709 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800710 }
711 return;
712}
713
714std::optional<std::string>
715 Manager::getPowerLabelFunctionID(const std::string& value)
716{
717 // If the value is "system", then the FunctionID is "system".
718 if (value == "system")
719 {
720 return value;
721 }
722
723 // If the value is not "system", then the label value have 3 numbers, of
724 // which we only care about the middle one:
725 // <sensor id>_<function id>_<apss channel>
726 // eg: The value is "0_10_5" , then the FunctionID is "10".
727 if (value.find("_") == std::string::npos)
728 {
729 return std::nullopt;
730 }
731
732 auto powerLabelValue = value.substr((value.find("_") + 1));
733
734 if (powerLabelValue.find("_") == std::string::npos)
735 {
736 return std::nullopt;
737 }
738
739 return powerLabelValue.substr(0, powerLabelValue.find("_"));
740}
741
742void Manager::readPowerSensors(const fs::path& path, uint32_t id)
743{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800744 std::regex expr{"power\\d+_label$"}; // Example: power5_label
745 for (auto& file : fs::directory_iterator(path))
746 {
747 if (!std::regex_search(file.path().string(), expr))
748 {
749 continue;
750 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800751
Matt Spinlera26f1522021-08-25 15:50:20 -0500752 std::string labelValue;
753 try
754 {
755 labelValue = readFile<std::string>(file.path());
756 }
757 catch (const std::system_error& e)
758 {
759 log<level::DEBUG>(
760 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
761 file.path().string(), e.code().value())
762 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800763 continue;
764 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800765
766 auto functionID = getPowerLabelFunctionID(labelValue);
767 if (functionID == std::nullopt)
768 {
769 continue;
770 }
771
772 const std::string& tempLabel = "label";
773 const std::string filePathString = file.path().string().substr(
774 0, file.path().string().length() - tempLabel.length());
775
776 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
777
778 auto iter = powerSensorName.find(*functionID);
779 if (iter == powerSensorName.end())
780 {
781 continue;
782 }
783 sensorPath.append(iter->second);
784
Matt Spinlera26f1522021-08-25 15:50:20 -0500785 double tempValue{0};
786
787 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800788 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500789 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800790 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500791 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800792 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800793 log<level::DEBUG>(
Chris Cain5d66a0a2022-02-09 08:52:10 -0600794 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
Matt Spinlera26f1522021-08-25 15:50:20 -0500795 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800796 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -0500797 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800798 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500799
Chris Cain5d66a0a2022-02-09 08:52:10 -0600800 dbus::OccDBusSensors::getOccDBus().setUnit(
Chris Caind84a8332022-01-13 08:58:45 -0600801 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
802
Chris Cain5d66a0a2022-02-09 08:52:10 -0600803 dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500804 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
805
Chris Cain5d66a0a2022-02-09 08:52:10 -0600806 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(sensorPath,
807 true);
Matt Spinlera26f1522021-08-25 15:50:20 -0500808
Matt Spinler5901abd2021-09-23 13:50:03 -0500809 if (existingSensors.find(sensorPath) == existingSensors.end())
810 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600811 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
812 sensorPath);
Matt Spinler5901abd2021-09-23 13:50:03 -0500813 }
814
Matt Spinlera26f1522021-08-25 15:50:20 -0500815 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800816 }
817 return;
818}
819
820void Manager::setSensorValueToNaN(uint32_t id)
821{
822 for (const auto& [sensorPath, occId] : existingSensors)
823 {
824 if (occId == id)
825 {
Chris Cain5d66a0a2022-02-09 08:52:10 -0600826 dbus::OccDBusSensors::getOccDBus().setValue(
Chicago Duanbb895cb2021-06-18 19:37:16 +0800827 sensorPath, std::numeric_limits<double>::quiet_NaN());
828 }
829 }
830 return;
831}
832
Chris Cain5d66a0a2022-02-09 08:52:10 -0600833void Manager::getSensorValues(std::unique_ptr<Status>& occ)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800834{
Chris Cain5d66a0a2022-02-09 08:52:10 -0600835 const fs::path fileName = occ->getHwmonPath();
836 const uint32_t id = occ->getOccInstanceID();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800837
838 // Read temperature sensors
839 readTempSensors(fileName, id);
840
Chris Cain5d66a0a2022-02-09 08:52:10 -0600841 if (occ->isMasterOcc())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800842 {
843 // Read power sensors
844 readPowerSensors(fileName, id);
845 }
846
847 return;
848}
849#endif
Chris Cain17257672021-10-22 13:41:03 -0500850
851// Read the altitude from DBus
852void Manager::readAltitude()
853{
854 static bool traceAltitudeErr = true;
855
856 utils::PropertyValue altitudeProperty{};
857 try
858 {
859 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
860 ALTITUDE_PROP);
861 auto sensorVal = std::get<double>(altitudeProperty);
862 if (sensorVal < 0xFFFF)
863 {
864 if (sensorVal < 0)
865 {
866 altitude = 0;
867 }
868 else
869 {
870 // Round to nearest meter
871 altitude = uint16_t(sensorVal + 0.5);
872 }
873 log<level::DEBUG>(fmt::format("readAltitude: sensor={} ({}m)",
874 sensorVal, altitude)
875 .c_str());
876 traceAltitudeErr = true;
877 }
878 else
879 {
880 if (traceAltitudeErr)
881 {
882 traceAltitudeErr = false;
883 log<level::DEBUG>(
884 fmt::format("Invalid altitude value: {}", sensorVal)
885 .c_str());
886 }
887 }
888 }
889 catch (const sdbusplus::exception::exception& e)
890 {
891 if (traceAltitudeErr)
892 {
893 traceAltitudeErr = false;
894 log<level::INFO>(
895 fmt::format("Unable to read Altitude: {}", e.what()).c_str());
896 }
897 altitude = 0xFFFF; // not available
898 }
899}
900
901// Callback function when ambient temperature changes
902void Manager::ambientCallback(sdbusplus::message::message& msg)
903{
904 double currentTemp = 0;
905 uint8_t truncatedTemp = 0xFF;
906 std::string msgSensor;
907 std::map<std::string, std::variant<double>> msgData;
908 msg.read(msgSensor, msgData);
909
910 auto valPropMap = msgData.find(AMBIENT_PROP);
911 if (valPropMap == msgData.end())
912 {
913 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
914 return;
915 }
916 currentTemp = std::get<double>(valPropMap->second);
917 if (std::isnan(currentTemp))
918 {
919 truncatedTemp = 0xFF;
920 }
921 else
922 {
923 if (currentTemp < 0)
924 {
925 truncatedTemp = 0;
926 }
927 else
928 {
929 // Round to nearest degree C
930 truncatedTemp = uint8_t(currentTemp + 0.5);
931 }
932 }
933
934 // If ambient changes, notify OCCs
935 if (truncatedTemp != ambient)
936 {
937 log<level::DEBUG>(
938 fmt::format("ambientCallback: Ambient change from {} to {}C",
939 ambient, currentTemp)
940 .c_str());
941
942 ambient = truncatedTemp;
943 if (altitude == 0xFFFF)
944 {
945 // No altitude yet, try reading again
946 readAltitude();
947 }
948
949 log<level::DEBUG>(
950 fmt::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
951 altitude)
952 .c_str());
953#ifdef POWER10
954 // Send ambient and altitude to all OCCs
955 for (auto& obj : statusObjects)
956 {
957 if (obj->occActive())
958 {
959 obj->sendAmbient(ambient, altitude);
960 }
961 }
962#endif // POWER10
963 }
964}
965
966// return the current ambient and altitude readings
967void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
968 uint16_t& altitudeValue) const
969{
970 ambientValid = true;
971 ambientTemp = ambient;
972 altitudeValue = altitude;
973
974 if (ambient == 0xFF)
975 {
976 ambientValid = false;
977 }
978}
979
Chris Caina7b74dc2021-11-10 17:03:43 -0600980#ifdef POWER10
981void Manager::occsNotAllRunning()
982{
Chris Cain6fa848a2022-01-24 14:54:38 -0600983 // Function will also gets called when occ-control app gets
984 // restarted. (occ active sensors do not change, so the Status
985 // object does not call Manager back for all OCCs)
Chris Caina7b74dc2021-11-10 17:03:43 -0600986
987 if (activeCount != statusObjects.size())
988 {
989 // Not all OCCs went active
990 log<level::WARNING>(
991 fmt::format(
992 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
993 activeCount, statusObjects.size())
994 .c_str());
995 // Procs may be garded, so may not need reset.
996 }
997
998 validateOccMaster();
999}
1000#endif // POWER10
1001
1002// Verify single master OCC and start presence monitor
1003void Manager::validateOccMaster()
1004{
1005 int masterInstance = -1;
1006 for (auto& obj : statusObjects)
1007 {
Chris Caina7b74dc2021-11-10 17:03:43 -06001008 if (obj->isMasterOcc())
1009 {
Chris Cain5d66a0a2022-02-09 08:52:10 -06001010 obj->addPresenceWatchMaster();
1011
Chris Caina7b74dc2021-11-10 17:03:43 -06001012 if (masterInstance == -1)
1013 {
1014 masterInstance = obj->getOccInstanceID();
1015 }
1016 else
1017 {
1018 log<level::ERR>(
1019 fmt::format(
1020 "validateOccMaster: Multiple OCC masters! ({} and {})",
1021 masterInstance, obj->getOccInstanceID())
1022 .c_str());
1023 // request reset
1024 obj->deviceError();
1025 }
1026 }
1027 }
1028 if (masterInstance < 0)
1029 {
1030 log<level::ERR>("validateOccMaster: Master OCC not found!");
1031 // request reset
1032 statusObjects.front()->deviceError();
1033 }
1034 else
1035 {
1036 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -06001037 fmt::format("validateOccMaster: OCC{} is master of {} OCCs",
1038 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001039 .c_str());
1040 }
1041}
1042
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301043} // namespace occ
1044} // namespace open_power