blob: 84d991a0f4a1bfd125d8478140307aa94b7a233c [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Cain1718fd82022-02-16 16:39:50 -060030const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
31
Chris Caina8857c52021-01-27 11:53:05 -060032using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060033using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060034
Matt Spinlera26f1522021-08-25 15:50:20 -050035template <typename T>
36T readFile(const std::string& path)
37{
38 std::ifstream ifs;
39 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
40 std::ifstream::eofbit);
41 T data;
42
43 try
44 {
45 ifs.open(path);
46 ifs >> data;
47 ifs.close();
48 }
49 catch (const std::exception& e)
50 {
51 auto err = errno;
52 throw std::system_error(err, std::generic_category());
53 }
54
55 return data;
56}
57
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053058void Manager::findAndCreateObjects()
59{
Matt Spinlerd267cec2021-09-01 14:49:19 -050060#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050061 for (auto id = 0; id < MAX_CPUS; ++id)
62 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060063 // Create one occ per cpu
64 auto occ = std::string(OCC_NAME) + std::to_string(id);
65 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053066 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050067#else
Chris Cain1718fd82022-02-16 16:39:50 -060068 if (!fs::exists(HOST_ON_FILE))
Matt Spinlerd267cec2021-09-01 14:49:19 -050069 {
Chris Cain1718fd82022-02-16 16:39:50 -060070 // Create the OCCs based on on the /dev/occX devices
71 auto occs = findOCCsInDev();
Matt Spinlerd267cec2021-09-01 14:49:19 -050072
Chris Cain1718fd82022-02-16 16:39:50 -060073 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
74 {
75 // Something changed or no OCCs yet, try again in 10s.
76 // Note on the first pass prevOCCSearch will be empty,
77 // so there will be at least one delay to give things
78 // a chance to settle.
79 prevOCCSearch = occs;
80
81 discoverTimer->restartOnce(10s);
82 }
83 else
84 {
85 discoverTimer.reset();
86
87 // createObjects requires OCC0 first.
88 std::sort(occs.begin(), occs.end());
89
90 for (auto id : occs)
91 {
92 createObjects(std::string(OCC_NAME) + std::to_string(id));
93 }
94 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050095 }
96 else
97 {
Chris Cain1718fd82022-02-16 16:39:50 -060098 log<level::INFO>(
99 fmt::format(
100 "Manager::findAndCreateObjects(): Waiting for {} to complete...",
101 HOST_ON_FILE)
102 .c_str());
103 discoverTimer->restartOnce(10s);
Matt Spinlerd267cec2021-09-01 14:49:19 -0500104 }
105#endif
106}
107
108std::vector<int> Manager::findOCCsInDev()
109{
110 std::vector<int> occs;
111 std::regex expr{R"(occ(\d+)$)"};
112
113 for (auto& file : fs::directory_iterator("/dev"))
114 {
115 std::smatch match;
116 std::string path{file.path().string()};
117 if (std::regex_search(path, match, expr))
118 {
119 auto num = std::stoi(match[1].str());
120
121 // /dev numbering starts at 1, ours starts at 0.
122 occs.push_back(num - 1);
123 }
124 }
125
126 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530127}
128
129int Manager::cpuCreated(sdbusplus::message::message& msg)
130{
George Liubcef3b42021-09-10 12:39:02 +0800131 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530132
133 sdbusplus::message::object_path o;
134 msg.read(o);
135 fs::path cpuPath(std::string(std::move(o)));
136
137 auto name = cpuPath.filename().string();
138 auto index = name.find(CPU_NAME);
139 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
140
141 createObjects(name);
142
143 return 0;
144}
145
146void Manager::createObjects(const std::string& occ)
147{
148 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
149
Chris Cain6fa848a2022-01-24 14:54:38 -0600150#ifdef POWER10
151 if (!pmode)
152 {
Chris Cain1be43372021-12-09 19:29:37 -0600153 // Create the power mode object
154 pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
155 *this, powermode::PMODE_PATH, powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600156 }
157#endif
158
Gunnar Mills94df8c92018-09-14 14:50:03 -0500159 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800160 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600161#ifdef POWER10
162 pmode,
163#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500164 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Tom Joseph00325232020-07-29 17:51:48 +0530165 std::placeholders::_1)
166#ifdef PLDM
167 ,
168 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
169 std::placeholders::_1)
170#endif
171 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530172
Chris Cain36f9cde2021-11-22 11:18:21 -0600173 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530174 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600175 log<level::INFO>(
176 fmt::format("Manager::createObjects(): OCC{} is the master",
177 statusObjects.back()->getOccInstanceID())
178 .c_str());
179 _pollTimer->setEnabled(false);
180
181 // Create the power cap monitor object for master OCC
182 if (!pcap)
183 {
184 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
Chris Cain1be43372021-12-09 19:29:37 -0600185 *statusObjects.back());
Chris Cain36f9cde2021-11-22 11:18:21 -0600186 }
Chris Cain78e86012021-03-04 16:15:31 -0600187
188#ifdef POWER10
Chris Cain6fa848a2022-01-24 14:54:38 -0600189 // Set the master OCC on the PowerMode object
190 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600191#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600192 }
193
194 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
195#ifdef POWER10
196 ,
197 pmode
198#endif
199 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530200}
201
202void Manager::statusCallBack(bool status)
203{
Gunnar Mills94df8c92018-09-14 14:50:03 -0500204 using InternalFailure =
205 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530206
207 // At this time, it won't happen but keeping it
208 // here just in case something changes in the future
209 if ((activeCount == 0) && (!status))
210 {
211 log<level::ERR>("Invalid update on OCCActive");
212 elog<InternalFailure>();
213 }
214
Chris Caina7b74dc2021-11-10 17:03:43 -0600215 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600216 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600217 // OCC went active
218 ++activeCount;
219
220#ifdef POWER10
221 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600222 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600223 // First OCC went active (allow some time for all OCCs to go active)
224 waitForAllOccsTimer->restartOnce(30s);
Matt Spinler53f68142021-08-25 15:47:31 -0500225 }
226#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600227
228 if (activeCount == statusObjects.size())
229 {
230#ifdef POWER10
231 // All OCCs are now running
232 if (waitForAllOccsTimer->isEnabled())
233 {
234 // stop occ wait timer
235 waitForAllOccsTimer->setEnabled(false);
236 }
237#endif
238
239 // Verify master OCC and start presence monitor
240 validateOccMaster();
241 }
242
243 // Start poll timer if not already started
244 if (!_pollTimer->isEnabled())
245 {
246 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -0600247 fmt::format("Manager: OCCs will be polled every {} seconds",
248 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600249 .c_str());
250
251 // Send poll and start OCC poll timer
252 pollerTimerExpired();
253 }
254 }
255 else
256 {
257 // OCC went away
258 --activeCount;
259
260 if (activeCount == 0)
261 {
262 // No OCCs are running
263
264 // Stop OCC poll timer
265 if (_pollTimer->isEnabled())
266 {
267 log<level::INFO>(
268 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
269 _pollTimer->setEnabled(false);
270 }
271
272#ifdef POWER10
273 // stop wait timer
274 if (waitForAllOccsTimer->isEnabled())
275 {
276 waitForAllOccsTimer->setEnabled(false);
277 }
278#endif
279
280#ifdef READ_OCC_SENSORS
281 // Clear OCC sensors
282 for (auto& obj : statusObjects)
283 {
284 setSensorValueToNaN(obj->getOccInstanceID());
285 }
286#endif
287 }
Chris Caina8857c52021-01-27 11:53:05 -0600288 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530289}
290
291#ifdef I2C_OCC
292void Manager::initStatusObjects()
293{
294 // Make sure we have a valid path string
295 static_assert(sizeof(DEV_PATH) != 0);
296
297 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
Lei YU41470e52017-11-30 16:03:50 +0800298 auto occMasterName = deviceNames.front();
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530299 for (auto& name : deviceNames)
300 {
301 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800302 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530303 auto path = fs::path(OCC_CONTROL_ROOT) / name;
304 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800305 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530306 }
Lei YU41470e52017-11-30 16:03:50 +0800307 // The first device is master occ
308 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
George Liuf3b75142021-06-10 11:22:50 +0800309 *statusObjects.front(), occMasterName);
Chris Cain78e86012021-03-04 16:15:31 -0600310#ifdef POWER10
Chris Cain1be43372021-12-09 19:29:37 -0600311 pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
312 *this, open_power::occ::powermode::PMODE_PATH,
313 open_power::occ::powermode::PIPS_PATH);
Chris Cain6fa848a2022-01-24 14:54:38 -0600314 // Set the master OCC on the PowerMode object
315 pmode->setMasterOcc(path);
Chris Cain78e86012021-03-04 16:15:31 -0600316#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530317}
318#endif
319
Tom Joseph815f9f52020-07-27 12:12:13 +0530320#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500321void Manager::sbeTimeout(unsigned int instance)
322{
323 log<level::INFO>("SBE timeout, requesting HRESET",
324 entry("SBE=%d", instance));
325
326 setSBEState(instance, SBE_STATE_NOT_USABLE);
327
328 pldmHandle->sendHRESET(instance);
329}
330
Tom Joseph815f9f52020-07-27 12:12:13 +0530331bool Manager::updateOCCActive(instanceID instance, bool status)
332{
333 return (statusObjects[instance])->occActive(status);
334}
Eddie Jamescbad2192021-10-07 09:39:39 -0500335
336void Manager::sbeHRESETResult(instanceID instance, bool success)
337{
338 if (success)
339 {
340 log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
341
342 setSBEState(instance, SBE_STATE_BOOTED);
343
344 return;
345 }
346
347 setSBEState(instance, SBE_STATE_FAILED);
348
349 if (sbeCanDump(instance))
350 {
Eddie Jamescbad2192021-10-07 09:39:39 -0500351 log<level::INFO>("HRESET failed, triggering SBE dump",
352 entry("SBE=%d", instance));
353
354 auto& bus = utils::getBus();
355 uint32_t src6 = instance << 16;
356 uint32_t logId =
357 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
358 src6, "SBE command timeout");
359
360 try
361 {
George Liuf3a4a692021-12-28 13:59:51 +0800362 constexpr auto path = "/org/openpower/dump";
363 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
364 constexpr auto function = "CreateDump";
365
Eddie Jamescbad2192021-10-07 09:39:39 -0500366 std::string service = utils::getService(path, interface);
367 auto method =
368 bus.new_method_call(service.c_str(), path, interface, function);
369
370 std::map<std::string, std::variant<std::string, uint64_t>>
371 createParams{
372 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
373 uint64_t(logId)},
374 {"com.ibm.Dump.Create.CreateParameters.DumpType",
375 "com.ibm.Dump.Create.DumpType.SBE"},
376 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
377 uint64_t(instance)},
378 };
379
380 method.append(createParams);
381
382 auto response = bus.call(method);
383 }
384 catch (const sdbusplus::exception::exception& e)
385 {
386 constexpr auto ERROR_DUMP_DISABLED =
387 "xyz.openbmc_project.Dump.Create.Error.Disabled";
388 if (e.name() == ERROR_DUMP_DISABLED)
389 {
390 log<level::INFO>("Dump is disabled, skipping");
391 }
392 else
393 {
394 log<level::ERR>("Dump failed");
395 }
396 }
397 }
398}
399
400bool Manager::sbeCanDump(unsigned int instance)
401{
402 struct pdbg_target* proc = getPdbgTarget(instance);
403
404 if (!proc)
405 {
406 // allow the dump in the error case
407 return true;
408 }
409
410 try
411 {
412 if (!openpower::phal::sbe::isDumpAllowed(proc))
413 {
414 return false;
415 }
416
417 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
418 {
419 return false;
420 }
421 }
422 catch (openpower::phal::exception::SbeError& e)
423 {
424 log<level::INFO>("Failed to query SBE state");
425 }
426
427 // allow the dump in the error case
428 return true;
429}
430
431void Manager::setSBEState(unsigned int instance, enum sbe_state state)
432{
433 struct pdbg_target* proc = getPdbgTarget(instance);
434
435 if (!proc)
436 {
437 return;
438 }
439
440 try
441 {
442 openpower::phal::sbe::setState(proc, state);
443 }
444 catch (const openpower::phal::exception::SbeError& e)
445 {
446 log<level::ERR>("Failed to set SBE state");
447 }
448}
449
450struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
451{
452 if (!pdbgInitialized)
453 {
454 try
455 {
456 openpower::phal::pdbg::init();
457 pdbgInitialized = true;
458 }
459 catch (const openpower::phal::exception::PdbgError& e)
460 {
461 log<level::ERR>("pdbg initialization failed");
462 return nullptr;
463 }
464 }
465
466 struct pdbg_target* proc = nullptr;
467 pdbg_for_each_class_target("proc", proc)
468 {
469 if (pdbg_target_index(proc) == instance)
470 {
471 return proc;
472 }
473 }
474
475 log<level::ERR>("Failed to get pdbg target");
476 return nullptr;
477}
Tom Joseph815f9f52020-07-27 12:12:13 +0530478#endif
479
Chris Caina8857c52021-01-27 11:53:05 -0600480void Manager::pollerTimerExpired()
481{
Chris Caina8857c52021-01-27 11:53:05 -0600482 if (!_pollTimer)
483 {
484 log<level::ERR>(
485 "Manager::pollerTimerExpired() ERROR: Timer not defined");
486 return;
487 }
488
489 for (auto& obj : statusObjects)
490 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600491#ifdef READ_OCC_SENSORS
492 auto id = obj->getOccInstanceID();
493#endif
494 if (!obj->occActive())
495 {
496 // OCC is not running yet
497#ifdef READ_OCC_SENSORS
498 setSensorValueToNaN(id);
499#endif
500 continue;
501 }
502
Chris Caina8857c52021-01-27 11:53:05 -0600503 // Read sysfs to force kernel to poll OCC
504 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800505
506#ifdef READ_OCC_SENSORS
507 // Read occ sensor values
Chicago Duanbb895cb2021-06-18 19:37:16 +0800508 getSensorValues(id, obj->isMasterOcc());
509#endif
Chris Caina8857c52021-01-27 11:53:05 -0600510 }
511
Chris Caina7b74dc2021-11-10 17:03:43 -0600512 if (activeCount > 0)
513 {
514 // Restart OCC poll timer
515 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
516 }
517 else
518 {
519 // No OCCs running, so poll timer will not be restarted
520 log<level::INFO>(
521 fmt::format(
522 "Manager::pollerTimerExpired: poll timer will not be restarted")
523 .c_str());
524 }
Chris Caina8857c52021-01-27 11:53:05 -0600525}
526
Chicago Duanbb895cb2021-06-18 19:37:16 +0800527#ifdef READ_OCC_SENSORS
528void Manager::readTempSensors(const fs::path& path, uint32_t id)
529{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800530 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
531 for (auto& file : fs::directory_iterator(path))
532 {
533 if (!std::regex_search(file.path().string(), expr))
534 {
535 continue;
536 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800537
Matt Spinlera26f1522021-08-25 15:50:20 -0500538 uint32_t labelValue{0};
539
540 try
541 {
542 labelValue = readFile<uint32_t>(file.path());
543 }
544 catch (const std::system_error& e)
545 {
546 log<level::DEBUG>(
547 fmt::format("readTempSensors: Failed reading {}, errno = {}",
548 file.path().string(), e.code().value())
549 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800550 continue;
551 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800552
553 const std::string& tempLabel = "label";
554 const std::string filePathString = file.path().string().substr(
555 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500556
557 uint32_t fruTypeValue{0};
558 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800559 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500560 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
561 }
562 catch (const std::system_error& e)
563 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800564 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500565 fmt::format("readTempSensors: Failed reading {}, errno = {}",
566 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800567 .c_str());
568 continue;
569 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800570
571 std::string sensorPath =
572 OCC_SENSORS_ROOT + std::string("/temperature/");
573
Matt Spinlerace67d82021-10-18 13:41:57 -0500574 std::string dvfsTempPath;
575
Chicago Duanbb895cb2021-06-18 19:37:16 +0800576 if (fruTypeValue == VRMVdd)
577 {
578 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
579 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500580 else if (fruTypeValue == processorIoRing)
581 {
582 sensorPath.append("proc" + std::to_string(id) + "_ioring_temp");
583 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
584 std::to_string(id) + "_ioring_dvfs_temp";
585 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800586 else
587 {
Matt Spinler14d14022021-08-25 15:38:29 -0500588 uint16_t type = (labelValue & 0xFF000000) >> 24;
589 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800590
591 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
592 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500593 if (fruTypeValue == fruTypeNotAvailable)
594 {
595 // Not all DIMM related temps are available to read
596 // (no _input file in this case)
597 continue;
598 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800599 auto iter = dimmTempSensorName.find(fruTypeValue);
600 if (iter == dimmTempSensorName.end())
601 {
George Liub5ca1012021-09-10 12:53:11 +0800602 log<level::ERR>(
603 fmt::format(
604 "readTempSensors: Fru type error! fruTypeValue = {}) ",
605 fruTypeValue)
606 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800607 continue;
608 }
609
610 sensorPath.append("dimm" + std::to_string(instanceID) +
611 iter->second);
612 }
613 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
614 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500615 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800616 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500617 // The OCC reports small core temps, of which there are
618 // two per big core. All current P10 systems are in big
619 // core mode, so use a big core name.
620 uint16_t coreNum = instanceID / 2;
621 uint16_t tempNum = instanceID % 2;
622 sensorPath.append("proc" + std::to_string(id) + "_core" +
623 std::to_string(coreNum) + "_" +
624 std::to_string(tempNum) + "_temp");
625
626 dvfsTempPath = std::string{OCC_SENSORS_ROOT} +
627 "/temperature/proc" + std::to_string(id) +
628 "_core_dvfs_temp";
629 }
630 else
631 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800632 continue;
633 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800634 }
635 else
636 {
637 continue;
638 }
639 }
640
Matt Spinlerace67d82021-10-18 13:41:57 -0500641 // The dvfs temp file only needs to be read once per chip per type.
642 if (!dvfsTempPath.empty() &&
643 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
644 {
645 try
646 {
647 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
648
649 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
650 dvfsTempPath, dvfsValue * std::pow(10, -3));
651 }
652 catch (const std::system_error& e)
653 {
654 log<level::DEBUG>(
655 fmt::format(
656 "readTempSensors: Failed reading {}, errno = {}",
657 filePathString + maxSuffix, e.code().value())
658 .c_str());
659 }
660 }
661
Matt Spinlera26f1522021-08-25 15:50:20 -0500662 uint32_t faultValue{0};
663 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800664 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500665 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
666 }
667 catch (const std::system_error& e)
668 {
669 log<level::DEBUG>(
670 fmt::format("readTempSensors: Failed reading {}, errno = {}",
671 filePathString + faultSuffix, e.code().value())
672 .c_str());
673 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800674 }
675
Matt Spinlera26f1522021-08-25 15:50:20 -0500676 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800677 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800678 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500679 sensorPath, std::numeric_limits<double>::quiet_NaN());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800680
681 open_power::occ::dbus::OccDBusSensors::getOccDBus()
Matt Spinlera26f1522021-08-25 15:50:20 -0500682 .setOperationalStatus(sensorPath, false);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800683
Matt Spinlera26f1522021-08-25 15:50:20 -0500684 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800685 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500686
687 double tempValue{0};
688
689 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800690 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500691 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800692 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500693 catch (const std::system_error& e)
694 {
695 log<level::DEBUG>(
696 fmt::format("readTempSensors: Failed reading {}, errno = {}",
697 filePathString + inputSuffix, e.code().value())
698 .c_str());
699 continue;
700 }
701
702 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
703 sensorPath, tempValue * std::pow(10, -3));
704
705 open_power::occ::dbus::OccDBusSensors::getOccDBus()
706 .setOperationalStatus(sensorPath, true);
707
Chris Cain6fa848a2022-01-24 14:54:38 -0600708 // At this point, the sensor will be created for sure.
709 if (existingSensors.find(sensorPath) == existingSensors.end())
710 {
711 open_power::occ::dbus::OccDBusSensors::getOccDBus()
712 .setChassisAssociation(sensorPath);
713 }
714
Matt Spinlera26f1522021-08-25 15:50:20 -0500715 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800716 }
717 return;
718}
719
720std::optional<std::string>
721 Manager::getPowerLabelFunctionID(const std::string& value)
722{
723 // If the value is "system", then the FunctionID is "system".
724 if (value == "system")
725 {
726 return value;
727 }
728
729 // If the value is not "system", then the label value have 3 numbers, of
730 // which we only care about the middle one:
731 // <sensor id>_<function id>_<apss channel>
732 // eg: The value is "0_10_5" , then the FunctionID is "10".
733 if (value.find("_") == std::string::npos)
734 {
735 return std::nullopt;
736 }
737
738 auto powerLabelValue = value.substr((value.find("_") + 1));
739
740 if (powerLabelValue.find("_") == std::string::npos)
741 {
742 return std::nullopt;
743 }
744
745 return powerLabelValue.substr(0, powerLabelValue.find("_"));
746}
747
748void Manager::readPowerSensors(const fs::path& path, uint32_t id)
749{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800750 std::regex expr{"power\\d+_label$"}; // Example: power5_label
751 for (auto& file : fs::directory_iterator(path))
752 {
753 if (!std::regex_search(file.path().string(), expr))
754 {
755 continue;
756 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800757
Matt Spinlera26f1522021-08-25 15:50:20 -0500758 std::string labelValue;
759 try
760 {
761 labelValue = readFile<std::string>(file.path());
762 }
763 catch (const std::system_error& e)
764 {
765 log<level::DEBUG>(
766 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
767 file.path().string(), e.code().value())
768 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800769 continue;
770 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800771
772 auto functionID = getPowerLabelFunctionID(labelValue);
773 if (functionID == std::nullopt)
774 {
775 continue;
776 }
777
778 const std::string& tempLabel = "label";
779 const std::string filePathString = file.path().string().substr(
780 0, file.path().string().length() - tempLabel.length());
781
782 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
783
784 auto iter = powerSensorName.find(*functionID);
785 if (iter == powerSensorName.end())
786 {
787 continue;
788 }
789 sensorPath.append(iter->second);
790
Matt Spinlera26f1522021-08-25 15:50:20 -0500791 double tempValue{0};
792
793 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800794 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500795 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800796 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500797 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800798 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800799 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500800 fmt::format("readTempSensors: Failed reading {}, errno = {}",
801 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800802 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -0500803 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800804 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500805
Chris Caind84a8332022-01-13 08:58:45 -0600806 open_power::occ::dbus::OccDBusSensors::getOccDBus().setUnit(
807 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
808
Matt Spinlera26f1522021-08-25 15:50:20 -0500809 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
810 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
811
812 open_power::occ::dbus::OccDBusSensors::getOccDBus()
813 .setOperationalStatus(sensorPath, true);
814
Matt Spinler5901abd2021-09-23 13:50:03 -0500815 if (existingSensors.find(sensorPath) == existingSensors.end())
816 {
817 open_power::occ::dbus::OccDBusSensors::getOccDBus()
818 .setChassisAssociation(sensorPath);
819 }
820
Matt Spinlera26f1522021-08-25 15:50:20 -0500821 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800822 }
823 return;
824}
825
826void Manager::setSensorValueToNaN(uint32_t id)
827{
828 for (const auto& [sensorPath, occId] : existingSensors)
829 {
830 if (occId == id)
831 {
832 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
833 sensorPath, std::numeric_limits<double>::quiet_NaN());
834 }
835 }
836 return;
837}
838
839void Manager::getSensorValues(uint32_t id, bool masterOcc)
840{
841 const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1);
842
843 fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"};
844
845 // Need to get the hwmonXX directory name, there better only be 1 dir
846 assert(std::distance(fs::directory_iterator(fileName),
847 fs::directory_iterator{}) == 1);
848 // Now set our path to this full path, including this hwmonXX directory
849 fileName = fs::path(*fs::directory_iterator(fileName));
850
851 // Read temperature sensors
852 readTempSensors(fileName, id);
853
854 if (masterOcc)
855 {
856 // Read power sensors
857 readPowerSensors(fileName, id);
858 }
859
860 return;
861}
862#endif
Chris Cain17257672021-10-22 13:41:03 -0500863
864// Read the altitude from DBus
865void Manager::readAltitude()
866{
867 static bool traceAltitudeErr = true;
868
869 utils::PropertyValue altitudeProperty{};
870 try
871 {
872 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
873 ALTITUDE_PROP);
874 auto sensorVal = std::get<double>(altitudeProperty);
875 if (sensorVal < 0xFFFF)
876 {
877 if (sensorVal < 0)
878 {
879 altitude = 0;
880 }
881 else
882 {
883 // Round to nearest meter
884 altitude = uint16_t(sensorVal + 0.5);
885 }
886 log<level::DEBUG>(fmt::format("readAltitude: sensor={} ({}m)",
887 sensorVal, altitude)
888 .c_str());
889 traceAltitudeErr = true;
890 }
891 else
892 {
893 if (traceAltitudeErr)
894 {
895 traceAltitudeErr = false;
896 log<level::DEBUG>(
897 fmt::format("Invalid altitude value: {}", sensorVal)
898 .c_str());
899 }
900 }
901 }
902 catch (const sdbusplus::exception::exception& e)
903 {
904 if (traceAltitudeErr)
905 {
906 traceAltitudeErr = false;
907 log<level::INFO>(
908 fmt::format("Unable to read Altitude: {}", e.what()).c_str());
909 }
910 altitude = 0xFFFF; // not available
911 }
912}
913
914// Callback function when ambient temperature changes
915void Manager::ambientCallback(sdbusplus::message::message& msg)
916{
917 double currentTemp = 0;
918 uint8_t truncatedTemp = 0xFF;
919 std::string msgSensor;
920 std::map<std::string, std::variant<double>> msgData;
921 msg.read(msgSensor, msgData);
922
923 auto valPropMap = msgData.find(AMBIENT_PROP);
924 if (valPropMap == msgData.end())
925 {
926 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
927 return;
928 }
929 currentTemp = std::get<double>(valPropMap->second);
930 if (std::isnan(currentTemp))
931 {
932 truncatedTemp = 0xFF;
933 }
934 else
935 {
936 if (currentTemp < 0)
937 {
938 truncatedTemp = 0;
939 }
940 else
941 {
942 // Round to nearest degree C
943 truncatedTemp = uint8_t(currentTemp + 0.5);
944 }
945 }
946
947 // If ambient changes, notify OCCs
948 if (truncatedTemp != ambient)
949 {
950 log<level::DEBUG>(
951 fmt::format("ambientCallback: Ambient change from {} to {}C",
952 ambient, currentTemp)
953 .c_str());
954
955 ambient = truncatedTemp;
956 if (altitude == 0xFFFF)
957 {
958 // No altitude yet, try reading again
959 readAltitude();
960 }
961
962 log<level::DEBUG>(
963 fmt::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
964 altitude)
965 .c_str());
966#ifdef POWER10
967 // Send ambient and altitude to all OCCs
968 for (auto& obj : statusObjects)
969 {
970 if (obj->occActive())
971 {
972 obj->sendAmbient(ambient, altitude);
973 }
974 }
975#endif // POWER10
976 }
977}
978
979// return the current ambient and altitude readings
980void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
981 uint16_t& altitudeValue) const
982{
983 ambientValid = true;
984 ambientTemp = ambient;
985 altitudeValue = altitude;
986
987 if (ambient == 0xFF)
988 {
989 ambientValid = false;
990 }
991}
992
Chris Caina7b74dc2021-11-10 17:03:43 -0600993#ifdef POWER10
994void Manager::occsNotAllRunning()
995{
Chris Cain6fa848a2022-01-24 14:54:38 -0600996 // Function will also gets called when occ-control app gets
997 // restarted. (occ active sensors do not change, so the Status
998 // object does not call Manager back for all OCCs)
Chris Caina7b74dc2021-11-10 17:03:43 -0600999
1000 if (activeCount != statusObjects.size())
1001 {
1002 // Not all OCCs went active
1003 log<level::WARNING>(
1004 fmt::format(
1005 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
1006 activeCount, statusObjects.size())
1007 .c_str());
1008 // Procs may be garded, so may not need reset.
1009 }
1010
1011 validateOccMaster();
1012}
1013#endif // POWER10
1014
1015// Verify single master OCC and start presence monitor
1016void Manager::validateOccMaster()
1017{
1018 int masterInstance = -1;
1019 for (auto& obj : statusObjects)
1020 {
1021 obj->addPresenceWatchMaster();
1022 if (obj->isMasterOcc())
1023 {
1024 if (masterInstance == -1)
1025 {
1026 masterInstance = obj->getOccInstanceID();
1027 }
1028 else
1029 {
1030 log<level::ERR>(
1031 fmt::format(
1032 "validateOccMaster: Multiple OCC masters! ({} and {})",
1033 masterInstance, obj->getOccInstanceID())
1034 .c_str());
1035 // request reset
1036 obj->deviceError();
1037 }
1038 }
1039 }
1040 if (masterInstance < 0)
1041 {
1042 log<level::ERR>("validateOccMaster: Master OCC not found!");
1043 // request reset
1044 statusObjects.front()->deviceError();
1045 }
1046 else
1047 {
1048 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -06001049 fmt::format("validateOccMaster: OCC{} is master of {} OCCs",
1050 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001051 .c_str());
1052 }
1053}
1054
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301055} // namespace occ
1056} // namespace open_power