blob: d24aa68a4afcb21c3b0f5cd5071a9484371164c9 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chris Cain36f9cde2021-11-22 11:18:21 -060016#include <fstream>
Chicago Duanbb895cb2021-06-18 19:37:16 +080017#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050018
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053019namespace open_power
20{
21namespace occ
22{
23
Matt Spinler8b8abee2021-08-25 15:18:21 -050024constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050025constexpr auto fruTypeSuffix = "fru_type";
26constexpr auto faultSuffix = "fault";
27constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050028constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050029
Chris Caina8857c52021-01-27 11:53:05 -060030using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060031using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060032
Matt Spinlera26f1522021-08-25 15:50:20 -050033template <typename T>
34T readFile(const std::string& path)
35{
36 std::ifstream ifs;
37 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
38 std::ifstream::eofbit);
39 T data;
40
41 try
42 {
43 ifs.open(path);
44 ifs >> data;
45 ifs.close();
46 }
47 catch (const std::exception& e)
48 {
49 auto err = errno;
50 throw std::system_error(err, std::generic_category());
51 }
52
53 return data;
54}
55
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053056void Manager::findAndCreateObjects()
57{
Matt Spinlerd267cec2021-09-01 14:49:19 -050058#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050059 for (auto id = 0; id < MAX_CPUS; ++id)
60 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060061 // Create one occ per cpu
62 auto occ = std::string(OCC_NAME) + std::to_string(id);
63 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053064 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050065#else
66 // Create the OCCs based on on the /dev/occX devices
67 auto occs = findOCCsInDev();
68
69 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
70 {
71 // Something changed or no OCCs yet, try again in 10s.
72 // Note on the first pass prevOCCSearch will be empty,
73 // so there will be at least one delay to give things
74 // a chance to settle.
75 prevOCCSearch = occs;
76
Matt Spinlerd267cec2021-09-01 14:49:19 -050077 discoverTimer->restartOnce(10s);
78 }
79 else
80 {
81 discoverTimer.reset();
82
83 // createObjects requires OCC0 first.
84 std::sort(occs.begin(), occs.end());
85
86 for (auto id : occs)
87 {
88 createObjects(std::string(OCC_NAME) + std::to_string(id));
89 }
90 }
91#endif
92}
93
94std::vector<int> Manager::findOCCsInDev()
95{
96 std::vector<int> occs;
97 std::regex expr{R"(occ(\d+)$)"};
98
99 for (auto& file : fs::directory_iterator("/dev"))
100 {
101 std::smatch match;
102 std::string path{file.path().string()};
103 if (std::regex_search(path, match, expr))
104 {
105 auto num = std::stoi(match[1].str());
106
107 // /dev numbering starts at 1, ours starts at 0.
108 occs.push_back(num - 1);
109 }
110 }
111
112 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530113}
114
115int Manager::cpuCreated(sdbusplus::message::message& msg)
116{
George Liubcef3b42021-09-10 12:39:02 +0800117 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530118
119 sdbusplus::message::object_path o;
120 msg.read(o);
121 fs::path cpuPath(std::string(std::move(o)));
122
123 auto name = cpuPath.filename().string();
124 auto index = name.find(CPU_NAME);
125 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
126
127 createObjects(name);
128
129 return 0;
130}
131
132void Manager::createObjects(const std::string& occ)
133{
134 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
135
Gunnar Mills94df8c92018-09-14 14:50:03 -0500136 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800137 event, path.c_str(), *this,
Chris Cain36f9cde2021-11-22 11:18:21 -0600138#ifdef POWER10
139 pmode,
140#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -0500141 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Tom Joseph00325232020-07-29 17:51:48 +0530142 std::placeholders::_1)
143#ifdef PLDM
144 ,
145 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
146 std::placeholders::_1)
147#endif
148 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530149
Chris Cain36f9cde2021-11-22 11:18:21 -0600150 if (statusObjects.back()->isMasterOcc())
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530151 {
Chris Cain36f9cde2021-11-22 11:18:21 -0600152 log<level::INFO>(
153 fmt::format("Manager::createObjects(): OCC{} is the master",
154 statusObjects.back()->getOccInstanceID())
155 .c_str());
156 _pollTimer->setEnabled(false);
157
158 // Create the power cap monitor object for master OCC
159 if (!pcap)
160 {
161 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
162 *statusObjects.front());
163 }
Chris Cain78e86012021-03-04 16:15:31 -0600164
165#ifdef POWER10
Chris Cain36f9cde2021-11-22 11:18:21 -0600166 // Create the power mode object for master OCC
167 if (!pmode)
168 {
169 pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
170 *this, path.c_str());
171 }
Chris Cain78e86012021-03-04 16:15:31 -0600172#endif
Chris Cain36f9cde2021-11-22 11:18:21 -0600173 }
174
175 passThroughObjects.emplace_back(std::make_unique<PassThrough>(path.c_str()
176#ifdef POWER10
177 ,
178 pmode
179#endif
180 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530181}
182
183void Manager::statusCallBack(bool status)
184{
Gunnar Mills94df8c92018-09-14 14:50:03 -0500185 using InternalFailure =
186 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530187
188 // At this time, it won't happen but keeping it
189 // here just in case something changes in the future
190 if ((activeCount == 0) && (!status))
191 {
192 log<level::ERR>("Invalid update on OCCActive");
193 elog<InternalFailure>();
194 }
195
Chris Caina7b74dc2021-11-10 17:03:43 -0600196 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600197 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600198 // OCC went active
199 ++activeCount;
200
201#ifdef POWER10
202 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600203 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600204 // First OCC went active (allow some time for all OCCs to go active)
205 waitForAllOccsTimer->restartOnce(30s);
Matt Spinler53f68142021-08-25 15:47:31 -0500206 }
207#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600208
209 if (activeCount == statusObjects.size())
210 {
211#ifdef POWER10
212 // All OCCs are now running
213 if (waitForAllOccsTimer->isEnabled())
214 {
215 // stop occ wait timer
216 waitForAllOccsTimer->setEnabled(false);
217 }
218#endif
219
220 // Verify master OCC and start presence monitor
221 validateOccMaster();
222 }
223
224 // Start poll timer if not already started
225 if (!_pollTimer->isEnabled())
226 {
227 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -0600228 fmt::format("Manager: OCCs will be polled every {} seconds",
229 pollInterval)
Chris Caina7b74dc2021-11-10 17:03:43 -0600230 .c_str());
231
232 // Send poll and start OCC poll timer
233 pollerTimerExpired();
234 }
235 }
236 else
237 {
238 // OCC went away
239 --activeCount;
240
241 if (activeCount == 0)
242 {
243 // No OCCs are running
244
245 // Stop OCC poll timer
246 if (_pollTimer->isEnabled())
247 {
248 log<level::INFO>(
249 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
250 _pollTimer->setEnabled(false);
251 }
252
253#ifdef POWER10
254 // stop wait timer
255 if (waitForAllOccsTimer->isEnabled())
256 {
257 waitForAllOccsTimer->setEnabled(false);
258 }
259#endif
260
261#ifdef READ_OCC_SENSORS
262 // Clear OCC sensors
263 for (auto& obj : statusObjects)
264 {
265 setSensorValueToNaN(obj->getOccInstanceID());
266 }
267#endif
268 }
Chris Caina8857c52021-01-27 11:53:05 -0600269 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530270}
271
272#ifdef I2C_OCC
273void Manager::initStatusObjects()
274{
275 // Make sure we have a valid path string
276 static_assert(sizeof(DEV_PATH) != 0);
277
278 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
Lei YU41470e52017-11-30 16:03:50 +0800279 auto occMasterName = deviceNames.front();
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530280 for (auto& name : deviceNames)
281 {
282 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800283 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530284 auto path = fs::path(OCC_CONTROL_ROOT) / name;
285 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800286 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530287 }
Lei YU41470e52017-11-30 16:03:50 +0800288 // The first device is master occ
289 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
George Liuf3b75142021-06-10 11:22:50 +0800290 *statusObjects.front(), occMasterName);
Chris Cain78e86012021-03-04 16:15:31 -0600291#ifdef POWER10
292 pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
Chris Cain36f9cde2021-11-22 11:18:21 -0600293 *this, path.c_str());
Chris Cain78e86012021-03-04 16:15:31 -0600294#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530295}
296#endif
297
Tom Joseph815f9f52020-07-27 12:12:13 +0530298#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500299void Manager::sbeTimeout(unsigned int instance)
300{
301 log<level::INFO>("SBE timeout, requesting HRESET",
302 entry("SBE=%d", instance));
303
304 setSBEState(instance, SBE_STATE_NOT_USABLE);
305
306 pldmHandle->sendHRESET(instance);
307}
308
Tom Joseph815f9f52020-07-27 12:12:13 +0530309bool Manager::updateOCCActive(instanceID instance, bool status)
310{
311 return (statusObjects[instance])->occActive(status);
312}
Eddie Jamescbad2192021-10-07 09:39:39 -0500313
314void Manager::sbeHRESETResult(instanceID instance, bool success)
315{
316 if (success)
317 {
318 log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
319
320 setSBEState(instance, SBE_STATE_BOOTED);
321
322 return;
323 }
324
325 setSBEState(instance, SBE_STATE_FAILED);
326
327 if (sbeCanDump(instance))
328 {
Eddie Jamescbad2192021-10-07 09:39:39 -0500329 log<level::INFO>("HRESET failed, triggering SBE dump",
330 entry("SBE=%d", instance));
331
332 auto& bus = utils::getBus();
333 uint32_t src6 = instance << 16;
334 uint32_t logId =
335 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
336 src6, "SBE command timeout");
337
338 try
339 {
George Liuf3a4a692021-12-28 13:59:51 +0800340 constexpr auto path = "/org/openpower/dump";
341 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
342 constexpr auto function = "CreateDump";
343
Eddie Jamescbad2192021-10-07 09:39:39 -0500344 std::string service = utils::getService(path, interface);
345 auto method =
346 bus.new_method_call(service.c_str(), path, interface, function);
347
348 std::map<std::string, std::variant<std::string, uint64_t>>
349 createParams{
350 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
351 uint64_t(logId)},
352 {"com.ibm.Dump.Create.CreateParameters.DumpType",
353 "com.ibm.Dump.Create.DumpType.SBE"},
354 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
355 uint64_t(instance)},
356 };
357
358 method.append(createParams);
359
360 auto response = bus.call(method);
361 }
362 catch (const sdbusplus::exception::exception& e)
363 {
364 constexpr auto ERROR_DUMP_DISABLED =
365 "xyz.openbmc_project.Dump.Create.Error.Disabled";
366 if (e.name() == ERROR_DUMP_DISABLED)
367 {
368 log<level::INFO>("Dump is disabled, skipping");
369 }
370 else
371 {
372 log<level::ERR>("Dump failed");
373 }
374 }
375 }
376}
377
378bool Manager::sbeCanDump(unsigned int instance)
379{
380 struct pdbg_target* proc = getPdbgTarget(instance);
381
382 if (!proc)
383 {
384 // allow the dump in the error case
385 return true;
386 }
387
388 try
389 {
390 if (!openpower::phal::sbe::isDumpAllowed(proc))
391 {
392 return false;
393 }
394
395 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
396 {
397 return false;
398 }
399 }
400 catch (openpower::phal::exception::SbeError& e)
401 {
402 log<level::INFO>("Failed to query SBE state");
403 }
404
405 // allow the dump in the error case
406 return true;
407}
408
409void Manager::setSBEState(unsigned int instance, enum sbe_state state)
410{
411 struct pdbg_target* proc = getPdbgTarget(instance);
412
413 if (!proc)
414 {
415 return;
416 }
417
418 try
419 {
420 openpower::phal::sbe::setState(proc, state);
421 }
422 catch (const openpower::phal::exception::SbeError& e)
423 {
424 log<level::ERR>("Failed to set SBE state");
425 }
426}
427
428struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
429{
430 if (!pdbgInitialized)
431 {
432 try
433 {
434 openpower::phal::pdbg::init();
435 pdbgInitialized = true;
436 }
437 catch (const openpower::phal::exception::PdbgError& e)
438 {
439 log<level::ERR>("pdbg initialization failed");
440 return nullptr;
441 }
442 }
443
444 struct pdbg_target* proc = nullptr;
445 pdbg_for_each_class_target("proc", proc)
446 {
447 if (pdbg_target_index(proc) == instance)
448 {
449 return proc;
450 }
451 }
452
453 log<level::ERR>("Failed to get pdbg target");
454 return nullptr;
455}
Tom Joseph815f9f52020-07-27 12:12:13 +0530456#endif
457
Chris Caina8857c52021-01-27 11:53:05 -0600458void Manager::pollerTimerExpired()
459{
Chris Caina8857c52021-01-27 11:53:05 -0600460 if (!_pollTimer)
461 {
462 log<level::ERR>(
463 "Manager::pollerTimerExpired() ERROR: Timer not defined");
464 return;
465 }
466
467 for (auto& obj : statusObjects)
468 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600469#ifdef READ_OCC_SENSORS
470 auto id = obj->getOccInstanceID();
471#endif
472 if (!obj->occActive())
473 {
474 // OCC is not running yet
475#ifdef READ_OCC_SENSORS
476 setSensorValueToNaN(id);
477#endif
478 continue;
479 }
480
Chris Caina8857c52021-01-27 11:53:05 -0600481 // Read sysfs to force kernel to poll OCC
482 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800483
484#ifdef READ_OCC_SENSORS
485 // Read occ sensor values
Chicago Duanbb895cb2021-06-18 19:37:16 +0800486 getSensorValues(id, obj->isMasterOcc());
487#endif
Chris Caina8857c52021-01-27 11:53:05 -0600488 }
489
Chris Caina7b74dc2021-11-10 17:03:43 -0600490 if (activeCount > 0)
491 {
492 // Restart OCC poll timer
493 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
494 }
495 else
496 {
497 // No OCCs running, so poll timer will not be restarted
498 log<level::INFO>(
499 fmt::format(
500 "Manager::pollerTimerExpired: poll timer will not be restarted")
501 .c_str());
502 }
Chris Caina8857c52021-01-27 11:53:05 -0600503}
504
Chicago Duanbb895cb2021-06-18 19:37:16 +0800505#ifdef READ_OCC_SENSORS
506void Manager::readTempSensors(const fs::path& path, uint32_t id)
507{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800508 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
509 for (auto& file : fs::directory_iterator(path))
510 {
511 if (!std::regex_search(file.path().string(), expr))
512 {
513 continue;
514 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800515
Matt Spinlera26f1522021-08-25 15:50:20 -0500516 uint32_t labelValue{0};
517
518 try
519 {
520 labelValue = readFile<uint32_t>(file.path());
521 }
522 catch (const std::system_error& e)
523 {
524 log<level::DEBUG>(
525 fmt::format("readTempSensors: Failed reading {}, errno = {}",
526 file.path().string(), e.code().value())
527 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800528 continue;
529 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800530
531 const std::string& tempLabel = "label";
532 const std::string filePathString = file.path().string().substr(
533 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500534
535 uint32_t fruTypeValue{0};
536 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800537 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500538 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
539 }
540 catch (const std::system_error& e)
541 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800542 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500543 fmt::format("readTempSensors: Failed reading {}, errno = {}",
544 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800545 .c_str());
546 continue;
547 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800548
549 std::string sensorPath =
550 OCC_SENSORS_ROOT + std::string("/temperature/");
551
Matt Spinlerace67d82021-10-18 13:41:57 -0500552 std::string dvfsTempPath;
553
Chicago Duanbb895cb2021-06-18 19:37:16 +0800554 if (fruTypeValue == VRMVdd)
555 {
556 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
557 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500558 else if (fruTypeValue == processorIoRing)
559 {
560 sensorPath.append("proc" + std::to_string(id) + "_ioring_temp");
561 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
562 std::to_string(id) + "_ioring_dvfs_temp";
563 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800564 else
565 {
Matt Spinler14d14022021-08-25 15:38:29 -0500566 uint16_t type = (labelValue & 0xFF000000) >> 24;
567 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800568
569 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
570 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500571 if (fruTypeValue == fruTypeNotAvailable)
572 {
573 // Not all DIMM related temps are available to read
574 // (no _input file in this case)
575 continue;
576 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800577 auto iter = dimmTempSensorName.find(fruTypeValue);
578 if (iter == dimmTempSensorName.end())
579 {
George Liub5ca1012021-09-10 12:53:11 +0800580 log<level::ERR>(
581 fmt::format(
582 "readTempSensors: Fru type error! fruTypeValue = {}) ",
583 fruTypeValue)
584 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800585 continue;
586 }
587
588 sensorPath.append("dimm" + std::to_string(instanceID) +
589 iter->second);
590 }
591 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
592 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500593 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800594 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500595 // The OCC reports small core temps, of which there are
596 // two per big core. All current P10 systems are in big
597 // core mode, so use a big core name.
598 uint16_t coreNum = instanceID / 2;
599 uint16_t tempNum = instanceID % 2;
600 sensorPath.append("proc" + std::to_string(id) + "_core" +
601 std::to_string(coreNum) + "_" +
602 std::to_string(tempNum) + "_temp");
603
604 dvfsTempPath = std::string{OCC_SENSORS_ROOT} +
605 "/temperature/proc" + std::to_string(id) +
606 "_core_dvfs_temp";
607 }
608 else
609 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800610 continue;
611 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800612 }
613 else
614 {
615 continue;
616 }
617 }
618
Matt Spinlerace67d82021-10-18 13:41:57 -0500619 // The dvfs temp file only needs to be read once per chip per type.
620 if (!dvfsTempPath.empty() &&
621 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
622 {
623 try
624 {
625 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
626
627 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
628 dvfsTempPath, dvfsValue * std::pow(10, -3));
629 }
630 catch (const std::system_error& e)
631 {
632 log<level::DEBUG>(
633 fmt::format(
634 "readTempSensors: Failed reading {}, errno = {}",
635 filePathString + maxSuffix, e.code().value())
636 .c_str());
637 }
638 }
639
Matt Spinlera26f1522021-08-25 15:50:20 -0500640 uint32_t faultValue{0};
641 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800642 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500643 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
644 }
645 catch (const std::system_error& e)
646 {
647 log<level::DEBUG>(
648 fmt::format("readTempSensors: Failed reading {}, errno = {}",
649 filePathString + faultSuffix, e.code().value())
650 .c_str());
651 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800652 }
653
Matt Spinler5901abd2021-09-23 13:50:03 -0500654 // At this point, the sensor will be created for sure.
655 if (existingSensors.find(sensorPath) == existingSensors.end())
656 {
657 open_power::occ::dbus::OccDBusSensors::getOccDBus()
658 .setChassisAssociation(sensorPath);
659 }
660
Matt Spinlera26f1522021-08-25 15:50:20 -0500661 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800662 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800663 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500664 sensorPath, std::numeric_limits<double>::quiet_NaN());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800665
666 open_power::occ::dbus::OccDBusSensors::getOccDBus()
Matt Spinlera26f1522021-08-25 15:50:20 -0500667 .setOperationalStatus(sensorPath, false);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800668
Matt Spinlera26f1522021-08-25 15:50:20 -0500669 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800670 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500671
672 double tempValue{0};
673
674 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800675 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500676 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800677 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500678 catch (const std::system_error& e)
679 {
680 log<level::DEBUG>(
681 fmt::format("readTempSensors: Failed reading {}, errno = {}",
682 filePathString + inputSuffix, e.code().value())
683 .c_str());
684 continue;
685 }
686
687 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
688 sensorPath, tempValue * std::pow(10, -3));
689
690 open_power::occ::dbus::OccDBusSensors::getOccDBus()
691 .setOperationalStatus(sensorPath, true);
692
693 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800694 }
695 return;
696}
697
698std::optional<std::string>
699 Manager::getPowerLabelFunctionID(const std::string& value)
700{
701 // If the value is "system", then the FunctionID is "system".
702 if (value == "system")
703 {
704 return value;
705 }
706
707 // If the value is not "system", then the label value have 3 numbers, of
708 // which we only care about the middle one:
709 // <sensor id>_<function id>_<apss channel>
710 // eg: The value is "0_10_5" , then the FunctionID is "10".
711 if (value.find("_") == std::string::npos)
712 {
713 return std::nullopt;
714 }
715
716 auto powerLabelValue = value.substr((value.find("_") + 1));
717
718 if (powerLabelValue.find("_") == std::string::npos)
719 {
720 return std::nullopt;
721 }
722
723 return powerLabelValue.substr(0, powerLabelValue.find("_"));
724}
725
726void Manager::readPowerSensors(const fs::path& path, uint32_t id)
727{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800728 std::regex expr{"power\\d+_label$"}; // Example: power5_label
729 for (auto& file : fs::directory_iterator(path))
730 {
731 if (!std::regex_search(file.path().string(), expr))
732 {
733 continue;
734 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800735
Matt Spinlera26f1522021-08-25 15:50:20 -0500736 std::string labelValue;
737 try
738 {
739 labelValue = readFile<std::string>(file.path());
740 }
741 catch (const std::system_error& e)
742 {
743 log<level::DEBUG>(
744 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
745 file.path().string(), e.code().value())
746 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800747 continue;
748 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800749
750 auto functionID = getPowerLabelFunctionID(labelValue);
751 if (functionID == std::nullopt)
752 {
753 continue;
754 }
755
756 const std::string& tempLabel = "label";
757 const std::string filePathString = file.path().string().substr(
758 0, file.path().string().length() - tempLabel.length());
759
760 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
761
762 auto iter = powerSensorName.find(*functionID);
763 if (iter == powerSensorName.end())
764 {
765 continue;
766 }
767 sensorPath.append(iter->second);
768
Matt Spinlera26f1522021-08-25 15:50:20 -0500769 double tempValue{0};
770
771 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800772 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500773 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800774 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500775 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800776 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800777 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500778 fmt::format("readTempSensors: Failed reading {}, errno = {}",
779 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800780 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -0500781 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800782 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500783
784 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
785 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
786
787 open_power::occ::dbus::OccDBusSensors::getOccDBus()
788 .setOperationalStatus(sensorPath, true);
789
Matt Spinler5901abd2021-09-23 13:50:03 -0500790 if (existingSensors.find(sensorPath) == existingSensors.end())
791 {
792 open_power::occ::dbus::OccDBusSensors::getOccDBus()
793 .setChassisAssociation(sensorPath);
794 }
795
Matt Spinlera26f1522021-08-25 15:50:20 -0500796 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800797 }
798 return;
799}
800
801void Manager::setSensorValueToNaN(uint32_t id)
802{
803 for (const auto& [sensorPath, occId] : existingSensors)
804 {
805 if (occId == id)
806 {
807 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
808 sensorPath, std::numeric_limits<double>::quiet_NaN());
809 }
810 }
811 return;
812}
813
814void Manager::getSensorValues(uint32_t id, bool masterOcc)
815{
816 const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1);
817
818 fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"};
819
820 // Need to get the hwmonXX directory name, there better only be 1 dir
821 assert(std::distance(fs::directory_iterator(fileName),
822 fs::directory_iterator{}) == 1);
823 // Now set our path to this full path, including this hwmonXX directory
824 fileName = fs::path(*fs::directory_iterator(fileName));
825
826 // Read temperature sensors
827 readTempSensors(fileName, id);
828
829 if (masterOcc)
830 {
831 // Read power sensors
832 readPowerSensors(fileName, id);
833 }
834
835 return;
836}
837#endif
Chris Cain17257672021-10-22 13:41:03 -0500838
839// Read the altitude from DBus
840void Manager::readAltitude()
841{
842 static bool traceAltitudeErr = true;
843
844 utils::PropertyValue altitudeProperty{};
845 try
846 {
847 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
848 ALTITUDE_PROP);
849 auto sensorVal = std::get<double>(altitudeProperty);
850 if (sensorVal < 0xFFFF)
851 {
852 if (sensorVal < 0)
853 {
854 altitude = 0;
855 }
856 else
857 {
858 // Round to nearest meter
859 altitude = uint16_t(sensorVal + 0.5);
860 }
861 log<level::DEBUG>(fmt::format("readAltitude: sensor={} ({}m)",
862 sensorVal, altitude)
863 .c_str());
864 traceAltitudeErr = true;
865 }
866 else
867 {
868 if (traceAltitudeErr)
869 {
870 traceAltitudeErr = false;
871 log<level::DEBUG>(
872 fmt::format("Invalid altitude value: {}", sensorVal)
873 .c_str());
874 }
875 }
876 }
877 catch (const sdbusplus::exception::exception& e)
878 {
879 if (traceAltitudeErr)
880 {
881 traceAltitudeErr = false;
882 log<level::INFO>(
883 fmt::format("Unable to read Altitude: {}", e.what()).c_str());
884 }
885 altitude = 0xFFFF; // not available
886 }
887}
888
889// Callback function when ambient temperature changes
890void Manager::ambientCallback(sdbusplus::message::message& msg)
891{
892 double currentTemp = 0;
893 uint8_t truncatedTemp = 0xFF;
894 std::string msgSensor;
895 std::map<std::string, std::variant<double>> msgData;
896 msg.read(msgSensor, msgData);
897
898 auto valPropMap = msgData.find(AMBIENT_PROP);
899 if (valPropMap == msgData.end())
900 {
901 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
902 return;
903 }
904 currentTemp = std::get<double>(valPropMap->second);
905 if (std::isnan(currentTemp))
906 {
907 truncatedTemp = 0xFF;
908 }
909 else
910 {
911 if (currentTemp < 0)
912 {
913 truncatedTemp = 0;
914 }
915 else
916 {
917 // Round to nearest degree C
918 truncatedTemp = uint8_t(currentTemp + 0.5);
919 }
920 }
921
922 // If ambient changes, notify OCCs
923 if (truncatedTemp != ambient)
924 {
925 log<level::DEBUG>(
926 fmt::format("ambientCallback: Ambient change from {} to {}C",
927 ambient, currentTemp)
928 .c_str());
929
930 ambient = truncatedTemp;
931 if (altitude == 0xFFFF)
932 {
933 // No altitude yet, try reading again
934 readAltitude();
935 }
936
937 log<level::DEBUG>(
938 fmt::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
939 altitude)
940 .c_str());
941#ifdef POWER10
942 // Send ambient and altitude to all OCCs
943 for (auto& obj : statusObjects)
944 {
945 if (obj->occActive())
946 {
947 obj->sendAmbient(ambient, altitude);
948 }
949 }
950#endif // POWER10
951 }
952}
953
954// return the current ambient and altitude readings
955void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
956 uint16_t& altitudeValue) const
957{
958 ambientValid = true;
959 ambientTemp = ambient;
960 altitudeValue = altitude;
961
962 if (ambient == 0xFF)
963 {
964 ambientValid = false;
965 }
966}
967
Chris Caina7b74dc2021-11-10 17:03:43 -0600968#ifdef POWER10
969void Manager::occsNotAllRunning()
970{
971 // Function will also gets called when occ-control app gets restarted.
972 // (occ active sensors do not change, so the Status object does not
973 // call Manager back for all OCCs)
974
975 if (activeCount != statusObjects.size())
976 {
977 // Not all OCCs went active
978 log<level::WARNING>(
979 fmt::format(
980 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
981 activeCount, statusObjects.size())
982 .c_str());
983 // Procs may be garded, so may not need reset.
984 }
985
986 validateOccMaster();
987}
988#endif // POWER10
989
990// Verify single master OCC and start presence monitor
991void Manager::validateOccMaster()
992{
993 int masterInstance = -1;
994 for (auto& obj : statusObjects)
995 {
996 obj->addPresenceWatchMaster();
997 if (obj->isMasterOcc())
998 {
999 if (masterInstance == -1)
1000 {
1001 masterInstance = obj->getOccInstanceID();
1002 }
1003 else
1004 {
1005 log<level::ERR>(
1006 fmt::format(
1007 "validateOccMaster: Multiple OCC masters! ({} and {})",
1008 masterInstance, obj->getOccInstanceID())
1009 .c_str());
1010 // request reset
1011 obj->deviceError();
1012 }
1013 }
1014 }
1015 if (masterInstance < 0)
1016 {
1017 log<level::ERR>("validateOccMaster: Master OCC not found!");
1018 // request reset
1019 statusObjects.front()->deviceError();
1020 }
1021 else
1022 {
1023 log<level::INFO>(
Chris Cain36f9cde2021-11-22 11:18:21 -06001024 fmt::format("validateOccMaster: OCC{} is master of {} OCCs",
1025 masterInstance, activeCount)
Chris Caina7b74dc2021-11-10 17:03:43 -06001026 .c_str());
1027 }
1028}
1029
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301030} // namespace occ
1031} // namespace open_power