blob: 33711bee8dc51b0cb68a8ad2c16aa3a5e9c8d567 [file] [log] [blame]
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301#include "config.h"
2
Gunnar Mills94df8c92018-09-14 14:50:03 -05003#include "occ_manager.hpp"
4
5#include "i2c_occ.hpp"
Chicago Duanbb895cb2021-06-18 19:37:16 +08006#include "occ_dbus.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007#include "utils.hpp"
8
George Liub5ca1012021-09-10 12:53:11 +08009#include <phosphor-logging/elog-errors.hpp>
10#include <phosphor-logging/log.hpp>
11#include <xyz/openbmc_project/Common/error.hpp>
12
Matt Spinlerd267cec2021-09-01 14:49:19 -050013#include <chrono>
Chicago Duanbb895cb2021-06-18 19:37:16 +080014#include <cmath>
George Liubcef3b42021-09-10 12:39:02 +080015#include <filesystem>
Chicago Duanbb895cb2021-06-18 19:37:16 +080016#include <regex>
Gunnar Mills94df8c92018-09-14 14:50:03 -050017
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053018namespace open_power
19{
20namespace occ
21{
22
Matt Spinler8b8abee2021-08-25 15:18:21 -050023constexpr uint32_t fruTypeNotAvailable = 0xFF;
Matt Spinlera26f1522021-08-25 15:50:20 -050024constexpr auto fruTypeSuffix = "fru_type";
25constexpr auto faultSuffix = "fault";
26constexpr auto inputSuffix = "input";
Matt Spinlerace67d82021-10-18 13:41:57 -050027constexpr auto maxSuffix = "max";
Matt Spinler8b8abee2021-08-25 15:18:21 -050028
Chris Caina8857c52021-01-27 11:53:05 -060029using namespace phosphor::logging;
Chris Caina7b74dc2021-11-10 17:03:43 -060030using namespace std::literals::chrono_literals;
Chris Caina8857c52021-01-27 11:53:05 -060031
Matt Spinlera26f1522021-08-25 15:50:20 -050032template <typename T>
33T readFile(const std::string& path)
34{
35 std::ifstream ifs;
36 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
37 std::ifstream::eofbit);
38 T data;
39
40 try
41 {
42 ifs.open(path);
43 ifs >> data;
44 ifs.close();
45 }
46 catch (const std::exception& e)
47 {
48 auto err = errno;
49 throw std::system_error(err, std::generic_category());
50 }
51
52 return data;
53}
54
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053055void Manager::findAndCreateObjects()
56{
Matt Spinlerd267cec2021-09-01 14:49:19 -050057#ifndef POWER10
Deepak Kodihalli370f06b2017-10-25 04:26:07 -050058 for (auto id = 0; id < MAX_CPUS; ++id)
59 {
Deepak Kodihalli30417a12017-12-04 00:54:01 -060060 // Create one occ per cpu
61 auto occ = std::string(OCC_NAME) + std::to_string(id);
62 createObjects(occ);
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +053063 }
Matt Spinlerd267cec2021-09-01 14:49:19 -050064#else
65 // Create the OCCs based on on the /dev/occX devices
66 auto occs = findOCCsInDev();
67
68 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
69 {
70 // Something changed or no OCCs yet, try again in 10s.
71 // Note on the first pass prevOCCSearch will be empty,
72 // so there will be at least one delay to give things
73 // a chance to settle.
74 prevOCCSearch = occs;
75
Matt Spinlerd267cec2021-09-01 14:49:19 -050076 discoverTimer->restartOnce(10s);
77 }
78 else
79 {
80 discoverTimer.reset();
81
82 // createObjects requires OCC0 first.
83 std::sort(occs.begin(), occs.end());
84
85 for (auto id : occs)
86 {
87 createObjects(std::string(OCC_NAME) + std::to_string(id));
88 }
89 }
90#endif
91}
92
93std::vector<int> Manager::findOCCsInDev()
94{
95 std::vector<int> occs;
96 std::regex expr{R"(occ(\d+)$)"};
97
98 for (auto& file : fs::directory_iterator("/dev"))
99 {
100 std::smatch match;
101 std::string path{file.path().string()};
102 if (std::regex_search(path, match, expr))
103 {
104 auto num = std::stoi(match[1].str());
105
106 // /dev numbering starts at 1, ours starts at 0.
107 occs.push_back(num - 1);
108 }
109 }
110
111 return occs;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530112}
113
114int Manager::cpuCreated(sdbusplus::message::message& msg)
115{
George Liubcef3b42021-09-10 12:39:02 +0800116 namespace fs = std::filesystem;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530117
118 sdbusplus::message::object_path o;
119 msg.read(o);
120 fs::path cpuPath(std::string(std::move(o)));
121
122 auto name = cpuPath.filename().string();
123 auto index = name.find(CPU_NAME);
124 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
125
126 createObjects(name);
127
128 return 0;
129}
130
131void Manager::createObjects(const std::string& occ)
132{
133 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
134
135 passThroughObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800136 std::make_unique<PassThrough>(path.c_str()));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530137
Gunnar Mills94df8c92018-09-14 14:50:03 -0500138 statusObjects.emplace_back(std::make_unique<Status>(
George Liuf3b75142021-06-10 11:22:50 +0800139 event, path.c_str(), *this,
Gunnar Mills94df8c92018-09-14 14:50:03 -0500140 std::bind(std::mem_fn(&Manager::statusCallBack), this,
Tom Joseph00325232020-07-29 17:51:48 +0530141 std::placeholders::_1)
142#ifdef PLDM
143 ,
144 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
145 std::placeholders::_1)
146#endif
147 ));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530148
149 // Create the power cap monitor object for master occ (0)
150 if (!pcap)
151 {
152 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
George Liuf3b75142021-06-10 11:22:50 +0800153 *statusObjects.front());
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530154 }
Chris Cain78e86012021-03-04 16:15:31 -0600155
156#ifdef POWER10
157 // Create the power mode monitor object for master occ (0)
158 if (!pmode)
159 {
160 pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
161 *statusObjects.front());
162 }
Chris Cain1d51da22021-09-21 14:13:41 -0500163 // Create the idle power saver monitor object for master occ (0)
164 if (!pips)
165 {
166 pips = std::make_unique<open_power::occ::powermode::PowerIPS>(
167 *statusObjects.front());
168 }
Chris Cain78e86012021-03-04 16:15:31 -0600169#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530170}
171
172void Manager::statusCallBack(bool status)
173{
Gunnar Mills94df8c92018-09-14 14:50:03 -0500174 using InternalFailure =
175 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530176
177 // At this time, it won't happen but keeping it
178 // here just in case something changes in the future
179 if ((activeCount == 0) && (!status))
180 {
181 log<level::ERR>("Invalid update on OCCActive");
182 elog<InternalFailure>();
183 }
184
Chris Caina7b74dc2021-11-10 17:03:43 -0600185 if (status == true)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600186 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600187 // OCC went active
188 ++activeCount;
189
190#ifdef POWER10
191 if (activeCount == 1)
Eddie Jamesdae2d942017-12-20 10:50:03 -0600192 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600193 // First OCC went active (allow some time for all OCCs to go active)
194 waitForAllOccsTimer->restartOnce(30s);
Matt Spinler53f68142021-08-25 15:47:31 -0500195 }
196#endif
Chris Caina7b74dc2021-11-10 17:03:43 -0600197
198 if (activeCount == statusObjects.size())
199 {
200#ifdef POWER10
201 // All OCCs are now running
202 if (waitForAllOccsTimer->isEnabled())
203 {
204 // stop occ wait timer
205 waitForAllOccsTimer->setEnabled(false);
206 }
207#endif
208
209 // Verify master OCC and start presence monitor
210 validateOccMaster();
211 }
212
213 // Start poll timer if not already started
214 if (!_pollTimer->isEnabled())
215 {
216 log<level::INFO>(
217 fmt::format(
218 "Manager::statusCallBack(): {} OCCs will be polled every {} seconds",
219 activeCount, pollInterval)
220 .c_str());
221
222 // Send poll and start OCC poll timer
223 pollerTimerExpired();
224 }
225 }
226 else
227 {
228 // OCC went away
229 --activeCount;
230
231 if (activeCount == 0)
232 {
233 // No OCCs are running
234
235 // Stop OCC poll timer
236 if (_pollTimer->isEnabled())
237 {
238 log<level::INFO>(
239 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
240 _pollTimer->setEnabled(false);
241 }
242
243#ifdef POWER10
244 // stop wait timer
245 if (waitForAllOccsTimer->isEnabled())
246 {
247 waitForAllOccsTimer->setEnabled(false);
248 }
249#endif
250
251#ifdef READ_OCC_SENSORS
252 // Clear OCC sensors
253 for (auto& obj : statusObjects)
254 {
255 setSensorValueToNaN(obj->getOccInstanceID());
256 }
257#endif
258 }
Chris Caina8857c52021-01-27 11:53:05 -0600259 }
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530260}
261
262#ifdef I2C_OCC
263void Manager::initStatusObjects()
264{
265 // Make sure we have a valid path string
266 static_assert(sizeof(DEV_PATH) != 0);
267
268 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
Lei YU41470e52017-11-30 16:03:50 +0800269 auto occMasterName = deviceNames.front();
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530270 for (auto& name : deviceNames)
271 {
272 i2c_occ::i2cToDbus(name);
Lei YUb5259a12017-09-01 16:22:40 +0800273 name = std::string(OCC_NAME) + '_' + name;
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530274 auto path = fs::path(OCC_CONTROL_ROOT) / name;
275 statusObjects.emplace_back(
George Liuf3b75142021-06-10 11:22:50 +0800276 std::make_unique<Status>(event, path.c_str(), *this));
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530277 }
Lei YU41470e52017-11-30 16:03:50 +0800278 // The first device is master occ
279 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
George Liuf3b75142021-06-10 11:22:50 +0800280 *statusObjects.front(), occMasterName);
Chris Cain78e86012021-03-04 16:15:31 -0600281#ifdef POWER10
282 pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
283 *statusObjects.front());
Chris Cain1d51da22021-09-21 14:13:41 -0500284 pips = std::make_unique<open_power::occ::powermode::PowerIPS>(
285 *statusObjects.front());
Chris Cain78e86012021-03-04 16:15:31 -0600286#endif
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +0530287}
288#endif
289
Tom Joseph815f9f52020-07-27 12:12:13 +0530290#ifdef PLDM
Eddie Jamescbad2192021-10-07 09:39:39 -0500291void Manager::sbeTimeout(unsigned int instance)
292{
293 log<level::INFO>("SBE timeout, requesting HRESET",
294 entry("SBE=%d", instance));
295
296 setSBEState(instance, SBE_STATE_NOT_USABLE);
297
298 pldmHandle->sendHRESET(instance);
299}
300
Tom Joseph815f9f52020-07-27 12:12:13 +0530301bool Manager::updateOCCActive(instanceID instance, bool status)
302{
303 return (statusObjects[instance])->occActive(status);
304}
Eddie Jamescbad2192021-10-07 09:39:39 -0500305
306void Manager::sbeHRESETResult(instanceID instance, bool success)
307{
308 if (success)
309 {
310 log<level::INFO>("HRESET succeeded", entry("SBE=%d", instance));
311
312 setSBEState(instance, SBE_STATE_BOOTED);
313
314 return;
315 }
316
317 setSBEState(instance, SBE_STATE_FAILED);
318
319 if (sbeCanDump(instance))
320 {
321 constexpr auto path = "/org/openpower/dump";
322 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
323 constexpr auto function = "CreateDump";
324
325 log<level::INFO>("HRESET failed, triggering SBE dump",
326 entry("SBE=%d", instance));
327
328 auto& bus = utils::getBus();
329 uint32_t src6 = instance << 16;
330 uint32_t logId =
331 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
332 src6, "SBE command timeout");
333
334 try
335 {
336 std::string service = utils::getService(path, interface);
337 auto method =
338 bus.new_method_call(service.c_str(), path, interface, function);
339
340 std::map<std::string, std::variant<std::string, uint64_t>>
341 createParams{
342 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
343 uint64_t(logId)},
344 {"com.ibm.Dump.Create.CreateParameters.DumpType",
345 "com.ibm.Dump.Create.DumpType.SBE"},
346 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
347 uint64_t(instance)},
348 };
349
350 method.append(createParams);
351
352 auto response = bus.call(method);
353 }
354 catch (const sdbusplus::exception::exception& e)
355 {
356 constexpr auto ERROR_DUMP_DISABLED =
357 "xyz.openbmc_project.Dump.Create.Error.Disabled";
358 if (e.name() == ERROR_DUMP_DISABLED)
359 {
360 log<level::INFO>("Dump is disabled, skipping");
361 }
362 else
363 {
364 log<level::ERR>("Dump failed");
365 }
366 }
367 }
368}
369
370bool Manager::sbeCanDump(unsigned int instance)
371{
372 struct pdbg_target* proc = getPdbgTarget(instance);
373
374 if (!proc)
375 {
376 // allow the dump in the error case
377 return true;
378 }
379
380 try
381 {
382 if (!openpower::phal::sbe::isDumpAllowed(proc))
383 {
384 return false;
385 }
386
387 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
388 {
389 return false;
390 }
391 }
392 catch (openpower::phal::exception::SbeError& e)
393 {
394 log<level::INFO>("Failed to query SBE state");
395 }
396
397 // allow the dump in the error case
398 return true;
399}
400
401void Manager::setSBEState(unsigned int instance, enum sbe_state state)
402{
403 struct pdbg_target* proc = getPdbgTarget(instance);
404
405 if (!proc)
406 {
407 return;
408 }
409
410 try
411 {
412 openpower::phal::sbe::setState(proc, state);
413 }
414 catch (const openpower::phal::exception::SbeError& e)
415 {
416 log<level::ERR>("Failed to set SBE state");
417 }
418}
419
420struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
421{
422 if (!pdbgInitialized)
423 {
424 try
425 {
426 openpower::phal::pdbg::init();
427 pdbgInitialized = true;
428 }
429 catch (const openpower::phal::exception::PdbgError& e)
430 {
431 log<level::ERR>("pdbg initialization failed");
432 return nullptr;
433 }
434 }
435
436 struct pdbg_target* proc = nullptr;
437 pdbg_for_each_class_target("proc", proc)
438 {
439 if (pdbg_target_index(proc) == instance)
440 {
441 return proc;
442 }
443 }
444
445 log<level::ERR>("Failed to get pdbg target");
446 return nullptr;
447}
Tom Joseph815f9f52020-07-27 12:12:13 +0530448#endif
449
Chris Caina8857c52021-01-27 11:53:05 -0600450void Manager::pollerTimerExpired()
451{
Chris Caina8857c52021-01-27 11:53:05 -0600452 if (!_pollTimer)
453 {
454 log<level::ERR>(
455 "Manager::pollerTimerExpired() ERROR: Timer not defined");
456 return;
457 }
458
459 for (auto& obj : statusObjects)
460 {
Chris Caina7b74dc2021-11-10 17:03:43 -0600461#ifdef READ_OCC_SENSORS
462 auto id = obj->getOccInstanceID();
463#endif
464 if (!obj->occActive())
465 {
466 // OCC is not running yet
467#ifdef READ_OCC_SENSORS
468 setSensorValueToNaN(id);
469#endif
470 continue;
471 }
472
Chris Caina8857c52021-01-27 11:53:05 -0600473 // Read sysfs to force kernel to poll OCC
474 obj->readOccState();
Chicago Duanbb895cb2021-06-18 19:37:16 +0800475
476#ifdef READ_OCC_SENSORS
477 // Read occ sensor values
Chicago Duanbb895cb2021-06-18 19:37:16 +0800478 getSensorValues(id, obj->isMasterOcc());
479#endif
Chris Caina8857c52021-01-27 11:53:05 -0600480 }
481
Chris Caina7b74dc2021-11-10 17:03:43 -0600482 if (activeCount > 0)
483 {
484 // Restart OCC poll timer
485 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
486 }
487 else
488 {
489 // No OCCs running, so poll timer will not be restarted
490 log<level::INFO>(
491 fmt::format(
492 "Manager::pollerTimerExpired: poll timer will not be restarted")
493 .c_str());
494 }
Chris Caina8857c52021-01-27 11:53:05 -0600495}
496
Chicago Duanbb895cb2021-06-18 19:37:16 +0800497#ifdef READ_OCC_SENSORS
498void Manager::readTempSensors(const fs::path& path, uint32_t id)
499{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800500 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
501 for (auto& file : fs::directory_iterator(path))
502 {
503 if (!std::regex_search(file.path().string(), expr))
504 {
505 continue;
506 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800507
Matt Spinlera26f1522021-08-25 15:50:20 -0500508 uint32_t labelValue{0};
509
510 try
511 {
512 labelValue = readFile<uint32_t>(file.path());
513 }
514 catch (const std::system_error& e)
515 {
516 log<level::DEBUG>(
517 fmt::format("readTempSensors: Failed reading {}, errno = {}",
518 file.path().string(), e.code().value())
519 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800520 continue;
521 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800522
523 const std::string& tempLabel = "label";
524 const std::string filePathString = file.path().string().substr(
525 0, file.path().string().length() - tempLabel.length());
Matt Spinlera26f1522021-08-25 15:50:20 -0500526
527 uint32_t fruTypeValue{0};
528 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800529 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500530 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
531 }
532 catch (const std::system_error& e)
533 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800534 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500535 fmt::format("readTempSensors: Failed reading {}, errno = {}",
536 filePathString + fruTypeSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800537 .c_str());
538 continue;
539 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800540
541 std::string sensorPath =
542 OCC_SENSORS_ROOT + std::string("/temperature/");
543
Matt Spinlerace67d82021-10-18 13:41:57 -0500544 std::string dvfsTempPath;
545
Chicago Duanbb895cb2021-06-18 19:37:16 +0800546 if (fruTypeValue == VRMVdd)
547 {
548 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
549 }
Matt Spinlerace67d82021-10-18 13:41:57 -0500550 else if (fruTypeValue == processorIoRing)
551 {
552 sensorPath.append("proc" + std::to_string(id) + "_ioring_temp");
553 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
554 std::to_string(id) + "_ioring_dvfs_temp";
555 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800556 else
557 {
Matt Spinler14d14022021-08-25 15:38:29 -0500558 uint16_t type = (labelValue & 0xFF000000) >> 24;
559 uint16_t instanceID = labelValue & 0x0000FFFF;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800560
561 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
562 {
Matt Spinler8b8abee2021-08-25 15:18:21 -0500563 if (fruTypeValue == fruTypeNotAvailable)
564 {
565 // Not all DIMM related temps are available to read
566 // (no _input file in this case)
567 continue;
568 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800569 auto iter = dimmTempSensorName.find(fruTypeValue);
570 if (iter == dimmTempSensorName.end())
571 {
George Liub5ca1012021-09-10 12:53:11 +0800572 log<level::ERR>(
573 fmt::format(
574 "readTempSensors: Fru type error! fruTypeValue = {}) ",
575 fruTypeValue)
576 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800577 continue;
578 }
579
580 sensorPath.append("dimm" + std::to_string(instanceID) +
581 iter->second);
582 }
583 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
584 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500585 if (fruTypeValue == processorCore)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800586 {
Matt Spinlerace67d82021-10-18 13:41:57 -0500587 // The OCC reports small core temps, of which there are
588 // two per big core. All current P10 systems are in big
589 // core mode, so use a big core name.
590 uint16_t coreNum = instanceID / 2;
591 uint16_t tempNum = instanceID % 2;
592 sensorPath.append("proc" + std::to_string(id) + "_core" +
593 std::to_string(coreNum) + "_" +
594 std::to_string(tempNum) + "_temp");
595
596 dvfsTempPath = std::string{OCC_SENSORS_ROOT} +
597 "/temperature/proc" + std::to_string(id) +
598 "_core_dvfs_temp";
599 }
600 else
601 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800602 continue;
603 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800604 }
605 else
606 {
607 continue;
608 }
609 }
610
Matt Spinlerace67d82021-10-18 13:41:57 -0500611 // The dvfs temp file only needs to be read once per chip per type.
612 if (!dvfsTempPath.empty() &&
613 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
614 {
615 try
616 {
617 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
618
619 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
620 dvfsTempPath, dvfsValue * std::pow(10, -3));
621 }
622 catch (const std::system_error& e)
623 {
624 log<level::DEBUG>(
625 fmt::format(
626 "readTempSensors: Failed reading {}, errno = {}",
627 filePathString + maxSuffix, e.code().value())
628 .c_str());
629 }
630 }
631
Matt Spinlera26f1522021-08-25 15:50:20 -0500632 uint32_t faultValue{0};
633 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800634 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500635 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
636 }
637 catch (const std::system_error& e)
638 {
639 log<level::DEBUG>(
640 fmt::format("readTempSensors: Failed reading {}, errno = {}",
641 filePathString + faultSuffix, e.code().value())
642 .c_str());
643 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800644 }
645
Matt Spinler5901abd2021-09-23 13:50:03 -0500646 // At this point, the sensor will be created for sure.
647 if (existingSensors.find(sensorPath) == existingSensors.end())
648 {
649 open_power::occ::dbus::OccDBusSensors::getOccDBus()
650 .setChassisAssociation(sensorPath);
651 }
652
Matt Spinlera26f1522021-08-25 15:50:20 -0500653 if (faultValue != 0)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800654 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800655 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
Matt Spinlera26f1522021-08-25 15:50:20 -0500656 sensorPath, std::numeric_limits<double>::quiet_NaN());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800657
658 open_power::occ::dbus::OccDBusSensors::getOccDBus()
Matt Spinlera26f1522021-08-25 15:50:20 -0500659 .setOperationalStatus(sensorPath, false);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800660
Matt Spinlera26f1522021-08-25 15:50:20 -0500661 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800662 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500663
664 double tempValue{0};
665
666 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800667 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500668 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800669 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500670 catch (const std::system_error& e)
671 {
672 log<level::DEBUG>(
673 fmt::format("readTempSensors: Failed reading {}, errno = {}",
674 filePathString + inputSuffix, e.code().value())
675 .c_str());
676 continue;
677 }
678
679 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
680 sensorPath, tempValue * std::pow(10, -3));
681
682 open_power::occ::dbus::OccDBusSensors::getOccDBus()
683 .setOperationalStatus(sensorPath, true);
684
685 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800686 }
687 return;
688}
689
690std::optional<std::string>
691 Manager::getPowerLabelFunctionID(const std::string& value)
692{
693 // If the value is "system", then the FunctionID is "system".
694 if (value == "system")
695 {
696 return value;
697 }
698
699 // If the value is not "system", then the label value have 3 numbers, of
700 // which we only care about the middle one:
701 // <sensor id>_<function id>_<apss channel>
702 // eg: The value is "0_10_5" , then the FunctionID is "10".
703 if (value.find("_") == std::string::npos)
704 {
705 return std::nullopt;
706 }
707
708 auto powerLabelValue = value.substr((value.find("_") + 1));
709
710 if (powerLabelValue.find("_") == std::string::npos)
711 {
712 return std::nullopt;
713 }
714
715 return powerLabelValue.substr(0, powerLabelValue.find("_"));
716}
717
718void Manager::readPowerSensors(const fs::path& path, uint32_t id)
719{
Chicago Duanbb895cb2021-06-18 19:37:16 +0800720 std::regex expr{"power\\d+_label$"}; // Example: power5_label
721 for (auto& file : fs::directory_iterator(path))
722 {
723 if (!std::regex_search(file.path().string(), expr))
724 {
725 continue;
726 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800727
Matt Spinlera26f1522021-08-25 15:50:20 -0500728 std::string labelValue;
729 try
730 {
731 labelValue = readFile<std::string>(file.path());
732 }
733 catch (const std::system_error& e)
734 {
735 log<level::DEBUG>(
736 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
737 file.path().string(), e.code().value())
738 .c_str());
Chicago Duanbb895cb2021-06-18 19:37:16 +0800739 continue;
740 }
Chicago Duanbb895cb2021-06-18 19:37:16 +0800741
742 auto functionID = getPowerLabelFunctionID(labelValue);
743 if (functionID == std::nullopt)
744 {
745 continue;
746 }
747
748 const std::string& tempLabel = "label";
749 const std::string filePathString = file.path().string().substr(
750 0, file.path().string().length() - tempLabel.length());
751
752 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
753
754 auto iter = powerSensorName.find(*functionID);
755 if (iter == powerSensorName.end())
756 {
757 continue;
758 }
759 sensorPath.append(iter->second);
760
Matt Spinlera26f1522021-08-25 15:50:20 -0500761 double tempValue{0};
762
763 try
Chicago Duanbb895cb2021-06-18 19:37:16 +0800764 {
Matt Spinlera26f1522021-08-25 15:50:20 -0500765 tempValue = readFile<double>(filePathString + inputSuffix);
Chicago Duanbb895cb2021-06-18 19:37:16 +0800766 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500767 catch (const std::system_error& e)
Chicago Duanbb895cb2021-06-18 19:37:16 +0800768 {
Chicago Duanbb895cb2021-06-18 19:37:16 +0800769 log<level::DEBUG>(
Matt Spinlera26f1522021-08-25 15:50:20 -0500770 fmt::format("readTempSensors: Failed reading {}, errno = {}",
771 filePathString + inputSuffix, e.code().value())
Chicago Duanbb895cb2021-06-18 19:37:16 +0800772 .c_str());
Matt Spinlera26f1522021-08-25 15:50:20 -0500773 continue;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800774 }
Matt Spinlera26f1522021-08-25 15:50:20 -0500775
776 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
777 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
778
779 open_power::occ::dbus::OccDBusSensors::getOccDBus()
780 .setOperationalStatus(sensorPath, true);
781
Matt Spinler5901abd2021-09-23 13:50:03 -0500782 if (existingSensors.find(sensorPath) == existingSensors.end())
783 {
784 open_power::occ::dbus::OccDBusSensors::getOccDBus()
785 .setChassisAssociation(sensorPath);
786 }
787
Matt Spinlera26f1522021-08-25 15:50:20 -0500788 existingSensors[sensorPath] = id;
Chicago Duanbb895cb2021-06-18 19:37:16 +0800789 }
790 return;
791}
792
793void Manager::setSensorValueToNaN(uint32_t id)
794{
795 for (const auto& [sensorPath, occId] : existingSensors)
796 {
797 if (occId == id)
798 {
799 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
800 sensorPath, std::numeric_limits<double>::quiet_NaN());
801 }
802 }
803 return;
804}
805
806void Manager::getSensorValues(uint32_t id, bool masterOcc)
807{
808 const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1);
809
810 fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"};
811
812 // Need to get the hwmonXX directory name, there better only be 1 dir
813 assert(std::distance(fs::directory_iterator(fileName),
814 fs::directory_iterator{}) == 1);
815 // Now set our path to this full path, including this hwmonXX directory
816 fileName = fs::path(*fs::directory_iterator(fileName));
817
818 // Read temperature sensors
819 readTempSensors(fileName, id);
820
821 if (masterOcc)
822 {
823 // Read power sensors
824 readPowerSensors(fileName, id);
825 }
826
827 return;
828}
829#endif
Chris Cain17257672021-10-22 13:41:03 -0500830
831// Read the altitude from DBus
832void Manager::readAltitude()
833{
834 static bool traceAltitudeErr = true;
835
836 utils::PropertyValue altitudeProperty{};
837 try
838 {
839 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
840 ALTITUDE_PROP);
841 auto sensorVal = std::get<double>(altitudeProperty);
842 if (sensorVal < 0xFFFF)
843 {
844 if (sensorVal < 0)
845 {
846 altitude = 0;
847 }
848 else
849 {
850 // Round to nearest meter
851 altitude = uint16_t(sensorVal + 0.5);
852 }
853 log<level::DEBUG>(fmt::format("readAltitude: sensor={} ({}m)",
854 sensorVal, altitude)
855 .c_str());
856 traceAltitudeErr = true;
857 }
858 else
859 {
860 if (traceAltitudeErr)
861 {
862 traceAltitudeErr = false;
863 log<level::DEBUG>(
864 fmt::format("Invalid altitude value: {}", sensorVal)
865 .c_str());
866 }
867 }
868 }
869 catch (const sdbusplus::exception::exception& e)
870 {
871 if (traceAltitudeErr)
872 {
873 traceAltitudeErr = false;
874 log<level::INFO>(
875 fmt::format("Unable to read Altitude: {}", e.what()).c_str());
876 }
877 altitude = 0xFFFF; // not available
878 }
879}
880
881// Callback function when ambient temperature changes
882void Manager::ambientCallback(sdbusplus::message::message& msg)
883{
884 double currentTemp = 0;
885 uint8_t truncatedTemp = 0xFF;
886 std::string msgSensor;
887 std::map<std::string, std::variant<double>> msgData;
888 msg.read(msgSensor, msgData);
889
890 auto valPropMap = msgData.find(AMBIENT_PROP);
891 if (valPropMap == msgData.end())
892 {
893 log<level::DEBUG>("ambientCallback: Unknown ambient property changed");
894 return;
895 }
896 currentTemp = std::get<double>(valPropMap->second);
897 if (std::isnan(currentTemp))
898 {
899 truncatedTemp = 0xFF;
900 }
901 else
902 {
903 if (currentTemp < 0)
904 {
905 truncatedTemp = 0;
906 }
907 else
908 {
909 // Round to nearest degree C
910 truncatedTemp = uint8_t(currentTemp + 0.5);
911 }
912 }
913
914 // If ambient changes, notify OCCs
915 if (truncatedTemp != ambient)
916 {
917 log<level::DEBUG>(
918 fmt::format("ambientCallback: Ambient change from {} to {}C",
919 ambient, currentTemp)
920 .c_str());
921
922 ambient = truncatedTemp;
923 if (altitude == 0xFFFF)
924 {
925 // No altitude yet, try reading again
926 readAltitude();
927 }
928
929 log<level::DEBUG>(
930 fmt::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient,
931 altitude)
932 .c_str());
933#ifdef POWER10
934 // Send ambient and altitude to all OCCs
935 for (auto& obj : statusObjects)
936 {
937 if (obj->occActive())
938 {
939 obj->sendAmbient(ambient, altitude);
940 }
941 }
942#endif // POWER10
943 }
944}
945
946// return the current ambient and altitude readings
947void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
948 uint16_t& altitudeValue) const
949{
950 ambientValid = true;
951 ambientTemp = ambient;
952 altitudeValue = altitude;
953
954 if (ambient == 0xFF)
955 {
956 ambientValid = false;
957 }
958}
959
Chris Caina7b74dc2021-11-10 17:03:43 -0600960#ifdef POWER10
961void Manager::occsNotAllRunning()
962{
963 // Function will also gets called when occ-control app gets restarted.
964 // (occ active sensors do not change, so the Status object does not
965 // call Manager back for all OCCs)
966
967 if (activeCount != statusObjects.size())
968 {
969 // Not all OCCs went active
970 log<level::WARNING>(
971 fmt::format(
972 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})",
973 activeCount, statusObjects.size())
974 .c_str());
975 // Procs may be garded, so may not need reset.
976 }
977
978 validateOccMaster();
979}
980#endif // POWER10
981
982// Verify single master OCC and start presence monitor
983void Manager::validateOccMaster()
984{
985 int masterInstance = -1;
986 for (auto& obj : statusObjects)
987 {
988 obj->addPresenceWatchMaster();
989 if (obj->isMasterOcc())
990 {
991 if (masterInstance == -1)
992 {
993 masterInstance = obj->getOccInstanceID();
994 }
995 else
996 {
997 log<level::ERR>(
998 fmt::format(
999 "validateOccMaster: Multiple OCC masters! ({} and {})",
1000 masterInstance, obj->getOccInstanceID())
1001 .c_str());
1002 // request reset
1003 obj->deviceError();
1004 }
1005 }
1006 }
1007 if (masterInstance < 0)
1008 {
1009 log<level::ERR>("validateOccMaster: Master OCC not found!");
1010 // request reset
1011 statusObjects.front()->deviceError();
1012 }
1013 else
1014 {
1015 log<level::INFO>(
1016 fmt::format("validateOccMaster: OCC{} is master", masterInstance)
1017 .c_str());
1018 }
1019}
1020
Vishwanatha Subbannadfc7ec72017-09-07 18:18:01 +05301021} // namespace occ
1022} // namespace open_power