blob: 220fcddb26f6e64fabc9fa5231aa206dc5eeea48 [file] [log] [blame]
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +05301#include "occ_status.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05002
Chris Cain17257672021-10-22 13:41:03 -05003#include "occ_manager.hpp"
Vishwanatha Subbanna6add0b82017-07-21 19:02:37 +05304#include "occ_sensor.hpp"
Chris Cain78e86012021-03-04 16:15:31 -06005#include "powermode.hpp"
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +05306#include "utils.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007
Chris Cain37abe9b2024-10-31 17:20:31 -05008#include <phosphor-logging/lg2.hpp>
Chris Cain78e86012021-03-04 16:15:31 -06009
Chris Caine2d0a432022-03-28 11:08:49 -050010#include <filesystem>
11
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +053012namespace open_power
13{
14namespace occ
15{
Chris Cain78e86012021-03-04 16:15:31 -060016
Chris Caina8857c52021-01-27 11:53:05 -060017using namespace phosphor::logging;
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +053018
Chris Cainc86d80f2023-05-04 15:49:18 -050019using ThrottleObj =
20 sdbusplus::xyz::openbmc_project::Control::Power::server::Throttle;
21
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +053022// Handles updates to occActive property
23bool Status::occActive(bool value)
24{
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053025 if (value != this->occActive())
26 {
Chris Cain37abe9b2024-10-31 17:20:31 -050027 lg2::info("Status::occActive OCC{INST} changed to {STATE}", "INST",
28 instance, "STATE", value);
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053029 if (value)
30 {
Chris Cainc86d80f2023-05-04 15:49:18 -050031 // Clear prior throttle reason (before setting device active)
32 updateThrottle(false, THROTTLED_ALL);
33
Eddie Jamesaced3092022-04-22 16:19:30 -050034 // Set the device active
35 device.setActive(true);
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +053036
Chris Caina8857c52021-01-27 11:53:05 -060037 // Reset last OCC state
38 lastState = 0;
39
Chris Cainf0295f52024-09-12 15:41:14 -050040 // Start watching for errors (throttles, etc)
41 try
42 {
43 addErrorWatch();
44 }
45 catch (const OpenFailure& e)
46 {
47 // Failed to add watch for throttle events, request reset to try
48 // to recover comm
Chris Cain37abe9b2024-10-31 17:20:31 -050049 lg2::error(
50 "Status::occActive: Unable to add error watch(s) for OCC{INST} watch: {ERROR}",
51 "INST", instance, "ERROR", e.what());
Chris Cainf0295f52024-09-12 15:41:14 -050052 deviceError(Error::Descriptor(OCC_COMM_ERROR_PATH));
53 return Base::Status::occActive(false);
54 }
55
56 // Update the OCC active sensor
57 Base::Status::occActive(value);
58
Chris Cain5d66a0a2022-02-09 08:52:10 -060059 if (device.master())
60 {
Chris Cain5d66a0a2022-02-09 08:52:10 -060061 // Update powercap bounds from OCC
Chris Cain40501a22022-03-14 17:33:27 -050062 manager.updatePcapBounds();
Chris Cain5d66a0a2022-02-09 08:52:10 -060063 }
64
Vishwanatha Subbanna2dc9b1a2017-08-18 18:29:41 +053065 // Call into Manager to let know that we have bound
Chris Cain1be43372021-12-09 19:29:37 -060066 if (this->managerCallBack)
Vishwanatha Subbanna2dc9b1a2017-08-18 18:29:41 +053067 {
Sheldon Bailey373af752022-02-21 15:14:00 -060068 this->managerCallBack(instance, value);
Edward A. James9fd2bdc2017-11-08 16:18:57 -060069 }
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053070 }
71 else
72 {
Chris Caina7b74dc2021-11-10 17:03:43 -060073#ifdef POWER10
Chris Cain1be43372021-12-09 19:29:37 -060074 if (pmode && device.master())
Chris Cain36f9cde2021-11-22 11:18:21 -060075 {
76 // Prevent mode changes
77 pmode->setMasterActive(false);
78 }
Chris Caina7b74dc2021-11-10 17:03:43 -060079 if (safeStateDelayTimer.isEnabled())
80 {
81 // stop safe delay timer
82 safeStateDelayTimer.setEnabled(false);
83 }
84#endif
Chris Cain36f9cde2021-11-22 11:18:21 -060085 // Call into Manager to let know that we will unbind.
Chris Cain1be43372021-12-09 19:29:37 -060086 if (this->managerCallBack)
Chris Cain36f9cde2021-11-22 11:18:21 -060087 {
Sheldon Bailey373af752022-02-21 15:14:00 -060088 this->managerCallBack(instance, value);
Chris Cain36f9cde2021-11-22 11:18:21 -060089 }
90
Edward A. James9fd2bdc2017-11-08 16:18:57 -060091 // Stop watching for errors
92 removeErrorWatch();
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +053093
Eddie Jamesaced3092022-04-22 16:19:30 -050094 // Set the device inactive
95 device.setActive(false);
Chris Cainc86d80f2023-05-04 15:49:18 -050096
97 // Clear throttles (OCC not active after disabling device)
98 updateThrottle(false, THROTTLED_ALL);
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053099 }
100 }
Eddie Jamesaced3092022-04-22 16:19:30 -0500101 else if (value && !device.active())
Edward A. James5e177972017-10-25 15:50:31 -0500102 {
103 // Existing error watch is on a dead file descriptor.
Edward A. James9fd2bdc2017-11-08 16:18:57 -0600104 removeErrorWatch();
Edward A. James5e177972017-10-25 15:50:31 -0500105
106 /*
107 * In it's constructor, Status checks Device::bound() to see if OCC is
108 * active or not.
109 * Device::bound() checks for occX-dev0 directory.
110 * We will lose occX-dev0 directories during FSI rescan.
111 * So, if we start this application (and construct Status), and then
112 * later do FSI rescan, we will end up with occActive = true and device
113 * NOT bound. Lets correct that situation here.
114 */
Eddie Jamesaced3092022-04-22 16:19:30 -0500115 device.setActive(true);
Edward A. James5e177972017-10-25 15:50:31 -0500116
117 // Add error watch again
Chris Cainf0295f52024-09-12 15:41:14 -0500118 try
119 {
120 addErrorWatch();
121 }
122 catch (const OpenFailure& e)
123 {
124 // Failed to add watch for throttle events, request reset to try to
125 // recover comm
Chris Cain37abe9b2024-10-31 17:20:31 -0500126 lg2::error(
127 "Status::occActive: Unable to add error watch(s) again for OCC{INST} watch: {ERROR}",
128 "INST", instance, "ERROR", e.what());
Chris Cainf0295f52024-09-12 15:41:14 -0500129 deviceError(Error::Descriptor(OCC_COMM_ERROR_PATH));
130 return Base::Status::occActive(false);
131 }
Edward A. James5e177972017-10-25 15:50:31 -0500132 }
Eddie Jamesaced3092022-04-22 16:19:30 -0500133 else if (!value && device.active())
Eddie James6d6d1b32019-04-22 10:45:08 -0500134 {
135 removeErrorWatch();
136
137 // In the event that the application never receives the active signal
138 // even though the OCC is active (this can occur if the BMC is rebooted
139 // with the host on, since the initial OCC driver probe will discover
140 // the OCCs), this application needs to be able to unbind the device
141 // when we get the OCC inactive signal.
Eddie Jamesaced3092022-04-22 16:19:30 -0500142 device.setActive(false);
Eddie James6d6d1b32019-04-22 10:45:08 -0500143 }
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +0530144 return Base::Status::occActive(value);
145}
146
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +0530147// Callback handler when a device error is reported.
Eddie James9789e712022-05-25 15:43:40 -0500148void Status::deviceError(Error::Descriptor d)
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +0530149{
Chris Cain36f9cde2021-11-22 11:18:21 -0600150#ifdef POWER10
Chris Cain1be43372021-12-09 19:29:37 -0600151 if (pmode && device.master())
152 {
153 // Prevent mode changes
154 pmode->setMasterActive(false);
155 }
Chris Cain36f9cde2021-11-22 11:18:21 -0600156#endif
157
Eddie James9789e712022-05-25 15:43:40 -0500158 if (d.log)
159 {
Chris Cain3ece5b92025-01-10 16:06:31 -0600160 FFDC::createOCCResetPEL(instance, d.path, d.err, d.callout,
161 d.isInventoryCallout);
Eddie James9789e712022-05-25 15:43:40 -0500162 }
163
Eddie Jamescbad2192021-10-07 09:39:39 -0500164 // This would deem OCC inactive
165 this->occActive(false);
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530166
Eddie Jamescbad2192021-10-07 09:39:39 -0500167 // Reset the OCC
168 this->resetOCC();
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530169}
170
171// Sends message to host control command handler to reset OCC
172void Status::resetOCC()
173{
Chris Cain37abe9b2024-10-31 17:20:31 -0500174 lg2::info(">>Status::resetOCC() - requesting reset for OCC{INST}", "INST",
175 instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500176 this->occActive(false);
Tom Joseph00325232020-07-29 17:51:48 +0530177#ifdef PLDM
178 if (resetCallBack)
179 {
180 this->resetCallBack(instance);
181 }
182#else
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530183 constexpr auto CONTROL_HOST_PATH = "/org/open_power/control/host0";
184 constexpr auto CONTROL_HOST_INTF = "org.open_power.Control.Host";
185
186 // This will throw exception on failure
George Liuf3b75142021-06-10 11:22:50 +0800187 auto service = utils::getService(CONTROL_HOST_PATH, CONTROL_HOST_INTF);
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530188
George Liuf3b75142021-06-10 11:22:50 +0800189 auto& bus = utils::getBus();
Gunnar Mills94df8c92018-09-14 14:50:03 -0500190 auto method = bus.new_method_call(service.c_str(), CONTROL_HOST_PATH,
191 CONTROL_HOST_INTF, "Execute");
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530192 // OCC Reset control command
Gunnar Mills94df8c92018-09-14 14:50:03 -0500193 method.append(convertForMessage(Control::Host::Command::OCCReset).c_str());
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530194
195 // OCC Sensor ID for callout reasons
Patrick Williamse0962702020-05-13 17:50:22 -0500196 method.append(std::variant<uint8_t>(std::get<0>(sensorMap.at(instance))));
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530197 bus.call_noreply(method);
198 return;
Tom Joseph00325232020-07-29 17:51:48 +0530199#endif
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530200}
201
202// Handler called by Host control command handler to convey the
203// status of the executed command
Patrick Williamsaf408082022-07-22 19:26:54 -0500204void Status::hostControlEvent(sdbusplus::message_t& msg)
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530205{
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530206 std::string cmdCompleted{};
207 std::string cmdStatus{};
208
209 msg.read(cmdCompleted, cmdStatus);
210
Chris Cain37abe9b2024-10-31 17:20:31 -0500211 lg2::debug("Host control signal values: command={CMD}, status={STATUS}",
212 "CMD", cmdCompleted, "STATUS", cmdStatus);
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530213
Gunnar Mills94df8c92018-09-14 14:50:03 -0500214 if (Control::Host::convertResultFromString(cmdStatus) !=
215 Control::Host::Result::Success)
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530216 {
Gunnar Mills94df8c92018-09-14 14:50:03 -0500217 if (Control::Host::convertCommandFromString(cmdCompleted) ==
218 Control::Host::Command::OCCReset)
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530219 {
Gunnar Mills85e65202018-04-08 15:01:54 -0500220 // Must be a Timeout. Log an Error trace
Chris Cain37abe9b2024-10-31 17:20:31 -0500221 lg2::error(
222 "Error resetting the OCC: path={PATH}, sensorid={SENSOR}",
223 "PATH", path, "SENSOR", std::get<0>(sensorMap.at(instance)));
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530224 }
225 }
226 return;
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +0530227}
228
Sheldon Bailey373af752022-02-21 15:14:00 -0600229// Called from Manager::pollerTimerExpired() in preperation to POLL OCC.
Chris Caina8857c52021-01-27 11:53:05 -0600230void Status::readOccState()
231{
Chris Cainf0295f52024-09-12 15:41:14 -0500232 if (stateValid)
233 {
234 // Reset retry count (since state is good)
235 currentOccReadRetriesCount = occReadRetries;
236 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600237 occReadStateNow();
Chris Caina8857c52021-01-27 11:53:05 -0600238}
239
Chris Cain78e86012021-03-04 16:15:31 -0600240#ifdef POWER10
Chris Cain78e86012021-03-04 16:15:31 -0600241// Special processing that needs to happen once the OCCs change to ACTIVE state
242void Status::occsWentActive()
243{
244 CmdStatus status = CmdStatus::SUCCESS;
245
Chris Cain1fe436d2024-10-10 09:41:03 -0500246 // IPS data will get sent automatically after a mode change if the mode
247 // supports it.
248 pmode->needToSendIPS();
249
Chris Cain36f9cde2021-11-22 11:18:21 -0600250 status = pmode->sendModeChange();
Chris Cain78e86012021-03-04 16:15:31 -0600251 if (status != CmdStatus::SUCCESS)
252 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500253 lg2::error(
254 "Status::occsWentActive: OCC mode change failed with status {STATUS}",
255 "STATUS", status);
Chris Cainc567dc82022-04-01 15:09:17 -0500256
257 // Disable and reset to try recovering
258 deviceError();
Chris Cain78e86012021-03-04 16:15:31 -0600259 }
Chris Cain78e86012021-03-04 16:15:31 -0600260}
261
Chris Cain17257672021-10-22 13:41:03 -0500262// Send Ambient and Altitude to the OCC
263CmdStatus Status::sendAmbient(const uint8_t inTemp, const uint16_t inAltitude)
264{
265 CmdStatus status = CmdStatus::FAILURE;
266 bool ambientValid = true;
267 uint8_t ambientTemp = inTemp;
268 uint16_t altitude = inAltitude;
269
270 if (ambientTemp == 0xFF)
271 {
272 // Get latest readings from manager
273 manager.getAmbientData(ambientValid, ambientTemp, altitude);
Chris Cain37abe9b2024-10-31 17:20:31 -0500274 lg2::debug(
275 "sendAmbient: valid: {VALID}, Ambient: {TEMP}C, altitude: {ALT}m",
276 "VALID", ambientValid, "TEMP", ambientTemp, "ALT", altitude);
Chris Cain17257672021-10-22 13:41:03 -0500277 }
278
279 std::vector<std::uint8_t> cmd, rsp;
280 cmd.reserve(11);
281 cmd.push_back(uint8_t(CmdType::SEND_AMBIENT));
282 cmd.push_back(0x00); // Data Length (2 bytes)
283 cmd.push_back(0x08); //
284 cmd.push_back(0x00); // Version
285 cmd.push_back(ambientValid ? 0 : 0xFF); // Ambient Status
286 cmd.push_back(ambientTemp); // Ambient Temperature
287 cmd.push_back(altitude >> 8); // Altitude in meters (2 bytes)
288 cmd.push_back(altitude & 0xFF); //
289 cmd.push_back(0x00); // Reserved (3 bytes)
290 cmd.push_back(0x00);
291 cmd.push_back(0x00);
Chris Cain37abe9b2024-10-31 17:20:31 -0500292 lg2::debug("sendAmbient: SEND_AMBIENT "
293 "command to OCC{INST} ({SIZE} bytes)",
294 "INST", instance, "SIZE", cmd.size());
Chris Cain17257672021-10-22 13:41:03 -0500295 status = occCmd.send(cmd, rsp);
296 if (status == CmdStatus::SUCCESS)
297 {
298 if (rsp.size() == 5)
299 {
300 if (RspStatus::SUCCESS != RspStatus(rsp[2]))
301 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500302 lg2::error(
303 "sendAmbient: SEND_AMBIENT failed with rspStatus {STATUS}",
304 "STATUS", lg2::hex, rsp[2]);
Chris Cain17257672021-10-22 13:41:03 -0500305 dump_hex(rsp);
306 status = CmdStatus::FAILURE;
307 }
308 }
309 else
310 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500311 lg2::error(
312 "sendAmbient: INVALID SEND_AMBIENT response length:{SIZE}",
313 "SIZE", rsp.size());
Chris Cain17257672021-10-22 13:41:03 -0500314 dump_hex(rsp);
315 status = CmdStatus::FAILURE;
316 }
317 }
318 else
319 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500320 lg2::error("sendAmbient: SEND_AMBIENT FAILED! with status {STATUS}",
321 "STATUS", lg2::hex, uint8_t(status));
Chris Cainc567dc82022-04-01 15:09:17 -0500322
323 if (status == CmdStatus::COMM_FAILURE)
Chris Cain17257672021-10-22 13:41:03 -0500324 {
Chris Cainf0295f52024-09-12 15:41:14 -0500325 // Disable due to OCC comm failure and reset to try recovering
326 deviceError(Error::Descriptor(OCC_COMM_ERROR_PATH));
Chris Cain17257672021-10-22 13:41:03 -0500327 }
328 }
329
330 return status;
331}
Chris Caina7b74dc2021-11-10 17:03:43 -0600332
333// Called when safe timer expires to determine if OCCs need to be reset
334void Status::safeStateDelayExpired()
335{
336 if (this->occActive())
337 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500338 lg2::info(
339 "safeStateDelayExpired: OCC{INST} state missing or not valid, requesting reset",
340 "INST", instance);
Chris Caina7b74dc2021-11-10 17:03:43 -0600341 // Disable and reset to try recovering
Eddie James9789e712022-05-25 15:43:40 -0500342 deviceError(Error::Descriptor(SAFE_ERROR_PATH));
Chris Caina7b74dc2021-11-10 17:03:43 -0600343 }
344}
Chris Cain78e86012021-03-04 16:15:31 -0600345#endif // POWER10
346
Chris Caine2d0a432022-03-28 11:08:49 -0500347fs::path Status::getHwmonPath()
Chris Cain5d66a0a2022-02-09 08:52:10 -0600348{
349 using namespace std::literals::string_literals;
350
Chris Caine2d0a432022-03-28 11:08:49 -0500351 if (!fs::exists(hwmonPath))
352 {
353 static bool tracedFail[8] = {0};
Chris Cain5d66a0a2022-02-09 08:52:10 -0600354
Chris Caine2d0a432022-03-28 11:08:49 -0500355 if (!hwmonPath.empty())
356 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500357 lg2::warning(
358 "Status::getHwmonPath(): path no longer exists: {PATH}", "PATH",
359 hwmonPath);
Chris Caine2d0a432022-03-28 11:08:49 -0500360 hwmonPath.clear();
361 }
362
363 // Build the base HWMON path
Patrick Williamsd7542c82024-08-16 15:20:28 -0400364 fs::path prefixPath =
365 fs::path{OCC_HWMON_PATH + "occ-hwmon."s +
366 std::to_string(instance + 1) + "/hwmon/"s};
Chris Caine2d0a432022-03-28 11:08:49 -0500367
368 // Get the hwmonXX directory name
369 try
370 {
371 // there should only be one directory
372 const int numDirs = std::distance(
373 fs::directory_iterator(prefixPath), fs::directory_iterator{});
374 if (numDirs == 1)
375 {
376 hwmonPath = *fs::directory_iterator(prefixPath);
377 tracedFail[instance] = false;
378 }
379 else
380 {
381 if (!tracedFail[instance])
382 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500383 lg2::error(
384 "Status::getHwmonPath(): Found multiple ({NUM}) hwmon paths!",
385 "NUM", numDirs);
Chris Caine2d0a432022-03-28 11:08:49 -0500386 tracedFail[instance] = true;
387 }
388 }
389 }
390 catch (const fs::filesystem_error& e)
391 {
392 if (!tracedFail[instance])
393 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500394 lg2::error(
395 "Status::getHwmonPath(): error accessing {PATH}: {ERROR}",
396 "PATH", prefixPath, "ERROR", e.what());
Chris Caine2d0a432022-03-28 11:08:49 -0500397 tracedFail[instance] = true;
398 }
399 }
400 }
401
402 return hwmonPath;
Chris Cain5d66a0a2022-02-09 08:52:10 -0600403}
404
Chris Cainf0295f52024-09-12 15:41:14 -0500405// Called to read state and handle any errors
Sheldon Bailey373af752022-02-21 15:14:00 -0600406void Status::occReadStateNow()
407{
408 unsigned int state;
409 const fs::path filename =
410 fs::path(DEV_PATH) /
411 fs::path(sysfsName + "." + std::to_string(instance + 1)) / "occ_state";
412
413 std::ifstream file;
414 bool goodFile = false;
415
416 // open file.
417 file.open(filename, std::ios::in);
418 const int openErrno = errno;
419
420 // File is open and state can be used.
421 if (file.is_open() && file.good())
422 {
423 goodFile = true;
424 file >> state;
Chris Cainf0295f52024-09-12 15:41:14 -0500425 // Read the error code (if any) to check status of the read
426 std::ios_base::iostate readState = file.rdstate();
427 if (readState)
428 {
429 // There was a failure reading the file
430 if (lastOccReadStatus != -1)
431 {
432 // Trace error bits
433 std::string errorBits = "";
434 if (readState & std::ios_base::eofbit)
435 {
436 errorBits += " EOF";
437 }
438 if (readState & std::ios_base::failbit)
439 {
440 errorBits += " failbit";
441 }
442 if (readState & std::ios_base::badbit)
443 {
444 errorBits += " badbit";
445 }
Chris Cain37abe9b2024-10-31 17:20:31 -0500446 lg2::error(
447 "readOccState: Failed to read OCC{INST} state: Read error on I/O operation - {ERROR}",
448 "INST", instance, "ERROR", errorBits);
Chris Cainf0295f52024-09-12 15:41:14 -0500449 lastOccReadStatus = -1;
450 }
451 goodFile = false;
452 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600453
Chris Cainf0295f52024-09-12 15:41:14 -0500454 if (goodFile && (state != lastState))
Sheldon Bailey373af752022-02-21 15:14:00 -0600455 {
456 // Trace OCC state changes
Chris Cain37abe9b2024-10-31 17:20:31 -0500457 lg2::info(
458 "Status::readOccState: OCC{INST} state {STATE} (lastState: {PRIOR})",
459 "INST", instance, "STATE", lg2::hex, state, "PRIOR", lg2::hex,
460 lastState);
Sheldon Bailey373af752022-02-21 15:14:00 -0600461 lastState = state;
462#ifdef POWER10
463 if (OccState(state) == OccState::ACTIVE)
464 {
465 if (pmode && device.master())
466 {
467 // Set the master OCC on the PowerMode object
468 pmode->setMasterOcc(path);
469 // Enable mode changes
470 pmode->setMasterActive();
471
472 // Special processing by master OCC when it goes active
473 occsWentActive();
474 }
475
476 CmdStatus status = sendAmbient();
477 if (status != CmdStatus::SUCCESS)
478 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500479 lg2::error(
480 "readOccState: Sending Ambient failed with status {STATUS}",
481 "STATUS", status);
Sheldon Bailey373af752022-02-21 15:14:00 -0600482 }
483 }
484
485 // If OCC in known Good State.
486 if ((OccState(state) == OccState::ACTIVE) ||
487 (OccState(state) == OccState::CHARACTERIZATION) ||
488 (OccState(state) == OccState::OBSERVATION))
489 {
490 // Good OCC State then sensors valid again
491 stateValid = true;
492
493 if (safeStateDelayTimer.isEnabled())
494 {
495 // stop safe delay timer (no longer in SAFE state)
496 safeStateDelayTimer.setEnabled(false);
497 }
498 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600499 else
500 {
Chris Cainf0295f52024-09-12 15:41:14 -0500501 // OCC is in SAFE or some other unsupported state
Sheldon Bailey373af752022-02-21 15:14:00 -0600502 if (!safeStateDelayTimer.isEnabled())
503 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500504 lg2::error(
505 "readOccState: Invalid OCC{INST} state of {STATE}, starting safe state delay timer",
506 "INST", instance, "STATE", state);
Sheldon Bailey373af752022-02-21 15:14:00 -0600507 // start safe delay timer (before requesting reset)
508 using namespace std::literals::chrono_literals;
509 safeStateDelayTimer.restartOnce(60s);
510 }
Chris Cainf0295f52024-09-12 15:41:14 -0500511 // Not a supported state (update sensors to NaN and not
512 // functional)
Sheldon Bailey373af752022-02-21 15:14:00 -0600513 stateValid = false;
514 }
515#else
516 // Before P10 state not checked, only used good file open.
517 stateValid = true;
518#endif
519 }
520 }
Chris Cainf0295f52024-09-12 15:41:14 -0500521#ifdef POWER10
522 else
523 {
524 // Unable to read state
525 stateValid = false;
526 }
527#endif
Sheldon Bailey373af752022-02-21 15:14:00 -0600528 file.close();
529
530 // if failed to Read a state or not a valid state -> Attempt retry
531 // after 1 Second delay if allowed.
532 if ((!goodFile) || (!stateValid))
533 {
534 if (!goodFile)
535 {
536 // If not able to read, OCC may be offline
Chris Cainf0295f52024-09-12 15:41:14 -0500537 if (openErrno != lastOccReadStatus)
538 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500539 lg2::error(
540 "Status::readOccState: open/read failed trying to read OCC{INST} state (open errno={ERROR})",
541 "INST", instance, "ERROR", openErrno);
Chris Cainf0295f52024-09-12 15:41:14 -0500542 lastOccReadStatus = openErrno;
543 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600544 }
545 else
546 {
547 // else this failed due to state not valid.
Chris Cainbd551de2022-04-26 13:41:16 -0500548 if (state != lastState)
549 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500550 lg2::error(
551 "Status::readOccState: OCC{INST} Invalid state {STATE} (last state: {PRIOR})",
552 "INST", instance, "STATE", lg2::hex, state, "PRIOR",
553 lg2::hex, lastState);
Chris Cainbd551de2022-04-26 13:41:16 -0500554 }
Sheldon Bailey373af752022-02-21 15:14:00 -0600555 }
556
557#ifdef READ_OCC_SENSORS
Sheldon Baileyc8dd4592022-05-12 10:15:14 -0500558 manager.setSensorValueToNaN(instance);
Sheldon Bailey373af752022-02-21 15:14:00 -0600559#endif
560
561 // See occReadRetries for number of retry attempts.
562 if (currentOccReadRetriesCount > 0)
563 {
564 --currentOccReadRetriesCount;
Sheldon Bailey373af752022-02-21 15:14:00 -0600565 }
566 else
567 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500568 lg2::error("readOccState: failed to read OCC{INST} state!", "INST",
569 instance);
Chris Cainf0295f52024-09-12 15:41:14 -0500570
Sheldon Bailey373af752022-02-21 15:14:00 -0600571 // State could not be determined, set it to NO State.
572 lastState = 0;
573
574 // Disable the ability to send Failed actions until OCC is
575 // Active again.
576 stateValid = false;
577
Chris Cainf0295f52024-09-12 15:41:14 -0500578 // Disable due to OCC comm failure and reset to try recovering
Chris Cain3ece5b92025-01-10 16:06:31 -0600579 // (processor callout will be added)
580 deviceError(Error::Descriptor(OCC_COMM_ERROR_PATH, ECOMM,
581 procPath.c_str(), true));
Chris Cainf0295f52024-09-12 15:41:14 -0500582
583 // Reset retry count (for next attempt after recovery)
584 currentOccReadRetriesCount = occReadRetries;
585 }
586 }
587 else
588 {
589 if (lastOccReadStatus != 0)
590 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500591 lg2::info(
592 "Status::readOccState: successfully read OCC{INST} state: {STATE}",
593 "INST", instance, "STATE", state);
Chris Cainf0295f52024-09-12 15:41:14 -0500594 lastOccReadStatus = 0; // no error
Sheldon Bailey373af752022-02-21 15:14:00 -0600595 }
596 }
597}
598
Chris Cainc86d80f2023-05-04 15:49:18 -0500599// Update processor throttle status on dbus
600void Status::updateThrottle(const bool isThrottled, const uint8_t newReason)
601{
602 if (!throttleHandle)
603 {
604 return;
605 }
606
607 uint8_t newThrottleCause = throttleCause;
608
609 if (isThrottled) // throttled due to newReason
610 {
611 if ((newReason & throttleCause) == 0)
612 {
613 // set the bit(s) for passed in reason
614 newThrottleCause |= newReason;
615 }
616 // else no change
617 }
618 else // no longer throttled due to newReason
619 {
620 if ((newReason & throttleCause) != 0)
621 {
622 // clear the bit(s) for passed in reason
623 newThrottleCause &= ~newReason;
624 }
625 // else no change
626 }
627
628 if (newThrottleCause != throttleCause)
629 {
630 if (newThrottleCause == THROTTLED_NONE)
631 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500632 lg2::debug(
633 "updateThrottle: OCC{INST} no longer throttled (prior reason: {REASON})",
634 "INST", instance, "REASON", throttleCause);
Chris Cainc86d80f2023-05-04 15:49:18 -0500635 throttleCause = THROTTLED_NONE;
636 throttleHandle->throttled(false);
637 throttleHandle->throttleCauses({});
638 }
639 else
640 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500641 lg2::debug(
642 "updateThrottle: OCC{INST} is throttled with reason {REASON} (prior reason: {PRIOR})",
643 "INST", instance, "REASON", newThrottleCause, "PRIOR",
644 throttleCause);
Chris Cainc86d80f2023-05-04 15:49:18 -0500645 throttleCause = newThrottleCause;
646
647 std::vector<ThrottleObj::ThrottleReasons> updatedCauses;
648 if (throttleCause & THROTTLED_POWER)
649 {
650 updatedCauses.push_back(
651 throttleHandle->ThrottleReasons::PowerLimit);
652 }
653 if (throttleCause & THROTTLED_THERMAL)
654 {
655 updatedCauses.push_back(
656 throttleHandle->ThrottleReasons::ThermalLimit);
657 }
658 if (throttleCause & THROTTLED_SAFE)
659 {
660 updatedCauses.push_back(
661 throttleHandle->ThrottleReasons::ManagementDetectedFault);
662 }
663 throttleHandle->throttleCauses(updatedCauses);
664 throttleHandle->throttled(true);
665 }
666 }
667 // else no change to throttle status
668}
669
670// Get processor path associated with this OCC
671void Status::readProcAssociation()
672{
673 std::string managingPath = path + "/power_managing";
Chris Cain37abe9b2024-10-31 17:20:31 -0500674 lg2::debug("readProcAssociation: getting endpoints for {MANAGE} ({PATH})",
675 "MANAGE", managingPath, "PATH", path);
Chris Cainc86d80f2023-05-04 15:49:18 -0500676 try
677 {
678 utils::PropertyValue procPathProperty{};
679 procPathProperty = utils::getProperty(
680 managingPath, "xyz.openbmc_project.Association", "endpoints");
681 auto result = std::get<std::vector<std::string>>(procPathProperty);
682 if (result.size() > 0)
683 {
684 procPath = result[0];
Chris Cain37abe9b2024-10-31 17:20:31 -0500685 lg2::info("readProcAssociation: OCC{INST} has proc={PATH}", "INST",
686 instance, "PATH", procPath);
Chris Cainc86d80f2023-05-04 15:49:18 -0500687 }
688 else
689 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500690 lg2::error(
691 "readProcAssociation: No processor associated with OCC{INST} / {PATH}",
692 "INST", instance, "PATH", path);
Chris Cainc86d80f2023-05-04 15:49:18 -0500693 }
694 }
695 catch (const sdbusplus::exception_t& e)
696 {
Chris Cain37abe9b2024-10-31 17:20:31 -0500697 lg2::error(
698 "readProcAssociation: Unable to get proc assocated with {PATH} - {ERROR}",
699 "PATH", path, "ERROR", e.what());
Chris Cainc86d80f2023-05-04 15:49:18 -0500700 procPath = {};
701 }
702}
703
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +0530704} // namespace occ
705} // namespace open_power