blob: 7c3658c0ac6c6289517a227061d6c0fff8108b73 [file] [log] [blame]
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +05301#include "occ_status.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05002
Chris Cain17257672021-10-22 13:41:03 -05003#include "occ_manager.hpp"
Vishwanatha Subbanna6add0b82017-07-21 19:02:37 +05304#include "occ_sensor.hpp"
Chris Cain78e86012021-03-04 16:15:31 -06005#include "powermode.hpp"
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +05306#include "utils.hpp"
Gunnar Mills94df8c92018-09-14 14:50:03 -05007
Chris Caina8857c52021-01-27 11:53:05 -06008#include <fmt/core.h>
9
Chris Cain78e86012021-03-04 16:15:31 -060010#ifdef POWER10
11#include <com/ibm/Host/Target/server.hpp>
12#endif
Gunnar Mills94df8c92018-09-14 14:50:03 -050013#include <phosphor-logging/log.hpp>
Chris Cain78e86012021-03-04 16:15:31 -060014
Chris Caine2d0a432022-03-28 11:08:49 -050015#include <filesystem>
16
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +053017namespace open_power
18{
19namespace occ
20{
Chris Cain78e86012021-03-04 16:15:31 -060021
Chris Caina8857c52021-01-27 11:53:05 -060022using namespace phosphor::logging;
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +053023
24// Handles updates to occActive property
25bool Status::occActive(bool value)
26{
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053027 if (value != this->occActive())
28 {
Chris Caina8857c52021-01-27 11:53:05 -060029 log<level::INFO>(fmt::format("Status::occActive OCC{} changed to {}",
30 instance, value)
31 .c_str());
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053032 if (value)
33 {
34 // Bind the device
35 device.bind();
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +053036
Edward A. James9fd2bdc2017-11-08 16:18:57 -060037 // Start watching for errors
38 addErrorWatch();
39
Chris Caina8857c52021-01-27 11:53:05 -060040 // Reset last OCC state
41 lastState = 0;
42
Chris Cain5d66a0a2022-02-09 08:52:10 -060043 if (device.master())
44 {
Chris Cain5d66a0a2022-02-09 08:52:10 -060045 // Update powercap bounds from OCC
Chris Cain40501a22022-03-14 17:33:27 -050046 manager.updatePcapBounds();
Chris Cain5d66a0a2022-02-09 08:52:10 -060047 }
48
Vishwanatha Subbanna2dc9b1a2017-08-18 18:29:41 +053049 // Call into Manager to let know that we have bound
Chris Cain1be43372021-12-09 19:29:37 -060050 if (this->managerCallBack)
Vishwanatha Subbanna2dc9b1a2017-08-18 18:29:41 +053051 {
Chris Cain1be43372021-12-09 19:29:37 -060052 this->managerCallBack(value);
Edward A. James9fd2bdc2017-11-08 16:18:57 -060053 }
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053054 }
55 else
56 {
Chris Caina7b74dc2021-11-10 17:03:43 -060057#ifdef POWER10
Chris Cain1be43372021-12-09 19:29:37 -060058 if (pmode && device.master())
Chris Cain36f9cde2021-11-22 11:18:21 -060059 {
60 // Prevent mode changes
61 pmode->setMasterActive(false);
62 }
Chris Caina7b74dc2021-11-10 17:03:43 -060063 if (safeStateDelayTimer.isEnabled())
64 {
65 // stop safe delay timer
66 safeStateDelayTimer.setEnabled(false);
67 }
68#endif
69
Chris Cain36f9cde2021-11-22 11:18:21 -060070 // Call into Manager to let know that we will unbind.
Chris Cain1be43372021-12-09 19:29:37 -060071 if (this->managerCallBack)
Chris Cain36f9cde2021-11-22 11:18:21 -060072 {
Chris Cain1be43372021-12-09 19:29:37 -060073 this->managerCallBack(value);
Chris Cain36f9cde2021-11-22 11:18:21 -060074 }
75
Edward A. James9fd2bdc2017-11-08 16:18:57 -060076 // Stop watching for errors
77 removeErrorWatch();
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +053078
79 // Do the unbind.
Vishwanatha Subbanna32e84e92017-06-28 19:17:28 +053080 device.unBind();
81 }
82 }
Edward A. James5e177972017-10-25 15:50:31 -050083 else if (value && !device.bound())
84 {
85 // Existing error watch is on a dead file descriptor.
Edward A. James9fd2bdc2017-11-08 16:18:57 -060086 removeErrorWatch();
Edward A. James5e177972017-10-25 15:50:31 -050087
88 /*
89 * In it's constructor, Status checks Device::bound() to see if OCC is
90 * active or not.
91 * Device::bound() checks for occX-dev0 directory.
92 * We will lose occX-dev0 directories during FSI rescan.
93 * So, if we start this application (and construct Status), and then
94 * later do FSI rescan, we will end up with occActive = true and device
95 * NOT bound. Lets correct that situation here.
96 */
97 device.bind();
98
99 // Add error watch again
Edward A. James9fd2bdc2017-11-08 16:18:57 -0600100 addErrorWatch();
Edward A. James5e177972017-10-25 15:50:31 -0500101 }
Eddie James6d6d1b32019-04-22 10:45:08 -0500102 else if (!value && device.bound())
103 {
104 removeErrorWatch();
105
106 // In the event that the application never receives the active signal
107 // even though the OCC is active (this can occur if the BMC is rebooted
108 // with the host on, since the initial OCC driver probe will discover
109 // the OCCs), this application needs to be able to unbind the device
110 // when we get the OCC inactive signal.
111 device.unBind();
112 }
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +0530113 return Base::Status::occActive(value);
114}
115
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +0530116// Callback handler when a device error is reported.
Eddie Jamescbad2192021-10-07 09:39:39 -0500117void Status::deviceError()
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +0530118{
Chris Cain36f9cde2021-11-22 11:18:21 -0600119#ifdef POWER10
Chris Cain1be43372021-12-09 19:29:37 -0600120 if (pmode && device.master())
121 {
122 // Prevent mode changes
123 pmode->setMasterActive(false);
124 }
Chris Cain36f9cde2021-11-22 11:18:21 -0600125#endif
126
Eddie Jamescbad2192021-10-07 09:39:39 -0500127 // This would deem OCC inactive
128 this->occActive(false);
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530129
Eddie Jamescbad2192021-10-07 09:39:39 -0500130 // Reset the OCC
131 this->resetOCC();
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530132}
133
134// Sends message to host control command handler to reset OCC
135void Status::resetOCC()
136{
Chris Caina8857c52021-01-27 11:53:05 -0600137 log<level::INFO>(
138 fmt::format(">>Status::resetOCC() - requesting reset for OCC{}",
139 instance)
140 .c_str());
Tom Joseph00325232020-07-29 17:51:48 +0530141#ifdef PLDM
142 if (resetCallBack)
143 {
144 this->resetCallBack(instance);
145 }
146#else
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530147 constexpr auto CONTROL_HOST_PATH = "/org/open_power/control/host0";
148 constexpr auto CONTROL_HOST_INTF = "org.open_power.Control.Host";
149
150 // This will throw exception on failure
George Liuf3b75142021-06-10 11:22:50 +0800151 auto service = utils::getService(CONTROL_HOST_PATH, CONTROL_HOST_INTF);
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530152
George Liuf3b75142021-06-10 11:22:50 +0800153 auto& bus = utils::getBus();
Gunnar Mills94df8c92018-09-14 14:50:03 -0500154 auto method = bus.new_method_call(service.c_str(), CONTROL_HOST_PATH,
155 CONTROL_HOST_INTF, "Execute");
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530156 // OCC Reset control command
Gunnar Mills94df8c92018-09-14 14:50:03 -0500157 method.append(convertForMessage(Control::Host::Command::OCCReset).c_str());
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530158
159 // OCC Sensor ID for callout reasons
Patrick Williamse0962702020-05-13 17:50:22 -0500160 method.append(std::variant<uint8_t>(std::get<0>(sensorMap.at(instance))));
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530161 bus.call_noreply(method);
162 return;
Tom Joseph00325232020-07-29 17:51:48 +0530163#endif
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530164}
165
166// Handler called by Host control command handler to convey the
167// status of the executed command
168void Status::hostControlEvent(sdbusplus::message::message& msg)
169{
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530170 std::string cmdCompleted{};
171 std::string cmdStatus{};
172
173 msg.read(cmdCompleted, cmdStatus);
174
175 log<level::DEBUG>("Host control signal values",
Gunnar Mills94df8c92018-09-14 14:50:03 -0500176 entry("COMMAND=%s", cmdCompleted.c_str()),
177 entry("STATUS=%s", cmdStatus.c_str()));
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530178
Gunnar Mills94df8c92018-09-14 14:50:03 -0500179 if (Control::Host::convertResultFromString(cmdStatus) !=
180 Control::Host::Result::Success)
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530181 {
Gunnar Mills94df8c92018-09-14 14:50:03 -0500182 if (Control::Host::convertCommandFromString(cmdCompleted) ==
183 Control::Host::Command::OCCReset)
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530184 {
Gunnar Mills85e65202018-04-08 15:01:54 -0500185 // Must be a Timeout. Log an Error trace
Alexander Filippov1d69e192019-03-21 18:12:07 +0300186 log<level::ERR>(
187 "Error resetting the OCC.", entry("PATH=%s", path.c_str()),
188 entry("SENSORID=0x%X", std::get<0>(sensorMap.at(instance))));
Vishwanatha Subbanna30e329a2017-07-24 23:13:14 +0530189 }
190 }
191 return;
Vishwanatha Subbannaee4d83d2017-06-29 18:35:00 +0530192}
193
Chris Caina8857c52021-01-27 11:53:05 -0600194void Status::readOccState()
195{
196 unsigned int state;
197 const fs::path filename =
198 fs::path(DEV_PATH) /
199 fs::path(sysfsName + "." + std::to_string(instance + 1)) / "occ_state";
200
Chris Caina8857c52021-01-27 11:53:05 -0600201 std::ifstream file(filename, std::ios::in);
202 const int open_errno = errno;
203 if (file)
204 {
205 file >> state;
206 if (state != lastState)
207 {
208 // Trace OCC state changes
209 log<level::INFO>(
210 fmt::format("Status::readOccState: OCC{} state 0x{:02X}",
211 instance, state)
212 .c_str());
Chris Cain5d66a0a2022-02-09 08:52:10 -0600213 if (state & 0xFFFFFFF8)
214 {
215 log<level::ERR>(
216 fmt::format("Status::readOccState: INVALID STATE from {}!!",
217 filename.c_str())
218 .c_str());
219 }
Chris Caina8857c52021-01-27 11:53:05 -0600220 lastState = state;
Chris Cain78e86012021-03-04 16:15:31 -0600221
222#ifdef POWER10
Chris Cain17257672021-10-22 13:41:03 -0500223 if (OccState(state) == OccState::ACTIVE)
224 {
Chris Cain1be43372021-12-09 19:29:37 -0600225 if (pmode && device.master())
Chris Caina7b74dc2021-11-10 17:03:43 -0600226 {
Chris Cain6fa848a2022-01-24 14:54:38 -0600227 // Set the master OCC on the PowerMode object
228 pmode->setMasterOcc(path);
229 // Enable mode changes
Chris Cain36f9cde2021-11-22 11:18:21 -0600230 pmode->setMasterActive();
231
Chris Caina7b74dc2021-11-10 17:03:43 -0600232 // Special processing by master OCC when it goes active
233 occsWentActive();
234 }
235
Chris Cain17257672021-10-22 13:41:03 -0500236 CmdStatus status = sendAmbient();
237 if (status != CmdStatus::SUCCESS)
238 {
239 log<level::ERR>(
240 fmt::format(
241 "readOccState: Sending Ambient failed with status {}",
242 status)
243 .c_str());
244 }
245 }
Chris Caina7b74dc2021-11-10 17:03:43 -0600246
247 if (OccState(state) == OccState::SAFE)
248 {
249 // start safe delay timer (before requesting reset)
250 using namespace std::literals::chrono_literals;
251 safeStateDelayTimer.restartOnce(60s);
252 }
253 else if (safeStateDelayTimer.isEnabled())
254 {
255 // stop safe delay timer (no longer in SAFE state)
256 safeStateDelayTimer.setEnabled(false);
257 }
Chris Cain78e86012021-03-04 16:15:31 -0600258#endif
Chris Caina8857c52021-01-27 11:53:05 -0600259 }
260 file.close();
261 }
262 else
263 {
264 // If not able to read, OCC may be offline
265 log<level::DEBUG>(
266 fmt::format("Status::readOccState: open failed (errno={})",
267 open_errno)
268 .c_str());
269 lastState = 0;
270 }
271}
272
Chris Cain78e86012021-03-04 16:15:31 -0600273#ifdef POWER10
Chris Cain78e86012021-03-04 16:15:31 -0600274// Special processing that needs to happen once the OCCs change to ACTIVE state
275void Status::occsWentActive()
276{
277 CmdStatus status = CmdStatus::SUCCESS;
278
Chris Cain36f9cde2021-11-22 11:18:21 -0600279 status = pmode->sendModeChange();
Chris Cain78e86012021-03-04 16:15:31 -0600280 if (status != CmdStatus::SUCCESS)
281 {
George Liub5ca1012021-09-10 12:53:11 +0800282 log<level::ERR>(
283 fmt::format(
284 "Status::occsWentActive: OCC mode change failed with status {}",
285 status)
286 .c_str());
Chris Cain78e86012021-03-04 16:15:31 -0600287 }
288
Chris Cain36f9cde2021-11-22 11:18:21 -0600289 status = pmode->sendIpsData();
Chris Cain78e86012021-03-04 16:15:31 -0600290 if (status != CmdStatus::SUCCESS)
291 {
292 log<level::ERR>(
293 fmt::format(
George Liub5ca1012021-09-10 12:53:11 +0800294 "Status::occsWentActive: Sending Idle Power Save Config data failed with status {}",
Chris Cain78e86012021-03-04 16:15:31 -0600295 status)
296 .c_str());
297 }
298}
299
Chris Cain17257672021-10-22 13:41:03 -0500300// Send Ambient and Altitude to the OCC
301CmdStatus Status::sendAmbient(const uint8_t inTemp, const uint16_t inAltitude)
302{
303 CmdStatus status = CmdStatus::FAILURE;
304 bool ambientValid = true;
305 uint8_t ambientTemp = inTemp;
306 uint16_t altitude = inAltitude;
307
308 if (ambientTemp == 0xFF)
309 {
310 // Get latest readings from manager
311 manager.getAmbientData(ambientValid, ambientTemp, altitude);
312 log<level::DEBUG>(
313 fmt::format("sendAmbient: valid: {}, Ambient: {}C, altitude: {}m",
314 ambientValid, ambientTemp, altitude)
315 .c_str());
316 }
317
318 std::vector<std::uint8_t> cmd, rsp;
319 cmd.reserve(11);
320 cmd.push_back(uint8_t(CmdType::SEND_AMBIENT));
321 cmd.push_back(0x00); // Data Length (2 bytes)
322 cmd.push_back(0x08); //
323 cmd.push_back(0x00); // Version
324 cmd.push_back(ambientValid ? 0 : 0xFF); // Ambient Status
325 cmd.push_back(ambientTemp); // Ambient Temperature
326 cmd.push_back(altitude >> 8); // Altitude in meters (2 bytes)
327 cmd.push_back(altitude & 0xFF); //
328 cmd.push_back(0x00); // Reserved (3 bytes)
329 cmd.push_back(0x00);
330 cmd.push_back(0x00);
331 log<level::DEBUG>(fmt::format("sendAmbient: SEND_AMBIENT "
332 "command to OCC{} ({} bytes)",
333 instance, cmd.size())
334 .c_str());
335 status = occCmd.send(cmd, rsp);
336 if (status == CmdStatus::SUCCESS)
337 {
338 if (rsp.size() == 5)
339 {
340 if (RspStatus::SUCCESS != RspStatus(rsp[2]))
341 {
342 log<level::ERR>(
343 fmt::format(
344 "sendAmbient: SEND_AMBIENT failed with status 0x{:02X}",
345 rsp[2])
346 .c_str());
347 dump_hex(rsp);
348 status = CmdStatus::FAILURE;
349 }
350 }
351 else
352 {
353 log<level::ERR>("sendAmbient: INVALID SEND_AMBIENT response");
354 dump_hex(rsp);
355 status = CmdStatus::FAILURE;
356 }
357 }
358 else
359 {
360 if (status == CmdStatus::OPEN_FAILURE)
361 {
362 // OCC not active yet
363 status = CmdStatus::SUCCESS;
364 }
365 else
366 {
367 log<level::ERR>("sendAmbient: SEND_AMBIENT FAILED!");
368 }
369 }
370
371 return status;
372}
Chris Caina7b74dc2021-11-10 17:03:43 -0600373
374// Called when safe timer expires to determine if OCCs need to be reset
375void Status::safeStateDelayExpired()
376{
377 if (this->occActive())
378 {
379 log<level::INFO>(
380 fmt::format(
381 "safeStateDelayExpired: OCC{} is in SAFE state, requesting reset",
382 instance)
383 .c_str());
384 // Disable and reset to try recovering
385 deviceError();
386 }
387}
Chris Cain78e86012021-03-04 16:15:31 -0600388#endif // POWER10
389
Chris Caine2d0a432022-03-28 11:08:49 -0500390fs::path Status::getHwmonPath()
Chris Cain5d66a0a2022-02-09 08:52:10 -0600391{
392 using namespace std::literals::string_literals;
393
Chris Caine2d0a432022-03-28 11:08:49 -0500394 if (!fs::exists(hwmonPath))
395 {
396 static bool tracedFail[8] = {0};
Chris Cain5d66a0a2022-02-09 08:52:10 -0600397
Chris Caine2d0a432022-03-28 11:08:49 -0500398 if (!hwmonPath.empty())
399 {
400 log<level::ERR>(
401 fmt::format("Status::getHwmonPath(): path no longer exists: {}",
402 hwmonPath.c_str())
403 .c_str());
404 hwmonPath.clear();
405 }
406
407 // Build the base HWMON path
408 fs::path prefixPath =
409 fs::path{OCC_HWMON_PATH + "occ-hwmon."s +
410 std::to_string(instance + 1) + "/hwmon/"s};
411
412 // Get the hwmonXX directory name
413 try
414 {
415 // there should only be one directory
416 const int numDirs = std::distance(
417 fs::directory_iterator(prefixPath), fs::directory_iterator{});
418 if (numDirs == 1)
419 {
420 hwmonPath = *fs::directory_iterator(prefixPath);
421 tracedFail[instance] = false;
422 }
423 else
424 {
425 if (!tracedFail[instance])
426 {
427 log<level::ERR>(
428 fmt::format(
429 "Status::getHwmonPath(): Found multiple ({}) hwmon paths!",
430 numDirs)
431 .c_str());
432 tracedFail[instance] = true;
433 }
434 }
435 }
436 catch (const fs::filesystem_error& e)
437 {
438 if (!tracedFail[instance])
439 {
440 log<level::ERR>(
441 fmt::format(
442 "Status::getHwmonPath(): error accessing {}: {}",
443 prefixPath.c_str(), e.what())
444 .c_str());
445 tracedFail[instance] = true;
446 }
447 }
448 }
449
450 return hwmonPath;
Chris Cain5d66a0a2022-02-09 08:52:10 -0600451}
452
Vishwanatha Subbanna307d80b2017-06-28 15:56:09 +0530453} // namespace occ
454} // namespace open_power