blob: 1f7f1674ce6a5a01456f49ccd752d15d2d01c4a6 [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001/**
2 * Copyright © 2017 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <map>
17#include <memory>
18#include <phosphor-logging/elog.hpp>
19#include <phosphor-logging/log.hpp>
20#include <elog-errors.hpp>
Brandon Wymane0eb45c2017-10-06 12:58:42 -050021#include <org/open_power/Witherspoon/Fault/error.hpp>
Matt Spinlerceacf942017-10-05 13:55:02 -050022#include <xyz/openbmc_project/Common/Device/error.hpp>
Matt Spinlere7e432b2017-08-21 15:01:40 -050023#include "names_values.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050024#include "ucd90160.hpp"
Matt Spinlera8269652017-09-19 15:13:28 -050025#include "utility.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050026
27namespace witherspoon
28{
29namespace power
30{
31
32using namespace std::string_literals;
33
Matt Spinlere7e432b2017-08-21 15:01:40 -050034const auto MFR_STATUS = "mfr_status"s;
Matt Spinler1e365692017-08-21 14:43:55 -050035
Matt Spinlerb54357f2017-08-21 14:38:54 -050036const auto DEVICE_NAME = "UCD90160"s;
37const auto DRIVER_NAME = "ucd9000"s;
Matt Spinlere7e432b2017-08-21 15:01:40 -050038constexpr auto NUM_PAGES = 16;
Matt Spinlerb54357f2017-08-21 14:38:54 -050039
Matt Spinler110b2842017-08-21 15:23:27 -050040namespace fs = std::experimental::filesystem;
Matt Spinlerd998b732017-08-21 15:35:54 -050041using namespace gpio;
Matt Spinlerb54357f2017-08-21 14:38:54 -050042using namespace pmbus;
43using namespace phosphor::logging;
Matt Spinlerceacf942017-10-05 13:55:02 -050044
45namespace device_error = sdbusplus::xyz::openbmc_project::
46 Common::Device::Error;
47namespace power_error = sdbusplus::org::open_power::
48 Witherspoon::Fault::Error;
Matt Spinlerb54357f2017-08-21 14:38:54 -050049
Matt Spinlera8269652017-09-19 15:13:28 -050050UCD90160::UCD90160(size_t instance, sdbusplus::bus::bus& bus) :
Matt Spinlerfcd4a712017-09-19 10:45:07 -050051 Device(DEVICE_NAME, instance),
52 interface(std::get<ucd90160::pathField>(
53 deviceMap.find(instance)->second),
54 DRIVER_NAME,
55 instance),
Matt Spinlera8269652017-09-19 15:13:28 -050056 gpioDevice(findGPIODevice(interface.path())),
57 bus(bus)
Matt Spinlerb54357f2017-08-21 14:38:54 -050058{
59}
60
61void UCD90160::onFailure()
62{
63 try
64 {
65 auto voutError = checkVOUTFaults();
66
67 auto pgoodError = checkPGOODFaults(false);
68
69 //Not a voltage or PGOOD fault, but we know something
70 //failed so still create an error log.
71 if (!voutError && !pgoodError)
72 {
73 createPowerFaultLog();
74 }
75 }
Matt Spinlerceacf942017-10-05 13:55:02 -050076 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050077 {
78 if (!accessError)
79 {
Matt Spinlerceacf942017-10-05 13:55:02 -050080 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -050081 accessError = true;
82 }
83 }
84}
85
86void UCD90160::analyze()
87{
88 try
89 {
90 //Note: Voltage faults are always fatal, so they just
91 //need to be analyzed in onFailure().
92
93 checkPGOODFaults(true);
94 }
Matt Spinlerceacf942017-10-05 13:55:02 -050095 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050096 {
97 if (!accessError)
98 {
Matt Spinlerceacf942017-10-05 13:55:02 -050099 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500100 accessError = true;
101 }
102 }
103}
104
Matt Spinlere7e432b2017-08-21 15:01:40 -0500105uint16_t UCD90160::readStatusWord()
106{
107 return interface.read(STATUS_WORD, Type::Debug);
108}
109
110uint32_t UCD90160::readMFRStatus()
111{
112 return interface.read(MFR_STATUS, Type::DeviceDebug);
113}
114
Matt Spinlerb54357f2017-08-21 14:38:54 -0500115bool UCD90160::checkVOUTFaults()
116{
Matt Spinlere7e432b2017-08-21 15:01:40 -0500117 bool errorCreated = false;
118 auto statusWord = readStatusWord();
119
120 //The status_word register has a summary bit to tell us
121 //if each page even needs to be checked
122 if (!(statusWord & status_word::VOUT_FAULT))
123 {
124 return errorCreated;
125 }
126
127 for (size_t page = 0; page < NUM_PAGES; page++)
128 {
129 if (isVoutFaultLogged(page))
130 {
131 continue;
132 }
133
134 auto statusVout = interface.insertPageNum(STATUS_VOUT, page);
135 uint8_t vout = interface.read(statusVout, Type::Debug);
136
137 //Any bit on is an error
138 if (vout)
139 {
140 auto& railNames = std::get<ucd90160::railNamesField>(
141 deviceMap.find(getInstance())->second);
142 auto railName = railNames.at(page);
143
144 util::NamesValues nv;
145 nv.add("STATUS_WORD", statusWord);
146 nv.add("STATUS_VOUT", vout);
147 nv.add("MFR_STATUS", readMFRStatus());
148
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500149 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlere7e432b2017-08-21 15:01:40 -0500150 PowerSequencerVoltageFault;
151
Matt Spinlerceacf942017-10-05 13:55:02 -0500152 report<power_error::PowerSequencerVoltageFault>(
Matt Spinlere7e432b2017-08-21 15:01:40 -0500153 metadata::RAIL(page),
154 metadata::RAIL_NAME(railName.c_str()),
155 metadata::RAW_STATUS(nv.get().c_str()));
156
157 setVoutFaultLogged(page);
158 errorCreated = true;
159 }
160 }
161
162 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500163}
164
165bool UCD90160::checkPGOODFaults(bool polling)
166{
Matt Spinlerd998b732017-08-21 15:35:54 -0500167 bool errorCreated = false;
168
169 //While PGOOD faults could show up in MFR_STATUS (and we could then
170 //check the summary bit in STATUS_WORD first), they are edge triggered,
171 //and as the device driver sends a clear faults command every time we
172 //do a read, we will never see them. So, we'll have to just read the
173 //real time GPI status GPIO.
174
175 //Check only the GPIs configured on this system.
176 auto& gpiConfigs = std::get<ucd90160::gpiConfigField>(
177 deviceMap.find(getInstance())->second);
178
179 for (const auto& gpiConfig : gpiConfigs)
180 {
181 auto gpiNum = std::get<ucd90160::gpiNumField>(gpiConfig);
182 auto doPoll = std::get<ucd90160::pollField>(gpiConfig);
183
184 //Can skip this one if there is already an error on this input,
185 //or we are polling and these inputs don't need to be polled
186 //(because errors on them are fatal).
187 if (isPGOODFaultLogged(gpiNum) || (polling && !doPoll))
188 {
189 continue;
190 }
191
192 //The real time status is read via the pin ID
193 auto pinID = std::get<ucd90160::pinIDField>(gpiConfig);
194 auto gpio = gpios.find(pinID);
195 Value gpiStatus;
196
197 try
198 {
199 //The first time through, create the GPIO objects
200 if (gpio == gpios.end())
201 {
202 gpios.emplace(
203 pinID,
204 std::make_unique<GPIO>(
205 gpioDevice, pinID, Direction::input));
206 gpio = gpios.find(pinID);
207 }
208
209 gpiStatus = gpio->second->read();
210 }
211 catch (std::exception& e)
212 {
213 if (!accessError)
214 {
215 log<level::ERR>(e.what());
216 accessError = true;
217 }
218 continue;
219 }
220
221 if (gpiStatus == Value::low)
222 {
Matt Spinler8bc12832017-09-19 11:17:54 -0500223 //There may be some extra analysis we can do to narrow the
224 //error down further. Note that finding an error here won't
225 //prevent us from checking this GPI again.
226 errorCreated = doExtraAnalysis(gpiConfig);
227
228 if (errorCreated)
229 {
230 continue;
231 }
232
Matt Spinlerd998b732017-08-21 15:35:54 -0500233 auto& gpiName = std::get<ucd90160::gpiNameField>(gpiConfig);
234 auto status = (gpiStatus == Value::low) ? 0 : 1;
235
236 util::NamesValues nv;
237 nv.add("STATUS_WORD", readStatusWord());
238 nv.add("MFR_STATUS", readMFRStatus());
239 nv.add("INPUT_STATUS", status);
240
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500241 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlerd998b732017-08-21 15:35:54 -0500242 PowerSequencerPGOODFault;
243
Matt Spinlerceacf942017-10-05 13:55:02 -0500244 report<power_error::PowerSequencerPGOODFault>(
Matt Spinlerd998b732017-08-21 15:35:54 -0500245 metadata::INPUT_NUM(gpiNum),
246 metadata::INPUT_NAME(gpiName.c_str()),
247 metadata::RAW_STATUS(nv.get().c_str()));
248
249 setPGOODFaultLogged(gpiNum);
250 errorCreated = true;
251 }
252 }
253
254 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500255}
256
257void UCD90160::createPowerFaultLog()
258{
Matt Spinler9efb3082017-08-21 15:43:43 -0500259 util::NamesValues nv;
260 nv.add("STATUS_WORD", readStatusWord());
261 nv.add("MFR_STATUS", readMFRStatus());
Matt Spinlerb54357f2017-08-21 14:38:54 -0500262
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500263 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinler9efb3082017-08-21 15:43:43 -0500264 PowerSequencerFault;
265
Matt Spinlerceacf942017-10-05 13:55:02 -0500266 report<power_error::PowerSequencerFault>(
Matt Spinler9efb3082017-08-21 15:43:43 -0500267 metadata::RAW_STATUS(nv.get().c_str()));
Matt Spinlerb54357f2017-08-21 14:38:54 -0500268}
269
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500270fs::path UCD90160::findGPIODevice(const fs::path& path)
Matt Spinler110b2842017-08-21 15:23:27 -0500271{
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500272 fs::path gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500273
274 //In the driver directory, look for a subdirectory
275 //named gpiochipX, where X is some number. Then
276 //we'll access the GPIO at /dev/gpiochipX.
277 if (fs::is_directory(path))
278 {
279 for (auto& f : fs::directory_iterator(path))
280 {
281 if (f.path().filename().string().find("gpiochip") !=
282 std::string::npos)
283 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500284 gpioDevicePath = "/dev" / f.path().filename();
Matt Spinler110b2842017-08-21 15:23:27 -0500285 break;
286 }
287 }
288 }
289
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500290 if (gpioDevicePath.empty())
Matt Spinler110b2842017-08-21 15:23:27 -0500291 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500292 log<level::ERR>("Could not find GPIO device path",
Matt Spinler110b2842017-08-21 15:23:27 -0500293 entry("BASE_PATH=%s", path.c_str()));
294 }
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500295
296 return gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500297}
298
Matt Spinler8bc12832017-09-19 11:17:54 -0500299bool UCD90160::doExtraAnalysis(const ucd90160::GPIConfig& config)
300{
301
302 auto type = std::get<ucd90160::extraAnalysisField>(config);
303 if (type == ucd90160::extraAnalysisType::none)
304 {
305 return false;
306 }
307
308 //Currently the only extra analysis to do is to check other GPIOs.
309 return doGPIOAnalysis(type);
310}
311
312bool UCD90160::doGPIOAnalysis(ucd90160::extraAnalysisType type)
313{
314 bool errorFound = false;
Matt Spinlera8269652017-09-19 15:13:28 -0500315 bool shutdown = false;
Matt Spinler8bc12832017-09-19 11:17:54 -0500316
317 const auto& analysisConfig = std::get<ucd90160::gpioAnalysisField>(
318 deviceMap.find(getInstance())->second);
319
320 auto gpioConfig = analysisConfig.find(type);
321 if (gpioConfig == analysisConfig.end())
322 {
323 return errorFound;
324 }
325
326 auto path = std::get<ucd90160::gpioDevicePathField>(
327 gpioConfig->second);
328
329 //The /dev/gpiochipX device
330 auto device = findGPIODevice(path);
331
332 //The GPIO value of the fault condition
333 auto polarity = std::get<ucd90160::gpioPolarityField>(
334 gpioConfig->second);
335
336 //The GPIOs to check
337 auto& gpios = std::get<ucd90160::gpioDefinitionField>(
338 gpioConfig->second);
339
340 for (const auto& gpio : gpios)
341 {
342 gpio::Value value;
343
344 try
345 {
346 GPIO g{device,
347 std::get<ucd90160::gpioNumField>(gpio),
348 Direction::input};
349
350 value = g.read();
351 }
352 catch (std::exception& e)
353 {
354 if (!gpioAccessError)
355 {
356 //GPIO only throws InternalErrors - not worth committing.
357 log<level::ERR>(
358 "GPIO read failed while analyzing a power fault",
359 entry("CHIP_PATH=%s", path.c_str()));
360
361 gpioAccessError = true;
362 }
363 continue;
364 }
365
366 if (value == polarity)
367 {
368 errorFound = true;
369
370 auto part = std::get<ucd90160::gpioCalloutField>(gpio);
371 PartCallout callout{type, part};
372
373 if (isPartCalledOut(callout))
374 {
375 continue;
376 }
377
378 //Look up and call the error creation function
379 auto logError = std::get<ucd90160::errorFunctionField>(
380 gpioConfig->second);
381
382 logError(*this, part);
383
384 //Save the part callout so we don't call it out again
385 setPartCallout(callout);
Matt Spinlera8269652017-09-19 15:13:28 -0500386
387 //Some errors (like overtemps) require a shutdown
388 auto actions = static_cast<uint32_t>(
389 std::get<ucd90160::optionFlagsField>(gpioConfig->second));
390
391 if (actions & static_cast<decltype(actions)>(
392 ucd90160::optionFlags::shutdownOnFault))
393 {
394 shutdown = true;
395 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500396 }
397 }
398
Matt Spinlera8269652017-09-19 15:13:28 -0500399 if (shutdown)
400 {
401 util::powerOff(bus);
402 }
403
Matt Spinler8bc12832017-09-19 11:17:54 -0500404 return errorFound;
405}
406
Matt Spinler7b14db22017-09-19 10:57:54 -0500407void UCD90160::gpuPGOODError(const std::string& callout)
408{
409 util::NamesValues nv;
410 nv.add("STATUS_WORD", readStatusWord());
411 nv.add("MFR_STATUS", readMFRStatus());
412
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500413 using metadata = org::open_power::Witherspoon::Fault::GPUPowerFault;
Matt Spinler7b14db22017-09-19 10:57:54 -0500414
Matt Spinlerceacf942017-10-05 13:55:02 -0500415 report<power_error::GPUPowerFault>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500416 metadata::RAW_STATUS(nv.get().c_str()),
417 metadata::GPU(callout.c_str()));
418}
419
420void UCD90160::gpuOverTempError(const std::string& callout)
421{
422 util::NamesValues nv;
423 nv.add("STATUS_WORD", readStatusWord());
424 nv.add("MFR_STATUS", readMFRStatus());
425
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500426 using metadata = org::open_power::Witherspoon::Fault::GPUOverTemp;
Matt Spinler7b14db22017-09-19 10:57:54 -0500427
Matt Spinlerceacf942017-10-05 13:55:02 -0500428 report<power_error::GPUOverTemp>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500429 metadata::RAW_STATUS(nv.get().c_str()),
430 metadata::GPU(callout.c_str()));
431}
432
Matt Spinlerb54357f2017-08-21 14:38:54 -0500433}
434}