blob: bc1177cb588602c82661aacccce9fe76c1a93041 [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001/**
2 * Copyright © 2017 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <map>
17#include <memory>
18#include <phosphor-logging/elog.hpp>
19#include <phosphor-logging/log.hpp>
20#include <elog-errors.hpp>
Brandon Wymane0eb45c2017-10-06 12:58:42 -050021#include <org/open_power/Witherspoon/Fault/error.hpp>
Matt Spinlerceacf942017-10-05 13:55:02 -050022#include <xyz/openbmc_project/Common/Device/error.hpp>
Matt Spinlere7e432b2017-08-21 15:01:40 -050023#include "names_values.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050024#include "ucd90160.hpp"
Matt Spinlera8269652017-09-19 15:13:28 -050025#include "utility.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050026
27namespace witherspoon
28{
29namespace power
30{
31
32using namespace std::string_literals;
33
Matt Spinlere7e432b2017-08-21 15:01:40 -050034const auto MFR_STATUS = "mfr_status"s;
Matt Spinler1e365692017-08-21 14:43:55 -050035
Matt Spinlerb54357f2017-08-21 14:38:54 -050036const auto DEVICE_NAME = "UCD90160"s;
37const auto DRIVER_NAME = "ucd9000"s;
Matt Spinlere7e432b2017-08-21 15:01:40 -050038constexpr auto NUM_PAGES = 16;
Matt Spinlerb54357f2017-08-21 14:38:54 -050039
Matt Spinler0e45ced2018-02-12 14:36:07 -060040constexpr auto INVENTORY_OBJ_PATH = "/xyz/openbmc_project/inventory";
41
Matt Spinler110b2842017-08-21 15:23:27 -050042namespace fs = std::experimental::filesystem;
Matt Spinlerd998b732017-08-21 15:35:54 -050043using namespace gpio;
Matt Spinlerb54357f2017-08-21 14:38:54 -050044using namespace pmbus;
45using namespace phosphor::logging;
Matt Spinlerceacf942017-10-05 13:55:02 -050046
47namespace device_error = sdbusplus::xyz::openbmc_project::
48 Common::Device::Error;
49namespace power_error = sdbusplus::org::open_power::
50 Witherspoon::Fault::Error;
Matt Spinlerb54357f2017-08-21 14:38:54 -050051
Matt Spinlera8269652017-09-19 15:13:28 -050052UCD90160::UCD90160(size_t instance, sdbusplus::bus::bus& bus) :
Matt Spinlerfcd4a712017-09-19 10:45:07 -050053 Device(DEVICE_NAME, instance),
54 interface(std::get<ucd90160::pathField>(
55 deviceMap.find(instance)->second),
56 DRIVER_NAME,
57 instance),
Matt Spinlera8269652017-09-19 15:13:28 -050058 gpioDevice(findGPIODevice(interface.path())),
59 bus(bus)
Matt Spinlerb54357f2017-08-21 14:38:54 -050060{
61}
62
63void UCD90160::onFailure()
64{
65 try
66 {
67 auto voutError = checkVOUTFaults();
68
69 auto pgoodError = checkPGOODFaults(false);
70
71 //Not a voltage or PGOOD fault, but we know something
72 //failed so still create an error log.
73 if (!voutError && !pgoodError)
74 {
75 createPowerFaultLog();
76 }
77 }
Matt Spinlerceacf942017-10-05 13:55:02 -050078 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050079 {
80 if (!accessError)
81 {
Matt Spinlerceacf942017-10-05 13:55:02 -050082 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -050083 accessError = true;
84 }
85 }
86}
87
88void UCD90160::analyze()
89{
90 try
91 {
92 //Note: Voltage faults are always fatal, so they just
93 //need to be analyzed in onFailure().
94
95 checkPGOODFaults(true);
96 }
Matt Spinlerceacf942017-10-05 13:55:02 -050097 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050098 {
99 if (!accessError)
100 {
Matt Spinlerceacf942017-10-05 13:55:02 -0500101 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500102 accessError = true;
103 }
104 }
105}
106
Matt Spinlere7e432b2017-08-21 15:01:40 -0500107uint16_t UCD90160::readStatusWord()
108{
109 return interface.read(STATUS_WORD, Type::Debug);
110}
111
112uint32_t UCD90160::readMFRStatus()
113{
Matt Spinler233a2522018-03-30 14:40:04 -0500114 return interface.read(MFR_STATUS, Type::HwmonDeviceDebug);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500115}
116
Matt Spinlerb54357f2017-08-21 14:38:54 -0500117bool UCD90160::checkVOUTFaults()
118{
Matt Spinlere7e432b2017-08-21 15:01:40 -0500119 bool errorCreated = false;
120 auto statusWord = readStatusWord();
121
122 //The status_word register has a summary bit to tell us
123 //if each page even needs to be checked
124 if (!(statusWord & status_word::VOUT_FAULT))
125 {
126 return errorCreated;
127 }
128
129 for (size_t page = 0; page < NUM_PAGES; page++)
130 {
131 if (isVoutFaultLogged(page))
132 {
133 continue;
134 }
135
136 auto statusVout = interface.insertPageNum(STATUS_VOUT, page);
137 uint8_t vout = interface.read(statusVout, Type::Debug);
138
Matt Spinlerde16d052017-12-13 13:22:14 -0600139 //If any bits are on log them, though some are just
140 //warnings so they won't cause errors
Matt Spinlere7e432b2017-08-21 15:01:40 -0500141 if (vout)
142 {
Matt Spinlerde16d052017-12-13 13:22:14 -0600143 log<level::INFO>("A voltage rail has bits on in STATUS_VOUT",
144 entry("STATUS_VOUT=0x%X", vout),
145 entry("PAGE=%d", page));
146 }
147
148 //Log errors if any non-warning bits on
149 if (vout & ~status_vout::WARNING_MASK)
150 {
Matt Spinlere7e432b2017-08-21 15:01:40 -0500151 auto& railNames = std::get<ucd90160::railNamesField>(
152 deviceMap.find(getInstance())->second);
153 auto railName = railNames.at(page);
154
155 util::NamesValues nv;
Matt Spinler6def9092018-02-27 14:22:59 -0600156 try
157 {
158 nv.add("STATUS_WORD", statusWord);
159 nv.add("STATUS_VOUT", vout);
160 nv.add("MFR_STATUS", readMFRStatus());
161 }
162 catch (device_error::ReadFailure& e)
163 {
164 log<level::ERR>("ReadFailure when collecting metadata");
165 commit<device_error::ReadFailure>();
166 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500167
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500168 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlere7e432b2017-08-21 15:01:40 -0500169 PowerSequencerVoltageFault;
170
Matt Spinlerceacf942017-10-05 13:55:02 -0500171 report<power_error::PowerSequencerVoltageFault>(
Matt Spinlere7e432b2017-08-21 15:01:40 -0500172 metadata::RAIL(page),
173 metadata::RAIL_NAME(railName.c_str()),
174 metadata::RAW_STATUS(nv.get().c_str()));
175
176 setVoutFaultLogged(page);
177 errorCreated = true;
178 }
179 }
180
181 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500182}
183
184bool UCD90160::checkPGOODFaults(bool polling)
185{
Matt Spinlerd998b732017-08-21 15:35:54 -0500186 bool errorCreated = false;
187
188 //While PGOOD faults could show up in MFR_STATUS (and we could then
189 //check the summary bit in STATUS_WORD first), they are edge triggered,
190 //and as the device driver sends a clear faults command every time we
191 //do a read, we will never see them. So, we'll have to just read the
192 //real time GPI status GPIO.
193
194 //Check only the GPIs configured on this system.
195 auto& gpiConfigs = std::get<ucd90160::gpiConfigField>(
196 deviceMap.find(getInstance())->second);
197
198 for (const auto& gpiConfig : gpiConfigs)
199 {
200 auto gpiNum = std::get<ucd90160::gpiNumField>(gpiConfig);
201 auto doPoll = std::get<ucd90160::pollField>(gpiConfig);
202
203 //Can skip this one if there is already an error on this input,
204 //or we are polling and these inputs don't need to be polled
205 //(because errors on them are fatal).
206 if (isPGOODFaultLogged(gpiNum) || (polling && !doPoll))
207 {
208 continue;
209 }
210
211 //The real time status is read via the pin ID
212 auto pinID = std::get<ucd90160::pinIDField>(gpiConfig);
213 auto gpio = gpios.find(pinID);
214 Value gpiStatus;
215
216 try
217 {
218 //The first time through, create the GPIO objects
219 if (gpio == gpios.end())
220 {
221 gpios.emplace(
222 pinID,
223 std::make_unique<GPIO>(
224 gpioDevice, pinID, Direction::input));
225 gpio = gpios.find(pinID);
226 }
227
228 gpiStatus = gpio->second->read();
229 }
230 catch (std::exception& e)
231 {
232 if (!accessError)
233 {
234 log<level::ERR>(e.what());
235 accessError = true;
236 }
237 continue;
238 }
239
240 if (gpiStatus == Value::low)
241 {
Matt Spinler8bc12832017-09-19 11:17:54 -0500242 //There may be some extra analysis we can do to narrow the
243 //error down further. Note that finding an error here won't
244 //prevent us from checking this GPI again.
245 errorCreated = doExtraAnalysis(gpiConfig);
246
247 if (errorCreated)
248 {
249 continue;
250 }
251
Matt Spinlerd998b732017-08-21 15:35:54 -0500252 auto& gpiName = std::get<ucd90160::gpiNameField>(gpiConfig);
253 auto status = (gpiStatus == Value::low) ? 0 : 1;
254
255 util::NamesValues nv;
Matt Spinler6def9092018-02-27 14:22:59 -0600256
257 try
258 {
259 nv.add("STATUS_WORD", readStatusWord());
260 nv.add("MFR_STATUS", readMFRStatus());
261 nv.add("INPUT_STATUS", status);
262 }
263 catch (device_error::ReadFailure& e)
264 {
265 log<level::ERR>("ReadFailure when collecting metadata");
266 commit<device_error::ReadFailure>();
267 }
Matt Spinlerd998b732017-08-21 15:35:54 -0500268
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500269 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlerd998b732017-08-21 15:35:54 -0500270 PowerSequencerPGOODFault;
271
Matt Spinlerceacf942017-10-05 13:55:02 -0500272 report<power_error::PowerSequencerPGOODFault>(
Matt Spinlerd998b732017-08-21 15:35:54 -0500273 metadata::INPUT_NUM(gpiNum),
274 metadata::INPUT_NAME(gpiName.c_str()),
275 metadata::RAW_STATUS(nv.get().c_str()));
276
277 setPGOODFaultLogged(gpiNum);
278 errorCreated = true;
279 }
280 }
281
282 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500283}
284
285void UCD90160::createPowerFaultLog()
286{
Matt Spinler9efb3082017-08-21 15:43:43 -0500287 util::NamesValues nv;
Matt Spinler6def9092018-02-27 14:22:59 -0600288
289 try
290 {
291 nv.add("STATUS_WORD", readStatusWord());
292 nv.add("MFR_STATUS", readMFRStatus());
293 }
294 catch (device_error::ReadFailure& e)
295 {
296 log<level::ERR>("ReadFailure when collecting metadata");
297 commit<device_error::ReadFailure>();
298 }
Matt Spinlerb54357f2017-08-21 14:38:54 -0500299
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500300 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinler9efb3082017-08-21 15:43:43 -0500301 PowerSequencerFault;
302
Matt Spinlerceacf942017-10-05 13:55:02 -0500303 report<power_error::PowerSequencerFault>(
Matt Spinler9efb3082017-08-21 15:43:43 -0500304 metadata::RAW_STATUS(nv.get().c_str()));
Matt Spinlerb54357f2017-08-21 14:38:54 -0500305}
306
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500307fs::path UCD90160::findGPIODevice(const fs::path& path)
Matt Spinler110b2842017-08-21 15:23:27 -0500308{
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500309 fs::path gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500310
311 //In the driver directory, look for a subdirectory
312 //named gpiochipX, where X is some number. Then
313 //we'll access the GPIO at /dev/gpiochipX.
314 if (fs::is_directory(path))
315 {
316 for (auto& f : fs::directory_iterator(path))
317 {
318 if (f.path().filename().string().find("gpiochip") !=
319 std::string::npos)
320 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500321 gpioDevicePath = "/dev" / f.path().filename();
Matt Spinler110b2842017-08-21 15:23:27 -0500322 break;
323 }
324 }
325 }
326
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500327 if (gpioDevicePath.empty())
Matt Spinler110b2842017-08-21 15:23:27 -0500328 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500329 log<level::ERR>("Could not find GPIO device path",
Matt Spinler110b2842017-08-21 15:23:27 -0500330 entry("BASE_PATH=%s", path.c_str()));
331 }
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500332
333 return gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500334}
335
Matt Spinler8bc12832017-09-19 11:17:54 -0500336bool UCD90160::doExtraAnalysis(const ucd90160::GPIConfig& config)
337{
338
339 auto type = std::get<ucd90160::extraAnalysisField>(config);
340 if (type == ucd90160::extraAnalysisType::none)
341 {
342 return false;
343 }
344
345 //Currently the only extra analysis to do is to check other GPIOs.
346 return doGPIOAnalysis(type);
347}
348
349bool UCD90160::doGPIOAnalysis(ucd90160::extraAnalysisType type)
350{
351 bool errorFound = false;
Matt Spinlera8269652017-09-19 15:13:28 -0500352 bool shutdown = false;
Matt Spinler8bc12832017-09-19 11:17:54 -0500353
354 const auto& analysisConfig = std::get<ucd90160::gpioAnalysisField>(
355 deviceMap.find(getInstance())->second);
356
357 auto gpioConfig = analysisConfig.find(type);
358 if (gpioConfig == analysisConfig.end())
359 {
360 return errorFound;
361 }
362
363 auto path = std::get<ucd90160::gpioDevicePathField>(
364 gpioConfig->second);
365
366 //The /dev/gpiochipX device
367 auto device = findGPIODevice(path);
368
369 //The GPIO value of the fault condition
370 auto polarity = std::get<ucd90160::gpioPolarityField>(
371 gpioConfig->second);
372
373 //The GPIOs to check
374 auto& gpios = std::get<ucd90160::gpioDefinitionField>(
375 gpioConfig->second);
376
377 for (const auto& gpio : gpios)
378 {
379 gpio::Value value;
380
381 try
382 {
383 GPIO g{device,
384 std::get<ucd90160::gpioNumField>(gpio),
385 Direction::input};
386
387 value = g.read();
388 }
389 catch (std::exception& e)
390 {
391 if (!gpioAccessError)
392 {
393 //GPIO only throws InternalErrors - not worth committing.
394 log<level::ERR>(
395 "GPIO read failed while analyzing a power fault",
396 entry("CHIP_PATH=%s", path.c_str()));
397
398 gpioAccessError = true;
399 }
400 continue;
401 }
402
403 if (value == polarity)
404 {
405 errorFound = true;
406
Matt Spinler0e45ced2018-02-12 14:36:07 -0600407 std::string part{INVENTORY_OBJ_PATH};
408 part = part + std::get<ucd90160::gpioCalloutField>(gpio);
Matt Spinler8bc12832017-09-19 11:17:54 -0500409 PartCallout callout{type, part};
410
411 if (isPartCalledOut(callout))
412 {
413 continue;
414 }
415
416 //Look up and call the error creation function
417 auto logError = std::get<ucd90160::errorFunctionField>(
418 gpioConfig->second);
419
420 logError(*this, part);
421
422 //Save the part callout so we don't call it out again
423 setPartCallout(callout);
Matt Spinlera8269652017-09-19 15:13:28 -0500424
425 //Some errors (like overtemps) require a shutdown
426 auto actions = static_cast<uint32_t>(
427 std::get<ucd90160::optionFlagsField>(gpioConfig->second));
428
429 if (actions & static_cast<decltype(actions)>(
430 ucd90160::optionFlags::shutdownOnFault))
431 {
432 shutdown = true;
433 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500434 }
435 }
436
Matt Spinlera8269652017-09-19 15:13:28 -0500437 if (shutdown)
438 {
Matt Spinler882ce952017-10-05 16:12:41 -0500439 //Will be replaced with a GPU specific error in a future commit
440 util::powerOff<power_error::Shutdown>(bus);
Matt Spinlera8269652017-09-19 15:13:28 -0500441 }
442
Matt Spinler8bc12832017-09-19 11:17:54 -0500443 return errorFound;
444}
445
Matt Spinler7b14db22017-09-19 10:57:54 -0500446void UCD90160::gpuPGOODError(const std::string& callout)
447{
448 util::NamesValues nv;
Matt Spinler6def9092018-02-27 14:22:59 -0600449
450 try
451 {
452 nv.add("STATUS_WORD", readStatusWord());
453 nv.add("MFR_STATUS", readMFRStatus());
454 }
455 catch (device_error::ReadFailure& e)
456 {
457 log<level::ERR>("ReadFailure when collecting metadata");
458 commit<device_error::ReadFailure>();
459 }
Matt Spinler7b14db22017-09-19 10:57:54 -0500460
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500461 using metadata = org::open_power::Witherspoon::Fault::GPUPowerFault;
Matt Spinler7b14db22017-09-19 10:57:54 -0500462
Matt Spinlerceacf942017-10-05 13:55:02 -0500463 report<power_error::GPUPowerFault>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500464 metadata::RAW_STATUS(nv.get().c_str()),
Matt Spinler0e45ced2018-02-12 14:36:07 -0600465 metadata::CALLOUT_INVENTORY_PATH(callout.c_str()));
Matt Spinler7b14db22017-09-19 10:57:54 -0500466}
467
468void UCD90160::gpuOverTempError(const std::string& callout)
469{
470 util::NamesValues nv;
Matt Spinler6def9092018-02-27 14:22:59 -0600471
472 try
473 {
474 nv.add("STATUS_WORD", readStatusWord());
475 nv.add("MFR_STATUS", readMFRStatus());
476 }
477 catch (device_error::ReadFailure& e)
478 {
479 log<level::ERR>("ReadFailure when collecting metadata");
480 commit<device_error::ReadFailure>();
481 }
Matt Spinler7b14db22017-09-19 10:57:54 -0500482
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500483 using metadata = org::open_power::Witherspoon::Fault::GPUOverTemp;
Matt Spinler7b14db22017-09-19 10:57:54 -0500484
Matt Spinlerceacf942017-10-05 13:55:02 -0500485 report<power_error::GPUOverTemp>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500486 metadata::RAW_STATUS(nv.get().c_str()),
Matt Spinler0e45ced2018-02-12 14:36:07 -0600487 metadata::CALLOUT_INVENTORY_PATH(callout.c_str()));
Matt Spinler7b14db22017-09-19 10:57:54 -0500488}
489
Matt Spinlerb54357f2017-08-21 14:38:54 -0500490}
491}