blob: 7f098f712f1047e90eee4588ebbaa1d412526742 [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001/**
2 * Copyright © 2017 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <map>
17#include <memory>
18#include <phosphor-logging/elog.hpp>
19#include <phosphor-logging/log.hpp>
20#include <elog-errors.hpp>
Brandon Wymane0eb45c2017-10-06 12:58:42 -050021#include <org/open_power/Witherspoon/Fault/error.hpp>
Matt Spinlerceacf942017-10-05 13:55:02 -050022#include <xyz/openbmc_project/Common/Device/error.hpp>
Matt Spinlere7e432b2017-08-21 15:01:40 -050023#include "names_values.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050024#include "ucd90160.hpp"
Matt Spinlera8269652017-09-19 15:13:28 -050025#include "utility.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050026
27namespace witherspoon
28{
29namespace power
30{
31
32using namespace std::string_literals;
33
Matt Spinlere7e432b2017-08-21 15:01:40 -050034const auto MFR_STATUS = "mfr_status"s;
Matt Spinler1e365692017-08-21 14:43:55 -050035
Matt Spinlerb54357f2017-08-21 14:38:54 -050036const auto DEVICE_NAME = "UCD90160"s;
37const auto DRIVER_NAME = "ucd9000"s;
Matt Spinlere7e432b2017-08-21 15:01:40 -050038constexpr auto NUM_PAGES = 16;
Matt Spinlerb54357f2017-08-21 14:38:54 -050039
Matt Spinler0e45ced2018-02-12 14:36:07 -060040constexpr auto INVENTORY_OBJ_PATH = "/xyz/openbmc_project/inventory";
41
Matt Spinler110b2842017-08-21 15:23:27 -050042namespace fs = std::experimental::filesystem;
Matt Spinlerd998b732017-08-21 15:35:54 -050043using namespace gpio;
Matt Spinlerb54357f2017-08-21 14:38:54 -050044using namespace pmbus;
45using namespace phosphor::logging;
Matt Spinlerceacf942017-10-05 13:55:02 -050046
47namespace device_error = sdbusplus::xyz::openbmc_project::
48 Common::Device::Error;
49namespace power_error = sdbusplus::org::open_power::
50 Witherspoon::Fault::Error;
Matt Spinlerb54357f2017-08-21 14:38:54 -050051
Matt Spinlera8269652017-09-19 15:13:28 -050052UCD90160::UCD90160(size_t instance, sdbusplus::bus::bus& bus) :
Matt Spinlerfcd4a712017-09-19 10:45:07 -050053 Device(DEVICE_NAME, instance),
54 interface(std::get<ucd90160::pathField>(
55 deviceMap.find(instance)->second),
56 DRIVER_NAME,
57 instance),
Matt Spinlera8269652017-09-19 15:13:28 -050058 gpioDevice(findGPIODevice(interface.path())),
59 bus(bus)
Matt Spinlerb54357f2017-08-21 14:38:54 -050060{
61}
62
63void UCD90160::onFailure()
64{
65 try
66 {
67 auto voutError = checkVOUTFaults();
68
69 auto pgoodError = checkPGOODFaults(false);
70
71 //Not a voltage or PGOOD fault, but we know something
72 //failed so still create an error log.
73 if (!voutError && !pgoodError)
74 {
75 createPowerFaultLog();
76 }
77 }
Matt Spinlerceacf942017-10-05 13:55:02 -050078 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050079 {
80 if (!accessError)
81 {
Matt Spinlerceacf942017-10-05 13:55:02 -050082 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -050083 accessError = true;
84 }
85 }
86}
87
88void UCD90160::analyze()
89{
90 try
91 {
92 //Note: Voltage faults are always fatal, so they just
93 //need to be analyzed in onFailure().
94
95 checkPGOODFaults(true);
96 }
Matt Spinlerceacf942017-10-05 13:55:02 -050097 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050098 {
99 if (!accessError)
100 {
Matt Spinlerceacf942017-10-05 13:55:02 -0500101 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500102 accessError = true;
103 }
104 }
105}
106
Matt Spinlere7e432b2017-08-21 15:01:40 -0500107uint16_t UCD90160::readStatusWord()
108{
109 return interface.read(STATUS_WORD, Type::Debug);
110}
111
112uint32_t UCD90160::readMFRStatus()
113{
114 return interface.read(MFR_STATUS, Type::DeviceDebug);
115}
116
Matt Spinlerb54357f2017-08-21 14:38:54 -0500117bool UCD90160::checkVOUTFaults()
118{
Matt Spinlere7e432b2017-08-21 15:01:40 -0500119 bool errorCreated = false;
120 auto statusWord = readStatusWord();
121
122 //The status_word register has a summary bit to tell us
123 //if each page even needs to be checked
124 if (!(statusWord & status_word::VOUT_FAULT))
125 {
126 return errorCreated;
127 }
128
129 for (size_t page = 0; page < NUM_PAGES; page++)
130 {
131 if (isVoutFaultLogged(page))
132 {
133 continue;
134 }
135
136 auto statusVout = interface.insertPageNum(STATUS_VOUT, page);
137 uint8_t vout = interface.read(statusVout, Type::Debug);
138
Matt Spinlerde16d052017-12-13 13:22:14 -0600139 //If any bits are on log them, though some are just
140 //warnings so they won't cause errors
Matt Spinlere7e432b2017-08-21 15:01:40 -0500141 if (vout)
142 {
Matt Spinlerde16d052017-12-13 13:22:14 -0600143 log<level::INFO>("A voltage rail has bits on in STATUS_VOUT",
144 entry("STATUS_VOUT=0x%X", vout),
145 entry("PAGE=%d", page));
146 }
147
148 //Log errors if any non-warning bits on
149 if (vout & ~status_vout::WARNING_MASK)
150 {
Matt Spinlere7e432b2017-08-21 15:01:40 -0500151 auto& railNames = std::get<ucd90160::railNamesField>(
152 deviceMap.find(getInstance())->second);
153 auto railName = railNames.at(page);
154
155 util::NamesValues nv;
156 nv.add("STATUS_WORD", statusWord);
157 nv.add("STATUS_VOUT", vout);
158 nv.add("MFR_STATUS", readMFRStatus());
159
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500160 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlere7e432b2017-08-21 15:01:40 -0500161 PowerSequencerVoltageFault;
162
Matt Spinlerceacf942017-10-05 13:55:02 -0500163 report<power_error::PowerSequencerVoltageFault>(
Matt Spinlere7e432b2017-08-21 15:01:40 -0500164 metadata::RAIL(page),
165 metadata::RAIL_NAME(railName.c_str()),
166 metadata::RAW_STATUS(nv.get().c_str()));
167
168 setVoutFaultLogged(page);
169 errorCreated = true;
170 }
171 }
172
173 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500174}
175
176bool UCD90160::checkPGOODFaults(bool polling)
177{
Matt Spinlerd998b732017-08-21 15:35:54 -0500178 bool errorCreated = false;
179
180 //While PGOOD faults could show up in MFR_STATUS (and we could then
181 //check the summary bit in STATUS_WORD first), they are edge triggered,
182 //and as the device driver sends a clear faults command every time we
183 //do a read, we will never see them. So, we'll have to just read the
184 //real time GPI status GPIO.
185
186 //Check only the GPIs configured on this system.
187 auto& gpiConfigs = std::get<ucd90160::gpiConfigField>(
188 deviceMap.find(getInstance())->second);
189
190 for (const auto& gpiConfig : gpiConfigs)
191 {
192 auto gpiNum = std::get<ucd90160::gpiNumField>(gpiConfig);
193 auto doPoll = std::get<ucd90160::pollField>(gpiConfig);
194
195 //Can skip this one if there is already an error on this input,
196 //or we are polling and these inputs don't need to be polled
197 //(because errors on them are fatal).
198 if (isPGOODFaultLogged(gpiNum) || (polling && !doPoll))
199 {
200 continue;
201 }
202
203 //The real time status is read via the pin ID
204 auto pinID = std::get<ucd90160::pinIDField>(gpiConfig);
205 auto gpio = gpios.find(pinID);
206 Value gpiStatus;
207
208 try
209 {
210 //The first time through, create the GPIO objects
211 if (gpio == gpios.end())
212 {
213 gpios.emplace(
214 pinID,
215 std::make_unique<GPIO>(
216 gpioDevice, pinID, Direction::input));
217 gpio = gpios.find(pinID);
218 }
219
220 gpiStatus = gpio->second->read();
221 }
222 catch (std::exception& e)
223 {
224 if (!accessError)
225 {
226 log<level::ERR>(e.what());
227 accessError = true;
228 }
229 continue;
230 }
231
232 if (gpiStatus == Value::low)
233 {
Matt Spinler8bc12832017-09-19 11:17:54 -0500234 //There may be some extra analysis we can do to narrow the
235 //error down further. Note that finding an error here won't
236 //prevent us from checking this GPI again.
237 errorCreated = doExtraAnalysis(gpiConfig);
238
239 if (errorCreated)
240 {
241 continue;
242 }
243
Matt Spinlerd998b732017-08-21 15:35:54 -0500244 auto& gpiName = std::get<ucd90160::gpiNameField>(gpiConfig);
245 auto status = (gpiStatus == Value::low) ? 0 : 1;
246
247 util::NamesValues nv;
248 nv.add("STATUS_WORD", readStatusWord());
249 nv.add("MFR_STATUS", readMFRStatus());
250 nv.add("INPUT_STATUS", status);
251
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500252 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlerd998b732017-08-21 15:35:54 -0500253 PowerSequencerPGOODFault;
254
Matt Spinlerceacf942017-10-05 13:55:02 -0500255 report<power_error::PowerSequencerPGOODFault>(
Matt Spinlerd998b732017-08-21 15:35:54 -0500256 metadata::INPUT_NUM(gpiNum),
257 metadata::INPUT_NAME(gpiName.c_str()),
258 metadata::RAW_STATUS(nv.get().c_str()));
259
260 setPGOODFaultLogged(gpiNum);
261 errorCreated = true;
262 }
263 }
264
265 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500266}
267
268void UCD90160::createPowerFaultLog()
269{
Matt Spinler9efb3082017-08-21 15:43:43 -0500270 util::NamesValues nv;
271 nv.add("STATUS_WORD", readStatusWord());
272 nv.add("MFR_STATUS", readMFRStatus());
Matt Spinlerb54357f2017-08-21 14:38:54 -0500273
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500274 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinler9efb3082017-08-21 15:43:43 -0500275 PowerSequencerFault;
276
Matt Spinlerceacf942017-10-05 13:55:02 -0500277 report<power_error::PowerSequencerFault>(
Matt Spinler9efb3082017-08-21 15:43:43 -0500278 metadata::RAW_STATUS(nv.get().c_str()));
Matt Spinlerb54357f2017-08-21 14:38:54 -0500279}
280
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500281fs::path UCD90160::findGPIODevice(const fs::path& path)
Matt Spinler110b2842017-08-21 15:23:27 -0500282{
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500283 fs::path gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500284
285 //In the driver directory, look for a subdirectory
286 //named gpiochipX, where X is some number. Then
287 //we'll access the GPIO at /dev/gpiochipX.
288 if (fs::is_directory(path))
289 {
290 for (auto& f : fs::directory_iterator(path))
291 {
292 if (f.path().filename().string().find("gpiochip") !=
293 std::string::npos)
294 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500295 gpioDevicePath = "/dev" / f.path().filename();
Matt Spinler110b2842017-08-21 15:23:27 -0500296 break;
297 }
298 }
299 }
300
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500301 if (gpioDevicePath.empty())
Matt Spinler110b2842017-08-21 15:23:27 -0500302 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500303 log<level::ERR>("Could not find GPIO device path",
Matt Spinler110b2842017-08-21 15:23:27 -0500304 entry("BASE_PATH=%s", path.c_str()));
305 }
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500306
307 return gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500308}
309
Matt Spinler8bc12832017-09-19 11:17:54 -0500310bool UCD90160::doExtraAnalysis(const ucd90160::GPIConfig& config)
311{
312
313 auto type = std::get<ucd90160::extraAnalysisField>(config);
314 if (type == ucd90160::extraAnalysisType::none)
315 {
316 return false;
317 }
318
319 //Currently the only extra analysis to do is to check other GPIOs.
320 return doGPIOAnalysis(type);
321}
322
323bool UCD90160::doGPIOAnalysis(ucd90160::extraAnalysisType type)
324{
325 bool errorFound = false;
Matt Spinlera8269652017-09-19 15:13:28 -0500326 bool shutdown = false;
Matt Spinler8bc12832017-09-19 11:17:54 -0500327
328 const auto& analysisConfig = std::get<ucd90160::gpioAnalysisField>(
329 deviceMap.find(getInstance())->second);
330
331 auto gpioConfig = analysisConfig.find(type);
332 if (gpioConfig == analysisConfig.end())
333 {
334 return errorFound;
335 }
336
337 auto path = std::get<ucd90160::gpioDevicePathField>(
338 gpioConfig->second);
339
340 //The /dev/gpiochipX device
341 auto device = findGPIODevice(path);
342
343 //The GPIO value of the fault condition
344 auto polarity = std::get<ucd90160::gpioPolarityField>(
345 gpioConfig->second);
346
347 //The GPIOs to check
348 auto& gpios = std::get<ucd90160::gpioDefinitionField>(
349 gpioConfig->second);
350
351 for (const auto& gpio : gpios)
352 {
353 gpio::Value value;
354
355 try
356 {
357 GPIO g{device,
358 std::get<ucd90160::gpioNumField>(gpio),
359 Direction::input};
360
361 value = g.read();
362 }
363 catch (std::exception& e)
364 {
365 if (!gpioAccessError)
366 {
367 //GPIO only throws InternalErrors - not worth committing.
368 log<level::ERR>(
369 "GPIO read failed while analyzing a power fault",
370 entry("CHIP_PATH=%s", path.c_str()));
371
372 gpioAccessError = true;
373 }
374 continue;
375 }
376
377 if (value == polarity)
378 {
379 errorFound = true;
380
Matt Spinler0e45ced2018-02-12 14:36:07 -0600381 std::string part{INVENTORY_OBJ_PATH};
382 part = part + std::get<ucd90160::gpioCalloutField>(gpio);
Matt Spinler8bc12832017-09-19 11:17:54 -0500383 PartCallout callout{type, part};
384
385 if (isPartCalledOut(callout))
386 {
387 continue;
388 }
389
390 //Look up and call the error creation function
391 auto logError = std::get<ucd90160::errorFunctionField>(
392 gpioConfig->second);
393
394 logError(*this, part);
395
396 //Save the part callout so we don't call it out again
397 setPartCallout(callout);
Matt Spinlera8269652017-09-19 15:13:28 -0500398
399 //Some errors (like overtemps) require a shutdown
400 auto actions = static_cast<uint32_t>(
401 std::get<ucd90160::optionFlagsField>(gpioConfig->second));
402
403 if (actions & static_cast<decltype(actions)>(
404 ucd90160::optionFlags::shutdownOnFault))
405 {
406 shutdown = true;
407 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500408 }
409 }
410
Matt Spinlera8269652017-09-19 15:13:28 -0500411 if (shutdown)
412 {
Matt Spinler882ce952017-10-05 16:12:41 -0500413 //Will be replaced with a GPU specific error in a future commit
414 util::powerOff<power_error::Shutdown>(bus);
Matt Spinlera8269652017-09-19 15:13:28 -0500415 }
416
Matt Spinler8bc12832017-09-19 11:17:54 -0500417 return errorFound;
418}
419
Matt Spinler7b14db22017-09-19 10:57:54 -0500420void UCD90160::gpuPGOODError(const std::string& callout)
421{
422 util::NamesValues nv;
423 nv.add("STATUS_WORD", readStatusWord());
424 nv.add("MFR_STATUS", readMFRStatus());
425
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500426 using metadata = org::open_power::Witherspoon::Fault::GPUPowerFault;
Matt Spinler7b14db22017-09-19 10:57:54 -0500427
Matt Spinlerceacf942017-10-05 13:55:02 -0500428 report<power_error::GPUPowerFault>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500429 metadata::RAW_STATUS(nv.get().c_str()),
Matt Spinler0e45ced2018-02-12 14:36:07 -0600430 metadata::CALLOUT_INVENTORY_PATH(callout.c_str()));
Matt Spinler7b14db22017-09-19 10:57:54 -0500431}
432
433void UCD90160::gpuOverTempError(const std::string& callout)
434{
435 util::NamesValues nv;
436 nv.add("STATUS_WORD", readStatusWord());
437 nv.add("MFR_STATUS", readMFRStatus());
438
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500439 using metadata = org::open_power::Witherspoon::Fault::GPUOverTemp;
Matt Spinler7b14db22017-09-19 10:57:54 -0500440
Matt Spinlerceacf942017-10-05 13:55:02 -0500441 report<power_error::GPUOverTemp>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500442 metadata::RAW_STATUS(nv.get().c_str()),
Matt Spinler0e45ced2018-02-12 14:36:07 -0600443 metadata::CALLOUT_INVENTORY_PATH(callout.c_str()));
Matt Spinler7b14db22017-09-19 10:57:54 -0500444}
445
Matt Spinlerb54357f2017-08-21 14:38:54 -0500446}
447}