blob: 79f2915c9ccb33a22d831fcae7ef411923973130 [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001/**
2 * Copyright © 2017 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <map>
17#include <memory>
18#include <phosphor-logging/elog.hpp>
19#include <phosphor-logging/log.hpp>
20#include <elog-errors.hpp>
Brandon Wymane0eb45c2017-10-06 12:58:42 -050021#include <org/open_power/Witherspoon/Fault/error.hpp>
Matt Spinlerceacf942017-10-05 13:55:02 -050022#include <xyz/openbmc_project/Common/Device/error.hpp>
Matt Spinlere7e432b2017-08-21 15:01:40 -050023#include "names_values.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050024#include "ucd90160.hpp"
Matt Spinlera8269652017-09-19 15:13:28 -050025#include "utility.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050026
27namespace witherspoon
28{
29namespace power
30{
31
32using namespace std::string_literals;
33
Matt Spinlere7e432b2017-08-21 15:01:40 -050034const auto MFR_STATUS = "mfr_status"s;
Matt Spinler1e365692017-08-21 14:43:55 -050035
Matt Spinlerb54357f2017-08-21 14:38:54 -050036const auto DEVICE_NAME = "UCD90160"s;
37const auto DRIVER_NAME = "ucd9000"s;
Matt Spinlere7e432b2017-08-21 15:01:40 -050038constexpr auto NUM_PAGES = 16;
Matt Spinlerb54357f2017-08-21 14:38:54 -050039
Matt Spinler110b2842017-08-21 15:23:27 -050040namespace fs = std::experimental::filesystem;
Matt Spinlerd998b732017-08-21 15:35:54 -050041using namespace gpio;
Matt Spinlerb54357f2017-08-21 14:38:54 -050042using namespace pmbus;
43using namespace phosphor::logging;
Matt Spinlerceacf942017-10-05 13:55:02 -050044
45namespace device_error = sdbusplus::xyz::openbmc_project::
46 Common::Device::Error;
47namespace power_error = sdbusplus::org::open_power::
48 Witherspoon::Fault::Error;
Matt Spinlerb54357f2017-08-21 14:38:54 -050049
Matt Spinlera8269652017-09-19 15:13:28 -050050UCD90160::UCD90160(size_t instance, sdbusplus::bus::bus& bus) :
Matt Spinlerfcd4a712017-09-19 10:45:07 -050051 Device(DEVICE_NAME, instance),
52 interface(std::get<ucd90160::pathField>(
53 deviceMap.find(instance)->second),
54 DRIVER_NAME,
55 instance),
Matt Spinlera8269652017-09-19 15:13:28 -050056 gpioDevice(findGPIODevice(interface.path())),
57 bus(bus)
Matt Spinlerb54357f2017-08-21 14:38:54 -050058{
59}
60
61void UCD90160::onFailure()
62{
63 try
64 {
65 auto voutError = checkVOUTFaults();
66
67 auto pgoodError = checkPGOODFaults(false);
68
69 //Not a voltage or PGOOD fault, but we know something
70 //failed so still create an error log.
71 if (!voutError && !pgoodError)
72 {
73 createPowerFaultLog();
74 }
75 }
Matt Spinlerceacf942017-10-05 13:55:02 -050076 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050077 {
78 if (!accessError)
79 {
Matt Spinlerceacf942017-10-05 13:55:02 -050080 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -050081 accessError = true;
82 }
83 }
84}
85
86void UCD90160::analyze()
87{
88 try
89 {
90 //Note: Voltage faults are always fatal, so they just
91 //need to be analyzed in onFailure().
92
93 checkPGOODFaults(true);
94 }
Matt Spinlerceacf942017-10-05 13:55:02 -050095 catch (device_error::ReadFailure& e)
Matt Spinlerb54357f2017-08-21 14:38:54 -050096 {
97 if (!accessError)
98 {
Matt Spinlerceacf942017-10-05 13:55:02 -050099 commit<device_error::ReadFailure>();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500100 accessError = true;
101 }
102 }
103}
104
Matt Spinlere7e432b2017-08-21 15:01:40 -0500105uint16_t UCD90160::readStatusWord()
106{
107 return interface.read(STATUS_WORD, Type::Debug);
108}
109
110uint32_t UCD90160::readMFRStatus()
111{
112 return interface.read(MFR_STATUS, Type::DeviceDebug);
113}
114
Matt Spinlerb54357f2017-08-21 14:38:54 -0500115bool UCD90160::checkVOUTFaults()
116{
Matt Spinlere7e432b2017-08-21 15:01:40 -0500117 bool errorCreated = false;
118 auto statusWord = readStatusWord();
119
120 //The status_word register has a summary bit to tell us
121 //if each page even needs to be checked
122 if (!(statusWord & status_word::VOUT_FAULT))
123 {
124 return errorCreated;
125 }
126
127 for (size_t page = 0; page < NUM_PAGES; page++)
128 {
129 if (isVoutFaultLogged(page))
130 {
131 continue;
132 }
133
134 auto statusVout = interface.insertPageNum(STATUS_VOUT, page);
135 uint8_t vout = interface.read(statusVout, Type::Debug);
136
Matt Spinlerde16d052017-12-13 13:22:14 -0600137 //If any bits are on log them, though some are just
138 //warnings so they won't cause errors
Matt Spinlere7e432b2017-08-21 15:01:40 -0500139 if (vout)
140 {
Matt Spinlerde16d052017-12-13 13:22:14 -0600141 log<level::INFO>("A voltage rail has bits on in STATUS_VOUT",
142 entry("STATUS_VOUT=0x%X", vout),
143 entry("PAGE=%d", page));
144 }
145
146 //Log errors if any non-warning bits on
147 if (vout & ~status_vout::WARNING_MASK)
148 {
Matt Spinlere7e432b2017-08-21 15:01:40 -0500149 auto& railNames = std::get<ucd90160::railNamesField>(
150 deviceMap.find(getInstance())->second);
151 auto railName = railNames.at(page);
152
153 util::NamesValues nv;
154 nv.add("STATUS_WORD", statusWord);
155 nv.add("STATUS_VOUT", vout);
156 nv.add("MFR_STATUS", readMFRStatus());
157
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500158 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlere7e432b2017-08-21 15:01:40 -0500159 PowerSequencerVoltageFault;
160
Matt Spinlerceacf942017-10-05 13:55:02 -0500161 report<power_error::PowerSequencerVoltageFault>(
Matt Spinlere7e432b2017-08-21 15:01:40 -0500162 metadata::RAIL(page),
163 metadata::RAIL_NAME(railName.c_str()),
164 metadata::RAW_STATUS(nv.get().c_str()));
165
166 setVoutFaultLogged(page);
167 errorCreated = true;
168 }
169 }
170
171 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500172}
173
174bool UCD90160::checkPGOODFaults(bool polling)
175{
Matt Spinlerd998b732017-08-21 15:35:54 -0500176 bool errorCreated = false;
177
178 //While PGOOD faults could show up in MFR_STATUS (and we could then
179 //check the summary bit in STATUS_WORD first), they are edge triggered,
180 //and as the device driver sends a clear faults command every time we
181 //do a read, we will never see them. So, we'll have to just read the
182 //real time GPI status GPIO.
183
184 //Check only the GPIs configured on this system.
185 auto& gpiConfigs = std::get<ucd90160::gpiConfigField>(
186 deviceMap.find(getInstance())->second);
187
188 for (const auto& gpiConfig : gpiConfigs)
189 {
190 auto gpiNum = std::get<ucd90160::gpiNumField>(gpiConfig);
191 auto doPoll = std::get<ucd90160::pollField>(gpiConfig);
192
193 //Can skip this one if there is already an error on this input,
194 //or we are polling and these inputs don't need to be polled
195 //(because errors on them are fatal).
196 if (isPGOODFaultLogged(gpiNum) || (polling && !doPoll))
197 {
198 continue;
199 }
200
201 //The real time status is read via the pin ID
202 auto pinID = std::get<ucd90160::pinIDField>(gpiConfig);
203 auto gpio = gpios.find(pinID);
204 Value gpiStatus;
205
206 try
207 {
208 //The first time through, create the GPIO objects
209 if (gpio == gpios.end())
210 {
211 gpios.emplace(
212 pinID,
213 std::make_unique<GPIO>(
214 gpioDevice, pinID, Direction::input));
215 gpio = gpios.find(pinID);
216 }
217
218 gpiStatus = gpio->second->read();
219 }
220 catch (std::exception& e)
221 {
222 if (!accessError)
223 {
224 log<level::ERR>(e.what());
225 accessError = true;
226 }
227 continue;
228 }
229
230 if (gpiStatus == Value::low)
231 {
Matt Spinler8bc12832017-09-19 11:17:54 -0500232 //There may be some extra analysis we can do to narrow the
233 //error down further. Note that finding an error here won't
234 //prevent us from checking this GPI again.
235 errorCreated = doExtraAnalysis(gpiConfig);
236
237 if (errorCreated)
238 {
239 continue;
240 }
241
Matt Spinlerd998b732017-08-21 15:35:54 -0500242 auto& gpiName = std::get<ucd90160::gpiNameField>(gpiConfig);
243 auto status = (gpiStatus == Value::low) ? 0 : 1;
244
245 util::NamesValues nv;
246 nv.add("STATUS_WORD", readStatusWord());
247 nv.add("MFR_STATUS", readMFRStatus());
248 nv.add("INPUT_STATUS", status);
249
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500250 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlerd998b732017-08-21 15:35:54 -0500251 PowerSequencerPGOODFault;
252
Matt Spinlerceacf942017-10-05 13:55:02 -0500253 report<power_error::PowerSequencerPGOODFault>(
Matt Spinlerd998b732017-08-21 15:35:54 -0500254 metadata::INPUT_NUM(gpiNum),
255 metadata::INPUT_NAME(gpiName.c_str()),
256 metadata::RAW_STATUS(nv.get().c_str()));
257
258 setPGOODFaultLogged(gpiNum);
259 errorCreated = true;
260 }
261 }
262
263 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500264}
265
266void UCD90160::createPowerFaultLog()
267{
Matt Spinler9efb3082017-08-21 15:43:43 -0500268 util::NamesValues nv;
269 nv.add("STATUS_WORD", readStatusWord());
270 nv.add("MFR_STATUS", readMFRStatus());
Matt Spinlerb54357f2017-08-21 14:38:54 -0500271
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500272 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinler9efb3082017-08-21 15:43:43 -0500273 PowerSequencerFault;
274
Matt Spinlerceacf942017-10-05 13:55:02 -0500275 report<power_error::PowerSequencerFault>(
Matt Spinler9efb3082017-08-21 15:43:43 -0500276 metadata::RAW_STATUS(nv.get().c_str()));
Matt Spinlerb54357f2017-08-21 14:38:54 -0500277}
278
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500279fs::path UCD90160::findGPIODevice(const fs::path& path)
Matt Spinler110b2842017-08-21 15:23:27 -0500280{
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500281 fs::path gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500282
283 //In the driver directory, look for a subdirectory
284 //named gpiochipX, where X is some number. Then
285 //we'll access the GPIO at /dev/gpiochipX.
286 if (fs::is_directory(path))
287 {
288 for (auto& f : fs::directory_iterator(path))
289 {
290 if (f.path().filename().string().find("gpiochip") !=
291 std::string::npos)
292 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500293 gpioDevicePath = "/dev" / f.path().filename();
Matt Spinler110b2842017-08-21 15:23:27 -0500294 break;
295 }
296 }
297 }
298
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500299 if (gpioDevicePath.empty())
Matt Spinler110b2842017-08-21 15:23:27 -0500300 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500301 log<level::ERR>("Could not find GPIO device path",
Matt Spinler110b2842017-08-21 15:23:27 -0500302 entry("BASE_PATH=%s", path.c_str()));
303 }
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500304
305 return gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500306}
307
Matt Spinler8bc12832017-09-19 11:17:54 -0500308bool UCD90160::doExtraAnalysis(const ucd90160::GPIConfig& config)
309{
310
311 auto type = std::get<ucd90160::extraAnalysisField>(config);
312 if (type == ucd90160::extraAnalysisType::none)
313 {
314 return false;
315 }
316
317 //Currently the only extra analysis to do is to check other GPIOs.
318 return doGPIOAnalysis(type);
319}
320
321bool UCD90160::doGPIOAnalysis(ucd90160::extraAnalysisType type)
322{
323 bool errorFound = false;
Matt Spinlera8269652017-09-19 15:13:28 -0500324 bool shutdown = false;
Matt Spinler8bc12832017-09-19 11:17:54 -0500325
326 const auto& analysisConfig = std::get<ucd90160::gpioAnalysisField>(
327 deviceMap.find(getInstance())->second);
328
329 auto gpioConfig = analysisConfig.find(type);
330 if (gpioConfig == analysisConfig.end())
331 {
332 return errorFound;
333 }
334
335 auto path = std::get<ucd90160::gpioDevicePathField>(
336 gpioConfig->second);
337
338 //The /dev/gpiochipX device
339 auto device = findGPIODevice(path);
340
341 //The GPIO value of the fault condition
342 auto polarity = std::get<ucd90160::gpioPolarityField>(
343 gpioConfig->second);
344
345 //The GPIOs to check
346 auto& gpios = std::get<ucd90160::gpioDefinitionField>(
347 gpioConfig->second);
348
349 for (const auto& gpio : gpios)
350 {
351 gpio::Value value;
352
353 try
354 {
355 GPIO g{device,
356 std::get<ucd90160::gpioNumField>(gpio),
357 Direction::input};
358
359 value = g.read();
360 }
361 catch (std::exception& e)
362 {
363 if (!gpioAccessError)
364 {
365 //GPIO only throws InternalErrors - not worth committing.
366 log<level::ERR>(
367 "GPIO read failed while analyzing a power fault",
368 entry("CHIP_PATH=%s", path.c_str()));
369
370 gpioAccessError = true;
371 }
372 continue;
373 }
374
375 if (value == polarity)
376 {
377 errorFound = true;
378
379 auto part = std::get<ucd90160::gpioCalloutField>(gpio);
380 PartCallout callout{type, part};
381
382 if (isPartCalledOut(callout))
383 {
384 continue;
385 }
386
387 //Look up and call the error creation function
388 auto logError = std::get<ucd90160::errorFunctionField>(
389 gpioConfig->second);
390
391 logError(*this, part);
392
393 //Save the part callout so we don't call it out again
394 setPartCallout(callout);
Matt Spinlera8269652017-09-19 15:13:28 -0500395
396 //Some errors (like overtemps) require a shutdown
397 auto actions = static_cast<uint32_t>(
398 std::get<ucd90160::optionFlagsField>(gpioConfig->second));
399
400 if (actions & static_cast<decltype(actions)>(
401 ucd90160::optionFlags::shutdownOnFault))
402 {
403 shutdown = true;
404 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500405 }
406 }
407
Matt Spinlera8269652017-09-19 15:13:28 -0500408 if (shutdown)
409 {
Matt Spinler882ce952017-10-05 16:12:41 -0500410 //Will be replaced with a GPU specific error in a future commit
411 util::powerOff<power_error::Shutdown>(bus);
Matt Spinlera8269652017-09-19 15:13:28 -0500412 }
413
Matt Spinler8bc12832017-09-19 11:17:54 -0500414 return errorFound;
415}
416
Matt Spinler7b14db22017-09-19 10:57:54 -0500417void UCD90160::gpuPGOODError(const std::string& callout)
418{
419 util::NamesValues nv;
420 nv.add("STATUS_WORD", readStatusWord());
421 nv.add("MFR_STATUS", readMFRStatus());
422
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500423 using metadata = org::open_power::Witherspoon::Fault::GPUPowerFault;
Matt Spinler7b14db22017-09-19 10:57:54 -0500424
Matt Spinlerceacf942017-10-05 13:55:02 -0500425 report<power_error::GPUPowerFault>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500426 metadata::RAW_STATUS(nv.get().c_str()),
427 metadata::GPU(callout.c_str()));
428}
429
430void UCD90160::gpuOverTempError(const std::string& callout)
431{
432 util::NamesValues nv;
433 nv.add("STATUS_WORD", readStatusWord());
434 nv.add("MFR_STATUS", readMFRStatus());
435
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500436 using metadata = org::open_power::Witherspoon::Fault::GPUOverTemp;
Matt Spinler7b14db22017-09-19 10:57:54 -0500437
Matt Spinlerceacf942017-10-05 13:55:02 -0500438 report<power_error::GPUOverTemp>(
Matt Spinler7b14db22017-09-19 10:57:54 -0500439 metadata::RAW_STATUS(nv.get().c_str()),
440 metadata::GPU(callout.c_str()));
441}
442
Matt Spinlerb54357f2017-08-21 14:38:54 -0500443}
444}