blob: e7fc8cf19c33d21cb438fd28a4994dd1302df2fa [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001/**
2 * Copyright © 2017 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <map>
17#include <memory>
18#include <phosphor-logging/elog.hpp>
19#include <phosphor-logging/log.hpp>
20#include <elog-errors.hpp>
21#include <xyz/openbmc_project/Sensor/Device/error.hpp>
22#include <xyz/openbmc_project/Control/Device/error.hpp>
Brandon Wymane0eb45c2017-10-06 12:58:42 -050023#include <org/open_power/Witherspoon/Fault/error.hpp>
Matt Spinlere7e432b2017-08-21 15:01:40 -050024#include "names_values.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050025#include "ucd90160.hpp"
Matt Spinlera8269652017-09-19 15:13:28 -050026#include "utility.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -050027
28namespace witherspoon
29{
30namespace power
31{
32
33using namespace std::string_literals;
34
Matt Spinlere7e432b2017-08-21 15:01:40 -050035const auto MFR_STATUS = "mfr_status"s;
Matt Spinler1e365692017-08-21 14:43:55 -050036
Matt Spinlerb54357f2017-08-21 14:38:54 -050037const auto DEVICE_NAME = "UCD90160"s;
38const auto DRIVER_NAME = "ucd9000"s;
Matt Spinlere7e432b2017-08-21 15:01:40 -050039constexpr auto NUM_PAGES = 16;
Matt Spinlerb54357f2017-08-21 14:38:54 -050040
Matt Spinler110b2842017-08-21 15:23:27 -050041namespace fs = std::experimental::filesystem;
Matt Spinlerd998b732017-08-21 15:35:54 -050042using namespace gpio;
Matt Spinlerb54357f2017-08-21 14:38:54 -050043using namespace pmbus;
44using namespace phosphor::logging;
45using namespace sdbusplus::xyz::openbmc_project::Control::Device::Error;
46using namespace sdbusplus::xyz::openbmc_project::Sensor::Device::Error;
Brandon Wymane0eb45c2017-10-06 12:58:42 -050047using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error;
Matt Spinlerb54357f2017-08-21 14:38:54 -050048
Matt Spinlera8269652017-09-19 15:13:28 -050049UCD90160::UCD90160(size_t instance, sdbusplus::bus::bus& bus) :
Matt Spinlerfcd4a712017-09-19 10:45:07 -050050 Device(DEVICE_NAME, instance),
51 interface(std::get<ucd90160::pathField>(
52 deviceMap.find(instance)->second),
53 DRIVER_NAME,
54 instance),
Matt Spinlera8269652017-09-19 15:13:28 -050055 gpioDevice(findGPIODevice(interface.path())),
56 bus(bus)
Matt Spinlerb54357f2017-08-21 14:38:54 -050057{
58}
59
60void UCD90160::onFailure()
61{
62 try
63 {
64 auto voutError = checkVOUTFaults();
65
66 auto pgoodError = checkPGOODFaults(false);
67
68 //Not a voltage or PGOOD fault, but we know something
69 //failed so still create an error log.
70 if (!voutError && !pgoodError)
71 {
72 createPowerFaultLog();
73 }
74 }
75 catch (ReadFailure& e)
76 {
77 if (!accessError)
78 {
79 commit<ReadFailure>();
80 accessError = true;
81 }
82 }
83}
84
85void UCD90160::analyze()
86{
87 try
88 {
89 //Note: Voltage faults are always fatal, so they just
90 //need to be analyzed in onFailure().
91
92 checkPGOODFaults(true);
93 }
94 catch (ReadFailure& e)
95 {
96 if (!accessError)
97 {
98 commit<ReadFailure>();
99 accessError = true;
100 }
101 }
102}
103
Matt Spinlere7e432b2017-08-21 15:01:40 -0500104uint16_t UCD90160::readStatusWord()
105{
106 return interface.read(STATUS_WORD, Type::Debug);
107}
108
109uint32_t UCD90160::readMFRStatus()
110{
111 return interface.read(MFR_STATUS, Type::DeviceDebug);
112}
113
Matt Spinlerb54357f2017-08-21 14:38:54 -0500114bool UCD90160::checkVOUTFaults()
115{
Matt Spinlere7e432b2017-08-21 15:01:40 -0500116 bool errorCreated = false;
117 auto statusWord = readStatusWord();
118
119 //The status_word register has a summary bit to tell us
120 //if each page even needs to be checked
121 if (!(statusWord & status_word::VOUT_FAULT))
122 {
123 return errorCreated;
124 }
125
126 for (size_t page = 0; page < NUM_PAGES; page++)
127 {
128 if (isVoutFaultLogged(page))
129 {
130 continue;
131 }
132
133 auto statusVout = interface.insertPageNum(STATUS_VOUT, page);
134 uint8_t vout = interface.read(statusVout, Type::Debug);
135
136 //Any bit on is an error
137 if (vout)
138 {
139 auto& railNames = std::get<ucd90160::railNamesField>(
140 deviceMap.find(getInstance())->second);
141 auto railName = railNames.at(page);
142
143 util::NamesValues nv;
144 nv.add("STATUS_WORD", statusWord);
145 nv.add("STATUS_VOUT", vout);
146 nv.add("MFR_STATUS", readMFRStatus());
147
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500148 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlere7e432b2017-08-21 15:01:40 -0500149 PowerSequencerVoltageFault;
150
151 report<PowerSequencerVoltageFault>(
152 metadata::RAIL(page),
153 metadata::RAIL_NAME(railName.c_str()),
154 metadata::RAW_STATUS(nv.get().c_str()));
155
156 setVoutFaultLogged(page);
157 errorCreated = true;
158 }
159 }
160
161 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500162}
163
164bool UCD90160::checkPGOODFaults(bool polling)
165{
Matt Spinlerd998b732017-08-21 15:35:54 -0500166 bool errorCreated = false;
167
168 //While PGOOD faults could show up in MFR_STATUS (and we could then
169 //check the summary bit in STATUS_WORD first), they are edge triggered,
170 //and as the device driver sends a clear faults command every time we
171 //do a read, we will never see them. So, we'll have to just read the
172 //real time GPI status GPIO.
173
174 //Check only the GPIs configured on this system.
175 auto& gpiConfigs = std::get<ucd90160::gpiConfigField>(
176 deviceMap.find(getInstance())->second);
177
178 for (const auto& gpiConfig : gpiConfigs)
179 {
180 auto gpiNum = std::get<ucd90160::gpiNumField>(gpiConfig);
181 auto doPoll = std::get<ucd90160::pollField>(gpiConfig);
182
183 //Can skip this one if there is already an error on this input,
184 //or we are polling and these inputs don't need to be polled
185 //(because errors on them are fatal).
186 if (isPGOODFaultLogged(gpiNum) || (polling && !doPoll))
187 {
188 continue;
189 }
190
191 //The real time status is read via the pin ID
192 auto pinID = std::get<ucd90160::pinIDField>(gpiConfig);
193 auto gpio = gpios.find(pinID);
194 Value gpiStatus;
195
196 try
197 {
198 //The first time through, create the GPIO objects
199 if (gpio == gpios.end())
200 {
201 gpios.emplace(
202 pinID,
203 std::make_unique<GPIO>(
204 gpioDevice, pinID, Direction::input));
205 gpio = gpios.find(pinID);
206 }
207
208 gpiStatus = gpio->second->read();
209 }
210 catch (std::exception& e)
211 {
212 if (!accessError)
213 {
214 log<level::ERR>(e.what());
215 accessError = true;
216 }
217 continue;
218 }
219
220 if (gpiStatus == Value::low)
221 {
Matt Spinler8bc12832017-09-19 11:17:54 -0500222 //There may be some extra analysis we can do to narrow the
223 //error down further. Note that finding an error here won't
224 //prevent us from checking this GPI again.
225 errorCreated = doExtraAnalysis(gpiConfig);
226
227 if (errorCreated)
228 {
229 continue;
230 }
231
Matt Spinlerd998b732017-08-21 15:35:54 -0500232 auto& gpiName = std::get<ucd90160::gpiNameField>(gpiConfig);
233 auto status = (gpiStatus == Value::low) ? 0 : 1;
234
235 util::NamesValues nv;
236 nv.add("STATUS_WORD", readStatusWord());
237 nv.add("MFR_STATUS", readMFRStatus());
238 nv.add("INPUT_STATUS", status);
239
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500240 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinlerd998b732017-08-21 15:35:54 -0500241 PowerSequencerPGOODFault;
242
243 report<PowerSequencerPGOODFault>(
244 metadata::INPUT_NUM(gpiNum),
245 metadata::INPUT_NAME(gpiName.c_str()),
246 metadata::RAW_STATUS(nv.get().c_str()));
247
248 setPGOODFaultLogged(gpiNum);
249 errorCreated = true;
250 }
251 }
252
253 return errorCreated;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500254}
255
256void UCD90160::createPowerFaultLog()
257{
Matt Spinler9efb3082017-08-21 15:43:43 -0500258 util::NamesValues nv;
259 nv.add("STATUS_WORD", readStatusWord());
260 nv.add("MFR_STATUS", readMFRStatus());
Matt Spinlerb54357f2017-08-21 14:38:54 -0500261
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500262 using metadata = org::open_power::Witherspoon::Fault::
Matt Spinler9efb3082017-08-21 15:43:43 -0500263 PowerSequencerFault;
264
265 report<PowerSequencerFault>(
266 metadata::RAW_STATUS(nv.get().c_str()));
Matt Spinlerb54357f2017-08-21 14:38:54 -0500267}
268
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500269fs::path UCD90160::findGPIODevice(const fs::path& path)
Matt Spinler110b2842017-08-21 15:23:27 -0500270{
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500271 fs::path gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500272
273 //In the driver directory, look for a subdirectory
274 //named gpiochipX, where X is some number. Then
275 //we'll access the GPIO at /dev/gpiochipX.
276 if (fs::is_directory(path))
277 {
278 for (auto& f : fs::directory_iterator(path))
279 {
280 if (f.path().filename().string().find("gpiochip") !=
281 std::string::npos)
282 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500283 gpioDevicePath = "/dev" / f.path().filename();
Matt Spinler110b2842017-08-21 15:23:27 -0500284 break;
285 }
286 }
287 }
288
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500289 if (gpioDevicePath.empty())
Matt Spinler110b2842017-08-21 15:23:27 -0500290 {
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500291 log<level::ERR>("Could not find GPIO device path",
Matt Spinler110b2842017-08-21 15:23:27 -0500292 entry("BASE_PATH=%s", path.c_str()));
293 }
Matt Spinlerfcd4a712017-09-19 10:45:07 -0500294
295 return gpioDevicePath;
Matt Spinler110b2842017-08-21 15:23:27 -0500296}
297
Matt Spinler8bc12832017-09-19 11:17:54 -0500298bool UCD90160::doExtraAnalysis(const ucd90160::GPIConfig& config)
299{
300
301 auto type = std::get<ucd90160::extraAnalysisField>(config);
302 if (type == ucd90160::extraAnalysisType::none)
303 {
304 return false;
305 }
306
307 //Currently the only extra analysis to do is to check other GPIOs.
308 return doGPIOAnalysis(type);
309}
310
311bool UCD90160::doGPIOAnalysis(ucd90160::extraAnalysisType type)
312{
313 bool errorFound = false;
Matt Spinlera8269652017-09-19 15:13:28 -0500314 bool shutdown = false;
Matt Spinler8bc12832017-09-19 11:17:54 -0500315
316 const auto& analysisConfig = std::get<ucd90160::gpioAnalysisField>(
317 deviceMap.find(getInstance())->second);
318
319 auto gpioConfig = analysisConfig.find(type);
320 if (gpioConfig == analysisConfig.end())
321 {
322 return errorFound;
323 }
324
325 auto path = std::get<ucd90160::gpioDevicePathField>(
326 gpioConfig->second);
327
328 //The /dev/gpiochipX device
329 auto device = findGPIODevice(path);
330
331 //The GPIO value of the fault condition
332 auto polarity = std::get<ucd90160::gpioPolarityField>(
333 gpioConfig->second);
334
335 //The GPIOs to check
336 auto& gpios = std::get<ucd90160::gpioDefinitionField>(
337 gpioConfig->second);
338
339 for (const auto& gpio : gpios)
340 {
341 gpio::Value value;
342
343 try
344 {
345 GPIO g{device,
346 std::get<ucd90160::gpioNumField>(gpio),
347 Direction::input};
348
349 value = g.read();
350 }
351 catch (std::exception& e)
352 {
353 if (!gpioAccessError)
354 {
355 //GPIO only throws InternalErrors - not worth committing.
356 log<level::ERR>(
357 "GPIO read failed while analyzing a power fault",
358 entry("CHIP_PATH=%s", path.c_str()));
359
360 gpioAccessError = true;
361 }
362 continue;
363 }
364
365 if (value == polarity)
366 {
367 errorFound = true;
368
369 auto part = std::get<ucd90160::gpioCalloutField>(gpio);
370 PartCallout callout{type, part};
371
372 if (isPartCalledOut(callout))
373 {
374 continue;
375 }
376
377 //Look up and call the error creation function
378 auto logError = std::get<ucd90160::errorFunctionField>(
379 gpioConfig->second);
380
381 logError(*this, part);
382
383 //Save the part callout so we don't call it out again
384 setPartCallout(callout);
Matt Spinlera8269652017-09-19 15:13:28 -0500385
386 //Some errors (like overtemps) require a shutdown
387 auto actions = static_cast<uint32_t>(
388 std::get<ucd90160::optionFlagsField>(gpioConfig->second));
389
390 if (actions & static_cast<decltype(actions)>(
391 ucd90160::optionFlags::shutdownOnFault))
392 {
393 shutdown = true;
394 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500395 }
396 }
397
Matt Spinlera8269652017-09-19 15:13:28 -0500398 if (shutdown)
399 {
400 util::powerOff(bus);
401 }
402
Matt Spinler8bc12832017-09-19 11:17:54 -0500403 return errorFound;
404}
405
Matt Spinler7b14db22017-09-19 10:57:54 -0500406void UCD90160::gpuPGOODError(const std::string& callout)
407{
408 util::NamesValues nv;
409 nv.add("STATUS_WORD", readStatusWord());
410 nv.add("MFR_STATUS", readMFRStatus());
411
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500412 using metadata = org::open_power::Witherspoon::Fault::GPUPowerFault;
Matt Spinler7b14db22017-09-19 10:57:54 -0500413
414 report<GPUPowerFault>(
415 metadata::RAW_STATUS(nv.get().c_str()),
416 metadata::GPU(callout.c_str()));
417}
418
419void UCD90160::gpuOverTempError(const std::string& callout)
420{
421 util::NamesValues nv;
422 nv.add("STATUS_WORD", readStatusWord());
423 nv.add("MFR_STATUS", readMFRStatus());
424
Brandon Wymane0eb45c2017-10-06 12:58:42 -0500425 using metadata = org::open_power::Witherspoon::Fault::GPUOverTemp;
Matt Spinler7b14db22017-09-19 10:57:54 -0500426
427 report<GPUOverTemp>(
428 metadata::RAW_STATUS(nv.get().c_str()),
429 metadata::GPU(callout.c_str()));
430}
431
Matt Spinlerb54357f2017-08-21 14:38:54 -0500432}
433}