blob: 7cfd809b8c784b607b69d32806d524ef0efef222 [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001#pragma once
2
Matt Spinlerf0f02b92018-10-25 16:12:43 -05003#include "device.hpp"
4#include "gpio.hpp"
5#include "pmbus.hpp"
6#include "types.hpp"
7
Brandon Wymand1bc4ce2019-12-13 14:20:34 -06008#include <sdbusplus/bus.hpp>
9
Matt Spinlerb54357f2017-08-21 14:38:54 -050010#include <algorithm>
Brandon Wyman9c7897c2019-03-28 17:42:34 -050011#include <filesystem>
Matt Spinlerb54357f2017-08-21 14:38:54 -050012#include <map>
13#include <vector>
Matt Spinlerb54357f2017-08-21 14:38:54 -050014
Lei YUab093322019-10-09 16:43:22 +080015namespace phosphor
Matt Spinlerb54357f2017-08-21 14:38:54 -050016{
17namespace power
18{
19
Matt Spinlerf0f02b92018-10-25 16:12:43 -050020// Error type, callout
21using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
Matt Spinler8bc12832017-09-19 11:17:54 -050022
Matt Spinlerb54357f2017-08-21 14:38:54 -050023/**
24 * @class UCD90160
25 *
26 * This class implements fault analysis for the UCD90160
27 * power sequencer device.
28 *
29 */
30class UCD90160 : public Device
31{
Matt Spinlerf0f02b92018-10-25 16:12:43 -050032 public:
33 UCD90160() = delete;
34 ~UCD90160() = default;
35 UCD90160(const UCD90160&) = delete;
36 UCD90160& operator=(const UCD90160&) = delete;
37 UCD90160(UCD90160&&) = default;
38 UCD90160& operator=(UCD90160&&) = default;
Matt Spinlerb54357f2017-08-21 14:38:54 -050039
Matt Spinlerf0f02b92018-10-25 16:12:43 -050040 /**
41 * Constructor
42 *
43 * @param[in] instance - the device instance number
44 * @param[in] bus - D-Bus bus object
45 */
46 UCD90160(size_t instance, sdbusplus::bus::bus& bus);
Matt Spinlerb54357f2017-08-21 14:38:54 -050047
Matt Spinlerf0f02b92018-10-25 16:12:43 -050048 /**
49 * Analyzes the device for errors when the device is
50 * known to be in an error state. A log will be created.
51 */
52 void onFailure() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050053
Matt Spinlerf0f02b92018-10-25 16:12:43 -050054 /**
55 * Checks the device for errors and only creates a log
56 * if one is found.
57 */
58 void analyze() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050059
Matt Spinlerf0f02b92018-10-25 16:12:43 -050060 /**
61 * Clears faults in the device
62 */
63 void clearFaults() override
64 {
65 }
Matt Spinlerb54357f2017-08-21 14:38:54 -050066
Matt Spinlerf0f02b92018-10-25 16:12:43 -050067 private:
68 /**
69 * Reports an error for a GPU PGOOD failure
70 *
71 * @param[in] callout - the GPU callout string
72 */
73 void gpuPGOODError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050074
Matt Spinlerf0f02b92018-10-25 16:12:43 -050075 /**
76 * Reports an error for a GPU OverTemp failure
77 *
78 * @param[in] callout - the GPU callout string
79 */
80 void gpuOverTempError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050081
Matt Spinlerf0f02b92018-10-25 16:12:43 -050082 /**
Brandon Wyman03c19db2019-05-10 17:46:41 -050083 * Reports an error for a MEM_GOODx failure.
84 *
85 * @param[in] callout - The MEM callout string
86 */
87 void memGoodError(const std::string& callout);
88
89 /**
Matt Spinlerf0f02b92018-10-25 16:12:43 -050090 * Given the device path for a chip, find its gpiochip
91 * path
92 *
93 * @param[in] path - device path, like
94 * /sys/devices/.../i2c-11/11-0064
95 *
96 * @return fs::path - The gpiochip path, like
97 * /dev/gpiochip1
98 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -050099 static std::filesystem::path
100 findGPIODevice(const std::filesystem::path& path);
Matt Spinler7b14db22017-09-19 10:57:54 -0500101
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500102 /**
103 * Checks for VOUT faults on the device.
104 *
105 * This device can monitor voltages of its dependent
106 * devices, and VOUT faults are voltage faults
107 * on these devices.
108 *
109 * @return bool - true if an error log was created
110 */
111 bool checkVOUTFaults();
Matt Spinler7b14db22017-09-19 10:57:54 -0500112
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500113 /**
114 * Checks for PGOOD faults on the device.
115 *
116 * This device can monitor the PGOOD signals of its dependent
117 * devices, and this check will look for faults of
118 * those PGOODs.
119 *
120 * @param[in] polling - If this is running while polling for errors,
121 * as opposing to analyzing a fail condition.
122 *
123 * @return bool - true if an error log was created
124 */
125 bool checkPGOODFaults(bool polling);
Matt Spinler110b2842017-08-21 15:23:27 -0500126
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500127 /**
128 * Creates an error log when the device has an error
129 * but it isn't a PGOOD or voltage failure.
130 */
131 void createPowerFaultLog();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500132
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500133 /**
134 * Reads the status_word register
135 *
136 * @return uint16_t - the register contents
137 */
138 uint16_t readStatusWord();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500139
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500140 /**
141 * Reads the mfr_status register
142 *
143 * @return uint32_t - the register contents
144 */
145 uint32_t readMFRStatus();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500146
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500147 /**
148 * Does any additional fault analysis based on the
149 * value of the extraAnalysisType field in the GPIOConfig
150 * entry.
151 *
152 * Used to get better callouts.
153 *
154 * @param[in] config - the GPIOConfig entry to use
155 *
156 * @return bool - true if a HW error was found, false else
157 */
158 bool doExtraAnalysis(const ucd90160::GPIConfig& config);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500159
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500160 /**
161 * Does additional fault analysis using GPIOs to
162 * specifically identify the failing part.
163 *
164 * Used when there are too many PGOOD inputs for
165 * the UCD90160 to handle, so just a summary bit
166 * is wired into the chip, and then the specific
167 * fault GPIOs are off of a different GPIO device,
168 * like an IO expander.
169 *
170 * @param[in] type - the type of analysis to do
171 *
172 * @return bool - true if a HW error was found, false else
173 */
174 bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500175
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500176 /**
177 * Says if we've already logged a Vout fault
178 *
179 * The policy is only 1 of the same error will
180 * be logged for the duration of a class instance.
181 *
182 * @param[in] page - the page to check
183 *
184 * @return bool - if we've already logged a fault against
185 * this page
186 */
187 inline bool isVoutFaultLogged(uint32_t page) const
188 {
189 return std::find(voutErrors.begin(), voutErrors.end(), page) !=
190 voutErrors.end();
191 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500192
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500193 /**
194 * Saves that a Vout fault has been logged
195 *
196 * @param[in] page - the page the error was logged against
197 */
198 inline void setVoutFaultLogged(uint32_t page)
199 {
200 voutErrors.push_back(page);
201 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500202
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500203 /**
204 * Says if we've already logged a PGOOD fault
205 *
206 * The policy is only 1 of the same errors will
207 * be logged for the duration of a class instance.
208 *
209 * @param[in] input - the input to check
210 *
211 * @return bool - if we've already logged a fault against
212 * this input
213 */
214 inline bool isPGOODFaultLogged(uint32_t input) const
215 {
216 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
217 pgoodErrors.end();
218 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500219
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500220 /**
221 * Says if we've already logged a specific fault
222 * against a specific part
223 *
224 * @param[in] callout - error type and name tuple
225 *
226 * @return bool - if we've already logged this fault
227 * against this part
228 */
229 inline bool isPartCalledOut(const PartCallout& callout) const
230 {
231 return std::find(callouts.begin(), callouts.end(), callout) !=
232 callouts.end();
233 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500234
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500235 /**
236 * Saves that a PGOOD fault has been logged
237 *
238 * @param[in] input - the input the error was logged against
239 */
240 inline void setPGOODFaultLogged(uint32_t input)
241 {
242 pgoodErrors.push_back(input);
243 }
Matt Spinlerd998b732017-08-21 15:35:54 -0500244
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500245 /**
246 * Saves that a specific fault on a specific part has been done
247 *
248 * @param[in] callout - error type and name tuple
249 */
250 inline void setPartCallout(const PartCallout& callout)
251 {
252 callouts.push_back(callout);
253 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500254
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500255 /**
256 * List of pages that Vout errors have
257 * already been logged against
258 */
259 std::vector<uint32_t> voutErrors;
Matt Spinlerd998b732017-08-21 15:35:54 -0500260
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500261 /**
262 * List of inputs that PGOOD errors have
263 * already been logged against
264 */
265 std::vector<uint32_t> pgoodErrors;
Matt Spinler8bc12832017-09-19 11:17:54 -0500266
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500267 /**
268 * List of callouts that already been done
269 */
270 std::vector<PartCallout> callouts;
Matt Spinlere7e432b2017-08-21 15:01:40 -0500271
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500272 /**
273 * The read/write interface to this hardware
274 */
275 pmbus::PMBus interface;
Matt Spinlerd998b732017-08-21 15:35:54 -0500276
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500277 /**
278 * A map of GPI pin IDs to the GPIO object
279 * used to access them
280 */
281 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
Matt Spinler8bc12832017-09-19 11:17:54 -0500282
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500283 /**
284 * Keeps track of device access errors to avoid repeatedly
285 * logging errors for bad hardware
286 */
287 bool accessError = false;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500288
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500289 /**
290 * Keeps track of GPIO access errors when doing the in depth
291 * PGOOD fault analysis to avoid repeatedly logging errors
292 * for bad hardware
293 */
294 bool gpioAccessError = false;
Matt Spinlerd998b732017-08-21 15:35:54 -0500295
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500296 /**
297 * The path to the GPIO device used to read
298 * the GPI (PGOOD) status
299 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -0500300 std::filesystem::path gpioDevice;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500301
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500302 /**
303 * The D-Bus bus object
304 */
305 sdbusplus::bus::bus& bus;
Matt Spinler8bc12832017-09-19 11:17:54 -0500306
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500307 /**
308 * Map of device instance to the instance specific data
309 */
310 static const ucd90160::DeviceMap deviceMap;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500311};
312
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500313} // namespace power
Lei YUab093322019-10-09 16:43:22 +0800314} // namespace phosphor