blob: b82711d461ab4b2982c72828c7cc1b68bbbe913e [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001#pragma once
2
3#include <algorithm>
Matt Spinler110b2842017-08-21 15:23:27 -05004#include <experimental/filesystem>
Matt Spinlerb54357f2017-08-21 14:38:54 -05005#include <map>
6#include <vector>
7#include "device.hpp"
Matt Spinlerd998b732017-08-21 15:35:54 -05008#include "gpio.hpp"
Matt Spinlerb54357f2017-08-21 14:38:54 -05009#include "pmbus.hpp"
10#include "types.hpp"
11
12namespace witherspoon
13{
14namespace power
15{
16
Matt Spinler8bc12832017-09-19 11:17:54 -050017//Error type, callout
18using PartCallout =
19 std::tuple<ucd90160::extraAnalysisType, std::string>;
20
Matt Spinlerb54357f2017-08-21 14:38:54 -050021/**
22 * @class UCD90160
23 *
24 * This class implements fault analysis for the UCD90160
25 * power sequencer device.
26 *
27 */
28class UCD90160 : public Device
29{
30 public:
31
32 UCD90160() = delete;
33 ~UCD90160() = default;
34 UCD90160(const UCD90160&) = delete;
35 UCD90160& operator=(const UCD90160&) = delete;
36 UCD90160(UCD90160&&) = default;
37 UCD90160& operator=(UCD90160&&) = default;
38
39 /**
40 * Constructor
41 *
42 * @param[in] instance - the device instance number
43 */
44 UCD90160(size_t instance);
45
46 /**
47 * Analyzes the device for errors when the device is
48 * known to be in an error state. A log will be created.
49 */
50 void onFailure() override;
51
52 /**
53 * Checks the device for errors and only creates a log
54 * if one is found.
55 */
56 void analyze() override;
57
58 /**
59 * Clears faults in the device
60 */
Matt Spinler81be00b2017-09-07 15:28:38 -050061 void clearFaults() override
62 {
63 }
Matt Spinlerb54357f2017-08-21 14:38:54 -050064
65 private:
66
67 /**
Matt Spinler7b14db22017-09-19 10:57:54 -050068 * Reports an error for a GPU PGOOD failure
69 *
70 * @param[in] callout - the GPU callout string
71 */
72 void gpuPGOODError(const std::string& callout);
73
74 /**
75 * Reports an error for a GPU OverTemp failure
76 *
77 * @param[in] callout - the GPU callout string
78 */
79 void gpuOverTempError(const std::string& callout);
80
81 /**
Matt Spinlerfcd4a712017-09-19 10:45:07 -050082 * Given the device path for a chip, find its gpiochip
83 * path
84 *
85 * @param[in] path - device path, like
86 * /sys/devices/.../i2c-11/11-0064
87 *
88 * @return fs::path - The gpiochip path, like
89 * /dev/gpiochip1
Matt Spinler110b2842017-08-21 15:23:27 -050090 */
Matt Spinlerfcd4a712017-09-19 10:45:07 -050091 static std::experimental::filesystem::path findGPIODevice(
92 const std::experimental::filesystem::path& path);
Matt Spinler110b2842017-08-21 15:23:27 -050093
94 /**
Matt Spinlerb54357f2017-08-21 14:38:54 -050095 * Checks for VOUT faults on the device.
96 *
97 * This device can monitor voltages of its dependent
98 * devices, and VOUT faults are voltage faults
99 * on these devices.
100 *
101 * @return bool - true if an error log was created
102 */
103 bool checkVOUTFaults();
104
105 /**
106 * Checks for PGOOD faults on the device.
107 *
108 * This device can monitor the PGOOD signals of its dependent
109 * devices, and this check will look for faults of
110 * those PGOODs.
111 *
112 * @param[in] polling - If this is running while polling for errors,
113 * as opposing to analyzing a fail condition.
114 *
115 * @return bool - true if an error log was created
116 */
117 bool checkPGOODFaults(bool polling);
118
119 /**
120 * Creates an error log when the device has an error
121 * but it isn't a PGOOD or voltage failure.
122 */
123 void createPowerFaultLog();
124
125 /**
Matt Spinlere7e432b2017-08-21 15:01:40 -0500126 * Reads the status_word register
127 *
128 * @return uint16_t - the register contents
129 */
130 uint16_t readStatusWord();
131
132 /**
133 * Reads the mfr_status register
134 *
135 * @return uint32_t - the register contents
136 */
137 uint32_t readMFRStatus();
138
139 /**
Matt Spinler8bc12832017-09-19 11:17:54 -0500140 * Does any additional fault analysis based on the
141 * value of the extraAnalysisType field in the GPIOConfig
142 * entry.
143 *
144 * Used to get better callouts.
145 *
146 * @param[in] config - the GPIOConfig entry to use
147 *
148 * @return bool - true if a HW error was found, false else
149 */
150 bool doExtraAnalysis(const ucd90160::GPIConfig& config);
151
152 /**
153 * Does additional fault analysis using GPIOs to
154 * specifically identify the failing part.
155 *
156 * Used when there are too many PGOOD inputs for
157 * the UCD90160 to handle, so just a summary bit
158 * is wired into the chip, and then the specific
159 * fault GPIOs are off of a different GPIO device,
160 * like an IO expander.
161 *
162 * @param[in] type - the type of analysis to do
163 *
164 * @return bool - true if a HW error was found, false else
165 */
166 bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
167
168 /**
Matt Spinlere7e432b2017-08-21 15:01:40 -0500169 * Says if we've already logged a Vout fault
170 *
171 * The policy is only 1 of the same error will
172 * be logged for the duration of a class instance.
173 *
174 * @param[in] page - the page to check
175 *
176 * @return bool - if we've already logged a fault against
177 * this page
178 */
179 inline bool isVoutFaultLogged(uint32_t page) const
180 {
181 return std::find(voutErrors.begin(),
182 voutErrors.end(),
183 page) != voutErrors.end();
184 }
185
186 /**
187 * Saves that a Vout fault has been logged
188 *
189 * @param[in] page - the page the error was logged against
190 */
191 inline void setVoutFaultLogged(uint32_t page)
192 {
193 voutErrors.push_back(page);
194 }
195
196 /**
Matt Spinlerd998b732017-08-21 15:35:54 -0500197 * Says if we've already logged a PGOOD fault
198 *
199 * The policy is only 1 of the same errors will
200 * be logged for the duration of a class instance.
201 *
202 * @param[in] input - the input to check
203 *
204 * @return bool - if we've already logged a fault against
205 * this input
206 */
207 inline bool isPGOODFaultLogged(uint32_t input) const
208 {
209 return std::find(pgoodErrors.begin(),
210 pgoodErrors.end(),
211 input) != pgoodErrors.end();
212 }
213
214 /**
Matt Spinler8bc12832017-09-19 11:17:54 -0500215 * Says if we've already logged a specific fault
216 * against a specific part
217 *
218 * @param[in] callout - error type and name tuple
219 *
220 * @return bool - if we've already logged this fault
221 * against this part
222 */
223 inline bool isPartCalledOut(const PartCallout& callout) const
224 {
225 return std::find(callouts.begin(),
226 callouts.end(),
227 callout) != callouts.end();
228 }
229
230 /**
Matt Spinlerd998b732017-08-21 15:35:54 -0500231 * Saves that a PGOOD fault has been logged
232 *
233 * @param[in] input - the input the error was logged against
234 */
235 inline void setPGOODFaultLogged(uint32_t input)
236 {
237 pgoodErrors.push_back(input);
238 }
239
240 /**
Matt Spinler8bc12832017-09-19 11:17:54 -0500241 * Saves that a specific fault on a specific part has been done
242 *
243 * @param[in] callout - error type and name tuple
244 */
245 inline void setPartCallout(const PartCallout& callout)
246 {
247 callouts.push_back(callout);
248 }
249
250 /**
Matt Spinlere7e432b2017-08-21 15:01:40 -0500251 * List of pages that Vout errors have
252 * already been logged against
253 */
254 std::vector<uint32_t> voutErrors;
255
256 /**
Matt Spinlerd998b732017-08-21 15:35:54 -0500257 * List of inputs that PGOOD errors have
258 * already been logged against
259 */
260 std::vector<uint32_t> pgoodErrors;
261
262 /**
Matt Spinler8bc12832017-09-19 11:17:54 -0500263 * List of callouts that already been done
264 */
265 std::vector<PartCallout> callouts;
266
267 /**
Matt Spinlerb54357f2017-08-21 14:38:54 -0500268 * The read/write interface to this hardware
269 */
270 pmbus::PMBus interface;
271
272 /**
Matt Spinlerd998b732017-08-21 15:35:54 -0500273 * A map of GPI pin IDs to the GPIO object
274 * used to access them
275 */
276 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
277
278 /**
Matt Spinlerb54357f2017-08-21 14:38:54 -0500279 * Keeps track of device access errors to avoid repeatedly
280 * logging errors for bad hardware
281 */
282 bool accessError = false;
283
284 /**
Matt Spinler8bc12832017-09-19 11:17:54 -0500285 * Keeps track of GPIO access errors when doing the in depth
286 * PGOOD fault analysis to avoid repeatedly logging errors
287 * for bad hardware
288 */
289 bool gpioAccessError = false;
290
291 /**
Matt Spinler110b2842017-08-21 15:23:27 -0500292 * The path to the GPIO device used to read
293 * the GPI (PGOOD) status
294 */
295 std::experimental::filesystem::path gpioDevice;
296
297 /**
Matt Spinlerb54357f2017-08-21 14:38:54 -0500298 * Map of device instance to the instance specific data
299 */
300 static const ucd90160::DeviceMap deviceMap;
301};
302
303}
304}