blob: b66bb82512d0ab2665554f5f46da7d0dc17c0b4a [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001#pragma once
2
Matt Spinlerf0f02b92018-10-25 16:12:43 -05003#include "device.hpp"
4#include "gpio.hpp"
5#include "pmbus.hpp"
6#include "types.hpp"
7
Patrick Williams2c4fbc42020-06-26 15:33:11 -05008#include <sdbusplus/bus.hpp>
9
Matt Spinlerb54357f2017-08-21 14:38:54 -050010#include <algorithm>
Brandon Wyman9c7897c2019-03-28 17:42:34 -050011#include <filesystem>
Matt Spinlerb54357f2017-08-21 14:38:54 -050012#include <map>
13#include <vector>
Matt Spinlerb54357f2017-08-21 14:38:54 -050014
15namespace witherspoon
16{
17namespace power
18{
19
Matt Spinlerf0f02b92018-10-25 16:12:43 -050020// Error type, callout
21using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
Matt Spinler8bc12832017-09-19 11:17:54 -050022
Matt Spinlerb54357f2017-08-21 14:38:54 -050023/**
24 * @class UCD90160
25 *
26 * This class implements fault analysis for the UCD90160
27 * power sequencer device.
28 *
29 */
30class UCD90160 : public Device
31{
Matt Spinlerf0f02b92018-10-25 16:12:43 -050032 public:
33 UCD90160() = delete;
34 ~UCD90160() = default;
35 UCD90160(const UCD90160&) = delete;
36 UCD90160& operator=(const UCD90160&) = delete;
37 UCD90160(UCD90160&&) = default;
38 UCD90160& operator=(UCD90160&&) = default;
Matt Spinlerb54357f2017-08-21 14:38:54 -050039
Matt Spinlerf0f02b92018-10-25 16:12:43 -050040 /**
41 * Constructor
42 *
43 * @param[in] instance - the device instance number
44 * @param[in] bus - D-Bus bus object
45 */
46 UCD90160(size_t instance, sdbusplus::bus::bus& bus);
Matt Spinlerb54357f2017-08-21 14:38:54 -050047
Matt Spinlerf0f02b92018-10-25 16:12:43 -050048 /**
49 * Analyzes the device for errors when the device is
50 * known to be in an error state. A log will be created.
51 */
52 void onFailure() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050053
Matt Spinlerf0f02b92018-10-25 16:12:43 -050054 /**
55 * Checks the device for errors and only creates a log
56 * if one is found.
57 */
58 void analyze() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050059
Matt Spinlerf0f02b92018-10-25 16:12:43 -050060 /**
61 * Clears faults in the device
62 */
63 void clearFaults() override
Patrick Williams2c4fbc42020-06-26 15:33:11 -050064 {}
Matt Spinlerb54357f2017-08-21 14:38:54 -050065
Matt Spinlerf0f02b92018-10-25 16:12:43 -050066 private:
67 /**
68 * Reports an error for a GPU PGOOD failure
69 *
70 * @param[in] callout - the GPU callout string
71 */
72 void gpuPGOODError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050073
Matt Spinlerf0f02b92018-10-25 16:12:43 -050074 /**
75 * Reports an error for a GPU OverTemp failure
76 *
77 * @param[in] callout - the GPU callout string
78 */
79 void gpuOverTempError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050080
Matt Spinlerf0f02b92018-10-25 16:12:43 -050081 /**
Brandon Wyman03c19db2019-05-10 17:46:41 -050082 * Reports an error for a MEM_GOODx failure.
83 *
84 * @param[in] callout - The MEM callout string
85 */
86 void memGoodError(const std::string& callout);
87
88 /**
Matt Spinlerf0f02b92018-10-25 16:12:43 -050089 * Given the device path for a chip, find its gpiochip
90 * path
91 *
92 * @param[in] path - device path, like
93 * /sys/devices/.../i2c-11/11-0064
94 *
95 * @return fs::path - The gpiochip path, like
96 * /dev/gpiochip1
97 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -050098 static std::filesystem::path
99 findGPIODevice(const std::filesystem::path& path);
Matt Spinler7b14db22017-09-19 10:57:54 -0500100
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500101 /**
102 * Checks for VOUT faults on the device.
103 *
104 * This device can monitor voltages of its dependent
105 * devices, and VOUT faults are voltage faults
106 * on these devices.
107 *
108 * @return bool - true if an error log was created
109 */
110 bool checkVOUTFaults();
Matt Spinler7b14db22017-09-19 10:57:54 -0500111
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500112 /**
113 * Checks for PGOOD faults on the device.
114 *
115 * This device can monitor the PGOOD signals of its dependent
116 * devices, and this check will look for faults of
117 * those PGOODs.
118 *
119 * @param[in] polling - If this is running while polling for errors,
120 * as opposing to analyzing a fail condition.
121 *
122 * @return bool - true if an error log was created
123 */
124 bool checkPGOODFaults(bool polling);
Matt Spinler110b2842017-08-21 15:23:27 -0500125
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500126 /**
127 * Creates an error log when the device has an error
128 * but it isn't a PGOOD or voltage failure.
129 */
130 void createPowerFaultLog();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500131
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500132 /**
133 * Reads the status_word register
134 *
135 * @return uint16_t - the register contents
136 */
137 uint16_t readStatusWord();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500138
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500139 /**
140 * Reads the mfr_status register
141 *
142 * @return uint32_t - the register contents
143 */
144 uint32_t readMFRStatus();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500145
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500146 /**
147 * Does any additional fault analysis based on the
148 * value of the extraAnalysisType field in the GPIOConfig
149 * entry.
150 *
151 * Used to get better callouts.
152 *
153 * @param[in] config - the GPIOConfig entry to use
154 *
155 * @return bool - true if a HW error was found, false else
156 */
157 bool doExtraAnalysis(const ucd90160::GPIConfig& config);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500158
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500159 /**
160 * Does additional fault analysis using GPIOs to
161 * specifically identify the failing part.
162 *
163 * Used when there are too many PGOOD inputs for
164 * the UCD90160 to handle, so just a summary bit
165 * is wired into the chip, and then the specific
166 * fault GPIOs are off of a different GPIO device,
167 * like an IO expander.
168 *
169 * @param[in] type - the type of analysis to do
170 *
171 * @return bool - true if a HW error was found, false else
172 */
173 bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500174
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500175 /**
176 * Says if we've already logged a Vout fault
177 *
178 * The policy is only 1 of the same error will
179 * be logged for the duration of a class instance.
180 *
181 * @param[in] page - the page to check
182 *
183 * @return bool - if we've already logged a fault against
184 * this page
185 */
186 inline bool isVoutFaultLogged(uint32_t page) const
187 {
188 return std::find(voutErrors.begin(), voutErrors.end(), page) !=
189 voutErrors.end();
190 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500191
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500192 /**
193 * Saves that a Vout fault has been logged
194 *
195 * @param[in] page - the page the error was logged against
196 */
197 inline void setVoutFaultLogged(uint32_t page)
198 {
199 voutErrors.push_back(page);
200 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500201
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500202 /**
203 * Says if we've already logged a PGOOD fault
204 *
205 * The policy is only 1 of the same errors will
206 * be logged for the duration of a class instance.
207 *
208 * @param[in] input - the input to check
209 *
210 * @return bool - if we've already logged a fault against
211 * this input
212 */
213 inline bool isPGOODFaultLogged(uint32_t input) const
214 {
215 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
216 pgoodErrors.end();
217 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500218
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500219 /**
220 * Says if we've already logged a specific fault
221 * against a specific part
222 *
223 * @param[in] callout - error type and name tuple
224 *
225 * @return bool - if we've already logged this fault
226 * against this part
227 */
228 inline bool isPartCalledOut(const PartCallout& callout) const
229 {
230 return std::find(callouts.begin(), callouts.end(), callout) !=
231 callouts.end();
232 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500233
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500234 /**
235 * Saves that a PGOOD fault has been logged
236 *
237 * @param[in] input - the input the error was logged against
238 */
239 inline void setPGOODFaultLogged(uint32_t input)
240 {
241 pgoodErrors.push_back(input);
242 }
Matt Spinlerd998b732017-08-21 15:35:54 -0500243
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500244 /**
245 * Saves that a specific fault on a specific part has been done
246 *
247 * @param[in] callout - error type and name tuple
248 */
249 inline void setPartCallout(const PartCallout& callout)
250 {
251 callouts.push_back(callout);
252 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500253
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500254 /**
255 * List of pages that Vout errors have
256 * already been logged against
257 */
258 std::vector<uint32_t> voutErrors;
Matt Spinlerd998b732017-08-21 15:35:54 -0500259
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500260 /**
261 * List of inputs that PGOOD errors have
262 * already been logged against
263 */
264 std::vector<uint32_t> pgoodErrors;
Matt Spinler8bc12832017-09-19 11:17:54 -0500265
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500266 /**
267 * List of callouts that already been done
268 */
269 std::vector<PartCallout> callouts;
Matt Spinlere7e432b2017-08-21 15:01:40 -0500270
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500271 /**
272 * The read/write interface to this hardware
273 */
274 pmbus::PMBus interface;
Matt Spinlerd998b732017-08-21 15:35:54 -0500275
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500276 /**
277 * A map of GPI pin IDs to the GPIO object
278 * used to access them
279 */
280 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
Matt Spinler8bc12832017-09-19 11:17:54 -0500281
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500282 /**
283 * Keeps track of device access errors to avoid repeatedly
284 * logging errors for bad hardware
285 */
286 bool accessError = false;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500287
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500288 /**
289 * Keeps track of GPIO access errors when doing the in depth
290 * PGOOD fault analysis to avoid repeatedly logging errors
291 * for bad hardware
292 */
293 bool gpioAccessError = false;
Matt Spinlerd998b732017-08-21 15:35:54 -0500294
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500295 /**
296 * The path to the GPIO device used to read
297 * the GPI (PGOOD) status
298 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -0500299 std::filesystem::path gpioDevice;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500300
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500301 /**
302 * The D-Bus bus object
303 */
304 sdbusplus::bus::bus& bus;
Matt Spinler8bc12832017-09-19 11:17:54 -0500305
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500306 /**
307 * Map of device instance to the instance specific data
308 */
309 static const ucd90160::DeviceMap deviceMap;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500310};
311
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500312} // namespace power
313} // namespace witherspoon