blob: 313270143a83633274aca1c7a201f4ee3d919b36 [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001#pragma once
2
Matt Spinlerf0f02b92018-10-25 16:12:43 -05003#include "device.hpp"
4#include "gpio.hpp"
5#include "pmbus.hpp"
6#include "types.hpp"
7
Patrick Williams2c4fbc42020-06-26 15:33:11 -05008#include <sdbusplus/bus.hpp>
9
Matt Spinlerb54357f2017-08-21 14:38:54 -050010#include <algorithm>
Brandon Wyman9c7897c2019-03-28 17:42:34 -050011#include <filesystem>
Matt Spinlerb54357f2017-08-21 14:38:54 -050012#include <map>
13#include <vector>
Matt Spinlerb54357f2017-08-21 14:38:54 -050014
15namespace witherspoon
16{
17namespace power
18{
19
Matt Spinlerf0f02b92018-10-25 16:12:43 -050020// Error type, callout
21using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
Matt Spinler8bc12832017-09-19 11:17:54 -050022
Matt Spinlerb54357f2017-08-21 14:38:54 -050023/**
24 * @class UCD90160
25 *
26 * This class implements fault analysis for the UCD90160
27 * power sequencer device.
28 *
29 */
30class UCD90160 : public Device
31{
Matt Spinlerf0f02b92018-10-25 16:12:43 -050032 public:
33 UCD90160() = delete;
34 ~UCD90160() = default;
35 UCD90160(const UCD90160&) = delete;
36 UCD90160& operator=(const UCD90160&) = delete;
37 UCD90160(UCD90160&&) = default;
38 UCD90160& operator=(UCD90160&&) = default;
Matt Spinlerb54357f2017-08-21 14:38:54 -050039
Matt Spinlerf0f02b92018-10-25 16:12:43 -050040 /**
41 * Constructor
42 *
43 * @param[in] instance - the device instance number
44 * @param[in] bus - D-Bus bus object
45 */
Patrick Williams1426a102022-07-22 19:26:55 -050046 UCD90160(size_t instance, sdbusplus::bus_t& bus);
Matt Spinlerb54357f2017-08-21 14:38:54 -050047
Matt Spinlerf0f02b92018-10-25 16:12:43 -050048 /**
49 * Analyzes the device for errors when the device is
50 * known to be in an error state. A log will be created.
51 */
52 void onFailure() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050053
Matt Spinlerf0f02b92018-10-25 16:12:43 -050054 /**
55 * Checks the device for errors and only creates a log
56 * if one is found.
57 */
58 void analyze() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050059
Matt Spinlerf0f02b92018-10-25 16:12:43 -050060 /**
61 * Clears faults in the device
62 */
Patrick Williamsb7ed5772023-05-10 07:50:45 -050063 void clearFaults() override {}
Matt Spinlerb54357f2017-08-21 14:38:54 -050064
Matt Spinlerf0f02b92018-10-25 16:12:43 -050065 private:
66 /**
67 * Reports an error for a GPU PGOOD failure
68 *
69 * @param[in] callout - the GPU callout string
70 */
71 void gpuPGOODError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050072
Matt Spinlerf0f02b92018-10-25 16:12:43 -050073 /**
74 * Reports an error for a GPU OverTemp failure
75 *
76 * @param[in] callout - the GPU callout string
77 */
78 void gpuOverTempError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050079
Matt Spinlerf0f02b92018-10-25 16:12:43 -050080 /**
Brandon Wyman03c19db2019-05-10 17:46:41 -050081 * Reports an error for a MEM_GOODx failure.
82 *
83 * @param[in] callout - The MEM callout string
84 */
85 void memGoodError(const std::string& callout);
86
87 /**
Matt Spinlerf0f02b92018-10-25 16:12:43 -050088 * Given the device path for a chip, find its gpiochip
89 * path
90 *
91 * @param[in] path - device path, like
92 * /sys/devices/.../i2c-11/11-0064
93 *
94 * @return fs::path - The gpiochip path, like
95 * /dev/gpiochip1
96 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -050097 static std::filesystem::path
98 findGPIODevice(const std::filesystem::path& path);
Matt Spinler7b14db22017-09-19 10:57:54 -050099
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500100 /**
101 * Checks for VOUT faults on the device.
102 *
103 * This device can monitor voltages of its dependent
104 * devices, and VOUT faults are voltage faults
105 * on these devices.
106 *
107 * @return bool - true if an error log was created
108 */
109 bool checkVOUTFaults();
Matt Spinler7b14db22017-09-19 10:57:54 -0500110
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500111 /**
112 * Checks for PGOOD faults on the device.
113 *
114 * This device can monitor the PGOOD signals of its dependent
115 * devices, and this check will look for faults of
116 * those PGOODs.
117 *
118 * @param[in] polling - If this is running while polling for errors,
119 * as opposing to analyzing a fail condition.
120 *
121 * @return bool - true if an error log was created
122 */
123 bool checkPGOODFaults(bool polling);
Matt Spinler110b2842017-08-21 15:23:27 -0500124
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500125 /**
126 * Creates an error log when the device has an error
127 * but it isn't a PGOOD or voltage failure.
128 */
129 void createPowerFaultLog();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500130
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500131 /**
132 * Reads the status_word register
133 *
134 * @return uint16_t - the register contents
135 */
136 uint16_t readStatusWord();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500137
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500138 /**
139 * Reads the mfr_status register
140 *
141 * @return uint32_t - the register contents
142 */
143 uint32_t readMFRStatus();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500144
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500145 /**
146 * Does any additional fault analysis based on the
147 * value of the extraAnalysisType field in the GPIOConfig
148 * entry.
149 *
150 * Used to get better callouts.
151 *
152 * @param[in] config - the GPIOConfig entry to use
153 *
154 * @return bool - true if a HW error was found, false else
155 */
156 bool doExtraAnalysis(const ucd90160::GPIConfig& config);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500157
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500158 /**
159 * Does additional fault analysis using GPIOs to
160 * specifically identify the failing part.
161 *
162 * Used when there are too many PGOOD inputs for
163 * the UCD90160 to handle, so just a summary bit
164 * is wired into the chip, and then the specific
165 * fault GPIOs are off of a different GPIO device,
166 * like an IO expander.
167 *
168 * @param[in] type - the type of analysis to do
169 *
170 * @return bool - true if a HW error was found, false else
171 */
172 bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500173
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500174 /**
175 * Says if we've already logged a Vout fault
176 *
177 * The policy is only 1 of the same error will
178 * be logged for the duration of a class instance.
179 *
180 * @param[in] page - the page to check
181 *
182 * @return bool - if we've already logged a fault against
183 * this page
184 */
185 inline bool isVoutFaultLogged(uint32_t page) const
186 {
187 return std::find(voutErrors.begin(), voutErrors.end(), page) !=
188 voutErrors.end();
189 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500190
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500191 /**
192 * Saves that a Vout fault has been logged
193 *
194 * @param[in] page - the page the error was logged against
195 */
196 inline void setVoutFaultLogged(uint32_t page)
197 {
198 voutErrors.push_back(page);
199 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500200
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500201 /**
202 * Says if we've already logged a PGOOD fault
203 *
204 * The policy is only 1 of the same errors will
205 * be logged for the duration of a class instance.
206 *
207 * @param[in] input - the input to check
208 *
209 * @return bool - if we've already logged a fault against
210 * this input
211 */
212 inline bool isPGOODFaultLogged(uint32_t input) const
213 {
214 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
215 pgoodErrors.end();
216 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500217
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500218 /**
219 * Says if we've already logged a specific fault
220 * against a specific part
221 *
222 * @param[in] callout - error type and name tuple
223 *
224 * @return bool - if we've already logged this fault
225 * against this part
226 */
227 inline bool isPartCalledOut(const PartCallout& callout) const
228 {
229 return std::find(callouts.begin(), callouts.end(), callout) !=
230 callouts.end();
231 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500232
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500233 /**
234 * Saves that a PGOOD fault has been logged
235 *
236 * @param[in] input - the input the error was logged against
237 */
238 inline void setPGOODFaultLogged(uint32_t input)
239 {
240 pgoodErrors.push_back(input);
241 }
Matt Spinlerd998b732017-08-21 15:35:54 -0500242
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500243 /**
244 * Saves that a specific fault on a specific part has been done
245 *
246 * @param[in] callout - error type and name tuple
247 */
248 inline void setPartCallout(const PartCallout& callout)
249 {
250 callouts.push_back(callout);
251 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500252
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500253 /**
254 * List of pages that Vout errors have
255 * already been logged against
256 */
257 std::vector<uint32_t> voutErrors;
Matt Spinlerd998b732017-08-21 15:35:54 -0500258
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500259 /**
260 * List of inputs that PGOOD errors have
261 * already been logged against
262 */
263 std::vector<uint32_t> pgoodErrors;
Matt Spinler8bc12832017-09-19 11:17:54 -0500264
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500265 /**
266 * List of callouts that already been done
267 */
268 std::vector<PartCallout> callouts;
Matt Spinlere7e432b2017-08-21 15:01:40 -0500269
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500270 /**
271 * The read/write interface to this hardware
272 */
273 pmbus::PMBus interface;
Matt Spinlerd998b732017-08-21 15:35:54 -0500274
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500275 /**
276 * A map of GPI pin IDs to the GPIO object
277 * used to access them
278 */
279 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
Matt Spinler8bc12832017-09-19 11:17:54 -0500280
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500281 /**
282 * Keeps track of device access errors to avoid repeatedly
283 * logging errors for bad hardware
284 */
285 bool accessError = false;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500286
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500287 /**
288 * Keeps track of GPIO access errors when doing the in depth
289 * PGOOD fault analysis to avoid repeatedly logging errors
290 * for bad hardware
291 */
292 bool gpioAccessError = false;
Matt Spinlerd998b732017-08-21 15:35:54 -0500293
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500294 /**
295 * The path to the GPIO device used to read
296 * the GPI (PGOOD) status
297 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -0500298 std::filesystem::path gpioDevice;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500299
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500300 /**
301 * The D-Bus bus object
302 */
Patrick Williams1426a102022-07-22 19:26:55 -0500303 sdbusplus::bus_t& bus;
Matt Spinler8bc12832017-09-19 11:17:54 -0500304
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500305 /**
306 * Map of device instance to the instance specific data
307 */
308 static const ucd90160::DeviceMap deviceMap;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500309};
310
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500311} // namespace power
312} // namespace witherspoon