blob: 3169de6df6f33192c2a1600d07156b8e6b567bae [file] [log] [blame]
Matt Spinlerb54357f2017-08-21 14:38:54 -05001#pragma once
2
Matt Spinlerf0f02b92018-10-25 16:12:43 -05003#include "device.hpp"
4#include "gpio.hpp"
5#include "pmbus.hpp"
6#include "types.hpp"
7
Matt Spinlerb54357f2017-08-21 14:38:54 -05008#include <algorithm>
Brandon Wyman9c7897c2019-03-28 17:42:34 -05009#include <filesystem>
Matt Spinlerb54357f2017-08-21 14:38:54 -050010#include <map>
Matt Spinlera8269652017-09-19 15:13:28 -050011#include <sdbusplus/bus.hpp>
Matt Spinlerb54357f2017-08-21 14:38:54 -050012#include <vector>
Matt Spinlerb54357f2017-08-21 14:38:54 -050013
14namespace witherspoon
15{
16namespace power
17{
18
Matt Spinlerf0f02b92018-10-25 16:12:43 -050019// Error type, callout
20using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
Matt Spinler8bc12832017-09-19 11:17:54 -050021
Matt Spinlerb54357f2017-08-21 14:38:54 -050022/**
23 * @class UCD90160
24 *
25 * This class implements fault analysis for the UCD90160
26 * power sequencer device.
27 *
28 */
29class UCD90160 : public Device
30{
Matt Spinlerf0f02b92018-10-25 16:12:43 -050031 public:
32 UCD90160() = delete;
33 ~UCD90160() = default;
34 UCD90160(const UCD90160&) = delete;
35 UCD90160& operator=(const UCD90160&) = delete;
36 UCD90160(UCD90160&&) = default;
37 UCD90160& operator=(UCD90160&&) = default;
Matt Spinlerb54357f2017-08-21 14:38:54 -050038
Matt Spinlerf0f02b92018-10-25 16:12:43 -050039 /**
40 * Constructor
41 *
42 * @param[in] instance - the device instance number
43 * @param[in] bus - D-Bus bus object
44 */
45 UCD90160(size_t instance, sdbusplus::bus::bus& bus);
Matt Spinlerb54357f2017-08-21 14:38:54 -050046
Matt Spinlerf0f02b92018-10-25 16:12:43 -050047 /**
48 * Analyzes the device for errors when the device is
49 * known to be in an error state. A log will be created.
50 */
51 void onFailure() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050052
Matt Spinlerf0f02b92018-10-25 16:12:43 -050053 /**
54 * Checks the device for errors and only creates a log
55 * if one is found.
56 */
57 void analyze() override;
Matt Spinlerb54357f2017-08-21 14:38:54 -050058
Matt Spinlerf0f02b92018-10-25 16:12:43 -050059 /**
60 * Clears faults in the device
61 */
62 void clearFaults() override
63 {
64 }
Matt Spinlerb54357f2017-08-21 14:38:54 -050065
Matt Spinlerf0f02b92018-10-25 16:12:43 -050066 private:
67 /**
68 * Reports an error for a GPU PGOOD failure
69 *
70 * @param[in] callout - the GPU callout string
71 */
72 void gpuPGOODError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050073
Matt Spinlerf0f02b92018-10-25 16:12:43 -050074 /**
75 * Reports an error for a GPU OverTemp failure
76 *
77 * @param[in] callout - the GPU callout string
78 */
79 void gpuOverTempError(const std::string& callout);
Matt Spinlerb54357f2017-08-21 14:38:54 -050080
Matt Spinlerf0f02b92018-10-25 16:12:43 -050081 /**
Brandon Wyman03c19db2019-05-10 17:46:41 -050082 * Reports an error for a MEM_GOODx failure.
83 *
84 * @param[in] callout - The MEM callout string
85 */
86 void memGoodError(const std::string& callout);
87
88 /**
Matt Spinlerf0f02b92018-10-25 16:12:43 -050089 * Given the device path for a chip, find its gpiochip
90 * path
91 *
92 * @param[in] path - device path, like
93 * /sys/devices/.../i2c-11/11-0064
94 *
95 * @return fs::path - The gpiochip path, like
96 * /dev/gpiochip1
97 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -050098 static std::filesystem::path
99 findGPIODevice(const std::filesystem::path& path);
Matt Spinler7b14db22017-09-19 10:57:54 -0500100
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500101 /**
102 * Checks for VOUT faults on the device.
103 *
104 * This device can monitor voltages of its dependent
105 * devices, and VOUT faults are voltage faults
106 * on these devices.
107 *
108 * @return bool - true if an error log was created
109 */
110 bool checkVOUTFaults();
Matt Spinler7b14db22017-09-19 10:57:54 -0500111
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500112 /**
113 * Checks for PGOOD faults on the device.
114 *
115 * This device can monitor the PGOOD signals of its dependent
116 * devices, and this check will look for faults of
117 * those PGOODs.
118 *
119 * @param[in] polling - If this is running while polling for errors,
120 * as opposing to analyzing a fail condition.
121 *
122 * @return bool - true if an error log was created
123 */
124 bool checkPGOODFaults(bool polling);
Matt Spinler110b2842017-08-21 15:23:27 -0500125
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500126 /**
127 * Creates an error log when the device has an error
128 * but it isn't a PGOOD or voltage failure.
129 */
130 void createPowerFaultLog();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500131
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500132 /**
133 * Reads the status_word register
134 *
135 * @return uint16_t - the register contents
136 */
137 uint16_t readStatusWord();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500138
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500139 /**
140 * Reads the mfr_status register
141 *
142 * @return uint32_t - the register contents
143 */
144 uint32_t readMFRStatus();
Matt Spinlerb54357f2017-08-21 14:38:54 -0500145
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500146 /**
147 * Does any additional fault analysis based on the
148 * value of the extraAnalysisType field in the GPIOConfig
149 * entry.
150 *
151 * Used to get better callouts.
152 *
153 * @param[in] config - the GPIOConfig entry to use
154 *
155 * @return bool - true if a HW error was found, false else
156 */
157 bool doExtraAnalysis(const ucd90160::GPIConfig& config);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500158
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500159 /**
160 * Does additional fault analysis using GPIOs to
161 * specifically identify the failing part.
162 *
163 * Used when there are too many PGOOD inputs for
164 * the UCD90160 to handle, so just a summary bit
165 * is wired into the chip, and then the specific
166 * fault GPIOs are off of a different GPIO device,
167 * like an IO expander.
168 *
169 * @param[in] type - the type of analysis to do
170 *
171 * @return bool - true if a HW error was found, false else
172 */
173 bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
Matt Spinlere7e432b2017-08-21 15:01:40 -0500174
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500175 /**
176 * Says if we've already logged a Vout fault
177 *
178 * The policy is only 1 of the same error will
179 * be logged for the duration of a class instance.
180 *
181 * @param[in] page - the page to check
182 *
183 * @return bool - if we've already logged a fault against
184 * this page
185 */
186 inline bool isVoutFaultLogged(uint32_t page) const
187 {
188 return std::find(voutErrors.begin(), voutErrors.end(), page) !=
189 voutErrors.end();
190 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500191
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500192 /**
193 * Saves that a Vout fault has been logged
194 *
195 * @param[in] page - the page the error was logged against
196 */
197 inline void setVoutFaultLogged(uint32_t page)
198 {
199 voutErrors.push_back(page);
200 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500201
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500202 /**
203 * Says if we've already logged a PGOOD fault
204 *
205 * The policy is only 1 of the same errors will
206 * be logged for the duration of a class instance.
207 *
208 * @param[in] input - the input to check
209 *
210 * @return bool - if we've already logged a fault against
211 * this input
212 */
213 inline bool isPGOODFaultLogged(uint32_t input) const
214 {
215 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
216 pgoodErrors.end();
217 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500218
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500219 /**
220 * Says if we've already logged a specific fault
221 * against a specific part
222 *
223 * @param[in] callout - error type and name tuple
224 *
225 * @return bool - if we've already logged this fault
226 * against this part
227 */
228 inline bool isPartCalledOut(const PartCallout& callout) const
229 {
230 return std::find(callouts.begin(), callouts.end(), callout) !=
231 callouts.end();
232 }
Matt Spinlere7e432b2017-08-21 15:01:40 -0500233
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500234 /**
235 * Saves that a PGOOD fault has been logged
236 *
237 * @param[in] input - the input the error was logged against
238 */
239 inline void setPGOODFaultLogged(uint32_t input)
240 {
241 pgoodErrors.push_back(input);
242 }
Matt Spinlerd998b732017-08-21 15:35:54 -0500243
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500244 /**
245 * Saves that a specific fault on a specific part has been done
246 *
247 * @param[in] callout - error type and name tuple
248 */
249 inline void setPartCallout(const PartCallout& callout)
250 {
251 callouts.push_back(callout);
252 }
Matt Spinler8bc12832017-09-19 11:17:54 -0500253
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500254 /**
255 * List of pages that Vout errors have
256 * already been logged against
257 */
258 std::vector<uint32_t> voutErrors;
Matt Spinlerd998b732017-08-21 15:35:54 -0500259
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500260 /**
261 * List of inputs that PGOOD errors have
262 * already been logged against
263 */
264 std::vector<uint32_t> pgoodErrors;
Matt Spinler8bc12832017-09-19 11:17:54 -0500265
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500266 /**
267 * List of callouts that already been done
268 */
269 std::vector<PartCallout> callouts;
Matt Spinlere7e432b2017-08-21 15:01:40 -0500270
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500271 /**
272 * The read/write interface to this hardware
273 */
274 pmbus::PMBus interface;
Matt Spinlerd998b732017-08-21 15:35:54 -0500275
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500276 /**
277 * A map of GPI pin IDs to the GPIO object
278 * used to access them
279 */
280 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
Matt Spinler8bc12832017-09-19 11:17:54 -0500281
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500282 /**
283 * Keeps track of device access errors to avoid repeatedly
284 * logging errors for bad hardware
285 */
286 bool accessError = false;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500287
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500288 /**
289 * Keeps track of GPIO access errors when doing the in depth
290 * PGOOD fault analysis to avoid repeatedly logging errors
291 * for bad hardware
292 */
293 bool gpioAccessError = false;
Matt Spinlerd998b732017-08-21 15:35:54 -0500294
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500295 /**
296 * The path to the GPIO device used to read
297 * the GPI (PGOOD) status
298 */
Brandon Wyman9c7897c2019-03-28 17:42:34 -0500299 std::filesystem::path gpioDevice;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500300
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500301 /**
302 * The D-Bus bus object
303 */
304 sdbusplus::bus::bus& bus;
Matt Spinler8bc12832017-09-19 11:17:54 -0500305
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500306 /**
307 * Map of device instance to the instance specific data
308 */
309 static const ucd90160::DeviceMap deviceMap;
Matt Spinlerb54357f2017-08-21 14:38:54 -0500310};
311
Matt Spinlerf0f02b92018-10-25 16:12:43 -0500312} // namespace power
313} // namespace witherspoon