blob: 6a841021eb2da530d5548061503e316bcb2ee746 [file] [log] [blame]
Shawn McCarney9284c302021-09-02 11:23:04 -05001/**
2 * Copyright © 2021 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#pragma once
17
18#include "action.hpp"
19#include "action_environment.hpp"
20#include "error_history.hpp"
21#include "phase_fault.hpp"
22#include "services.hpp"
23
24#include <memory>
25#include <string>
26#include <utility>
27#include <vector>
28
29namespace phosphor::power::regulators
30{
31
32// Forward declarations to avoid circular dependencies
33class Chassis;
34class Device;
35class System;
36
37/**
38 * @class PhaseFaultDetection
39 *
40 * Detects and logs redundant phase faults in a voltage regulator.
41 *
42 * A voltage regulator is sometimes called a "phase controller" because it
43 * controls one or more phases that perform the actual voltage regulation.
44 *
45 * A regulator may have redundant phases. If a redundant phase fails, the
46 * regulator will continue to provide the desired output voltage. However, a
47 * phase fault error should be logged warning the user that the regulator has
48 * lost redundancy.
49 *
50 * The technique used to detect a phase fault varies depending on the regulator
51 * hardware. Often a bit is checked in a status register. The status register
52 * could exist in the regulator or in a related I/O expander.
53 *
Shawn McCarney54b3ab92021-09-14 17:28:56 -050054 * Phase fault detection is executed repeatedly based on a timer. A phase fault
55 * must be detected two consecutive times before an error is logged. This
56 * provides "de-glitching" to ignore transient hardware problems.
Shawn McCarney9284c302021-09-02 11:23:04 -050057 *
58 * Phase faults are detected by executing actions.
59 */
60class PhaseFaultDetection
61{
62 public:
63 // Specify which compiler-generated methods we want
64 PhaseFaultDetection() = delete;
65 PhaseFaultDetection(const PhaseFaultDetection&) = delete;
66 PhaseFaultDetection(PhaseFaultDetection&&) = delete;
67 PhaseFaultDetection& operator=(const PhaseFaultDetection&) = delete;
68 PhaseFaultDetection& operator=(PhaseFaultDetection&&) = delete;
69 ~PhaseFaultDetection() = default;
70
71 /**
72 * Constructor.
73 *
74 * @param actions Actions that detect phase faults in the regulator.
75 * @param deviceID Unique ID of the device to use when detecting phase
76 * faults. If not specified, the regulator will be used.
77 */
78 explicit PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions,
79 const std::string& deviceID = "") :
80 actions{std::move(actions)},
81 deviceID{deviceID}
Adriana Kobylak0c9a33d2021-09-13 18:05:09 +000082 {}
Shawn McCarney9284c302021-09-02 11:23:04 -050083
84 /**
85 * Clears all error history.
86 *
87 * All data on previously logged errors will be deleted. If errors occur
88 * again in the future they will be logged again.
89 *
90 * This method is normally called when the system is being powered on.
91 */
92 void clearErrorHistory()
93 {
94 errorHistory.clear();
95 actionErrorCount = 0;
96 nFaultCount = 0;
97 nPlus1FaultCount = 0;
98 }
99
100 /**
101 * Executes the actions that detect phase faults in the regulator.
102 *
103 * If the required number of consecutive phase faults are detected, an error
104 * is logged.
105 *
106 * @param services system services like error logging and the journal
107 * @param system system that contains the chassis
108 * @param chassis chassis that contains the regulator device
109 * @param regulator voltage regulator device
110 */
111 void execute(Services& services, System& system, Chassis& chassis,
112 Device& regulator);
113
114 /**
115 * Returns the actions that detect phase faults in the regulator.
116 *
117 * @return actions
118 */
119 const std::vector<std::unique_ptr<Action>>& getActions() const
120 {
121 return actions;
122 }
123
124 /**
125 * Returns the unique ID of the device to use when detecting phase
126 * faults.
127 *
128 * If the value is "", the regulator will be used.
129 *
130 * @return device ID
131 */
132 const std::string& getDeviceID() const
133 {
134 return deviceID;
135 }
136
137 private:
138 /**
139 * Checks if the specified phase fault type was detected.
140 *
141 * If the fault type was detected, increments the counter tracking
142 * consecutive faults. If the required number of consecutive faults have
143 * been detected, logs a phase fault error.
144 *
145 * The ActionEnvironment contains the set of phase fault types that were
146 * detected (if any).
147 *
148 * @param faultType phase fault type to check
149 * @param services system services like error logging and the journal
150 * @param regulator voltage regulator device
151 * @param environment action execution environment
152 */
153 void checkForPhaseFault(PhaseFaultType faultType, Services& services,
154 Device& regulator, ActionEnvironment& environment);
155
156 /**
157 * Logs an error for the specified phase fault type.
158 *
159 * @param faultType phase fault type that occurred
160 * @param services system services like error logging and the journal
161 * @param regulator voltage regulator device
162 * @param environment action execution environment
163 */
164 void logPhaseFault(PhaseFaultType faultType, Services& services,
165 Device& regulator, ActionEnvironment& environment);
166
167 /**
168 * Actions that detect phase faults in the regulator.
169 */
170 std::vector<std::unique_ptr<Action>> actions{};
171
172 /**
173 * Unique ID of the device to use when detecting phase faults.
174 *
175 * Sometimes a separate device, such as an I/O expander, is accessed to
176 * obtain the phase fault status for a regulator.
177 *
178 * If the value is "", the regulator will be used.
179 */
180 const std::string deviceID{};
181
182 /**
183 * History of which error types have been logged.
184 *
185 * Since phase fault detection runs repeatedly based on a timer, each error
186 * type is only logged once.
187 */
188 ErrorHistory errorHistory{};
189
190 /**
191 * Number of errors that have occurred while executing actions, resulting in
192 * an exception.
193 */
194 unsigned short actionErrorCount{0};
195
196 /**
197 * Number of consecutive N phase faults that have been detected.
198 */
199 unsigned short nFaultCount{0};
200
201 /**
202 * Number of consecutive N+1 phase faults that have been detected.
203 */
204 unsigned short nPlus1FaultCount{0};
205};
206
207} // namespace phosphor::power::regulators