blob: 467d8c7cd1f614c4f7fbbca47e15907334c7f302 [file] [log] [blame]
Shawn McCarney9284c302021-09-02 11:23:04 -05001/**
2 * Copyright © 2021 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#pragma once
17
18#include "action.hpp"
19#include "action_environment.hpp"
20#include "error_history.hpp"
21#include "phase_fault.hpp"
22#include "services.hpp"
23
24#include <memory>
25#include <string>
26#include <utility>
27#include <vector>
28
29namespace phosphor::power::regulators
30{
31
32// Forward declarations to avoid circular dependencies
33class Chassis;
34class Device;
35class System;
36
37/**
38 * @class PhaseFaultDetection
39 *
40 * Detects and logs redundant phase faults in a voltage regulator.
41 *
42 * A voltage regulator is sometimes called a "phase controller" because it
43 * controls one or more phases that perform the actual voltage regulation.
44 *
45 * A regulator may have redundant phases. If a redundant phase fails, the
46 * regulator will continue to provide the desired output voltage. However, a
47 * phase fault error should be logged warning the user that the regulator has
48 * lost redundancy.
49 *
50 * The technique used to detect a phase fault varies depending on the regulator
51 * hardware. Often a bit is checked in a status register. The status register
52 * could exist in the regulator or in a related I/O expander.
53 *
54 * Phase fault detection is performed every 15 seconds. A phase fault must be
55 * detected two consecutive times (15 seconds apart) before an error is logged.
56 * This provides "de-glitching" to ignore transient hardware problems.
57 *
58 * Phase faults are detected by executing actions.
59 */
60class PhaseFaultDetection
61{
62 public:
63 // Specify which compiler-generated methods we want
64 PhaseFaultDetection() = delete;
65 PhaseFaultDetection(const PhaseFaultDetection&) = delete;
66 PhaseFaultDetection(PhaseFaultDetection&&) = delete;
67 PhaseFaultDetection& operator=(const PhaseFaultDetection&) = delete;
68 PhaseFaultDetection& operator=(PhaseFaultDetection&&) = delete;
69 ~PhaseFaultDetection() = default;
70
71 /**
72 * Constructor.
73 *
74 * @param actions Actions that detect phase faults in the regulator.
75 * @param deviceID Unique ID of the device to use when detecting phase
76 * faults. If not specified, the regulator will be used.
77 */
78 explicit PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions,
79 const std::string& deviceID = "") :
80 actions{std::move(actions)},
81 deviceID{deviceID}
82 {
83 }
84
85 /**
86 * Clears all error history.
87 *
88 * All data on previously logged errors will be deleted. If errors occur
89 * again in the future they will be logged again.
90 *
91 * This method is normally called when the system is being powered on.
92 */
93 void clearErrorHistory()
94 {
95 errorHistory.clear();
96 actionErrorCount = 0;
97 nFaultCount = 0;
98 nPlus1FaultCount = 0;
99 }
100
101 /**
102 * Executes the actions that detect phase faults in the regulator.
103 *
104 * If the required number of consecutive phase faults are detected, an error
105 * is logged.
106 *
107 * @param services system services like error logging and the journal
108 * @param system system that contains the chassis
109 * @param chassis chassis that contains the regulator device
110 * @param regulator voltage regulator device
111 */
112 void execute(Services& services, System& system, Chassis& chassis,
113 Device& regulator);
114
115 /**
116 * Returns the actions that detect phase faults in the regulator.
117 *
118 * @return actions
119 */
120 const std::vector<std::unique_ptr<Action>>& getActions() const
121 {
122 return actions;
123 }
124
125 /**
126 * Returns the unique ID of the device to use when detecting phase
127 * faults.
128 *
129 * If the value is "", the regulator will be used.
130 *
131 * @return device ID
132 */
133 const std::string& getDeviceID() const
134 {
135 return deviceID;
136 }
137
138 private:
139 /**
140 * Checks if the specified phase fault type was detected.
141 *
142 * If the fault type was detected, increments the counter tracking
143 * consecutive faults. If the required number of consecutive faults have
144 * been detected, logs a phase fault error.
145 *
146 * The ActionEnvironment contains the set of phase fault types that were
147 * detected (if any).
148 *
149 * @param faultType phase fault type to check
150 * @param services system services like error logging and the journal
151 * @param regulator voltage regulator device
152 * @param environment action execution environment
153 */
154 void checkForPhaseFault(PhaseFaultType faultType, Services& services,
155 Device& regulator, ActionEnvironment& environment);
156
157 /**
158 * Logs an error for the specified phase fault type.
159 *
160 * @param faultType phase fault type that occurred
161 * @param services system services like error logging and the journal
162 * @param regulator voltage regulator device
163 * @param environment action execution environment
164 */
165 void logPhaseFault(PhaseFaultType faultType, Services& services,
166 Device& regulator, ActionEnvironment& environment);
167
168 /**
169 * Actions that detect phase faults in the regulator.
170 */
171 std::vector<std::unique_ptr<Action>> actions{};
172
173 /**
174 * Unique ID of the device to use when detecting phase faults.
175 *
176 * Sometimes a separate device, such as an I/O expander, is accessed to
177 * obtain the phase fault status for a regulator.
178 *
179 * If the value is "", the regulator will be used.
180 */
181 const std::string deviceID{};
182
183 /**
184 * History of which error types have been logged.
185 *
186 * Since phase fault detection runs repeatedly based on a timer, each error
187 * type is only logged once.
188 */
189 ErrorHistory errorHistory{};
190
191 /**
192 * Number of errors that have occurred while executing actions, resulting in
193 * an exception.
194 */
195 unsigned short actionErrorCount{0};
196
197 /**
198 * Number of consecutive N phase faults that have been detected.
199 */
200 unsigned short nFaultCount{0};
201
202 /**
203 * Number of consecutive N+1 phase faults that have been detected.
204 */
205 unsigned short nPlus1FaultCount{0};
206};
207
208} // namespace phosphor::power::regulators