blob: a69056715924aafb020dbafbc16c04e21ae496c7 [file] [log] [blame]
Matthew Barthc95c5272020-06-15 19:51:13 -05001/**
2 * Copyright © 2020 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#pragma once
17
18#include "fan.hpp"
Matt Spinlerac1efc12020-10-27 10:20:11 -050019#include "fan_error.hpp"
Matt Spinlere892e392020-10-14 13:21:31 -050020#include "power_off_rule.hpp"
21#include "power_state.hpp"
Matthew Barthc95c5272020-06-15 19:51:13 -050022#include "tach_sensor.hpp"
23#include "trust_manager.hpp"
24#include "types.hpp"
25
26#include <nlohmann/json.hpp>
27#include <sdbusplus/bus.hpp>
28#include <sdeventplus/event.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050029#include <sdeventplus/source/signal.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050030
31#include <memory>
32#include <optional>
33#include <vector>
34
35namespace phosphor::fan::monitor
36{
37
38using json = nlohmann::json;
39
Mike Cappsfdcd5db2021-05-20 12:47:10 -040040// Mapping from service name to sensor
41using SensorMapType =
42 std::map<std::string, std::set<std::shared_ptr<TachSensor>>>;
43
Matthew Barthc95c5272020-06-15 19:51:13 -050044class System
45{
46 public:
47 System() = delete;
Matt Spinlerac1efc12020-10-27 10:20:11 -050048 ~System() = default;
Matthew Barthc95c5272020-06-15 19:51:13 -050049 System(const System&) = delete;
50 System(System&&) = delete;
51 System& operator=(const System&) = delete;
52 System& operator=(System&&) = delete;
Matthew Barthc95c5272020-06-15 19:51:13 -050053
54 /**
55 * Constructor
Matthew Barthc95c5272020-06-15 19:51:13 -050056 *
57 * @param[in] mode - mode of fan monitor
58 * @param[in] bus - sdbusplus bus object
59 * @param[in] event - event loop reference
60 */
61 System(Mode mode, sdbusplus::bus::bus& bus,
62 const sdeventplus::Event& event);
63
Matthew Barthd06905c2020-06-12 08:13:06 -050064 /**
65 * @brief Callback function to handle receiving a HUP signal to reload the
66 * JSON configuration.
67 */
68 void sighupHandler(sdeventplus::source::Signal&,
69 const struct signalfd_siginfo*);
70
Matt Spinlerb63aa092020-10-14 09:45:11 -050071 /**
72 * @brief Called from the fan when it changes either
73 * present or functional status to update the
74 * fan health map.
75 *
76 * @param[in] fan - The fan that changed
Matt Spinler4283c5d2021-03-01 15:56:00 -060077 * @param[in] skipRulesCheck - If the rules checks should be done now.
Matt Spinlerb63aa092020-10-14 09:45:11 -050078 */
Matt Spinler4283c5d2021-03-01 15:56:00 -060079 void fanStatusChange(const Fan& fan, bool skipRulesCheck = false);
Matt Spinlerb63aa092020-10-14 09:45:11 -050080
Matt Spinlerf13b42e2020-10-26 15:29:49 -050081 /**
82 * @brief Called when a fan sensor's error timer expires, which
83 * happens when the sensor has been nonfunctional for a
84 * certain amount of time. An event log will be created.
85 *
86 * @param[in] fan - The parent fan of the sensor
87 * @param[in] sensor - The faulted sensor
88 */
89 void sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor);
90
Matt Spinler27f6b682020-10-27 08:43:37 -050091 /**
92 * @brief Called when the timer that starts when a fan is missing
93 * has expired so an event log needs to be created.
94 *
95 * @param[in] fan - The missing fan.
96 */
97 void fanMissingErrorTimerExpired(const Fan& fan);
98
Matt Spinlerac1efc12020-10-27 10:20:11 -050099 /**
100 * @brief Called by the power off actions to log an error when there is
101 * a power off due to fan problems.
102 *
103 * The error it logs is just the last fan error that occurred.
104 */
105 void logShutdownError();
106
Matt Spinler7d135642021-02-04 12:44:17 -0600107 /**
108 * @brief Returns true if power is on
109 */
110 bool isPowerOn() const
111 {
112 return _powerState->isPowerOn();
113 }
114
115 /**
Matthew Barth823bc492021-06-21 14:19:09 -0500116 * @brief Parses and populates the fan monitor trust groups and list of fans
Matt Spinler7d135642021-02-04 12:44:17 -0600117 */
Matthew Barth823bc492021-06-21 14:19:09 -0500118 void start();
Matt Spinler7d135642021-02-04 12:44:17 -0600119
Matthew Barthc95c5272020-06-15 19:51:13 -0500120 private:
121 /* The mode of fan monitor */
122 Mode _mode;
123
124 /* The sdbusplus bus object */
125 sdbusplus::bus::bus& _bus;
126
127 /* The event loop reference */
128 const sdeventplus::Event& _event;
129
130 /* Trust manager of trust groups */
131 std::unique_ptr<phosphor::fan::trust::Manager> _trust;
132
133 /* List of fan objects to monitor */
134 std::vector<std::unique_ptr<Fan>> _fans;
135
136 /**
Matt Spinlerb63aa092020-10-14 09:45:11 -0500137 * @brief The latest health of all the fans
138 */
139 FanHealth _fanHealth;
140
141 /**
Matt Spinlere892e392020-10-14 13:21:31 -0500142 * @brief The object to watch the power state
143 */
144 std::unique_ptr<PowerState> _powerState;
145
146 /**
147 * @brief The power off rules, for shutting down the system
148 * due to fan failures.
149 */
150 std::vector<std::unique_ptr<PowerOffRule>> _powerOffRules;
151
152 /**
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500153 * @brief The number of concurrently nonfunctional fan sensors
154 * there must be for an event log created due to a
155 * nonfunctional fan sensor to have an Error severity as
156 * opposed to an Informational one.
157 */
158 std::optional<size_t> _numNonfuncSensorsBeforeError;
159
160 /**
Matt Spinlerac1efc12020-10-27 10:20:11 -0500161 * @brief The most recently committed fan error.
162 */
163 std::unique_ptr<FanError> _lastError;
164
165 /**
Matt Spinlerc8d3c512021-01-06 14:22:25 -0600166 * @brief The thermal alert D-Bus object
167 */
168 ThermalAlertObject _thermalAlert;
169
170 /**
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400171 * @brief The tach sensors D-Bus match objects
172 */
173 std::vector<std::unique_ptr<sdbusplus::bus::match::match>> _sensorMatch;
174
175 /**
Matt Spinler7d135642021-02-04 12:44:17 -0600176 * @brief If start() has been called
177 */
178 bool _started = false;
179
180 /**
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500181 * @brief Captures tach sensor data as JSON for use in
182 * fan fault and fan missing event logs.
183 *
184 * @return json - The JSON data
185 */
186 json captureSensorData();
187
188 /**
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400189 * @brief Builds a mapping for sensors to be identified
190 * for a given service name.
191 *
192 * @return a map of service_name->[sensor1,sensor2...]
193 */
194 SensorMapType buildNameOwnerChangedMap() const;
195
196 /**
Matthew Barthc95c5272020-06-15 19:51:13 -0500197 * @brief Retrieve the configured trust groups
198 *
199 * @param[in] jsonObj - JSON object to parse from
200 *
201 * @return List of functions applied on trust groups
202 */
203 const std::vector<CreateGroupFunction> getTrustGroups(const json& jsonObj);
204
205 /**
Matthew Barthd06905c2020-06-12 08:13:06 -0500206 * @brief Set the trust manager's list of trust group functions
207 *
208 * @param[in] groupFuncs - list of trust group functions
209 */
210 void setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs);
211
212 /**
Matthew Barthc95c5272020-06-15 19:51:13 -0500213 * @brief Retrieve the configured fan definitions
214 *
215 * @param[in] jsonObj - JSON object to parse from
216 *
217 * @return List of fan definition data on the fans configured
218 */
219 const std::vector<FanDefinition> getFanDefinitions(const json& jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -0500220
221 /**
222 * @brief Set the list of fans to be monitored
223 *
224 * @param[in] fanDefs - list of fan definitions to create fans monitored
225 */
226 void setFans(const std::vector<FanDefinition>& fanDefs);
Matt Spinlerb63aa092020-10-14 09:45:11 -0500227
228 /**
229 * @brief Updates the fan health map entry for the fan passed in
230 *
231 * @param[in] fan - The fan to update the health map with
232 */
233 void updateFanHealth(const Fan& fan);
Matt Spinlere892e392020-10-14 13:21:31 -0500234
235 /**
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400236 * @brief callback when a tach sensor signal goes offline
237 *
238 * @param[in] msg - D-Bus message containing details (inc. service name)
239 *
240 * @param[in] sensorMap - map providing sensor access for each service
241 */
242 void tachSignalOffline(sdbusplus::message::message& msg,
243 const SensorMapType& sensorMap);
244
245 /**
Matt Spinlere892e392020-10-14 13:21:31 -0500246 * @brief The function that runs when the power state changes
247 *
248 * @param[in] powerStateOn - If power is now on or not
249 */
250 void powerStateChanged(bool powerStateOn);
251
252 /**
253 * @brief Reads the fault configuration from the JSON config
254 * file, such as the power off rule configuration.
255 *
256 * @param[in] jsonObj - JSON object to parse from
257 */
258 void setFaultConfig(const json& jsonObj);
Matt Spinlerbb449c12021-06-14 11:45:28 -0600259
260 /**
261 * @brief Log an error and shut down due to an offline fan controller
262 */
263 void handleOfflineFanController();
Matthew Barthc95c5272020-06-15 19:51:13 -0500264};
265
266} // namespace phosphor::fan::monitor