blob: df3e35fb64a841e47f8aaa5228c8a079289c0488 [file] [log] [blame]
Matthew Barthc95c5272020-06-15 19:51:13 -05001/**
2 * Copyright © 2020 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#pragma once
17
18#include "fan.hpp"
Matt Spinlerac1efc12020-10-27 10:20:11 -050019#include "fan_error.hpp"
Matt Spinlere892e392020-10-14 13:21:31 -050020#include "power_off_rule.hpp"
21#include "power_state.hpp"
Matthew Barthc95c5272020-06-15 19:51:13 -050022#include "tach_sensor.hpp"
23#include "trust_manager.hpp"
24#include "types.hpp"
25
26#include <nlohmann/json.hpp>
27#include <sdbusplus/bus.hpp>
28#include <sdeventplus/event.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050029#include <sdeventplus/source/signal.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050030
31#include <memory>
32#include <optional>
33#include <vector>
34
35namespace phosphor::fan::monitor
36{
37
38using json = nlohmann::json;
39
Mike Cappsfdcd5db2021-05-20 12:47:10 -040040// Mapping from service name to sensor
41using SensorMapType =
42 std::map<std::string, std::set<std::shared_ptr<TachSensor>>>;
43
Matthew Barthc95c5272020-06-15 19:51:13 -050044class System
45{
46 public:
47 System() = delete;
Matt Spinlerac1efc12020-10-27 10:20:11 -050048 ~System() = default;
Matthew Barthc95c5272020-06-15 19:51:13 -050049 System(const System&) = delete;
50 System(System&&) = delete;
51 System& operator=(const System&) = delete;
52 System& operator=(System&&) = delete;
Matthew Barthc95c5272020-06-15 19:51:13 -050053
54 /**
55 * Constructor
Matthew Barthc95c5272020-06-15 19:51:13 -050056 *
57 * @param[in] mode - mode of fan monitor
58 * @param[in] bus - sdbusplus bus object
59 * @param[in] event - event loop reference
60 */
Patrick Williamscb356d42022-07-22 19:26:53 -050061 System(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event);
Matthew Barthc95c5272020-06-15 19:51:13 -050062
Matthew Barthd06905c2020-06-12 08:13:06 -050063 /**
64 * @brief Callback function to handle receiving a HUP signal to reload the
65 * JSON configuration.
66 */
67 void sighupHandler(sdeventplus::source::Signal&,
68 const struct signalfd_siginfo*);
69
Matt Spinlerb63aa092020-10-14 09:45:11 -050070 /**
71 * @brief Called from the fan when it changes either
72 * present or functional status to update the
73 * fan health map.
74 *
75 * @param[in] fan - The fan that changed
Matt Spinler4283c5d2021-03-01 15:56:00 -060076 * @param[in] skipRulesCheck - If the rules checks should be done now.
Matt Spinlerb63aa092020-10-14 09:45:11 -050077 */
Matt Spinler4283c5d2021-03-01 15:56:00 -060078 void fanStatusChange(const Fan& fan, bool skipRulesCheck = false);
Matt Spinlerb63aa092020-10-14 09:45:11 -050079
Matt Spinlerf13b42e2020-10-26 15:29:49 -050080 /**
81 * @brief Called when a fan sensor's error timer expires, which
82 * happens when the sensor has been nonfunctional for a
83 * certain amount of time. An event log will be created.
84 *
85 * @param[in] fan - The parent fan of the sensor
86 * @param[in] sensor - The faulted sensor
87 */
88 void sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor);
89
Matt Spinler27f6b682020-10-27 08:43:37 -050090 /**
91 * @brief Called when the timer that starts when a fan is missing
92 * has expired so an event log needs to be created.
93 *
94 * @param[in] fan - The missing fan.
95 */
96 void fanMissingErrorTimerExpired(const Fan& fan);
97
Matt Spinlerac1efc12020-10-27 10:20:11 -050098 /**
99 * @brief Called by the power off actions to log an error when there is
100 * a power off due to fan problems.
101 *
102 * The error it logs is just the last fan error that occurred.
103 */
104 void logShutdownError();
105
Matt Spinler7d135642021-02-04 12:44:17 -0600106 /**
107 * @brief Returns true if power is on
108 */
109 bool isPowerOn() const
110 {
111 return _powerState->isPowerOn();
112 }
113
114 /**
Mike Cappsb4379a12021-10-11 14:18:06 -0400115 * @brief tests the presence of Inventory and calls load() if present, else
116 * waits for Inventory asynchronously and has a callback to load() when
117 * present
Matt Spinler7d135642021-02-04 12:44:17 -0600118 */
Matthew Barth823bc492021-06-21 14:19:09 -0500119 void start();
Matt Spinler7d135642021-02-04 12:44:17 -0600120
Mike Cappsb4379a12021-10-11 14:18:06 -0400121 /**
122 * @brief Parses and populates the fan monitor trust groups and list of fans
123 */
124 void load();
125
Matthew Barthc95c5272020-06-15 19:51:13 -0500126 private:
Mike Cappsb4379a12021-10-11 14:18:06 -0400127 /**
128 * @brief Callback from D-Bus when Inventory service comes online
129 *
130 * @param[in] msg - Service details.
131 */
Patrick Williamscb356d42022-07-22 19:26:53 -0500132 void inventoryOnlineCb(sdbusplus::message_t& msg);
Mike Cappsb4379a12021-10-11 14:18:06 -0400133
Mike Capps683a96c2022-04-27 16:46:06 -0400134 /**
135 * @brief Create a BMC Dump
136 */
137 void createBmcDump() const;
138
Matthew Barthc95c5272020-06-15 19:51:13 -0500139 /* The mode of fan monitor */
140 Mode _mode;
141
142 /* The sdbusplus bus object */
Patrick Williamscb356d42022-07-22 19:26:53 -0500143 sdbusplus::bus_t& _bus;
Matthew Barthc95c5272020-06-15 19:51:13 -0500144
145 /* The event loop reference */
146 const sdeventplus::Event& _event;
147
148 /* Trust manager of trust groups */
149 std::unique_ptr<phosphor::fan::trust::Manager> _trust;
150
Mike Cappsb4379a12021-10-11 14:18:06 -0400151 /* match object to detect Inventory service */
Patrick Williamscb356d42022-07-22 19:26:53 -0500152 std::unique_ptr<sdbusplus::bus::match_t> _inventoryMatch;
Mike Cappsb4379a12021-10-11 14:18:06 -0400153
Matthew Barthc95c5272020-06-15 19:51:13 -0500154 /* List of fan objects to monitor */
155 std::vector<std::unique_ptr<Fan>> _fans;
156
157 /**
Matt Spinlerb63aa092020-10-14 09:45:11 -0500158 * @brief The latest health of all the fans
159 */
160 FanHealth _fanHealth;
161
162 /**
Matt Spinlere892e392020-10-14 13:21:31 -0500163 * @brief The object to watch the power state
164 */
165 std::unique_ptr<PowerState> _powerState;
166
167 /**
168 * @brief The power off rules, for shutting down the system
169 * due to fan failures.
170 */
171 std::vector<std::unique_ptr<PowerOffRule>> _powerOffRules;
172
173 /**
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500174 * @brief The number of concurrently nonfunctional fan sensors
175 * there must be for an event log created due to a
176 * nonfunctional fan sensor to have an Error severity as
177 * opposed to an Informational one.
178 */
179 std::optional<size_t> _numNonfuncSensorsBeforeError;
180
181 /**
Matt Spinlerac1efc12020-10-27 10:20:11 -0500182 * @brief The most recently committed fan error.
183 */
184 std::unique_ptr<FanError> _lastError;
185
186 /**
Matt Spinlerc8d3c512021-01-06 14:22:25 -0600187 * @brief The thermal alert D-Bus object
188 */
189 ThermalAlertObject _thermalAlert;
190
191 /**
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400192 * @brief The tach sensors D-Bus match objects
193 */
Patrick Williamscb356d42022-07-22 19:26:53 -0500194 std::vector<std::unique_ptr<sdbusplus::bus::match_t>> _sensorMatch;
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400195
196 /**
Mike Cappsb4379a12021-10-11 14:18:06 -0400197 * @brief true if config files have been loaded
Matt Spinler7d135642021-02-04 12:44:17 -0600198 */
Mike Cappsb4379a12021-10-11 14:18:06 -0400199 bool _loaded = false;
Matt Spinler7d135642021-02-04 12:44:17 -0600200
201 /**
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500202 * @brief Captures tach sensor data as JSON for use in
203 * fan fault and fan missing event logs.
204 *
205 * @return json - The JSON data
206 */
207 json captureSensorData();
208
209 /**
Mike Capps25f03272021-09-13 13:38:44 -0400210 * @brief creates a subscription (service->sensor) to take sensors
211 * on/offline when D-Bus starts/stops updating values
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400212 *
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400213 */
Mike Capps25f03272021-09-13 13:38:44 -0400214 void subscribeSensorsToServices();
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400215
216 /**
Matthew Barthc95c5272020-06-15 19:51:13 -0500217 * @brief Retrieve the configured trust groups
218 *
219 * @param[in] jsonObj - JSON object to parse from
220 *
221 * @return List of functions applied on trust groups
222 */
223 const std::vector<CreateGroupFunction> getTrustGroups(const json& jsonObj);
224
225 /**
Matthew Barthd06905c2020-06-12 08:13:06 -0500226 * @brief Set the trust manager's list of trust group functions
227 *
228 * @param[in] groupFuncs - list of trust group functions
229 */
230 void setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs);
231
232 /**
Matthew Barthc95c5272020-06-15 19:51:13 -0500233 * @brief Retrieve the configured fan definitions
234 *
235 * @param[in] jsonObj - JSON object to parse from
236 *
237 * @return List of fan definition data on the fans configured
238 */
239 const std::vector<FanDefinition> getFanDefinitions(const json& jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -0500240
241 /**
242 * @brief Set the list of fans to be monitored
243 *
244 * @param[in] fanDefs - list of fan definitions to create fans monitored
245 */
246 void setFans(const std::vector<FanDefinition>& fanDefs);
Matt Spinlerb63aa092020-10-14 09:45:11 -0500247
248 /**
249 * @brief Updates the fan health map entry for the fan passed in
250 *
251 * @param[in] fan - The fan to update the health map with
252 */
253 void updateFanHealth(const Fan& fan);
Matt Spinlere892e392020-10-14 13:21:31 -0500254
255 /**
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400256 * @brief callback when a tach sensor signal goes offline
257 *
258 * @param[in] msg - D-Bus message containing details (inc. service name)
259 *
260 * @param[in] sensorMap - map providing sensor access for each service
261 */
Patrick Williamscb356d42022-07-22 19:26:53 -0500262 void tachSignalOffline(sdbusplus::message_t& msg,
Mike Cappsfdcd5db2021-05-20 12:47:10 -0400263 const SensorMapType& sensorMap);
264
265 /**
Matt Spinlere892e392020-10-14 13:21:31 -0500266 * @brief The function that runs when the power state changes
267 *
268 * @param[in] powerStateOn - If power is now on or not
269 */
270 void powerStateChanged(bool powerStateOn);
271
272 /**
273 * @brief Reads the fault configuration from the JSON config
274 * file, such as the power off rule configuration.
275 *
276 * @param[in] jsonObj - JSON object to parse from
277 */
278 void setFaultConfig(const json& jsonObj);
Matt Spinlerbb449c12021-06-14 11:45:28 -0600279
280 /**
281 * @brief Log an error and shut down due to an offline fan controller
282 */
283 void handleOfflineFanController();
Matthew Barthc95c5272020-06-15 19:51:13 -0500284};
285
286} // namespace phosphor::fan::monitor