blob: a6af9bff2d075b709a84f56daa2b97b68fb0a6b9 [file] [log] [blame]
Matthew Barthc95c5272020-06-15 19:51:13 -05001/**
2 * Copyright © 2020 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Matthew Barthc95c5272020-06-15 19:51:13 -050016#include "system.hpp"
17
18#include "fan.hpp"
19#include "fan_defs.hpp"
20#include "tach_sensor.hpp"
21#include "trust_manager.hpp"
22#include "types.hpp"
23#ifdef MONITOR_USE_JSON
24#include "json_parser.hpp"
25#endif
26
Matt Spinlerc8d3c512021-01-06 14:22:25 -060027#include "config.h"
28
Matthew Barthc95c5272020-06-15 19:51:13 -050029#include <nlohmann/json.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050030#include <phosphor-logging/log.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050031#include <sdbusplus/bus.hpp>
32#include <sdeventplus/event.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050033#include <sdeventplus/source/signal.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050034
35namespace phosphor::fan::monitor
36{
37
38using json = nlohmann::json;
Matt Spinlerf13b42e2020-10-26 15:29:49 -050039using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
40
Matthew Barthd06905c2020-06-12 08:13:06 -050041using namespace phosphor::logging;
Matthew Barthc95c5272020-06-15 19:51:13 -050042
43System::System(Mode mode, sdbusplus::bus::bus& bus,
44 const sdeventplus::Event& event) :
45 _mode(mode),
Matt Spinlerc8d3c512021-01-06 14:22:25 -060046 _bus(bus), _event(event),
47 _powerState(std::make_unique<PGoodState>(
Matt Spinlere892e392020-10-14 13:21:31 -050048 bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
Matt Spinlerc8d3c512021-01-06 14:22:25 -060049 std::placeholders::_1))),
50 _thermalAlert(bus, THERMAL_ALERT_OBJPATH)
Matt Spinler7d135642021-02-04 12:44:17 -060051{}
Matt Spinlere892e392020-10-14 13:21:31 -050052
Matt Spinler7d135642021-02-04 12:44:17 -060053void System::start(
54#ifdef MONITOR_USE_JSON
55 const std::string& confFile
56#endif
57)
58{
59 _started = true;
Matthew Barthc95c5272020-06-15 19:51:13 -050060 json jsonObj = json::object();
61#ifdef MONITOR_USE_JSON
Matt Spinler7d135642021-02-04 12:44:17 -060062 jsonObj = fan::JsonConfig::load(confFile);
Matthew Barthc95c5272020-06-15 19:51:13 -050063#endif
64 // Retrieve and set trust groups within the trust manager
Matthew Barthd06905c2020-06-12 08:13:06 -050065 setTrustMgr(getTrustGroups(jsonObj));
Matthew Barthc95c5272020-06-15 19:51:13 -050066 // Retrieve fan definitions and create fan objects to be monitored
Matthew Barthd06905c2020-06-12 08:13:06 -050067 setFans(getFanDefinitions(jsonObj));
Matt Spinlere892e392020-10-14 13:21:31 -050068 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050069 log<level::INFO>("Configuration loaded");
Matt Spinlere892e392020-10-14 13:21:31 -050070
Matt Spinlere892e392020-10-14 13:21:31 -050071 if (_powerState->isPowerOn())
72 {
73 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
74 [this](auto& rule) {
Matt Spinlere892e392020-10-14 13:21:31 -050075 rule->check(PowerRuleState::runtime, _fanHealth);
76 });
77 }
Matthew Barthd06905c2020-06-12 08:13:06 -050078}
79
80void System::sighupHandler(sdeventplus::source::Signal&,
81 const struct signalfd_siginfo*)
82{
83 try
Matthew Barthc95c5272020-06-15 19:51:13 -050084 {
Matthew Barthd06905c2020-06-12 08:13:06 -050085 json jsonObj = json::object();
86#ifdef MONITOR_USE_JSON
87 jsonObj = getJsonObj(_bus);
88#endif
89 auto trustGrps = getTrustGroups(jsonObj);
90 auto fanDefs = getFanDefinitions(jsonObj);
91 // Set configured trust groups
92 setTrustMgr(trustGrps);
93 // Clear/set configured fan definitions
94 _fans.clear();
Matt Spinlerb63aa092020-10-14 09:45:11 -050095 _fanHealth.clear();
Matthew Barthd06905c2020-06-12 08:13:06 -050096 setFans(fanDefs);
Matt Spinlere892e392020-10-14 13:21:31 -050097 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050098 log<level::INFO>("Configuration reloaded successfully");
Matt Spinlere892e392020-10-14 13:21:31 -050099
100 if (_powerState->isPowerOn())
101 {
102 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
103 [this](auto& rule) {
104 rule->check(PowerRuleState::runtime, _fanHealth);
105 });
106 }
Matthew Barthd06905c2020-06-12 08:13:06 -0500107 }
108 catch (std::runtime_error& re)
109 {
110 log<level::ERR>("Error reloading config, no config changes made",
111 entry("LOAD_ERROR=%s", re.what()));
Matthew Barthc95c5272020-06-15 19:51:13 -0500112 }
113}
114
115const std::vector<CreateGroupFunction>
116 System::getTrustGroups(const json& jsonObj)
117{
118#ifdef MONITOR_USE_JSON
119 return getTrustGrps(jsonObj);
120#else
121 return trustGroups;
122#endif
123}
124
Matthew Barthd06905c2020-06-12 08:13:06 -0500125void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
126{
127 _trust = std::make_unique<trust::Manager>(groupFuncs);
128}
129
Matthew Barthc95c5272020-06-15 19:51:13 -0500130const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
131{
132#ifdef MONITOR_USE_JSON
133 return getFanDefs(jsonObj);
134#else
135 return fanDefinitions;
136#endif
137}
138
Matthew Barthd06905c2020-06-12 08:13:06 -0500139void System::setFans(const std::vector<FanDefinition>& fanDefs)
140{
141 for (const auto& fanDef : fanDefs)
142 {
143 // Check if a condition exists on the fan
144 auto condition = std::get<conditionField>(fanDef);
145 if (condition)
146 {
147 // Condition exists, skip adding fan if it fails
148 if (!(*condition)(_bus))
149 {
150 continue;
151 }
152 }
153 _fans.emplace_back(
Matt Spinlerb0412d02020-10-12 16:53:52 -0500154 std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
Matt Spinlerb63aa092020-10-14 09:45:11 -0500155
156 updateFanHealth(*(_fans.back()));
Matthew Barthd06905c2020-06-12 08:13:06 -0500157 }
158}
159
Matt Spinlerb63aa092020-10-14 09:45:11 -0500160void System::updateFanHealth(const Fan& fan)
161{
162 std::vector<bool> sensorStatus;
163 for (const auto& sensor : fan.sensors())
164 {
165 sensorStatus.push_back(sensor->functional());
166 }
167
168 _fanHealth[fan.getName()] =
169 std::make_tuple(fan.present(), std::move(sensorStatus));
170}
171
Matt Spinler4283c5d2021-03-01 15:56:00 -0600172void System::fanStatusChange(const Fan& fan, bool skipRulesCheck)
Matt Spinlerb63aa092020-10-14 09:45:11 -0500173{
174 updateFanHealth(fan);
Matt Spinlere892e392020-10-14 13:21:31 -0500175
Matt Spinler4283c5d2021-03-01 15:56:00 -0600176 if (_powerState->isPowerOn() && !skipRulesCheck)
Matt Spinlere892e392020-10-14 13:21:31 -0500177 {
178 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
179 [this](auto& rule) {
180 rule->check(PowerRuleState::runtime, _fanHealth);
181 });
182 }
183}
184
185void System::setFaultConfig(const json& jsonObj)
186{
187#ifdef MONITOR_USE_JSON
188 std::shared_ptr<PowerInterfaceBase> powerInterface =
Matt Spinlerba3ee9a2021-01-06 14:45:50 -0600189 std::make_shared<PowerInterface>(_thermalAlert);
Matt Spinlere892e392020-10-14 13:21:31 -0500190
Matt Spinlerac1efc12020-10-27 10:20:11 -0500191 PowerOffAction::PrePowerOffFunc func =
192 std::bind(std::mem_fn(&System::logShutdownError), this);
193
194 _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500195
196 _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
Matt Spinlere892e392020-10-14 13:21:31 -0500197#endif
198}
199
200void System::powerStateChanged(bool powerStateOn)
201{
Matt Spinler7d135642021-02-04 12:44:17 -0600202 std::for_each(_fans.begin(), _fans.end(), [powerStateOn](auto& fan) {
203 fan->powerStateChanged(powerStateOn);
204 });
205
Matt Spinlere892e392020-10-14 13:21:31 -0500206 if (powerStateOn)
207 {
Matt Spinler7d135642021-02-04 12:44:17 -0600208 if (!_started)
209 {
210 log<level::ERR>("No conf file found at power on");
Matthew Barthba53d3e2021-02-24 07:48:37 -0600211 throw std::runtime_error("No conf file found at power on");
Matt Spinler7d135642021-02-04 12:44:17 -0600212 }
213
Matt Spinlere892e392020-10-14 13:21:31 -0500214 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
215 [this](auto& rule) {
216 rule->check(PowerRuleState::atPgood, _fanHealth);
217 });
218 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
219 [this](auto& rule) {
220 rule->check(PowerRuleState::runtime, _fanHealth);
221 });
222 }
223 else
224 {
Matt Spinlerc8d3c512021-01-06 14:22:25 -0600225 _thermalAlert.enabled(false);
226
Matt Spinlere892e392020-10-14 13:21:31 -0500227 // Cancel any in-progress power off actions
228 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
229 [this](auto& rule) { rule->cancel(); });
230 }
Matt Spinlerb63aa092020-10-14 09:45:11 -0500231}
232
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500233void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
234{
235 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
236
237 getLogger().log(
238 fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
239 sensor.name()),
240 Logger::error);
241
242 // In order to know if the event log should have a severity of error or
243 // informational, count the number of existing nonfunctional sensors and
244 // compare it to _numNonfuncSensorsBeforeError.
245 size_t nonfuncSensors = 0;
246 for (const auto& fan : _fans)
247 {
248 for (const auto& s : fan->sensors())
249 {
250 // Don't count nonfunctional sensors that still have their
251 // error timer running as nonfunctional since they haven't
252 // had event logs created for those errors yet.
253 if (!s->functional() && !s->errorTimerRunning())
254 {
255 nonfuncSensors++;
256 }
257 }
258 }
259
260 Severity severity = Severity::Error;
261 if (nonfuncSensors < _numNonfuncSensorsBeforeError)
262 {
263 severity = Severity::Informational;
264 }
265
266 auto error =
267 std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
268 fanPath, sensor.name(), severity);
269
270 auto sensorData = captureSensorData();
271 error->commit(sensorData);
272
Matt Spinlerac1efc12020-10-27 10:20:11 -0500273 // Save the error so it can be committed again on a power off.
274 _lastError = std::move(error);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500275}
276
Matt Spinler27f6b682020-10-27 08:43:37 -0500277void System::fanMissingErrorTimerExpired(const Fan& fan)
278{
279 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
280
281 getLogger().log(
282 fmt::format("Creating event log for missing fan {}", fanPath),
283 Logger::error);
284
285 auto error = std::make_unique<FanError>(
286 "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
287
288 auto sensorData = captureSensorData();
289 error->commit(sensorData);
290
Matt Spinlerac1efc12020-10-27 10:20:11 -0500291 // Save the error so it can be committed again on a power off.
292 _lastError = std::move(error);
293}
294
295void System::logShutdownError()
296{
297 if (_lastError)
298 {
299 getLogger().log("Re-committing previous fan error before power off");
300
301 // Still use the latest sensor data
302 auto sensorData = captureSensorData();
Matt Spinlerf435eb12021-05-11 14:44:25 -0500303 _lastError->commit(sensorData, true);
Matt Spinlerac1efc12020-10-27 10:20:11 -0500304 }
Matt Spinler27f6b682020-10-27 08:43:37 -0500305}
306
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500307json System::captureSensorData()
308{
309 json data;
310
311 for (const auto& fan : _fans)
312 {
313 for (const auto& sensor : fan->sensors())
314 {
315 json values;
316 values["present"] = fan->present();
317 values["functional"] = sensor->functional();
318 values["tach"] = sensor->getInput();
319 if (sensor->hasTarget())
320 {
321 values["target"] = sensor->getTarget();
322 }
323
324 data["sensors"][sensor->name()] = values;
325 }
326 }
327
328 return data;
329}
330
Matthew Barthc95c5272020-06-15 19:51:13 -0500331} // namespace phosphor::fan::monitor