blob: 22279da59353ae1981c17f32be401e99f1eec4c5 [file] [log] [blame]
Matthew Barthc95c5272020-06-15 19:51:13 -05001/**
2 * Copyright © 2020 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Matthew Barthc95c5272020-06-15 19:51:13 -050016#include "system.hpp"
17
18#include "fan.hpp"
19#include "fan_defs.hpp"
20#include "tach_sensor.hpp"
21#include "trust_manager.hpp"
22#include "types.hpp"
23#ifdef MONITOR_USE_JSON
24#include "json_parser.hpp"
25#endif
26
Matt Spinlerc8d3c512021-01-06 14:22:25 -060027#include "config.h"
28
Matthew Barthc95c5272020-06-15 19:51:13 -050029#include <nlohmann/json.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050030#include <phosphor-logging/log.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050031#include <sdbusplus/bus.hpp>
32#include <sdeventplus/event.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050033#include <sdeventplus/source/signal.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050034
35namespace phosphor::fan::monitor
36{
37
38using json = nlohmann::json;
Matt Spinlerf13b42e2020-10-26 15:29:49 -050039using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
40
Matthew Barthd06905c2020-06-12 08:13:06 -050041using namespace phosphor::logging;
Matthew Barthc95c5272020-06-15 19:51:13 -050042
43System::System(Mode mode, sdbusplus::bus::bus& bus,
44 const sdeventplus::Event& event) :
45 _mode(mode),
Matt Spinlerc8d3c512021-01-06 14:22:25 -060046 _bus(bus), _event(event),
47 _powerState(std::make_unique<PGoodState>(
Matt Spinlere892e392020-10-14 13:21:31 -050048 bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
Matt Spinlerc8d3c512021-01-06 14:22:25 -060049 std::placeholders::_1))),
50 _thermalAlert(bus, THERMAL_ALERT_OBJPATH)
Matt Spinler7d135642021-02-04 12:44:17 -060051{}
Matt Spinlere892e392020-10-14 13:21:31 -050052
Matthew Barth823bc492021-06-21 14:19:09 -050053void System::start()
Matt Spinler7d135642021-02-04 12:44:17 -060054{
55 _started = true;
Matthew Barthc95c5272020-06-15 19:51:13 -050056 json jsonObj = json::object();
57#ifdef MONITOR_USE_JSON
Matthew Barth823bc492021-06-21 14:19:09 -050058 auto confFile =
59 fan::JsonConfig::getConfFile(_bus, confAppName, confFileName);
Matt Spinler7d135642021-02-04 12:44:17 -060060 jsonObj = fan::JsonConfig::load(confFile);
Matthew Barthc95c5272020-06-15 19:51:13 -050061#endif
62 // Retrieve and set trust groups within the trust manager
Matthew Barthd06905c2020-06-12 08:13:06 -050063 setTrustMgr(getTrustGroups(jsonObj));
Matthew Barthc95c5272020-06-15 19:51:13 -050064 // Retrieve fan definitions and create fan objects to be monitored
Matthew Barthd06905c2020-06-12 08:13:06 -050065 setFans(getFanDefinitions(jsonObj));
Matt Spinlere892e392020-10-14 13:21:31 -050066 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050067 log<level::INFO>("Configuration loaded");
Matt Spinlere892e392020-10-14 13:21:31 -050068
Matt Spinlere892e392020-10-14 13:21:31 -050069 if (_powerState->isPowerOn())
70 {
71 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
72 [this](auto& rule) {
Matt Spinlere892e392020-10-14 13:21:31 -050073 rule->check(PowerRuleState::runtime, _fanHealth);
74 });
75 }
Matthew Barthd06905c2020-06-12 08:13:06 -050076}
77
78void System::sighupHandler(sdeventplus::source::Signal&,
79 const struct signalfd_siginfo*)
80{
81 try
Matthew Barthc95c5272020-06-15 19:51:13 -050082 {
Matthew Barthd06905c2020-06-12 08:13:06 -050083 json jsonObj = json::object();
84#ifdef MONITOR_USE_JSON
85 jsonObj = getJsonObj(_bus);
86#endif
87 auto trustGrps = getTrustGroups(jsonObj);
88 auto fanDefs = getFanDefinitions(jsonObj);
89 // Set configured trust groups
90 setTrustMgr(trustGrps);
91 // Clear/set configured fan definitions
92 _fans.clear();
Matt Spinlerb63aa092020-10-14 09:45:11 -050093 _fanHealth.clear();
Matthew Barthd06905c2020-06-12 08:13:06 -050094 setFans(fanDefs);
Matt Spinlere892e392020-10-14 13:21:31 -050095 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050096 log<level::INFO>("Configuration reloaded successfully");
Matt Spinlere892e392020-10-14 13:21:31 -050097
98 if (_powerState->isPowerOn())
99 {
100 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
101 [this](auto& rule) {
102 rule->check(PowerRuleState::runtime, _fanHealth);
103 });
104 }
Matthew Barthd06905c2020-06-12 08:13:06 -0500105 }
106 catch (std::runtime_error& re)
107 {
108 log<level::ERR>("Error reloading config, no config changes made",
109 entry("LOAD_ERROR=%s", re.what()));
Matthew Barthc95c5272020-06-15 19:51:13 -0500110 }
111}
112
113const std::vector<CreateGroupFunction>
114 System::getTrustGroups(const json& jsonObj)
115{
116#ifdef MONITOR_USE_JSON
117 return getTrustGrps(jsonObj);
118#else
119 return trustGroups;
120#endif
121}
122
Matthew Barthd06905c2020-06-12 08:13:06 -0500123void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
124{
125 _trust = std::make_unique<trust::Manager>(groupFuncs);
126}
127
Matthew Barthc95c5272020-06-15 19:51:13 -0500128const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
129{
130#ifdef MONITOR_USE_JSON
131 return getFanDefs(jsonObj);
132#else
133 return fanDefinitions;
134#endif
135}
136
Matthew Barthd06905c2020-06-12 08:13:06 -0500137void System::setFans(const std::vector<FanDefinition>& fanDefs)
138{
139 for (const auto& fanDef : fanDefs)
140 {
141 // Check if a condition exists on the fan
142 auto condition = std::get<conditionField>(fanDef);
143 if (condition)
144 {
145 // Condition exists, skip adding fan if it fails
146 if (!(*condition)(_bus))
147 {
148 continue;
149 }
150 }
151 _fans.emplace_back(
Matt Spinlerb0412d02020-10-12 16:53:52 -0500152 std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
Matt Spinlerb63aa092020-10-14 09:45:11 -0500153
154 updateFanHealth(*(_fans.back()));
Matthew Barthd06905c2020-06-12 08:13:06 -0500155 }
156}
157
Matt Spinlerb63aa092020-10-14 09:45:11 -0500158void System::updateFanHealth(const Fan& fan)
159{
160 std::vector<bool> sensorStatus;
161 for (const auto& sensor : fan.sensors())
162 {
163 sensorStatus.push_back(sensor->functional());
164 }
165
166 _fanHealth[fan.getName()] =
167 std::make_tuple(fan.present(), std::move(sensorStatus));
168}
169
Matt Spinler4283c5d2021-03-01 15:56:00 -0600170void System::fanStatusChange(const Fan& fan, bool skipRulesCheck)
Matt Spinlerb63aa092020-10-14 09:45:11 -0500171{
172 updateFanHealth(fan);
Matt Spinlere892e392020-10-14 13:21:31 -0500173
Matt Spinler4283c5d2021-03-01 15:56:00 -0600174 if (_powerState->isPowerOn() && !skipRulesCheck)
Matt Spinlere892e392020-10-14 13:21:31 -0500175 {
176 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
177 [this](auto& rule) {
178 rule->check(PowerRuleState::runtime, _fanHealth);
179 });
180 }
181}
182
183void System::setFaultConfig(const json& jsonObj)
184{
185#ifdef MONITOR_USE_JSON
186 std::shared_ptr<PowerInterfaceBase> powerInterface =
Matt Spinlerba3ee9a2021-01-06 14:45:50 -0600187 std::make_shared<PowerInterface>(_thermalAlert);
Matt Spinlere892e392020-10-14 13:21:31 -0500188
Matt Spinlerac1efc12020-10-27 10:20:11 -0500189 PowerOffAction::PrePowerOffFunc func =
190 std::bind(std::mem_fn(&System::logShutdownError), this);
191
192 _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500193
194 _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
Matt Spinlere892e392020-10-14 13:21:31 -0500195#endif
196}
197
198void System::powerStateChanged(bool powerStateOn)
199{
Matt Spinler7d135642021-02-04 12:44:17 -0600200 std::for_each(_fans.begin(), _fans.end(), [powerStateOn](auto& fan) {
201 fan->powerStateChanged(powerStateOn);
202 });
203
Matt Spinlere892e392020-10-14 13:21:31 -0500204 if (powerStateOn)
205 {
Matt Spinler7d135642021-02-04 12:44:17 -0600206 if (!_started)
207 {
208 log<level::ERR>("No conf file found at power on");
Matthew Barthba53d3e2021-02-24 07:48:37 -0600209 throw std::runtime_error("No conf file found at power on");
Matt Spinler7d135642021-02-04 12:44:17 -0600210 }
211
Matt Spinlere892e392020-10-14 13:21:31 -0500212 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
213 [this](auto& rule) {
214 rule->check(PowerRuleState::atPgood, _fanHealth);
215 });
216 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
217 [this](auto& rule) {
218 rule->check(PowerRuleState::runtime, _fanHealth);
219 });
220 }
221 else
222 {
Matt Spinlerc8d3c512021-01-06 14:22:25 -0600223 _thermalAlert.enabled(false);
224
Matt Spinlere892e392020-10-14 13:21:31 -0500225 // Cancel any in-progress power off actions
226 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
227 [this](auto& rule) { rule->cancel(); });
228 }
Matt Spinlerb63aa092020-10-14 09:45:11 -0500229}
230
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500231void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
232{
233 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
234
235 getLogger().log(
236 fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
237 sensor.name()),
238 Logger::error);
239
240 // In order to know if the event log should have a severity of error or
241 // informational, count the number of existing nonfunctional sensors and
242 // compare it to _numNonfuncSensorsBeforeError.
243 size_t nonfuncSensors = 0;
244 for (const auto& fan : _fans)
245 {
246 for (const auto& s : fan->sensors())
247 {
248 // Don't count nonfunctional sensors that still have their
249 // error timer running as nonfunctional since they haven't
250 // had event logs created for those errors yet.
251 if (!s->functional() && !s->errorTimerRunning())
252 {
253 nonfuncSensors++;
254 }
255 }
256 }
257
258 Severity severity = Severity::Error;
259 if (nonfuncSensors < _numNonfuncSensorsBeforeError)
260 {
261 severity = Severity::Informational;
262 }
263
264 auto error =
265 std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
266 fanPath, sensor.name(), severity);
267
268 auto sensorData = captureSensorData();
269 error->commit(sensorData);
270
Matt Spinlerac1efc12020-10-27 10:20:11 -0500271 // Save the error so it can be committed again on a power off.
272 _lastError = std::move(error);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500273}
274
Matt Spinler27f6b682020-10-27 08:43:37 -0500275void System::fanMissingErrorTimerExpired(const Fan& fan)
276{
277 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
278
279 getLogger().log(
280 fmt::format("Creating event log for missing fan {}", fanPath),
281 Logger::error);
282
283 auto error = std::make_unique<FanError>(
284 "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
285
286 auto sensorData = captureSensorData();
287 error->commit(sensorData);
288
Matt Spinlerac1efc12020-10-27 10:20:11 -0500289 // Save the error so it can be committed again on a power off.
290 _lastError = std::move(error);
291}
292
293void System::logShutdownError()
294{
295 if (_lastError)
296 {
297 getLogger().log("Re-committing previous fan error before power off");
298
299 // Still use the latest sensor data
300 auto sensorData = captureSensorData();
Matt Spinlerf435eb12021-05-11 14:44:25 -0500301 _lastError->commit(sensorData, true);
Matt Spinlerac1efc12020-10-27 10:20:11 -0500302 }
Matt Spinler27f6b682020-10-27 08:43:37 -0500303}
304
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500305json System::captureSensorData()
306{
307 json data;
308
309 for (const auto& fan : _fans)
310 {
311 for (const auto& sensor : fan->sensors())
312 {
313 json values;
314 values["present"] = fan->present();
315 values["functional"] = sensor->functional();
316 values["tach"] = sensor->getInput();
317 if (sensor->hasTarget())
318 {
319 values["target"] = sensor->getTarget();
320 }
321
322 data["sensors"][sensor->name()] = values;
323 }
324 }
325
326 return data;
327}
328
Matthew Barthc95c5272020-06-15 19:51:13 -0500329} // namespace phosphor::fan::monitor