blob: 01f1a9bdafb82851b2cbad9f436d0fa04c56e356 [file] [log] [blame]
Matthew Barthc95c5272020-06-15 19:51:13 -05001/**
2 * Copyright © 2020 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Matthew Barthc95c5272020-06-15 19:51:13 -050016#include "system.hpp"
17
18#include "fan.hpp"
19#include "fan_defs.hpp"
20#include "tach_sensor.hpp"
21#include "trust_manager.hpp"
22#include "types.hpp"
23#ifdef MONITOR_USE_JSON
24#include "json_parser.hpp"
25#endif
26
Matt Spinlerc8d3c512021-01-06 14:22:25 -060027#include "config.h"
28
Matt Spinlerbb449c12021-06-14 11:45:28 -060029#include "hwmon_ffdc.hpp"
30
Matthew Barthc95c5272020-06-15 19:51:13 -050031#include <nlohmann/json.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050032#include <phosphor-logging/log.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050033#include <sdbusplus/bus.hpp>
34#include <sdeventplus/event.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050035#include <sdeventplus/source/signal.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050036
37namespace phosphor::fan::monitor
38{
39
40using json = nlohmann::json;
Matt Spinlerf13b42e2020-10-26 15:29:49 -050041using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
42
Matthew Barthd06905c2020-06-12 08:13:06 -050043using namespace phosphor::logging;
Matthew Barthc95c5272020-06-15 19:51:13 -050044
45System::System(Mode mode, sdbusplus::bus::bus& bus,
46 const sdeventplus::Event& event) :
47 _mode(mode),
Matt Spinlerc8d3c512021-01-06 14:22:25 -060048 _bus(bus), _event(event),
49 _powerState(std::make_unique<PGoodState>(
Matt Spinlere892e392020-10-14 13:21:31 -050050 bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
Matt Spinlerc8d3c512021-01-06 14:22:25 -060051 std::placeholders::_1))),
52 _thermalAlert(bus, THERMAL_ALERT_OBJPATH)
Matt Spinler7d135642021-02-04 12:44:17 -060053{}
Matt Spinlere892e392020-10-14 13:21:31 -050054
Matthew Barth823bc492021-06-21 14:19:09 -050055void System::start()
Matt Spinler7d135642021-02-04 12:44:17 -060056{
57 _started = true;
Matthew Barthc95c5272020-06-15 19:51:13 -050058 json jsonObj = json::object();
59#ifdef MONITOR_USE_JSON
Matthew Barth823bc492021-06-21 14:19:09 -050060 auto confFile =
61 fan::JsonConfig::getConfFile(_bus, confAppName, confFileName);
Matt Spinler7d135642021-02-04 12:44:17 -060062 jsonObj = fan::JsonConfig::load(confFile);
Matthew Barthc95c5272020-06-15 19:51:13 -050063#endif
64 // Retrieve and set trust groups within the trust manager
Matthew Barthd06905c2020-06-12 08:13:06 -050065 setTrustMgr(getTrustGroups(jsonObj));
Matthew Barthc95c5272020-06-15 19:51:13 -050066 // Retrieve fan definitions and create fan objects to be monitored
Matthew Barthd06905c2020-06-12 08:13:06 -050067 setFans(getFanDefinitions(jsonObj));
Matt Spinlere892e392020-10-14 13:21:31 -050068 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050069 log<level::INFO>("Configuration loaded");
Matt Spinlere892e392020-10-14 13:21:31 -050070
Matt Spinlere892e392020-10-14 13:21:31 -050071 if (_powerState->isPowerOn())
72 {
73 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
74 [this](auto& rule) {
Matt Spinlere892e392020-10-14 13:21:31 -050075 rule->check(PowerRuleState::runtime, _fanHealth);
76 });
77 }
Matthew Barthd06905c2020-06-12 08:13:06 -050078}
79
80void System::sighupHandler(sdeventplus::source::Signal&,
81 const struct signalfd_siginfo*)
82{
83 try
Matthew Barthc95c5272020-06-15 19:51:13 -050084 {
Matthew Barthd06905c2020-06-12 08:13:06 -050085 json jsonObj = json::object();
86#ifdef MONITOR_USE_JSON
87 jsonObj = getJsonObj(_bus);
88#endif
89 auto trustGrps = getTrustGroups(jsonObj);
90 auto fanDefs = getFanDefinitions(jsonObj);
91 // Set configured trust groups
92 setTrustMgr(trustGrps);
93 // Clear/set configured fan definitions
94 _fans.clear();
Matt Spinlerb63aa092020-10-14 09:45:11 -050095 _fanHealth.clear();
Matthew Barthd06905c2020-06-12 08:13:06 -050096 setFans(fanDefs);
Matt Spinlere892e392020-10-14 13:21:31 -050097 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050098 log<level::INFO>("Configuration reloaded successfully");
Matt Spinlere892e392020-10-14 13:21:31 -050099
100 if (_powerState->isPowerOn())
101 {
102 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
103 [this](auto& rule) {
104 rule->check(PowerRuleState::runtime, _fanHealth);
105 });
106 }
Matthew Barthd06905c2020-06-12 08:13:06 -0500107 }
108 catch (std::runtime_error& re)
109 {
110 log<level::ERR>("Error reloading config, no config changes made",
111 entry("LOAD_ERROR=%s", re.what()));
Matthew Barthc95c5272020-06-15 19:51:13 -0500112 }
113}
114
115const std::vector<CreateGroupFunction>
116 System::getTrustGroups(const json& jsonObj)
117{
118#ifdef MONITOR_USE_JSON
119 return getTrustGrps(jsonObj);
120#else
121 return trustGroups;
122#endif
123}
124
Matthew Barthd06905c2020-06-12 08:13:06 -0500125void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
126{
127 _trust = std::make_unique<trust::Manager>(groupFuncs);
128}
129
Matthew Barthc95c5272020-06-15 19:51:13 -0500130const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
131{
132#ifdef MONITOR_USE_JSON
133 return getFanDefs(jsonObj);
134#else
135 return fanDefinitions;
136#endif
137}
138
Matthew Barthd06905c2020-06-12 08:13:06 -0500139void System::setFans(const std::vector<FanDefinition>& fanDefs)
140{
141 for (const auto& fanDef : fanDefs)
142 {
143 // Check if a condition exists on the fan
144 auto condition = std::get<conditionField>(fanDef);
145 if (condition)
146 {
147 // Condition exists, skip adding fan if it fails
148 if (!(*condition)(_bus))
149 {
150 continue;
151 }
152 }
153 _fans.emplace_back(
Matt Spinlerb0412d02020-10-12 16:53:52 -0500154 std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
Matt Spinlerb63aa092020-10-14 09:45:11 -0500155
156 updateFanHealth(*(_fans.back()));
Matthew Barthd06905c2020-06-12 08:13:06 -0500157 }
158}
159
Matt Spinlerb63aa092020-10-14 09:45:11 -0500160void System::updateFanHealth(const Fan& fan)
161{
162 std::vector<bool> sensorStatus;
163 for (const auto& sensor : fan.sensors())
164 {
165 sensorStatus.push_back(sensor->functional());
166 }
167
168 _fanHealth[fan.getName()] =
169 std::make_tuple(fan.present(), std::move(sensorStatus));
170}
171
Matt Spinler4283c5d2021-03-01 15:56:00 -0600172void System::fanStatusChange(const Fan& fan, bool skipRulesCheck)
Matt Spinlerb63aa092020-10-14 09:45:11 -0500173{
174 updateFanHealth(fan);
Matt Spinlere892e392020-10-14 13:21:31 -0500175
Matt Spinler4283c5d2021-03-01 15:56:00 -0600176 if (_powerState->isPowerOn() && !skipRulesCheck)
Matt Spinlere892e392020-10-14 13:21:31 -0500177 {
178 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
179 [this](auto& rule) {
180 rule->check(PowerRuleState::runtime, _fanHealth);
181 });
182 }
183}
184
185void System::setFaultConfig(const json& jsonObj)
186{
187#ifdef MONITOR_USE_JSON
188 std::shared_ptr<PowerInterfaceBase> powerInterface =
Matt Spinlerba3ee9a2021-01-06 14:45:50 -0600189 std::make_shared<PowerInterface>(_thermalAlert);
Matt Spinlere892e392020-10-14 13:21:31 -0500190
Matt Spinlerac1efc12020-10-27 10:20:11 -0500191 PowerOffAction::PrePowerOffFunc func =
192 std::bind(std::mem_fn(&System::logShutdownError), this);
193
194 _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500195
196 _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
Matt Spinlere892e392020-10-14 13:21:31 -0500197#endif
198}
199
200void System::powerStateChanged(bool powerStateOn)
201{
Matt Spinler7d135642021-02-04 12:44:17 -0600202 std::for_each(_fans.begin(), _fans.end(), [powerStateOn](auto& fan) {
203 fan->powerStateChanged(powerStateOn);
204 });
205
Matt Spinlere892e392020-10-14 13:21:31 -0500206 if (powerStateOn)
207 {
Matt Spinler7d135642021-02-04 12:44:17 -0600208 if (!_started)
209 {
210 log<level::ERR>("No conf file found at power on");
Matthew Barthba53d3e2021-02-24 07:48:37 -0600211 throw std::runtime_error("No conf file found at power on");
Matt Spinler7d135642021-02-04 12:44:17 -0600212 }
213
Matt Spinlerbb449c12021-06-14 11:45:28 -0600214 // If no fan has its sensors on D-Bus, then there is a problem
215 // with the fan controller. Log an error and shut down.
216 if (std::all_of(_fans.begin(), _fans.end(), [](const auto& fan) {
217 return fan->numSensorsOnDBusAtPowerOn() == 0;
218 }))
219 {
220 handleOfflineFanController();
221 return;
222 }
223
Matt Spinlere892e392020-10-14 13:21:31 -0500224 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
225 [this](auto& rule) {
226 rule->check(PowerRuleState::atPgood, _fanHealth);
227 });
228 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
229 [this](auto& rule) {
230 rule->check(PowerRuleState::runtime, _fanHealth);
231 });
232 }
233 else
234 {
Matt Spinlerc8d3c512021-01-06 14:22:25 -0600235 _thermalAlert.enabled(false);
236
Matt Spinlere892e392020-10-14 13:21:31 -0500237 // Cancel any in-progress power off actions
238 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
239 [this](auto& rule) { rule->cancel(); });
240 }
Matt Spinlerb63aa092020-10-14 09:45:11 -0500241}
242
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500243void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
244{
245 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
246
247 getLogger().log(
248 fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
249 sensor.name()),
250 Logger::error);
251
252 // In order to know if the event log should have a severity of error or
253 // informational, count the number of existing nonfunctional sensors and
254 // compare it to _numNonfuncSensorsBeforeError.
255 size_t nonfuncSensors = 0;
256 for (const auto& fan : _fans)
257 {
258 for (const auto& s : fan->sensors())
259 {
260 // Don't count nonfunctional sensors that still have their
261 // error timer running as nonfunctional since they haven't
262 // had event logs created for those errors yet.
263 if (!s->functional() && !s->errorTimerRunning())
264 {
265 nonfuncSensors++;
266 }
267 }
268 }
269
270 Severity severity = Severity::Error;
271 if (nonfuncSensors < _numNonfuncSensorsBeforeError)
272 {
273 severity = Severity::Informational;
274 }
275
276 auto error =
277 std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
278 fanPath, sensor.name(), severity);
279
280 auto sensorData = captureSensorData();
281 error->commit(sensorData);
282
Matt Spinlerac1efc12020-10-27 10:20:11 -0500283 // Save the error so it can be committed again on a power off.
284 _lastError = std::move(error);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500285}
286
Matt Spinler27f6b682020-10-27 08:43:37 -0500287void System::fanMissingErrorTimerExpired(const Fan& fan)
288{
289 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
290
291 getLogger().log(
292 fmt::format("Creating event log for missing fan {}", fanPath),
293 Logger::error);
294
295 auto error = std::make_unique<FanError>(
296 "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
297
298 auto sensorData = captureSensorData();
299 error->commit(sensorData);
300
Matt Spinlerac1efc12020-10-27 10:20:11 -0500301 // Save the error so it can be committed again on a power off.
302 _lastError = std::move(error);
303}
304
305void System::logShutdownError()
306{
307 if (_lastError)
308 {
309 getLogger().log("Re-committing previous fan error before power off");
310
311 // Still use the latest sensor data
312 auto sensorData = captureSensorData();
Matt Spinlerf435eb12021-05-11 14:44:25 -0500313 _lastError->commit(sensorData, true);
Matt Spinlerac1efc12020-10-27 10:20:11 -0500314 }
Matt Spinler27f6b682020-10-27 08:43:37 -0500315}
316
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500317json System::captureSensorData()
318{
319 json data;
320
321 for (const auto& fan : _fans)
322 {
323 for (const auto& sensor : fan->sensors())
324 {
325 json values;
326 values["present"] = fan->present();
327 values["functional"] = sensor->functional();
328 values["tach"] = sensor->getInput();
329 if (sensor->hasTarget())
330 {
331 values["target"] = sensor->getTarget();
332 }
333
334 data["sensors"][sensor->name()] = values;
335 }
336 }
337
338 return data;
339}
340
Matt Spinlerbb449c12021-06-14 11:45:28 -0600341void System::handleOfflineFanController()
342{
343 getLogger().log("The fan controller appears to be offline. Shutting down.",
344 Logger::error);
345
346 auto ffdc = collectHwmonFFDC();
347
348 FanError error{"xyz.openbmc_project.Fan.Error.FanControllerOffline",
349 Severity::Critical};
350 error.commit(ffdc, true);
351
352 PowerInterface::executeHardPowerOff();
353}
354
Matthew Barthc95c5272020-06-15 19:51:13 -0500355} // namespace phosphor::fan::monitor