blob: 4c30f9f7f77c50f9d11d5737677f3ae633c0de66 [file] [log] [blame]
Matthew Barthc95c5272020-06-15 19:51:13 -05001/**
2 * Copyright © 2020 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Matthew Barthc95c5272020-06-15 19:51:13 -050016#include "system.hpp"
17
18#include "fan.hpp"
19#include "fan_defs.hpp"
20#include "tach_sensor.hpp"
21#include "trust_manager.hpp"
22#include "types.hpp"
23#ifdef MONITOR_USE_JSON
24#include "json_parser.hpp"
25#endif
26
Matt Spinlerc8d3c512021-01-06 14:22:25 -060027#include "config.h"
28
Matthew Barthc95c5272020-06-15 19:51:13 -050029#include <nlohmann/json.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050030#include <phosphor-logging/log.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050031#include <sdbusplus/bus.hpp>
32#include <sdeventplus/event.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050033#include <sdeventplus/source/signal.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050034
35namespace phosphor::fan::monitor
36{
37
38using json = nlohmann::json;
Matt Spinlerf13b42e2020-10-26 15:29:49 -050039using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
40
Matthew Barthd06905c2020-06-12 08:13:06 -050041using namespace phosphor::logging;
Matthew Barthc95c5272020-06-15 19:51:13 -050042
43System::System(Mode mode, sdbusplus::bus::bus& bus,
44 const sdeventplus::Event& event) :
45 _mode(mode),
Matt Spinlerc8d3c512021-01-06 14:22:25 -060046 _bus(bus), _event(event),
47 _powerState(std::make_unique<PGoodState>(
Matt Spinlere892e392020-10-14 13:21:31 -050048 bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
Matt Spinlerc8d3c512021-01-06 14:22:25 -060049 std::placeholders::_1))),
50 _thermalAlert(bus, THERMAL_ALERT_OBJPATH)
51{
Matt Spinlere892e392020-10-14 13:21:31 -050052
Matthew Barthc95c5272020-06-15 19:51:13 -050053 json jsonObj = json::object();
54#ifdef MONITOR_USE_JSON
55 jsonObj = getJsonObj(bus);
56#endif
57 // Retrieve and set trust groups within the trust manager
Matthew Barthd06905c2020-06-12 08:13:06 -050058 setTrustMgr(getTrustGroups(jsonObj));
Matthew Barthc95c5272020-06-15 19:51:13 -050059 // Retrieve fan definitions and create fan objects to be monitored
Matthew Barthd06905c2020-06-12 08:13:06 -050060 setFans(getFanDefinitions(jsonObj));
Matt Spinlere892e392020-10-14 13:21:31 -050061 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050062 log<level::INFO>("Configuration loaded");
Matt Spinlere892e392020-10-14 13:21:31 -050063
64 // Since this doesn't run at standby yet, powerStateChanged
65 // will never be called so for now treat start up as the
66 // pgood. When this does run at standby, the 'atPgood'
67 // rules won't need to be checked here.
68 if (_powerState->isPowerOn())
69 {
70 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
71 [this](auto& rule) {
72 rule->check(PowerRuleState::atPgood, _fanHealth);
73 });
74 // Runtime rules still need to be checked since fans may already
75 // be missing that could trigger a runtime rule.
76 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
77 [this](auto& rule) {
78 rule->check(PowerRuleState::runtime, _fanHealth);
79 });
80 }
Matthew Barthd06905c2020-06-12 08:13:06 -050081}
82
83void System::sighupHandler(sdeventplus::source::Signal&,
84 const struct signalfd_siginfo*)
85{
86 try
Matthew Barthc95c5272020-06-15 19:51:13 -050087 {
Matthew Barthd06905c2020-06-12 08:13:06 -050088 json jsonObj = json::object();
89#ifdef MONITOR_USE_JSON
90 jsonObj = getJsonObj(_bus);
91#endif
92 auto trustGrps = getTrustGroups(jsonObj);
93 auto fanDefs = getFanDefinitions(jsonObj);
94 // Set configured trust groups
95 setTrustMgr(trustGrps);
96 // Clear/set configured fan definitions
97 _fans.clear();
Matt Spinlerb63aa092020-10-14 09:45:11 -050098 _fanHealth.clear();
Matthew Barthd06905c2020-06-12 08:13:06 -050099 setFans(fanDefs);
Matt Spinlere892e392020-10-14 13:21:31 -0500100 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -0500101 log<level::INFO>("Configuration reloaded successfully");
Matt Spinlere892e392020-10-14 13:21:31 -0500102
103 if (_powerState->isPowerOn())
104 {
105 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
106 [this](auto& rule) {
107 rule->check(PowerRuleState::runtime, _fanHealth);
108 });
109 }
Matthew Barthd06905c2020-06-12 08:13:06 -0500110 }
111 catch (std::runtime_error& re)
112 {
113 log<level::ERR>("Error reloading config, no config changes made",
114 entry("LOAD_ERROR=%s", re.what()));
Matthew Barthc95c5272020-06-15 19:51:13 -0500115 }
116}
117
118const std::vector<CreateGroupFunction>
119 System::getTrustGroups(const json& jsonObj)
120{
121#ifdef MONITOR_USE_JSON
122 return getTrustGrps(jsonObj);
123#else
124 return trustGroups;
125#endif
126}
127
Matthew Barthd06905c2020-06-12 08:13:06 -0500128void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
129{
130 _trust = std::make_unique<trust::Manager>(groupFuncs);
131}
132
Matthew Barthc95c5272020-06-15 19:51:13 -0500133const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
134{
135#ifdef MONITOR_USE_JSON
136 return getFanDefs(jsonObj);
137#else
138 return fanDefinitions;
139#endif
140}
141
Matthew Barthd06905c2020-06-12 08:13:06 -0500142void System::setFans(const std::vector<FanDefinition>& fanDefs)
143{
144 for (const auto& fanDef : fanDefs)
145 {
146 // Check if a condition exists on the fan
147 auto condition = std::get<conditionField>(fanDef);
148 if (condition)
149 {
150 // Condition exists, skip adding fan if it fails
151 if (!(*condition)(_bus))
152 {
153 continue;
154 }
155 }
156 _fans.emplace_back(
Matt Spinlerb0412d02020-10-12 16:53:52 -0500157 std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
Matt Spinlerb63aa092020-10-14 09:45:11 -0500158
159 updateFanHealth(*(_fans.back()));
Matthew Barthd06905c2020-06-12 08:13:06 -0500160 }
161}
162
Matt Spinlerb63aa092020-10-14 09:45:11 -0500163void System::updateFanHealth(const Fan& fan)
164{
165 std::vector<bool> sensorStatus;
166 for (const auto& sensor : fan.sensors())
167 {
168 sensorStatus.push_back(sensor->functional());
169 }
170
171 _fanHealth[fan.getName()] =
172 std::make_tuple(fan.present(), std::move(sensorStatus));
173}
174
175void System::fanStatusChange(const Fan& fan)
176{
177 updateFanHealth(fan);
Matt Spinlere892e392020-10-14 13:21:31 -0500178
179 if (_powerState->isPowerOn())
180 {
181 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
182 [this](auto& rule) {
183 rule->check(PowerRuleState::runtime, _fanHealth);
184 });
185 }
186}
187
188void System::setFaultConfig(const json& jsonObj)
189{
190#ifdef MONITOR_USE_JSON
191 std::shared_ptr<PowerInterfaceBase> powerInterface =
Matt Spinlerba3ee9a2021-01-06 14:45:50 -0600192 std::make_shared<PowerInterface>(_thermalAlert);
Matt Spinlere892e392020-10-14 13:21:31 -0500193
Matt Spinlerac1efc12020-10-27 10:20:11 -0500194 PowerOffAction::PrePowerOffFunc func =
195 std::bind(std::mem_fn(&System::logShutdownError), this);
196
197 _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500198
199 _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
Matt Spinlere892e392020-10-14 13:21:31 -0500200#endif
201}
202
203void System::powerStateChanged(bool powerStateOn)
204{
205 if (powerStateOn)
206 {
207 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
208 [this](auto& rule) {
209 rule->check(PowerRuleState::atPgood, _fanHealth);
210 });
211 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
212 [this](auto& rule) {
213 rule->check(PowerRuleState::runtime, _fanHealth);
214 });
215 }
216 else
217 {
Matt Spinlerc8d3c512021-01-06 14:22:25 -0600218 _thermalAlert.enabled(false);
219
Matt Spinlere892e392020-10-14 13:21:31 -0500220 // Cancel any in-progress power off actions
221 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
222 [this](auto& rule) { rule->cancel(); });
223 }
Matt Spinlerb63aa092020-10-14 09:45:11 -0500224}
225
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500226void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
227{
228 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
229
230 getLogger().log(
231 fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
232 sensor.name()),
233 Logger::error);
234
235 // In order to know if the event log should have a severity of error or
236 // informational, count the number of existing nonfunctional sensors and
237 // compare it to _numNonfuncSensorsBeforeError.
238 size_t nonfuncSensors = 0;
239 for (const auto& fan : _fans)
240 {
241 for (const auto& s : fan->sensors())
242 {
243 // Don't count nonfunctional sensors that still have their
244 // error timer running as nonfunctional since they haven't
245 // had event logs created for those errors yet.
246 if (!s->functional() && !s->errorTimerRunning())
247 {
248 nonfuncSensors++;
249 }
250 }
251 }
252
253 Severity severity = Severity::Error;
254 if (nonfuncSensors < _numNonfuncSensorsBeforeError)
255 {
256 severity = Severity::Informational;
257 }
258
259 auto error =
260 std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
261 fanPath, sensor.name(), severity);
262
263 auto sensorData = captureSensorData();
264 error->commit(sensorData);
265
Matt Spinlerac1efc12020-10-27 10:20:11 -0500266 // Save the error so it can be committed again on a power off.
267 _lastError = std::move(error);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500268}
269
Matt Spinler27f6b682020-10-27 08:43:37 -0500270void System::fanMissingErrorTimerExpired(const Fan& fan)
271{
272 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
273
274 getLogger().log(
275 fmt::format("Creating event log for missing fan {}", fanPath),
276 Logger::error);
277
278 auto error = std::make_unique<FanError>(
279 "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
280
281 auto sensorData = captureSensorData();
282 error->commit(sensorData);
283
Matt Spinlerac1efc12020-10-27 10:20:11 -0500284 // Save the error so it can be committed again on a power off.
285 _lastError = std::move(error);
286}
287
288void System::logShutdownError()
289{
290 if (_lastError)
291 {
292 getLogger().log("Re-committing previous fan error before power off");
293
294 // Still use the latest sensor data
295 auto sensorData = captureSensorData();
296 _lastError->commit(sensorData);
297 }
Matt Spinler27f6b682020-10-27 08:43:37 -0500298}
299
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500300json System::captureSensorData()
301{
302 json data;
303
304 for (const auto& fan : _fans)
305 {
306 for (const auto& sensor : fan->sensors())
307 {
308 json values;
309 values["present"] = fan->present();
310 values["functional"] = sensor->functional();
311 values["tach"] = sensor->getInput();
312 if (sensor->hasTarget())
313 {
314 values["target"] = sensor->getTarget();
315 }
316
317 data["sensors"][sensor->name()] = values;
318 }
319 }
320
321 return data;
322}
323
Matthew Barthc95c5272020-06-15 19:51:13 -0500324} // namespace phosphor::fan::monitor