blob: 229eca86234600eaa598ef0acd4c3a527b05d8ec [file] [log] [blame]
Matthew Barthc95c5272020-06-15 19:51:13 -05001/**
2 * Copyright © 2020 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Matthew Barthc95c5272020-06-15 19:51:13 -050016#include "system.hpp"
17
18#include "fan.hpp"
19#include "fan_defs.hpp"
20#include "tach_sensor.hpp"
21#include "trust_manager.hpp"
22#include "types.hpp"
23#ifdef MONITOR_USE_JSON
24#include "json_parser.hpp"
25#endif
26
27#include <nlohmann/json.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050028#include <phosphor-logging/log.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050029#include <sdbusplus/bus.hpp>
30#include <sdeventplus/event.hpp>
Matthew Barthd06905c2020-06-12 08:13:06 -050031#include <sdeventplus/source/signal.hpp>
Matthew Barthc95c5272020-06-15 19:51:13 -050032
33namespace phosphor::fan::monitor
34{
35
36using json = nlohmann::json;
Matt Spinlerf13b42e2020-10-26 15:29:49 -050037using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
38
Matthew Barthd06905c2020-06-12 08:13:06 -050039using namespace phosphor::logging;
Matthew Barthc95c5272020-06-15 19:51:13 -050040
41System::System(Mode mode, sdbusplus::bus::bus& bus,
42 const sdeventplus::Event& event) :
43 _mode(mode),
44 _bus(bus), _event(event)
45{
Matt Spinlere892e392020-10-14 13:21:31 -050046 _powerState = std::make_unique<PGoodState>(
47 bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
48 std::placeholders::_1));
49
Matthew Barthc95c5272020-06-15 19:51:13 -050050 json jsonObj = json::object();
51#ifdef MONITOR_USE_JSON
52 jsonObj = getJsonObj(bus);
53#endif
54 // Retrieve and set trust groups within the trust manager
Matthew Barthd06905c2020-06-12 08:13:06 -050055 setTrustMgr(getTrustGroups(jsonObj));
Matthew Barthc95c5272020-06-15 19:51:13 -050056 // Retrieve fan definitions and create fan objects to be monitored
Matthew Barthd06905c2020-06-12 08:13:06 -050057 setFans(getFanDefinitions(jsonObj));
Matt Spinlere892e392020-10-14 13:21:31 -050058 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050059 log<level::INFO>("Configuration loaded");
Matt Spinlere892e392020-10-14 13:21:31 -050060
61 // Since this doesn't run at standby yet, powerStateChanged
62 // will never be called so for now treat start up as the
63 // pgood. When this does run at standby, the 'atPgood'
64 // rules won't need to be checked here.
65 if (_powerState->isPowerOn())
66 {
67 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
68 [this](auto& rule) {
69 rule->check(PowerRuleState::atPgood, _fanHealth);
70 });
71 // Runtime rules still need to be checked since fans may already
72 // be missing that could trigger a runtime rule.
73 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
74 [this](auto& rule) {
75 rule->check(PowerRuleState::runtime, _fanHealth);
76 });
77 }
Matthew Barthd06905c2020-06-12 08:13:06 -050078}
79
80void System::sighupHandler(sdeventplus::source::Signal&,
81 const struct signalfd_siginfo*)
82{
83 try
Matthew Barthc95c5272020-06-15 19:51:13 -050084 {
Matthew Barthd06905c2020-06-12 08:13:06 -050085 json jsonObj = json::object();
86#ifdef MONITOR_USE_JSON
87 jsonObj = getJsonObj(_bus);
88#endif
89 auto trustGrps = getTrustGroups(jsonObj);
90 auto fanDefs = getFanDefinitions(jsonObj);
91 // Set configured trust groups
92 setTrustMgr(trustGrps);
93 // Clear/set configured fan definitions
94 _fans.clear();
Matt Spinlerb63aa092020-10-14 09:45:11 -050095 _fanHealth.clear();
Matthew Barthd06905c2020-06-12 08:13:06 -050096 setFans(fanDefs);
Matt Spinlere892e392020-10-14 13:21:31 -050097 setFaultConfig(jsonObj);
Matthew Barthd06905c2020-06-12 08:13:06 -050098 log<level::INFO>("Configuration reloaded successfully");
Matt Spinlere892e392020-10-14 13:21:31 -050099
100 if (_powerState->isPowerOn())
101 {
102 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
103 [this](auto& rule) {
104 rule->check(PowerRuleState::runtime, _fanHealth);
105 });
106 }
Matthew Barthd06905c2020-06-12 08:13:06 -0500107 }
108 catch (std::runtime_error& re)
109 {
110 log<level::ERR>("Error reloading config, no config changes made",
111 entry("LOAD_ERROR=%s", re.what()));
Matthew Barthc95c5272020-06-15 19:51:13 -0500112 }
113}
114
115const std::vector<CreateGroupFunction>
116 System::getTrustGroups(const json& jsonObj)
117{
118#ifdef MONITOR_USE_JSON
119 return getTrustGrps(jsonObj);
120#else
121 return trustGroups;
122#endif
123}
124
Matthew Barthd06905c2020-06-12 08:13:06 -0500125void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
126{
127 _trust = std::make_unique<trust::Manager>(groupFuncs);
128}
129
Matthew Barthc95c5272020-06-15 19:51:13 -0500130const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
131{
132#ifdef MONITOR_USE_JSON
133 return getFanDefs(jsonObj);
134#else
135 return fanDefinitions;
136#endif
137}
138
Matthew Barthd06905c2020-06-12 08:13:06 -0500139void System::setFans(const std::vector<FanDefinition>& fanDefs)
140{
141 for (const auto& fanDef : fanDefs)
142 {
143 // Check if a condition exists on the fan
144 auto condition = std::get<conditionField>(fanDef);
145 if (condition)
146 {
147 // Condition exists, skip adding fan if it fails
148 if (!(*condition)(_bus))
149 {
150 continue;
151 }
152 }
153 _fans.emplace_back(
Matt Spinlerb0412d02020-10-12 16:53:52 -0500154 std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
Matt Spinlerb63aa092020-10-14 09:45:11 -0500155
156 updateFanHealth(*(_fans.back()));
Matthew Barthd06905c2020-06-12 08:13:06 -0500157 }
158}
159
Matt Spinlerb63aa092020-10-14 09:45:11 -0500160void System::updateFanHealth(const Fan& fan)
161{
162 std::vector<bool> sensorStatus;
163 for (const auto& sensor : fan.sensors())
164 {
165 sensorStatus.push_back(sensor->functional());
166 }
167
168 _fanHealth[fan.getName()] =
169 std::make_tuple(fan.present(), std::move(sensorStatus));
170}
171
172void System::fanStatusChange(const Fan& fan)
173{
174 updateFanHealth(fan);
Matt Spinlere892e392020-10-14 13:21:31 -0500175
176 if (_powerState->isPowerOn())
177 {
178 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
179 [this](auto& rule) {
180 rule->check(PowerRuleState::runtime, _fanHealth);
181 });
182 }
183}
184
185void System::setFaultConfig(const json& jsonObj)
186{
187#ifdef MONITOR_USE_JSON
188 std::shared_ptr<PowerInterfaceBase> powerInterface =
189 std::make_shared<PowerInterface>();
190
Matt Spinlerac1efc12020-10-27 10:20:11 -0500191 PowerOffAction::PrePowerOffFunc func =
192 std::bind(std::mem_fn(&System::logShutdownError), this);
193
194 _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500195
196 _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
Matt Spinlere892e392020-10-14 13:21:31 -0500197#endif
198}
199
200void System::powerStateChanged(bool powerStateOn)
201{
202 if (powerStateOn)
203 {
204 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
205 [this](auto& rule) {
206 rule->check(PowerRuleState::atPgood, _fanHealth);
207 });
208 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
209 [this](auto& rule) {
210 rule->check(PowerRuleState::runtime, _fanHealth);
211 });
212 }
213 else
214 {
215 // Cancel any in-progress power off actions
216 std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
217 [this](auto& rule) { rule->cancel(); });
218 }
Matt Spinlerb63aa092020-10-14 09:45:11 -0500219}
220
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500221void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
222{
223 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
224
225 getLogger().log(
226 fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
227 sensor.name()),
228 Logger::error);
229
230 // In order to know if the event log should have a severity of error or
231 // informational, count the number of existing nonfunctional sensors and
232 // compare it to _numNonfuncSensorsBeforeError.
233 size_t nonfuncSensors = 0;
234 for (const auto& fan : _fans)
235 {
236 for (const auto& s : fan->sensors())
237 {
238 // Don't count nonfunctional sensors that still have their
239 // error timer running as nonfunctional since they haven't
240 // had event logs created for those errors yet.
241 if (!s->functional() && !s->errorTimerRunning())
242 {
243 nonfuncSensors++;
244 }
245 }
246 }
247
248 Severity severity = Severity::Error;
249 if (nonfuncSensors < _numNonfuncSensorsBeforeError)
250 {
251 severity = Severity::Informational;
252 }
253
254 auto error =
255 std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
256 fanPath, sensor.name(), severity);
257
258 auto sensorData = captureSensorData();
259 error->commit(sensorData);
260
Matt Spinlerac1efc12020-10-27 10:20:11 -0500261 // Save the error so it can be committed again on a power off.
262 _lastError = std::move(error);
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500263}
264
Matt Spinler27f6b682020-10-27 08:43:37 -0500265void System::fanMissingErrorTimerExpired(const Fan& fan)
266{
267 std::string fanPath{util::INVENTORY_PATH + fan.getName()};
268
269 getLogger().log(
270 fmt::format("Creating event log for missing fan {}", fanPath),
271 Logger::error);
272
273 auto error = std::make_unique<FanError>(
274 "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
275
276 auto sensorData = captureSensorData();
277 error->commit(sensorData);
278
Matt Spinlerac1efc12020-10-27 10:20:11 -0500279 // Save the error so it can be committed again on a power off.
280 _lastError = std::move(error);
281}
282
283void System::logShutdownError()
284{
285 if (_lastError)
286 {
287 getLogger().log("Re-committing previous fan error before power off");
288
289 // Still use the latest sensor data
290 auto sensorData = captureSensorData();
291 _lastError->commit(sensorData);
292 }
Matt Spinler27f6b682020-10-27 08:43:37 -0500293}
294
Matt Spinlerf13b42e2020-10-26 15:29:49 -0500295json System::captureSensorData()
296{
297 json data;
298
299 for (const auto& fan : _fans)
300 {
301 for (const auto& sensor : fan->sensors())
302 {
303 json values;
304 values["present"] = fan->present();
305 values["functional"] = sensor->functional();
306 values["tach"] = sensor->getInput();
307 if (sensor->hasTarget())
308 {
309 values["target"] = sensor->getTarget();
310 }
311
312 data["sensors"][sensor->name()] = values;
313 }
314 }
315
316 return data;
317}
318
Matthew Barthc95c5272020-06-15 19:51:13 -0500319} // namespace phosphor::fan::monitor