blob: 14773234d2b24a1bb72ae175fb07f7c045ef7b5b [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
Sui Chen036f1612021-07-22 01:31:49 -07005#include <unistd.h>
6
7#include <boost/asio/deadline_timer.hpp>
8#include <sdbusplus/asio/connection.hpp>
9#include <sdbusplus/asio/object_server.hpp>
10#include <sdbusplus/asio/sd_event.hpp>
11#include <sdbusplus/bus/match.hpp>
Vijay Khemka1d0d0122020-09-29 12:17:43 -070012#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -070013#include <sdeventplus/event.hpp>
14
15#include <fstream>
16#include <iostream>
Sui Chen036f1612021-07-22 01:31:49 -070017#include <memory>
Vijay Khemka15537762020-07-22 11:44:56 -070018#include <numeric>
19#include <sstream>
20
21extern "C"
22{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080023#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070024#include <sys/sysinfo.h>
25}
Vijay Khemkae2795302020-07-15 17:28:45 -070026
Patrick Williams957e03c2021-09-02 16:38:42 -050027PHOSPHOR_LOG2_USING;
28
Vijay Khemkae2795302020-07-15 17:28:45 -070029static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070030static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070031
Sui Chen036f1612021-07-22 01:31:49 -070032// Limit sensor recreation interval to 10s
33bool needUpdate;
34static constexpr int TIMER_INTERVAL = 10;
35std::shared_ptr<boost::asio::deadline_timer> sensorRecreateTimer;
36std::shared_ptr<phosphor::health::HealthMon> healthMon;
37
Vijay Khemkae2795302020-07-15 17:28:45 -070038namespace phosphor
39{
40namespace health
41{
42
Sui Chen517524a2021-12-19 20:52:46 -080043// Example values for iface:
44// BMC_CONFIGURATION
45// BMC_INVENTORY_ITEM
Patrick Williams9ca00452022-11-26 09:41:58 -060046std::vector<std::string> findPathsWithType(sdbusplus::bus_t& bus,
Sui Chen517524a2021-12-19 20:52:46 -080047 const std::string& iface)
48{
49 PHOSPHOR_LOG2_USING;
50 std::vector<std::string> ret;
51
52 // Find all BMCs (DBus objects implementing the
53 // Inventory.Item.Bmc interface that may be created by
54 // configuring the Inventory Manager)
Patrick Williams9ca00452022-11-26 09:41:58 -060055 sdbusplus::message_t msg = bus.new_method_call(
Sui Chen517524a2021-12-19 20:52:46 -080056 "xyz.openbmc_project.ObjectMapper",
57 "/xyz/openbmc_project/object_mapper",
58 "xyz.openbmc_project.ObjectMapper", "GetSubTreePaths");
59
60 // "/": No limit for paths for all the paths that may be touched
61 // in this daemon
62
63 // 0: Limit the depth to 0 to match both objects created by
64 // EntityManager and by InventoryManager
65
66 // {iface}: The endpoint of the Association Definition must have
67 // the Inventory.Item.Bmc interface
68 msg.append("/", 0, std::vector<std::string>{iface});
69
70 try
71 {
72 bus.call(msg, 0).read(ret);
73
74 if (!ret.empty())
75 {
76 debug("{IFACE} found", "IFACE", iface);
77 }
78 else
79 {
80 debug("{IFACE} not found", "IFACE", iface);
81 }
82 }
83 catch (std::exception& e)
84 {
85 error("Exception occurred while calling {PATH}: {ERROR}", "PATH",
86 InventoryPath, "ERROR", e);
87 }
88 return ret;
89}
90
Vijay Khemka15537762020-07-22 11:44:56 -070091enum CPUStatesTime
92{
93 USER_IDX = 0,
94 NICE_IDX,
95 SYSTEM_IDX,
96 IDLE_IDX,
97 IOWAIT_IDX,
98 IRQ_IDX,
99 SOFTIRQ_IDX,
100 STEAL_IDX,
101 GUEST_USER_IDX,
102 GUEST_NICE_IDX,
103 NUM_CPU_STATES_TIME
104};
105
Sui Chen51bcfcb2021-11-01 15:28:51 -0700106enum CPUUtilizationType
107{
108 USER = 0,
109 KERNEL,
110 TOTAL
111};
112
113double readCPUUtilization(enum CPUUtilizationType type)
Vijay Khemka15537762020-07-22 11:44:56 -0700114{
Patrick Williams957e03c2021-09-02 16:38:42 -0500115 auto proc_stat = "/proc/stat";
116 std::ifstream fileStat(proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700117 if (!fileStat.is_open())
118 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500119 error("cpu file not available: {PATH}", "PATH", proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700120 return -1;
121 }
122
123 std::string firstLine, labelName;
124 std::size_t timeData[NUM_CPU_STATES_TIME];
125
126 std::getline(fileStat, firstLine);
127 std::stringstream ss(firstLine);
128 ss >> labelName;
129
130 if (DEBUG)
131 std::cout << "CPU stats first Line is " << firstLine << "\n";
132
133 if (labelName.compare("cpu"))
134 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500135 error("CPU data not available");
Vijay Khemka15537762020-07-22 11:44:56 -0700136 return -1;
137 }
138
139 int i;
140 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
141 {
142 if (!(ss >> timeData[i]))
143 break;
144 }
145
146 if (i != NUM_CPU_STATES_TIME)
147 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500148 error("CPU data not correct");
Vijay Khemka15537762020-07-22 11:44:56 -0700149 return -1;
150 }
151
Sui Chen51bcfcb2021-11-01 15:28:51 -0700152 static std::unordered_map<enum CPUUtilizationType, double> preActiveTime,
153 preIdleTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700154 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
155 activePercValue;
156
157 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
Sui Chen51bcfcb2021-11-01 15:28:51 -0700158 if (type == TOTAL)
159 {
160 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
161 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
162 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
163 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
164 }
165 else if (type == KERNEL)
166 {
167 activeTime = timeData[SYSTEM_IDX];
168 }
169 else if (type == USER)
170 {
171 activeTime = timeData[USER_IDX];
172 }
Vijay Khemka15537762020-07-22 11:44:56 -0700173
Sui Chen51bcfcb2021-11-01 15:28:51 -0700174 idleTimeDiff = idleTime - preIdleTime[type];
175 activeTimeDiff = activeTime - preActiveTime[type];
Vijay Khemka15537762020-07-22 11:44:56 -0700176
177 /* Store current idle and active time for next calculation */
Sui Chen51bcfcb2021-11-01 15:28:51 -0700178 preIdleTime[type] = idleTime;
179 preActiveTime[type] = activeTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700180
181 totalTime = idleTimeDiff + activeTimeDiff;
182
183 activePercValue = activeTimeDiff / totalTime * 100;
184
185 if (DEBUG)
186 std::cout << "CPU Utilization is " << activePercValue << "\n";
187
188 return activePercValue;
189}
190
Sui Chen51bcfcb2021-11-01 15:28:51 -0700191auto readCPUUtilizationTotal([[maybe_unused]] const std::string& path)
192{
193 return readCPUUtilization(CPUUtilizationType::TOTAL);
194}
195
196auto readCPUUtilizationKernel([[maybe_unused]] const std::string& path)
197{
198 return readCPUUtilization(CPUUtilizationType::KERNEL);
199}
200
201auto readCPUUtilizationUser([[maybe_unused]] const std::string& path)
202{
203 return readCPUUtilization(CPUUtilizationType::USER);
204}
205
Sui Chen517524a2021-12-19 20:52:46 -0800206double readMemoryUtilization([[maybe_unused]] const std::string& path)
Vijay Khemka15537762020-07-22 11:44:56 -0700207{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800208 /* Unused var: path */
209 std::ignore = path;
Potin Laib7d7bd52022-08-23 01:47:13 +0000210 std::ifstream meminfo("/proc/meminfo");
211 std::string line;
212 double memTotal = -1;
213 double memAvail = -1;
Vijay Khemka15537762020-07-22 11:44:56 -0700214
Potin Laib7d7bd52022-08-23 01:47:13 +0000215 while (std::getline(meminfo, line))
216 {
217 std::string name;
218 double value;
219 std::istringstream iss(line);
220
221 if (!(iss >> name >> value))
222 {
223 continue;
224 }
225
226 if (name.starts_with("MemTotal"))
227 {
228 memTotal = value;
229 }
230 else if (name.starts_with("MemAvailable"))
231 {
232 memAvail = value;
233 }
234 }
235
236 if (memTotal <= 0 || memAvail <= 0)
237 {
238 return std::numeric_limits<double>::quiet_NaN();
239 }
Vijay Khemka15537762020-07-22 11:44:56 -0700240
241 if (DEBUG)
242 {
Potin Laib7d7bd52022-08-23 01:47:13 +0000243 std::cout << "MemTotal: " << memTotal << " MemAvailable: " << memAvail
244 << std::endl;
Vijay Khemka15537762020-07-22 11:44:56 -0700245 }
246
Potin Laib7d7bd52022-08-23 01:47:13 +0000247 return (memTotal - memAvail) / memTotal * 100;
Vijay Khemka15537762020-07-22 11:44:56 -0700248}
249
Sui Chen517524a2021-12-19 20:52:46 -0800250double readStorageUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800251{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800252 struct statvfs buffer
253 {};
254 int ret = statvfs(path.c_str(), &buffer);
255 double total = 0;
256 double available = 0;
257 double used = 0;
258 double usedPercentage = 0;
259
260 if (ret != 0)
261 {
262 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800263 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
264 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800265 return 0;
266 }
267
268 total = buffer.f_blocks * (buffer.f_frsize / 1024);
269 available = buffer.f_bfree * (buffer.f_frsize / 1024);
270 used = total - available;
271 usedPercentage = (used / total) * 100;
272
273 if (DEBUG)
274 {
275 std::cout << "Total:" << total << "\n";
276 std::cout << "Available:" << available << "\n";
277 std::cout << "Used:" << used << "\n";
278 std::cout << "Storage utilization is:" << usedPercentage << "\n";
279 }
280
281 return usedPercentage;
282}
283
Sui Chen517524a2021-12-19 20:52:46 -0800284double readInodeUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800285{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800286 struct statvfs buffer
287 {};
288 int ret = statvfs(path.c_str(), &buffer);
289 double totalInodes = 0;
290 double availableInodes = 0;
291 double used = 0;
292 double usedPercentage = 0;
293
294 if (ret != 0)
295 {
296 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800297 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
298 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800299 return 0;
300 }
301
302 totalInodes = buffer.f_files;
303 availableInodes = buffer.f_ffree;
304 used = totalInodes - availableInodes;
305 usedPercentage = (used / totalInodes) * 100;
306
307 if (DEBUG)
308 {
309 std::cout << "Total Inodes:" << totalInodes << "\n";
310 std::cout << "Available Inodes:" << availableInodes << "\n";
311 std::cout << "Used:" << used << "\n";
312 std::cout << "Inodes utilization is:" << usedPercentage << "\n";
313 }
314
315 return usedPercentage;
316}
317
318constexpr auto storage = "Storage";
319constexpr auto inode = "Inode";
Sui Chen51bcfcb2021-11-01 15:28:51 -0700320
321/** Map of read function for each health sensors supported
322 *
323 * The following health sensors are read in the ManagerDiagnosticData
324 * Redfish resource:
325 * - CPU_Kernel populates ProcessorStatistics.KernelPercent
326 * - CPU_User populates ProcessorStatistics.UserPercent
327 */
Sui Chen517524a2021-12-19 20:52:46 -0800328const std::map<std::string, std::function<double(const std::string& path)>>
Sui Chen51bcfcb2021-11-01 15:28:51 -0700329 readSensors = {{"CPU", readCPUUtilizationTotal},
330 {"CPU_Kernel", readCPUUtilizationKernel},
331 {"CPU_User", readCPUUtilizationUser},
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800332 {"Memory", readMemoryUtilization},
333 {storage, readStorageUtilization},
334 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700335
336void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700337{
338 CriticalInterface::criticalHigh(criticalHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800339 CriticalInterface::criticalLow(std::numeric_limits<double>::quiet_NaN());
340
Vijay Khemkae2795302020-07-15 17:28:45 -0700341 WarningInterface::warningHigh(warningHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800342 WarningInterface::warningLow(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkae2795302020-07-15 17:28:45 -0700343}
344
Vijay Khemka15537762020-07-22 11:44:56 -0700345void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700346{
347 ValueIface::value(value);
348}
349
Sui Chen517524a2021-12-19 20:52:46 -0800350void HealthSensor::initHealthSensor(
351 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemka15537762020-07-22 11:44:56 -0700352{
Sui Chen517524a2021-12-19 20:52:46 -0800353 info("{SENSOR} Health Sensor initialized", "SENSOR", sensorConfig.name);
354
355 /* Look for sensor read functions and Read Sensor values */
356 auto it = readSensors.find(sensorConfig.name);
357
358 if (sensorConfig.name.rfind(storage, 0) == 0)
359 {
360 it = readSensors.find(storage);
361 }
362 else if (sensorConfig.name.rfind(inode, 0) == 0)
363 {
364 it = readSensors.find(inode);
365 }
366 else if (it == readSensors.end())
367 {
368 error("Sensor read function not available");
369 return;
370 }
371
372 double value = it->second(sensorConfig.path);
373
374 if (value < 0)
375 {
376 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
377 sensorConfig.name);
378 return;
379 }
380
Vijay Khemka08797702020-09-21 14:53:57 -0700381 /* Initialize unit value (Percent) for utilization sensor */
382 ValueIface::unit(ValueIface::Unit::Percent);
383
Konstantin Aladyshev9d29b372021-12-21 15:45:02 +0300384 ValueIface::maxValue(100);
385 ValueIface::minValue(0);
Potin Laic82e6162022-08-02 10:22:56 +0000386 ValueIface::value(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkab38fd582020-07-23 13:21:23 -0700387
Sui Chen670cc132021-04-13 09:27:22 -0700388 // Associate the sensor to chassis
Sui Chen517524a2021-12-19 20:52:46 -0800389 // This connects the DBus object to a Chassis.
390
Sui Chen670cc132021-04-13 09:27:22 -0700391 std::vector<AssociationTuple> associationTuples;
Sui Chen517524a2021-12-19 20:52:46 -0800392 for (const auto& chassisId : bmcInventoryPaths)
Sui Chen670cc132021-04-13 09:27:22 -0700393 {
Sui Chen517524a2021-12-19 20:52:46 -0800394 // This utilization sensor "is monitoring" the BMC with path chassisId.
395 // The chassisId is "monitored_by" this utilization sensor.
396 associationTuples.push_back({"monitors", "monitored_by", chassisId});
Sui Chen670cc132021-04-13 09:27:22 -0700397 }
398 AssociationDefinitionInterface::associations(associationTuples);
399
Vijay Khemkab38fd582020-07-23 13:21:23 -0700400 /* Start the timer for reading sensor data at regular interval */
401 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
402}
403
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700404void HealthSensor::checkSensorThreshold(const double value)
405{
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300406 if (std::isfinite(sensorConfig.criticalHigh) &&
407 (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700408 {
409 if (!CriticalInterface::criticalAlarmHigh())
410 {
411 CriticalInterface::criticalAlarmHigh(true);
412 if (sensorConfig.criticalLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800413 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500414 error(
415 "ASSERT: sensor {SENSOR} is above the upper threshold critical high",
416 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800417 startUnit(sensorConfig.criticalTgt);
418 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700419 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300420 return;
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700421 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300422
423 if (CriticalInterface::criticalAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700424 {
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300425 CriticalInterface::criticalAlarmHigh(false);
426 if (sensorConfig.criticalLog)
427 info(
428 "DEASSERT: sensor {SENSOR} is under the upper threshold critical high",
429 "SENSOR", sensorConfig.name);
430 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700431
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300432 if (std::isfinite(sensorConfig.warningHigh) &&
433 (value > sensorConfig.warningHigh))
434 {
435 if (!WarningInterface::warningAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700436 {
437 WarningInterface::warningAlarmHigh(true);
438 if (sensorConfig.warningLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800439 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500440 error(
441 "ASSERT: sensor {SENSOR} is above the upper threshold warning high",
442 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800443 startUnit(sensorConfig.warningTgt);
444 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700445 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300446 return;
447 }
448
449 if (WarningInterface::warningAlarmHigh())
450 {
451 WarningInterface::warningAlarmHigh(false);
452 if (sensorConfig.warningLog)
453 info(
454 "DEASSERT: sensor {SENSOR} is under the upper threshold warning high",
455 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700456 }
457}
458
Vijay Khemkab38fd582020-07-23 13:21:23 -0700459void HealthSensor::readHealthSensor()
460{
461 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800462 double value;
463
464 if (sensorConfig.name.rfind(storage, 0) == 0)
465 {
466 value = readSensors.find(storage)->second(sensorConfig.path);
467 }
468 else if (sensorConfig.name.rfind(inode, 0) == 0)
469 {
470 value = readSensors.find(inode)->second(sensorConfig.path);
471 }
472 else
473 {
474 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
475 }
476
Vijay Khemkab38fd582020-07-23 13:21:23 -0700477 if (value < 0)
478 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500479 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
480 sensorConfig.name);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700481 return;
482 }
483
484 /* Remove first item from the queue */
Potin Laic82e6162022-08-02 10:22:56 +0000485 if (valQueue.size() >= sensorConfig.windowSize)
486 {
487 valQueue.pop_front();
488 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700489 /* Add new item at the back */
490 valQueue.push_back(value);
Potin Laic82e6162022-08-02 10:22:56 +0000491 /* Wait until the queue is filled with enough reference*/
492 if (valQueue.size() < sensorConfig.windowSize)
493 {
494 return;
495 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700496
497 /* Calculate average values for the given window size */
498 double avgValue = 0;
499 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
500 avgValue = avgValue / sensorConfig.windowSize;
501
502 /* Set this new value to dbus */
503 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700504
505 /* Check the sensor threshold and log required message */
506 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700507}
508
Potin Lai156ecf32022-07-11 17:09:10 +0800509void HealthSensor::startUnit(const std::string& sysdUnit)
510{
511 if (sysdUnit.empty())
512 {
513 return;
514 }
515
Patrick Williamsbbfe7182022-07-22 19:26:56 -0500516 sdbusplus::message_t msg = bus.new_method_call(
Potin Lai156ecf32022-07-11 17:09:10 +0800517 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
518 "org.freedesktop.systemd1.Manager", "StartUnit");
519 msg.append(sysdUnit, "replace");
520 bus.call_noreply(msg);
521}
522
Sui Chen036f1612021-07-22 01:31:49 -0700523void HealthMon::recreateSensors()
524{
525 PHOSPHOR_LOG2_USING;
526 healthSensors.clear();
Sui Chen036f1612021-07-22 01:31:49 -0700527
Sui Chen517524a2021-12-19 20:52:46 -0800528 // Find BMC inventory paths and create health sensors
529 std::vector<std::string> bmcInventoryPaths =
530 findPathsWithType(bus, BMC_INVENTORY_ITEM);
531 createHealthSensors(bmcInventoryPaths);
Sui Chen036f1612021-07-22 01:31:49 -0700532}
533
Vijay Khemka15537762020-07-22 11:44:56 -0700534void printConfig(HealthConfig& cfg)
535{
536 std::cout << "Name: " << cfg.name << "\n";
537 std::cout << "Freq: " << (int)cfg.freq << "\n";
538 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
539 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
540 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
541 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
542 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
543 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
544 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800545 std::cout << "Path : " << cfg.path << "\n\n";
Vijay Khemka15537762020-07-22 11:44:56 -0700546}
547
Vijay Khemkae2795302020-07-15 17:28:45 -0700548/* Create dbus utilization sensor object for each configured sensors */
Sui Chen517524a2021-12-19 20:52:46 -0800549void HealthMon::createHealthSensors(
550 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemkae2795302020-07-15 17:28:45 -0700551{
552 for (auto& cfg : sensorConfigs)
553 {
554 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
Sui Chen517524a2021-12-19 20:52:46 -0800555 auto healthSensor = std::make_shared<HealthSensor>(
556 bus, objPath.c_str(), cfg, bmcInventoryPaths);
Vijay Khemkae2795302020-07-15 17:28:45 -0700557 healthSensors.emplace(cfg.name, healthSensor);
558
Patrick Williams957e03c2021-09-02 16:38:42 -0500559 info("{SENSOR} Health Sensor created", "SENSOR", cfg.name);
Vijay Khemkae2795302020-07-15 17:28:45 -0700560
561 /* Set configured values of crtical and warning high to dbus */
562 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
563 }
564}
565
566/** @brief Parsing Health config JSON file */
567Json HealthMon::parseConfigFile(std::string configFile)
568{
569 std::ifstream jsonFile(configFile);
570 if (!jsonFile.is_open())
571 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500572 error("config JSON file not found: {PATH}", "PATH", configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700573 }
574
575 auto data = Json::parse(jsonFile, nullptr, false);
576 if (data.is_discarded())
577 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500578 error("config readings JSON parser failure: {PATH}", "PATH",
579 configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700580 }
581
582 return data;
583}
584
Vijay Khemkae2795302020-07-15 17:28:45 -0700585void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
586{
587
588 static const Json empty{};
589
Vijay Khemka15537762020-07-22 11:44:56 -0700590 /* Default frerquency of sensor polling is 1 second */
591 cfg.freq = data.value("Frequency", 1);
592
593 /* Default window size sensor queue is 1 */
594 cfg.windowSize = data.value("Window_size", 1);
595
Vijay Khemkae2795302020-07-15 17:28:45 -0700596 auto threshold = data.value("Threshold", empty);
597 if (!threshold.empty())
598 {
599 auto criticalData = threshold.value("Critical", empty);
600 if (!criticalData.empty())
601 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700602 cfg.criticalHigh =
603 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700604 cfg.criticalLog = criticalData.value("Log", true);
605 cfg.criticalTgt = criticalData.value("Target", "");
606 }
607 auto warningData = threshold.value("Warning", empty);
608 if (!warningData.empty())
609 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700610 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
611 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700612 cfg.warningTgt = warningData.value("Target", "");
613 }
614 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800615 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700616}
617
Vijay Khemka15537762020-07-22 11:44:56 -0700618std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700619{
620
621 std::vector<HealthConfig> cfgs;
Vijay Khemkae2795302020-07-15 17:28:45 -0700622 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
623
624 // print values
625 if (DEBUG)
626 std::cout << "Config json data:\n" << data << "\n\n";
627
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800628 /* Get data items from config json data*/
Vijay Khemkae2795302020-07-15 17:28:45 -0700629 for (auto& j : data.items())
630 {
631 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800632 /* key need match default value in map readSensors or match the key
633 * start with "Storage" or "Inode" */
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800634 bool isStorageOrInode =
635 (key.rfind(storage, 0) == 0 || key.rfind(inode, 0) == 0);
636 if (readSensors.find(key) != readSensors.end() || isStorageOrInode)
Vijay Khemkae2795302020-07-15 17:28:45 -0700637 {
638 HealthConfig cfg = HealthConfig();
639 cfg.name = j.key();
640 getConfigData(j.value(), cfg);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800641 if (isStorageOrInode)
642 {
643 struct statvfs buffer
644 {};
645 int ret = statvfs(cfg.path.c_str(), &buffer);
646 if (ret != 0)
647 {
648 auto e = errno;
649 std::cerr << "Error from statvfs: " << strerror(e)
650 << ", name: " << cfg.name
651 << ", path: " << cfg.path
652 << ", please check your settings in config file."
653 << std::endl;
654 continue;
655 }
656 }
Vijay Khemkae2795302020-07-15 17:28:45 -0700657 cfgs.push_back(cfg);
658 if (DEBUG)
659 printConfig(cfg);
660 }
661 else
662 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500663 error("{SENSOR} Health Sensor not supported", "SENSOR", key);
Vijay Khemkae2795302020-07-15 17:28:45 -0700664 }
665 }
666 return cfgs;
667}
668
Sui Chen517524a2021-12-19 20:52:46 -0800669// Two caveats here.
670// 1. The BMC Inventory will only show up by the nearest ObjectMapper polling
671// interval.
672// 2. InterfacesAdded events will are not emitted like they are with E-M.
673void HealthMon::createBmcInventoryIfNotCreated()
674{
675 if (bmcInventory == nullptr)
676 {
677 info("createBmcInventory");
678 bmcInventory = std::make_shared<phosphor::health::BmcInventory>(
679 bus, "/xyz/openbmc_project/inventory/bmc");
680 }
681}
682
683bool HealthMon::bmcInventoryCreated()
684{
685 return bmcInventory != nullptr;
686}
687
Vijay Khemkae2795302020-07-15 17:28:45 -0700688} // namespace health
689} // namespace phosphor
690
Sui Chen517524a2021-12-19 20:52:46 -0800691void sensorRecreateTimerCallback(
Patrick Williams9ca00452022-11-26 09:41:58 -0600692 std::shared_ptr<boost::asio::deadline_timer> timer, sdbusplus::bus_t& bus)
Sui Chen517524a2021-12-19 20:52:46 -0800693{
694 timer->expires_from_now(boost::posix_time::seconds(TIMER_INTERVAL));
695 timer->async_wait([timer, &bus](const boost::system::error_code& ec) {
696 if (ec == boost::asio::error::operation_aborted)
697 {
698 info("sensorRecreateTimer aborted");
699 return;
700 }
701
702 // When Entity-manager is already running
703 if (!needUpdate)
704 {
705 if ((!healthMon->bmcInventoryCreated()) &&
706 (!phosphor::health::findPathsWithType(bus, BMC_CONFIGURATION)
707 .empty()))
708 {
709 healthMon->createBmcInventoryIfNotCreated();
710 needUpdate = true;
711 }
712 }
713 else
714 {
715
716 // If this daemon maintains its own DBus object, we must make sure
717 // the object is registered to ObjectMapper
718 if (phosphor::health::findPathsWithType(bus, BMC_INVENTORY_ITEM)
719 .empty())
720 {
721 info(
722 "BMC inventory item not registered to Object Mapper yet, waiting for next iteration");
723 }
724 else
725 {
726 info(
727 "BMC inventory item registered to Object Mapper, creating sensors now");
728 healthMon->recreateSensors();
729 needUpdate = false;
730 }
731 }
732 sensorRecreateTimerCallback(timer, bus);
733 });
734}
735
Vijay Khemkae2795302020-07-15 17:28:45 -0700736/**
737 * @brief Main
738 */
739int main()
740{
Sui Chen036f1612021-07-22 01:31:49 -0700741 // The io_context is needed for the timer
742 boost::asio::io_context io;
743
744 // DBus connection
745 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
746
747 conn->request_name(HEALTH_BUS_NAME);
748
Vijay Khemkae2795302020-07-15 17:28:45 -0700749 // Get a default event loop
750 auto event = sdeventplus::Event::get_default();
751
Vijay Khemkae2795302020-07-15 17:28:45 -0700752 // Create an health monitor object
Sui Chen036f1612021-07-22 01:31:49 -0700753 healthMon = std::make_shared<phosphor::health::HealthMon>(*conn);
Vijay Khemkae2795302020-07-15 17:28:45 -0700754
Yong Lif8d79732021-03-12 09:12:19 +0800755 // Add object manager through object_server
756 sdbusplus::asio::object_server objectServer(conn);
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700757
Sui Chen036f1612021-07-22 01:31:49 -0700758 sdbusplus::asio::sd_event_wrapper sdEvents(io);
759
760 sensorRecreateTimer = std::make_shared<boost::asio::deadline_timer>(io);
761
762 // If the SystemInventory does not exist: wait for the InterfaceAdded signal
Sui Chen517524a2021-12-19 20:52:46 -0800763 auto interfacesAddedSignalHandler = std::make_unique<
764 sdbusplus::bus::match_t>(
765 static_cast<sdbusplus::bus_t&>(*conn),
766 sdbusplus::bus::match::rules::interfacesAdded(),
Patrick Williams9ca00452022-11-26 09:41:58 -0600767 [conn](sdbusplus::message_t& msg) {
Sui Chen517524a2021-12-19 20:52:46 -0800768 using Association =
769 std::tuple<std::string, std::string, std::string>;
770 using InterfacesAdded = std::vector<std::pair<
771 std::string,
772 std::vector<std::pair<
773 std::string, std::variant<std::vector<Association>>>>>>;
774
775 sdbusplus::message::object_path o;
776 InterfacesAdded interfacesAdded;
777
778 try
779 {
Sui Chen036f1612021-07-22 01:31:49 -0700780 msg.read(o);
Sui Chen517524a2021-12-19 20:52:46 -0800781 msg.read(interfacesAdded);
782 }
783 catch (const std::exception& e)
784 {
785 error(
786 "Exception occurred while processing interfacesAdded: {EXCEPTION}",
787 "EXCEPTION", e.what());
788 return;
789 }
790
791 // Ignore any signal coming from health-monitor itself.
792 if (msg.get_sender() == conn->get_unique_name())
793 {
794 return;
795 }
796
797 // Check if the BMC Inventory is in the interfaces created.
798 bool hasBmcConfiguration = false;
799 for (const auto& x : interfacesAdded)
800 {
801 if (x.first == BMC_CONFIGURATION)
Sui Chen036f1612021-07-22 01:31:49 -0700802 {
Sui Chen517524a2021-12-19 20:52:46 -0800803 hasBmcConfiguration = true;
Sui Chen036f1612021-07-22 01:31:49 -0700804 }
Sui Chen517524a2021-12-19 20:52:46 -0800805 }
806
807 if (hasBmcConfiguration)
808 {
809 info(
810 "BMC configuration detected, will create a corresponding Inventory item");
811 healthMon->createBmcInventoryIfNotCreated();
812 needUpdate = true;
813 }
814 });
Sui Chen036f1612021-07-22 01:31:49 -0700815
816 // Start the timer
Sui Chen517524a2021-12-19 20:52:46 -0800817 io.post(
818 [conn]() { sensorRecreateTimerCallback(sensorRecreateTimer, *conn); });
Sui Chen036f1612021-07-22 01:31:49 -0700819 io.run();
Vijay Khemkae2795302020-07-15 17:28:45 -0700820
821 return 0;
822}