blob: 2b4e0c24fb990d03f47d431e4e45b3cc25c2673a [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
Sui Chen036f1612021-07-22 01:31:49 -07005#include <unistd.h>
6
Jian Zhang2a940012023-06-26 00:18:32 +08007#include <boost/asio/steady_timer.hpp>
Sui Chen036f1612021-07-22 01:31:49 -07008#include <sdbusplus/asio/connection.hpp>
9#include <sdbusplus/asio/object_server.hpp>
10#include <sdbusplus/asio/sd_event.hpp>
11#include <sdbusplus/bus/match.hpp>
Vijay Khemka1d0d0122020-09-29 12:17:43 -070012#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -070013#include <sdeventplus/event.hpp>
14
15#include <fstream>
16#include <iostream>
Sui Chen036f1612021-07-22 01:31:49 -070017#include <memory>
Vijay Khemka15537762020-07-22 11:44:56 -070018#include <numeric>
19#include <sstream>
20
21extern "C"
22{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080023#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070024#include <sys/sysinfo.h>
25}
Vijay Khemkae2795302020-07-15 17:28:45 -070026
Patrick Williams957e03c2021-09-02 16:38:42 -050027PHOSPHOR_LOG2_USING;
28
Vijay Khemkae2795302020-07-15 17:28:45 -070029static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070030static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070031
Sui Chen036f1612021-07-22 01:31:49 -070032// Limit sensor recreation interval to 10s
33bool needUpdate;
34static constexpr int TIMER_INTERVAL = 10;
Jian Zhang2a940012023-06-26 00:18:32 +080035std::shared_ptr<boost::asio::steady_timer> sensorRecreateTimer;
Sui Chen036f1612021-07-22 01:31:49 -070036std::shared_ptr<phosphor::health::HealthMon> healthMon;
37
Vijay Khemkae2795302020-07-15 17:28:45 -070038namespace phosphor
39{
40namespace health
41{
42
Sui Chen517524a2021-12-19 20:52:46 -080043// Example values for iface:
44// BMC_CONFIGURATION
45// BMC_INVENTORY_ITEM
Patrick Williams9ca00452022-11-26 09:41:58 -060046std::vector<std::string> findPathsWithType(sdbusplus::bus_t& bus,
Sui Chen517524a2021-12-19 20:52:46 -080047 const std::string& iface)
48{
49 PHOSPHOR_LOG2_USING;
50 std::vector<std::string> ret;
51
52 // Find all BMCs (DBus objects implementing the
53 // Inventory.Item.Bmc interface that may be created by
54 // configuring the Inventory Manager)
Patrick Williams9ca00452022-11-26 09:41:58 -060055 sdbusplus::message_t msg = bus.new_method_call(
Sui Chen517524a2021-12-19 20:52:46 -080056 "xyz.openbmc_project.ObjectMapper",
57 "/xyz/openbmc_project/object_mapper",
58 "xyz.openbmc_project.ObjectMapper", "GetSubTreePaths");
59
60 // "/": No limit for paths for all the paths that may be touched
61 // in this daemon
62
63 // 0: Limit the depth to 0 to match both objects created by
64 // EntityManager and by InventoryManager
65
66 // {iface}: The endpoint of the Association Definition must have
67 // the Inventory.Item.Bmc interface
68 msg.append("/", 0, std::vector<std::string>{iface});
69
70 try
71 {
72 bus.call(msg, 0).read(ret);
73
74 if (!ret.empty())
75 {
76 debug("{IFACE} found", "IFACE", iface);
77 }
78 else
79 {
80 debug("{IFACE} not found", "IFACE", iface);
81 }
82 }
83 catch (std::exception& e)
84 {
85 error("Exception occurred while calling {PATH}: {ERROR}", "PATH",
86 InventoryPath, "ERROR", e);
87 }
88 return ret;
89}
90
Vijay Khemka15537762020-07-22 11:44:56 -070091enum CPUStatesTime
92{
93 USER_IDX = 0,
94 NICE_IDX,
95 SYSTEM_IDX,
96 IDLE_IDX,
97 IOWAIT_IDX,
98 IRQ_IDX,
99 SOFTIRQ_IDX,
100 STEAL_IDX,
101 GUEST_USER_IDX,
102 GUEST_NICE_IDX,
103 NUM_CPU_STATES_TIME
104};
105
Sui Chenec6601d2023-01-09 14:55:54 -0800106// # cat /proc/stat|grep 'cpu '
107// cpu 5750423 14827 1572788 9259794 1317 0 28879 0 0 0
108static_assert(NUM_CPU_STATES_TIME == 10);
109
Sui Chen51bcfcb2021-11-01 15:28:51 -0700110enum CPUUtilizationType
111{
112 USER = 0,
113 KERNEL,
114 TOTAL
115};
116
117double readCPUUtilization(enum CPUUtilizationType type)
Vijay Khemka15537762020-07-22 11:44:56 -0700118{
Patrick Williams957e03c2021-09-02 16:38:42 -0500119 auto proc_stat = "/proc/stat";
120 std::ifstream fileStat(proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700121 if (!fileStat.is_open())
122 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500123 error("cpu file not available: {PATH}", "PATH", proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700124 return -1;
125 }
126
127 std::string firstLine, labelName;
128 std::size_t timeData[NUM_CPU_STATES_TIME];
129
130 std::getline(fileStat, firstLine);
131 std::stringstream ss(firstLine);
132 ss >> labelName;
133
134 if (DEBUG)
Patrick Williams4f64e522022-04-13 09:04:51 -0500135 debug("CPU stats first Line is: {LINE}", "LINE", firstLine);
Vijay Khemka15537762020-07-22 11:44:56 -0700136
137 if (labelName.compare("cpu"))
138 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500139 error("CPU data not available");
Vijay Khemka15537762020-07-22 11:44:56 -0700140 return -1;
141 }
142
143 int i;
144 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
145 {
146 if (!(ss >> timeData[i]))
147 break;
148 }
149
150 if (i != NUM_CPU_STATES_TIME)
151 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500152 error("CPU data not correct");
Vijay Khemka15537762020-07-22 11:44:56 -0700153 return -1;
154 }
155
Sui Chenec6601d2023-01-09 14:55:54 -0800156 static std::unordered_map<enum CPUUtilizationType, uint64_t> preActiveTime,
157 preTotalTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700158
Sui Chenec6601d2023-01-09 14:55:54 -0800159 // These are actually Jiffies. On the BMC, 1 jiffy usually corresponds to
160 // 0.01 second.
161 uint64_t activeTime = 0, activeTimeDiff = 0, totalTime = 0,
162 totalTimeDiff = 0;
163 double activePercValue = 0;
164
Sui Chen51bcfcb2021-11-01 15:28:51 -0700165 if (type == TOTAL)
166 {
167 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
168 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
169 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
170 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
171 }
172 else if (type == KERNEL)
173 {
174 activeTime = timeData[SYSTEM_IDX];
175 }
176 else if (type == USER)
177 {
178 activeTime = timeData[USER_IDX];
179 }
Vijay Khemka15537762020-07-22 11:44:56 -0700180
Sui Chenec6601d2023-01-09 14:55:54 -0800181 totalTime = std::accumulate(std::begin(timeData), std::end(timeData), 0);
182
Sui Chen51bcfcb2021-11-01 15:28:51 -0700183 activeTimeDiff = activeTime - preActiveTime[type];
Sui Chenec6601d2023-01-09 14:55:54 -0800184 totalTimeDiff = totalTime - preTotalTime[type];
Vijay Khemka15537762020-07-22 11:44:56 -0700185
186 /* Store current idle and active time for next calculation */
Sui Chen51bcfcb2021-11-01 15:28:51 -0700187 preActiveTime[type] = activeTime;
Sui Chenec6601d2023-01-09 14:55:54 -0800188 preTotalTime[type] = totalTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700189
Sui Chenec6601d2023-01-09 14:55:54 -0800190 activePercValue = (100.0 * activeTimeDiff) / totalTimeDiff;
Vijay Khemka15537762020-07-22 11:44:56 -0700191
192 if (DEBUG)
Patrick Williams4f64e522022-04-13 09:04:51 -0500193 debug("CPU Utilization is {VALUE}", "VALUE", activePercValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700194
195 return activePercValue;
196}
197
Sui Chen51bcfcb2021-11-01 15:28:51 -0700198auto readCPUUtilizationTotal([[maybe_unused]] const std::string& path)
199{
200 return readCPUUtilization(CPUUtilizationType::TOTAL);
201}
202
203auto readCPUUtilizationKernel([[maybe_unused]] const std::string& path)
204{
205 return readCPUUtilization(CPUUtilizationType::KERNEL);
206}
207
208auto readCPUUtilizationUser([[maybe_unused]] const std::string& path)
209{
210 return readCPUUtilization(CPUUtilizationType::USER);
211}
212
Sui Chen517524a2021-12-19 20:52:46 -0800213double readMemoryUtilization([[maybe_unused]] const std::string& path)
Vijay Khemka15537762020-07-22 11:44:56 -0700214{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800215 /* Unused var: path */
216 std::ignore = path;
Potin Laib7d7bd52022-08-23 01:47:13 +0000217 std::ifstream meminfo("/proc/meminfo");
218 std::string line;
219 double memTotal = -1;
220 double memAvail = -1;
Vijay Khemka15537762020-07-22 11:44:56 -0700221
Potin Laib7d7bd52022-08-23 01:47:13 +0000222 while (std::getline(meminfo, line))
223 {
224 std::string name;
225 double value;
226 std::istringstream iss(line);
227
228 if (!(iss >> name >> value))
229 {
230 continue;
231 }
232
233 if (name.starts_with("MemTotal"))
234 {
235 memTotal = value;
236 }
237 else if (name.starts_with("MemAvailable"))
238 {
239 memAvail = value;
240 }
241 }
242
243 if (memTotal <= 0 || memAvail <= 0)
244 {
245 return std::numeric_limits<double>::quiet_NaN();
246 }
Vijay Khemka15537762020-07-22 11:44:56 -0700247
248 if (DEBUG)
249 {
Patrick Williams4f64e522022-04-13 09:04:51 -0500250 debug("MemTotal: {VALUE}", "VALUE", memTotal);
251 debug("MemAvailable: {VALUE}", "VALUE", memAvail);
Vijay Khemka15537762020-07-22 11:44:56 -0700252 }
253
Potin Laib7d7bd52022-08-23 01:47:13 +0000254 return (memTotal - memAvail) / memTotal * 100;
Vijay Khemka15537762020-07-22 11:44:56 -0700255}
256
Sui Chen517524a2021-12-19 20:52:46 -0800257double readStorageUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800258{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800259 struct statvfs buffer
260 {};
261 int ret = statvfs(path.c_str(), &buffer);
262 double total = 0;
263 double available = 0;
264 double used = 0;
265 double usedPercentage = 0;
266
267 if (ret != 0)
268 {
269 auto e = errno;
Patrick Williams4f64e522022-04-13 09:04:51 -0500270 error("Error from statvfs: {ERROR}; {PATH}", "ERROR", strerror(e),
271 "PATH", path);
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800272 return 0;
273 }
274
275 total = buffer.f_blocks * (buffer.f_frsize / 1024);
276 available = buffer.f_bfree * (buffer.f_frsize / 1024);
277 used = total - available;
278 usedPercentage = (used / total) * 100;
279
280 if (DEBUG)
281 {
Patrick Williams4f64e522022-04-13 09:04:51 -0500282 debug("Storage Total: {VALUE}", "VALUE", total);
283 debug("Available: {VALUE}", "VALUE", available);
284 debug("Used: {VALUE}", "VALUE", used);
285 debug("Storage Utilization: {VALUE}", "VALUE", usedPercentage);
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800286 }
287
288 return usedPercentage;
289}
290
Sui Chen517524a2021-12-19 20:52:46 -0800291double readInodeUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800292{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800293 struct statvfs buffer
294 {};
295 int ret = statvfs(path.c_str(), &buffer);
296 double totalInodes = 0;
297 double availableInodes = 0;
298 double used = 0;
299 double usedPercentage = 0;
300
301 if (ret != 0)
302 {
303 auto e = errno;
Patrick Williams4f64e522022-04-13 09:04:51 -0500304 error("Error from statvfs on {PATH}: {ERROR}", "PATH", path, "ERROR",
305 strerror(e));
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800306 return 0;
307 }
308
309 totalInodes = buffer.f_files;
310 availableInodes = buffer.f_ffree;
311 used = totalInodes - availableInodes;
312 usedPercentage = (used / totalInodes) * 100;
313
314 if (DEBUG)
315 {
Patrick Williams4f64e522022-04-13 09:04:51 -0500316 debug("Total Inodes: {VALUE}", "VALUE", totalInodes);
317 debug("Available Inodes: {VALUE}", "VALUE", availableInodes);
318 debug("Used: {VALUE}", "VALUE", used);
319 debug("Inodes utilization is: {VALUE}", "VALUE", usedPercentage);
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800320 }
321
322 return usedPercentage;
323}
324
325constexpr auto storage = "Storage";
326constexpr auto inode = "Inode";
Sui Chen51bcfcb2021-11-01 15:28:51 -0700327
328/** Map of read function for each health sensors supported
329 *
330 * The following health sensors are read in the ManagerDiagnosticData
331 * Redfish resource:
332 * - CPU_Kernel populates ProcessorStatistics.KernelPercent
333 * - CPU_User populates ProcessorStatistics.UserPercent
334 */
Sui Chen517524a2021-12-19 20:52:46 -0800335const std::map<std::string, std::function<double(const std::string& path)>>
Sui Chen51bcfcb2021-11-01 15:28:51 -0700336 readSensors = {{"CPU", readCPUUtilizationTotal},
337 {"CPU_Kernel", readCPUUtilizationKernel},
338 {"CPU_User", readCPUUtilizationUser},
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800339 {"Memory", readMemoryUtilization},
340 {storage, readStorageUtilization},
341 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700342
343void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700344{
345 CriticalInterface::criticalHigh(criticalHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800346 CriticalInterface::criticalLow(std::numeric_limits<double>::quiet_NaN());
347
Vijay Khemkae2795302020-07-15 17:28:45 -0700348 WarningInterface::warningHigh(warningHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800349 WarningInterface::warningLow(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkae2795302020-07-15 17:28:45 -0700350}
351
Vijay Khemka15537762020-07-22 11:44:56 -0700352void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700353{
354 ValueIface::value(value);
355}
356
Sui Chen517524a2021-12-19 20:52:46 -0800357void HealthSensor::initHealthSensor(
358 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemka15537762020-07-22 11:44:56 -0700359{
Sui Chen517524a2021-12-19 20:52:46 -0800360 info("{SENSOR} Health Sensor initialized", "SENSOR", sensorConfig.name);
361
362 /* Look for sensor read functions and Read Sensor values */
363 auto it = readSensors.find(sensorConfig.name);
364
365 if (sensorConfig.name.rfind(storage, 0) == 0)
366 {
367 it = readSensors.find(storage);
368 }
369 else if (sensorConfig.name.rfind(inode, 0) == 0)
370 {
371 it = readSensors.find(inode);
372 }
373 else if (it == readSensors.end())
374 {
375 error("Sensor read function not available");
376 return;
377 }
378
379 double value = it->second(sensorConfig.path);
380
381 if (value < 0)
382 {
383 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
384 sensorConfig.name);
385 return;
386 }
387
Vijay Khemka08797702020-09-21 14:53:57 -0700388 /* Initialize unit value (Percent) for utilization sensor */
389 ValueIface::unit(ValueIface::Unit::Percent);
390
Konstantin Aladyshev9d29b372021-12-21 15:45:02 +0300391 ValueIface::maxValue(100);
392 ValueIface::minValue(0);
Potin Laic82e6162022-08-02 10:22:56 +0000393 ValueIface::value(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkab38fd582020-07-23 13:21:23 -0700394
Sui Chen670cc132021-04-13 09:27:22 -0700395 // Associate the sensor to chassis
Sui Chen517524a2021-12-19 20:52:46 -0800396 // This connects the DBus object to a Chassis.
397
Sui Chen670cc132021-04-13 09:27:22 -0700398 std::vector<AssociationTuple> associationTuples;
Sui Chen517524a2021-12-19 20:52:46 -0800399 for (const auto& chassisId : bmcInventoryPaths)
Sui Chen670cc132021-04-13 09:27:22 -0700400 {
Sui Chen517524a2021-12-19 20:52:46 -0800401 // This utilization sensor "is monitoring" the BMC with path chassisId.
402 // The chassisId is "monitored_by" this utilization sensor.
403 associationTuples.push_back({"monitors", "monitored_by", chassisId});
Sui Chen670cc132021-04-13 09:27:22 -0700404 }
405 AssociationDefinitionInterface::associations(associationTuples);
406
Vijay Khemkab38fd582020-07-23 13:21:23 -0700407 /* Start the timer for reading sensor data at regular interval */
408 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
409}
410
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700411void HealthSensor::checkSensorThreshold(const double value)
412{
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300413 if (std::isfinite(sensorConfig.criticalHigh) &&
414 (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700415 {
416 if (!CriticalInterface::criticalAlarmHigh())
417 {
418 CriticalInterface::criticalAlarmHigh(true);
419 if (sensorConfig.criticalLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800420 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500421 error(
422 "ASSERT: sensor {SENSOR} is above the upper threshold critical high",
423 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800424 startUnit(sensorConfig.criticalTgt);
425 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700426 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300427 return;
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700428 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300429
430 if (CriticalInterface::criticalAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700431 {
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300432 CriticalInterface::criticalAlarmHigh(false);
433 if (sensorConfig.criticalLog)
434 info(
435 "DEASSERT: sensor {SENSOR} is under the upper threshold critical high",
436 "SENSOR", sensorConfig.name);
437 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700438
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300439 if (std::isfinite(sensorConfig.warningHigh) &&
440 (value > sensorConfig.warningHigh))
441 {
442 if (!WarningInterface::warningAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700443 {
444 WarningInterface::warningAlarmHigh(true);
445 if (sensorConfig.warningLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800446 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500447 error(
448 "ASSERT: sensor {SENSOR} is above the upper threshold warning high",
449 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800450 startUnit(sensorConfig.warningTgt);
451 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700452 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300453 return;
454 }
455
456 if (WarningInterface::warningAlarmHigh())
457 {
458 WarningInterface::warningAlarmHigh(false);
459 if (sensorConfig.warningLog)
460 info(
461 "DEASSERT: sensor {SENSOR} is under the upper threshold warning high",
462 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700463 }
464}
465
Vijay Khemkab38fd582020-07-23 13:21:23 -0700466void HealthSensor::readHealthSensor()
467{
468 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800469 double value;
470
471 if (sensorConfig.name.rfind(storage, 0) == 0)
472 {
473 value = readSensors.find(storage)->second(sensorConfig.path);
474 }
475 else if (sensorConfig.name.rfind(inode, 0) == 0)
476 {
477 value = readSensors.find(inode)->second(sensorConfig.path);
478 }
479 else
480 {
481 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
482 }
483
Vijay Khemkab38fd582020-07-23 13:21:23 -0700484 if (value < 0)
485 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500486 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
487 sensorConfig.name);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700488 return;
489 }
490
491 /* Remove first item from the queue */
Potin Laic82e6162022-08-02 10:22:56 +0000492 if (valQueue.size() >= sensorConfig.windowSize)
493 {
494 valQueue.pop_front();
495 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700496 /* Add new item at the back */
497 valQueue.push_back(value);
Potin Laic82e6162022-08-02 10:22:56 +0000498 /* Wait until the queue is filled with enough reference*/
499 if (valQueue.size() < sensorConfig.windowSize)
500 {
501 return;
502 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700503
504 /* Calculate average values for the given window size */
505 double avgValue = 0;
506 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
507 avgValue = avgValue / sensorConfig.windowSize;
508
509 /* Set this new value to dbus */
510 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700511
512 /* Check the sensor threshold and log required message */
513 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700514}
515
Potin Lai156ecf32022-07-11 17:09:10 +0800516void HealthSensor::startUnit(const std::string& sysdUnit)
517{
518 if (sysdUnit.empty())
519 {
520 return;
521 }
522
Patrick Williamsbbfe7182022-07-22 19:26:56 -0500523 sdbusplus::message_t msg = bus.new_method_call(
Potin Lai156ecf32022-07-11 17:09:10 +0800524 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
525 "org.freedesktop.systemd1.Manager", "StartUnit");
526 msg.append(sysdUnit, "replace");
527 bus.call_noreply(msg);
528}
529
Sui Chen036f1612021-07-22 01:31:49 -0700530void HealthMon::recreateSensors()
531{
532 PHOSPHOR_LOG2_USING;
533 healthSensors.clear();
Sui Chen036f1612021-07-22 01:31:49 -0700534
Sui Chen517524a2021-12-19 20:52:46 -0800535 // Find BMC inventory paths and create health sensors
536 std::vector<std::string> bmcInventoryPaths =
537 findPathsWithType(bus, BMC_INVENTORY_ITEM);
538 createHealthSensors(bmcInventoryPaths);
Sui Chen036f1612021-07-22 01:31:49 -0700539}
540
Vijay Khemka15537762020-07-22 11:44:56 -0700541void printConfig(HealthConfig& cfg)
542{
Patrick Williams4f64e522022-04-13 09:04:51 -0500543 debug("Name: {VALUE}", "VALUE", cfg.name);
544 debug("Freq: {VALUE}", "VALUE", cfg.freq);
545 debug("Window Size: {VALUE}", "VALUE", cfg.windowSize);
546 debug("Critical value: {VALUE}", "VALUE", cfg.criticalHigh);
547 debug("warning value: {VALUE}", "VALUE", cfg.warningHigh);
548 debug("Critical log: {VALUE}", "VALUE", cfg.criticalLog);
549 debug("Warning log: {VALUE}", "VALUE", cfg.warningLog);
550 debug("Critical Target: {VALUE}", "VALUE", cfg.criticalTgt);
551 debug("Warning Target: {VALUE}", "VALUE", cfg.warningTgt);
552 debug("Path: {VALUE}", "VALUE", cfg.path);
Vijay Khemka15537762020-07-22 11:44:56 -0700553}
554
Vijay Khemkae2795302020-07-15 17:28:45 -0700555/* Create dbus utilization sensor object for each configured sensors */
Sui Chen517524a2021-12-19 20:52:46 -0800556void HealthMon::createHealthSensors(
557 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemkae2795302020-07-15 17:28:45 -0700558{
559 for (auto& cfg : sensorConfigs)
560 {
561 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
Sui Chen517524a2021-12-19 20:52:46 -0800562 auto healthSensor = std::make_shared<HealthSensor>(
563 bus, objPath.c_str(), cfg, bmcInventoryPaths);
Vijay Khemkae2795302020-07-15 17:28:45 -0700564 healthSensors.emplace(cfg.name, healthSensor);
565
Patrick Williams957e03c2021-09-02 16:38:42 -0500566 info("{SENSOR} Health Sensor created", "SENSOR", cfg.name);
Vijay Khemkae2795302020-07-15 17:28:45 -0700567
568 /* Set configured values of crtical and warning high to dbus */
569 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
570 }
571}
572
573/** @brief Parsing Health config JSON file */
574Json HealthMon::parseConfigFile(std::string configFile)
575{
576 std::ifstream jsonFile(configFile);
577 if (!jsonFile.is_open())
578 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500579 error("config JSON file not found: {PATH}", "PATH", configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700580 }
581
582 auto data = Json::parse(jsonFile, nullptr, false);
583 if (data.is_discarded())
584 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500585 error("config readings JSON parser failure: {PATH}", "PATH",
586 configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700587 }
588
589 return data;
590}
591
Vijay Khemkae2795302020-07-15 17:28:45 -0700592void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
593{
Vijay Khemkae2795302020-07-15 17:28:45 -0700594 static const Json empty{};
595
Vijay Khemka15537762020-07-22 11:44:56 -0700596 /* Default frerquency of sensor polling is 1 second */
597 cfg.freq = data.value("Frequency", 1);
598
599 /* Default window size sensor queue is 1 */
600 cfg.windowSize = data.value("Window_size", 1);
601
Vijay Khemkae2795302020-07-15 17:28:45 -0700602 auto threshold = data.value("Threshold", empty);
603 if (!threshold.empty())
604 {
605 auto criticalData = threshold.value("Critical", empty);
606 if (!criticalData.empty())
607 {
Patrick Williams658d70a2023-05-10 07:51:03 -0500608 cfg.criticalHigh = criticalData.value("Value",
609 defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700610 cfg.criticalLog = criticalData.value("Log", true);
611 cfg.criticalTgt = criticalData.value("Target", "");
612 }
613 auto warningData = threshold.value("Warning", empty);
614 if (!warningData.empty())
615 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700616 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
617 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700618 cfg.warningTgt = warningData.value("Target", "");
619 }
620 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800621 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700622}
623
Vijay Khemka15537762020-07-22 11:44:56 -0700624std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700625{
Vijay Khemkae2795302020-07-15 17:28:45 -0700626 std::vector<HealthConfig> cfgs;
Vijay Khemkae2795302020-07-15 17:28:45 -0700627 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
628
629 // print values
630 if (DEBUG)
Patrick Williams4f64e522022-04-13 09:04:51 -0500631 debug("Config json data: {VALUE}", "VALUE", data.dump(2));
Vijay Khemkae2795302020-07-15 17:28:45 -0700632
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800633 /* Get data items from config json data*/
Vijay Khemkae2795302020-07-15 17:28:45 -0700634 for (auto& j : data.items())
635 {
636 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800637 /* key need match default value in map readSensors or match the key
638 * start with "Storage" or "Inode" */
Patrick Williams658d70a2023-05-10 07:51:03 -0500639 bool isStorageOrInode = (key.rfind(storage, 0) == 0 ||
640 key.rfind(inode, 0) == 0);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800641 if (readSensors.find(key) != readSensors.end() || isStorageOrInode)
Vijay Khemkae2795302020-07-15 17:28:45 -0700642 {
643 HealthConfig cfg = HealthConfig();
644 cfg.name = j.key();
645 getConfigData(j.value(), cfg);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800646 if (isStorageOrInode)
647 {
648 struct statvfs buffer
649 {};
650 int ret = statvfs(cfg.path.c_str(), &buffer);
651 if (ret != 0)
652 {
653 auto e = errno;
Patrick Williams4f64e522022-04-13 09:04:51 -0500654 error("Error from statvfs: {ERROR}; ({NAME}, {PATH})",
655 "ERROR", strerror(e), "NAME", cfg.name, "PATH",
656 cfg.path);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800657 continue;
658 }
659 }
Vijay Khemkae2795302020-07-15 17:28:45 -0700660 cfgs.push_back(cfg);
661 if (DEBUG)
662 printConfig(cfg);
663 }
664 else
665 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500666 error("{SENSOR} Health Sensor not supported", "SENSOR", key);
Vijay Khemkae2795302020-07-15 17:28:45 -0700667 }
668 }
669 return cfgs;
670}
671
Sui Chen517524a2021-12-19 20:52:46 -0800672// Two caveats here.
673// 1. The BMC Inventory will only show up by the nearest ObjectMapper polling
674// interval.
675// 2. InterfacesAdded events will are not emitted like they are with E-M.
676void HealthMon::createBmcInventoryIfNotCreated()
677{
678 if (bmcInventory == nullptr)
679 {
680 info("createBmcInventory");
681 bmcInventory = std::make_shared<phosphor::health::BmcInventory>(
682 bus, "/xyz/openbmc_project/inventory/bmc");
683 }
684}
685
686bool HealthMon::bmcInventoryCreated()
687{
688 return bmcInventory != nullptr;
689}
690
Vijay Khemkae2795302020-07-15 17:28:45 -0700691} // namespace health
692} // namespace phosphor
693
Sui Chen517524a2021-12-19 20:52:46 -0800694void sensorRecreateTimerCallback(
Jian Zhang2a940012023-06-26 00:18:32 +0800695 std::shared_ptr<boost::asio::steady_timer> timer, sdbusplus::bus_t& bus)
Sui Chen517524a2021-12-19 20:52:46 -0800696{
Jian Zhang2a940012023-06-26 00:18:32 +0800697 timer->expires_after(std::chrono::seconds(TIMER_INTERVAL));
Sui Chen517524a2021-12-19 20:52:46 -0800698 timer->async_wait([timer, &bus](const boost::system::error_code& ec) {
699 if (ec == boost::asio::error::operation_aborted)
700 {
701 info("sensorRecreateTimer aborted");
702 return;
703 }
704
705 // When Entity-manager is already running
706 if (!needUpdate)
707 {
708 if ((!healthMon->bmcInventoryCreated()) &&
709 (!phosphor::health::findPathsWithType(bus, BMC_CONFIGURATION)
710 .empty()))
711 {
712 healthMon->createBmcInventoryIfNotCreated();
713 needUpdate = true;
714 }
715 }
716 else
717 {
Sui Chen517524a2021-12-19 20:52:46 -0800718 // If this daemon maintains its own DBus object, we must make sure
719 // the object is registered to ObjectMapper
720 if (phosphor::health::findPathsWithType(bus, BMC_INVENTORY_ITEM)
721 .empty())
722 {
723 info(
724 "BMC inventory item not registered to Object Mapper yet, waiting for next iteration");
725 }
726 else
727 {
728 info(
729 "BMC inventory item registered to Object Mapper, creating sensors now");
730 healthMon->recreateSensors();
731 needUpdate = false;
732 }
733 }
734 sensorRecreateTimerCallback(timer, bus);
735 });
736}
737
Vijay Khemkae2795302020-07-15 17:28:45 -0700738/**
739 * @brief Main
740 */
741int main()
742{
Sui Chen036f1612021-07-22 01:31:49 -0700743 // The io_context is needed for the timer
744 boost::asio::io_context io;
745
746 // DBus connection
747 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
748
749 conn->request_name(HEALTH_BUS_NAME);
750
Vijay Khemkae2795302020-07-15 17:28:45 -0700751 // Get a default event loop
752 auto event = sdeventplus::Event::get_default();
753
Vijay Khemkae2795302020-07-15 17:28:45 -0700754 // Create an health monitor object
Sui Chen036f1612021-07-22 01:31:49 -0700755 healthMon = std::make_shared<phosphor::health::HealthMon>(*conn);
Vijay Khemkae2795302020-07-15 17:28:45 -0700756
Yong Lif8d79732021-03-12 09:12:19 +0800757 // Add object manager through object_server
758 sdbusplus::asio::object_server objectServer(conn);
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700759
Sui Chen036f1612021-07-22 01:31:49 -0700760 sdbusplus::asio::sd_event_wrapper sdEvents(io);
761
Jian Zhang2a940012023-06-26 00:18:32 +0800762 sensorRecreateTimer = std::make_shared<boost::asio::steady_timer>(io);
Sui Chen036f1612021-07-22 01:31:49 -0700763
764 // If the SystemInventory does not exist: wait for the InterfaceAdded signal
Patrick Williams658d70a2023-05-10 07:51:03 -0500765 auto interfacesAddedSignalHandler =
766 std::make_unique<sdbusplus::bus::match_t>(
767 static_cast<sdbusplus::bus_t&>(*conn),
768 sdbusplus::bus::match::rules::interfacesAdded(),
769 [conn](sdbusplus::message_t& msg) {
770 using Association = std::tuple<std::string, std::string, std::string>;
771 using InterfacesAdded = std::vector<std::pair<
772 std::string,
773 std::vector<std::pair<std::string,
774 std::variant<std::vector<Association>>>>>>;
Sui Chen517524a2021-12-19 20:52:46 -0800775
Patrick Williams658d70a2023-05-10 07:51:03 -0500776 sdbusplus::message::object_path o;
777 InterfacesAdded interfacesAdded;
Sui Chen517524a2021-12-19 20:52:46 -0800778
Patrick Williams658d70a2023-05-10 07:51:03 -0500779 try
780 {
781 msg.read(o);
782 msg.read(interfacesAdded);
783 }
784 catch (const std::exception& e)
785 {
786 error(
Patrick Williams4f64e522022-04-13 09:04:51 -0500787 "Exception occurred while processing interfacesAdded: {ERROR}",
788 "ERROR", e);
Patrick Williams658d70a2023-05-10 07:51:03 -0500789 return;
790 }
Sui Chen517524a2021-12-19 20:52:46 -0800791
Patrick Williams658d70a2023-05-10 07:51:03 -0500792 // Ignore any signal coming from health-monitor itself.
793 if (msg.get_sender() == conn->get_unique_name())
794 {
795 return;
796 }
Sui Chen517524a2021-12-19 20:52:46 -0800797
Patrick Williams658d70a2023-05-10 07:51:03 -0500798 // Check if the BMC Inventory is in the interfaces created.
799 bool hasBmcConfiguration = false;
800 for (const auto& x : interfacesAdded)
801 {
802 if (x.first == BMC_CONFIGURATION)
Sui Chen517524a2021-12-19 20:52:46 -0800803 {
Patrick Williams658d70a2023-05-10 07:51:03 -0500804 hasBmcConfiguration = true;
Sui Chen517524a2021-12-19 20:52:46 -0800805 }
Patrick Williams658d70a2023-05-10 07:51:03 -0500806 }
Sui Chen517524a2021-12-19 20:52:46 -0800807
Patrick Williams658d70a2023-05-10 07:51:03 -0500808 if (hasBmcConfiguration)
809 {
810 info(
811 "BMC configuration detected, will create a corresponding Inventory item");
812 healthMon->createBmcInventoryIfNotCreated();
813 needUpdate = true;
814 }
Patrick Williams54cbac22023-10-20 11:19:26 -0500815 });
Sui Chen036f1612021-07-22 01:31:49 -0700816
817 // Start the timer
Ed Tanousa19c6fb2023-03-06 13:53:27 -0800818 boost::asio::post(io, [conn]() {
819 sensorRecreateTimerCallback(sensorRecreateTimer, *conn);
820 });
Sui Chen036f1612021-07-22 01:31:49 -0700821 io.run();
Vijay Khemkae2795302020-07-15 17:28:45 -0700822
823 return 0;
824}