blob: 6892cade54fb9885270f8eb708dc99a702459582 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
Sui Chen036f1612021-07-22 01:31:49 -07005#include <unistd.h>
6
7#include <boost/asio/deadline_timer.hpp>
8#include <sdbusplus/asio/connection.hpp>
9#include <sdbusplus/asio/object_server.hpp>
10#include <sdbusplus/asio/sd_event.hpp>
11#include <sdbusplus/bus/match.hpp>
Vijay Khemka1d0d0122020-09-29 12:17:43 -070012#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -070013#include <sdeventplus/event.hpp>
14
15#include <fstream>
16#include <iostream>
Sui Chen036f1612021-07-22 01:31:49 -070017#include <memory>
Vijay Khemka15537762020-07-22 11:44:56 -070018#include <numeric>
19#include <sstream>
20
21extern "C"
22{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080023#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070024#include <sys/sysinfo.h>
25}
Vijay Khemkae2795302020-07-15 17:28:45 -070026
Patrick Williams957e03c2021-09-02 16:38:42 -050027PHOSPHOR_LOG2_USING;
28
Vijay Khemkae2795302020-07-15 17:28:45 -070029static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070030static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070031
Sui Chen036f1612021-07-22 01:31:49 -070032// Limit sensor recreation interval to 10s
33bool needUpdate;
34static constexpr int TIMER_INTERVAL = 10;
35std::shared_ptr<boost::asio::deadline_timer> sensorRecreateTimer;
36std::shared_ptr<phosphor::health::HealthMon> healthMon;
37
Vijay Khemkae2795302020-07-15 17:28:45 -070038namespace phosphor
39{
40namespace health
41{
42
Sui Chen517524a2021-12-19 20:52:46 -080043// Example values for iface:
44// BMC_CONFIGURATION
45// BMC_INVENTORY_ITEM
Patrick Williams9ca00452022-11-26 09:41:58 -060046std::vector<std::string> findPathsWithType(sdbusplus::bus_t& bus,
Sui Chen517524a2021-12-19 20:52:46 -080047 const std::string& iface)
48{
49 PHOSPHOR_LOG2_USING;
50 std::vector<std::string> ret;
51
52 // Find all BMCs (DBus objects implementing the
53 // Inventory.Item.Bmc interface that may be created by
54 // configuring the Inventory Manager)
Patrick Williams9ca00452022-11-26 09:41:58 -060055 sdbusplus::message_t msg = bus.new_method_call(
Sui Chen517524a2021-12-19 20:52:46 -080056 "xyz.openbmc_project.ObjectMapper",
57 "/xyz/openbmc_project/object_mapper",
58 "xyz.openbmc_project.ObjectMapper", "GetSubTreePaths");
59
60 // "/": No limit for paths for all the paths that may be touched
61 // in this daemon
62
63 // 0: Limit the depth to 0 to match both objects created by
64 // EntityManager and by InventoryManager
65
66 // {iface}: The endpoint of the Association Definition must have
67 // the Inventory.Item.Bmc interface
68 msg.append("/", 0, std::vector<std::string>{iface});
69
70 try
71 {
72 bus.call(msg, 0).read(ret);
73
74 if (!ret.empty())
75 {
76 debug("{IFACE} found", "IFACE", iface);
77 }
78 else
79 {
80 debug("{IFACE} not found", "IFACE", iface);
81 }
82 }
83 catch (std::exception& e)
84 {
85 error("Exception occurred while calling {PATH}: {ERROR}", "PATH",
86 InventoryPath, "ERROR", e);
87 }
88 return ret;
89}
90
Vijay Khemka15537762020-07-22 11:44:56 -070091enum CPUStatesTime
92{
93 USER_IDX = 0,
94 NICE_IDX,
95 SYSTEM_IDX,
96 IDLE_IDX,
97 IOWAIT_IDX,
98 IRQ_IDX,
99 SOFTIRQ_IDX,
100 STEAL_IDX,
101 GUEST_USER_IDX,
102 GUEST_NICE_IDX,
103 NUM_CPU_STATES_TIME
104};
105
Sui Chenec6601d2023-01-09 14:55:54 -0800106// # cat /proc/stat|grep 'cpu '
107// cpu 5750423 14827 1572788 9259794 1317 0 28879 0 0 0
108static_assert(NUM_CPU_STATES_TIME == 10);
109
Sui Chen51bcfcb2021-11-01 15:28:51 -0700110enum CPUUtilizationType
111{
112 USER = 0,
113 KERNEL,
114 TOTAL
115};
116
117double readCPUUtilization(enum CPUUtilizationType type)
Vijay Khemka15537762020-07-22 11:44:56 -0700118{
Patrick Williams957e03c2021-09-02 16:38:42 -0500119 auto proc_stat = "/proc/stat";
120 std::ifstream fileStat(proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700121 if (!fileStat.is_open())
122 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500123 error("cpu file not available: {PATH}", "PATH", proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700124 return -1;
125 }
126
127 std::string firstLine, labelName;
128 std::size_t timeData[NUM_CPU_STATES_TIME];
129
130 std::getline(fileStat, firstLine);
131 std::stringstream ss(firstLine);
132 ss >> labelName;
133
134 if (DEBUG)
135 std::cout << "CPU stats first Line is " << firstLine << "\n";
136
137 if (labelName.compare("cpu"))
138 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500139 error("CPU data not available");
Vijay Khemka15537762020-07-22 11:44:56 -0700140 return -1;
141 }
142
143 int i;
144 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
145 {
146 if (!(ss >> timeData[i]))
147 break;
148 }
149
150 if (i != NUM_CPU_STATES_TIME)
151 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500152 error("CPU data not correct");
Vijay Khemka15537762020-07-22 11:44:56 -0700153 return -1;
154 }
155
Sui Chenec6601d2023-01-09 14:55:54 -0800156 static std::unordered_map<enum CPUUtilizationType, uint64_t> preActiveTime,
157 preTotalTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700158
Sui Chenec6601d2023-01-09 14:55:54 -0800159 // These are actually Jiffies. On the BMC, 1 jiffy usually corresponds to
160 // 0.01 second.
161 uint64_t activeTime = 0, activeTimeDiff = 0, totalTime = 0,
162 totalTimeDiff = 0;
163 double activePercValue = 0;
164
Sui Chen51bcfcb2021-11-01 15:28:51 -0700165 if (type == TOTAL)
166 {
167 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
168 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
169 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
170 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
171 }
172 else if (type == KERNEL)
173 {
174 activeTime = timeData[SYSTEM_IDX];
175 }
176 else if (type == USER)
177 {
178 activeTime = timeData[USER_IDX];
179 }
Vijay Khemka15537762020-07-22 11:44:56 -0700180
Sui Chenec6601d2023-01-09 14:55:54 -0800181 totalTime = std::accumulate(std::begin(timeData), std::end(timeData), 0);
182
Sui Chen51bcfcb2021-11-01 15:28:51 -0700183 activeTimeDiff = activeTime - preActiveTime[type];
Sui Chenec6601d2023-01-09 14:55:54 -0800184 totalTimeDiff = totalTime - preTotalTime[type];
Vijay Khemka15537762020-07-22 11:44:56 -0700185
186 /* Store current idle and active time for next calculation */
Sui Chen51bcfcb2021-11-01 15:28:51 -0700187 preActiveTime[type] = activeTime;
Sui Chenec6601d2023-01-09 14:55:54 -0800188 preTotalTime[type] = totalTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700189
Sui Chenec6601d2023-01-09 14:55:54 -0800190 activePercValue = (100.0 * activeTimeDiff) / totalTimeDiff;
Vijay Khemka15537762020-07-22 11:44:56 -0700191
192 if (DEBUG)
193 std::cout << "CPU Utilization is " << activePercValue << "\n";
194
195 return activePercValue;
196}
197
Sui Chen51bcfcb2021-11-01 15:28:51 -0700198auto readCPUUtilizationTotal([[maybe_unused]] const std::string& path)
199{
200 return readCPUUtilization(CPUUtilizationType::TOTAL);
201}
202
203auto readCPUUtilizationKernel([[maybe_unused]] const std::string& path)
204{
205 return readCPUUtilization(CPUUtilizationType::KERNEL);
206}
207
208auto readCPUUtilizationUser([[maybe_unused]] const std::string& path)
209{
210 return readCPUUtilization(CPUUtilizationType::USER);
211}
212
Sui Chen517524a2021-12-19 20:52:46 -0800213double readMemoryUtilization([[maybe_unused]] const std::string& path)
Vijay Khemka15537762020-07-22 11:44:56 -0700214{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800215 /* Unused var: path */
216 std::ignore = path;
Potin Laib7d7bd52022-08-23 01:47:13 +0000217 std::ifstream meminfo("/proc/meminfo");
218 std::string line;
219 double memTotal = -1;
220 double memAvail = -1;
Vijay Khemka15537762020-07-22 11:44:56 -0700221
Potin Laib7d7bd52022-08-23 01:47:13 +0000222 while (std::getline(meminfo, line))
223 {
224 std::string name;
225 double value;
226 std::istringstream iss(line);
227
228 if (!(iss >> name >> value))
229 {
230 continue;
231 }
232
233 if (name.starts_with("MemTotal"))
234 {
235 memTotal = value;
236 }
237 else if (name.starts_with("MemAvailable"))
238 {
239 memAvail = value;
240 }
241 }
242
243 if (memTotal <= 0 || memAvail <= 0)
244 {
245 return std::numeric_limits<double>::quiet_NaN();
246 }
Vijay Khemka15537762020-07-22 11:44:56 -0700247
248 if (DEBUG)
249 {
Potin Laib7d7bd52022-08-23 01:47:13 +0000250 std::cout << "MemTotal: " << memTotal << " MemAvailable: " << memAvail
251 << std::endl;
Vijay Khemka15537762020-07-22 11:44:56 -0700252 }
253
Potin Laib7d7bd52022-08-23 01:47:13 +0000254 return (memTotal - memAvail) / memTotal * 100;
Vijay Khemka15537762020-07-22 11:44:56 -0700255}
256
Sui Chen517524a2021-12-19 20:52:46 -0800257double readStorageUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800258{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800259 struct statvfs buffer
260 {};
261 int ret = statvfs(path.c_str(), &buffer);
262 double total = 0;
263 double available = 0;
264 double used = 0;
265 double usedPercentage = 0;
266
267 if (ret != 0)
268 {
269 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800270 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
271 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800272 return 0;
273 }
274
275 total = buffer.f_blocks * (buffer.f_frsize / 1024);
276 available = buffer.f_bfree * (buffer.f_frsize / 1024);
277 used = total - available;
278 usedPercentage = (used / total) * 100;
279
280 if (DEBUG)
281 {
282 std::cout << "Total:" << total << "\n";
283 std::cout << "Available:" << available << "\n";
284 std::cout << "Used:" << used << "\n";
285 std::cout << "Storage utilization is:" << usedPercentage << "\n";
286 }
287
288 return usedPercentage;
289}
290
Sui Chen517524a2021-12-19 20:52:46 -0800291double readInodeUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800292{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800293 struct statvfs buffer
294 {};
295 int ret = statvfs(path.c_str(), &buffer);
296 double totalInodes = 0;
297 double availableInodes = 0;
298 double used = 0;
299 double usedPercentage = 0;
300
301 if (ret != 0)
302 {
303 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800304 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
305 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800306 return 0;
307 }
308
309 totalInodes = buffer.f_files;
310 availableInodes = buffer.f_ffree;
311 used = totalInodes - availableInodes;
312 usedPercentage = (used / totalInodes) * 100;
313
314 if (DEBUG)
315 {
316 std::cout << "Total Inodes:" << totalInodes << "\n";
317 std::cout << "Available Inodes:" << availableInodes << "\n";
318 std::cout << "Used:" << used << "\n";
319 std::cout << "Inodes utilization is:" << usedPercentage << "\n";
320 }
321
322 return usedPercentage;
323}
324
325constexpr auto storage = "Storage";
326constexpr auto inode = "Inode";
Sui Chen51bcfcb2021-11-01 15:28:51 -0700327
328/** Map of read function for each health sensors supported
329 *
330 * The following health sensors are read in the ManagerDiagnosticData
331 * Redfish resource:
332 * - CPU_Kernel populates ProcessorStatistics.KernelPercent
333 * - CPU_User populates ProcessorStatistics.UserPercent
334 */
Sui Chen517524a2021-12-19 20:52:46 -0800335const std::map<std::string, std::function<double(const std::string& path)>>
Sui Chen51bcfcb2021-11-01 15:28:51 -0700336 readSensors = {{"CPU", readCPUUtilizationTotal},
337 {"CPU_Kernel", readCPUUtilizationKernel},
338 {"CPU_User", readCPUUtilizationUser},
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800339 {"Memory", readMemoryUtilization},
340 {storage, readStorageUtilization},
341 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700342
343void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700344{
345 CriticalInterface::criticalHigh(criticalHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800346 CriticalInterface::criticalLow(std::numeric_limits<double>::quiet_NaN());
347
Vijay Khemkae2795302020-07-15 17:28:45 -0700348 WarningInterface::warningHigh(warningHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800349 WarningInterface::warningLow(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkae2795302020-07-15 17:28:45 -0700350}
351
Vijay Khemka15537762020-07-22 11:44:56 -0700352void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700353{
354 ValueIface::value(value);
355}
356
Sui Chen517524a2021-12-19 20:52:46 -0800357void HealthSensor::initHealthSensor(
358 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemka15537762020-07-22 11:44:56 -0700359{
Sui Chen517524a2021-12-19 20:52:46 -0800360 info("{SENSOR} Health Sensor initialized", "SENSOR", sensorConfig.name);
361
362 /* Look for sensor read functions and Read Sensor values */
363 auto it = readSensors.find(sensorConfig.name);
364
365 if (sensorConfig.name.rfind(storage, 0) == 0)
366 {
367 it = readSensors.find(storage);
368 }
369 else if (sensorConfig.name.rfind(inode, 0) == 0)
370 {
371 it = readSensors.find(inode);
372 }
373 else if (it == readSensors.end())
374 {
375 error("Sensor read function not available");
376 return;
377 }
378
379 double value = it->second(sensorConfig.path);
380
381 if (value < 0)
382 {
383 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
384 sensorConfig.name);
385 return;
386 }
387
Vijay Khemka08797702020-09-21 14:53:57 -0700388 /* Initialize unit value (Percent) for utilization sensor */
389 ValueIface::unit(ValueIface::Unit::Percent);
390
Konstantin Aladyshev9d29b372021-12-21 15:45:02 +0300391 ValueIface::maxValue(100);
392 ValueIface::minValue(0);
Potin Laic82e6162022-08-02 10:22:56 +0000393 ValueIface::value(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkab38fd582020-07-23 13:21:23 -0700394
Sui Chen670cc132021-04-13 09:27:22 -0700395 // Associate the sensor to chassis
Sui Chen517524a2021-12-19 20:52:46 -0800396 // This connects the DBus object to a Chassis.
397
Sui Chen670cc132021-04-13 09:27:22 -0700398 std::vector<AssociationTuple> associationTuples;
Sui Chen517524a2021-12-19 20:52:46 -0800399 for (const auto& chassisId : bmcInventoryPaths)
Sui Chen670cc132021-04-13 09:27:22 -0700400 {
Sui Chen517524a2021-12-19 20:52:46 -0800401 // This utilization sensor "is monitoring" the BMC with path chassisId.
402 // The chassisId is "monitored_by" this utilization sensor.
403 associationTuples.push_back({"monitors", "monitored_by", chassisId});
Sui Chen670cc132021-04-13 09:27:22 -0700404 }
405 AssociationDefinitionInterface::associations(associationTuples);
406
Vijay Khemkab38fd582020-07-23 13:21:23 -0700407 /* Start the timer for reading sensor data at regular interval */
408 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
409}
410
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700411void HealthSensor::checkSensorThreshold(const double value)
412{
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300413 if (std::isfinite(sensorConfig.criticalHigh) &&
414 (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700415 {
416 if (!CriticalInterface::criticalAlarmHigh())
417 {
418 CriticalInterface::criticalAlarmHigh(true);
419 if (sensorConfig.criticalLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800420 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500421 error(
422 "ASSERT: sensor {SENSOR} is above the upper threshold critical high",
423 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800424 startUnit(sensorConfig.criticalTgt);
425 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700426 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300427 return;
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700428 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300429
430 if (CriticalInterface::criticalAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700431 {
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300432 CriticalInterface::criticalAlarmHigh(false);
433 if (sensorConfig.criticalLog)
434 info(
435 "DEASSERT: sensor {SENSOR} is under the upper threshold critical high",
436 "SENSOR", sensorConfig.name);
437 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700438
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300439 if (std::isfinite(sensorConfig.warningHigh) &&
440 (value > sensorConfig.warningHigh))
441 {
442 if (!WarningInterface::warningAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700443 {
444 WarningInterface::warningAlarmHigh(true);
445 if (sensorConfig.warningLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800446 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500447 error(
448 "ASSERT: sensor {SENSOR} is above the upper threshold warning high",
449 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800450 startUnit(sensorConfig.warningTgt);
451 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700452 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300453 return;
454 }
455
456 if (WarningInterface::warningAlarmHigh())
457 {
458 WarningInterface::warningAlarmHigh(false);
459 if (sensorConfig.warningLog)
460 info(
461 "DEASSERT: sensor {SENSOR} is under the upper threshold warning high",
462 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700463 }
464}
465
Vijay Khemkab38fd582020-07-23 13:21:23 -0700466void HealthSensor::readHealthSensor()
467{
468 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800469 double value;
470
471 if (sensorConfig.name.rfind(storage, 0) == 0)
472 {
473 value = readSensors.find(storage)->second(sensorConfig.path);
474 }
475 else if (sensorConfig.name.rfind(inode, 0) == 0)
476 {
477 value = readSensors.find(inode)->second(sensorConfig.path);
478 }
479 else
480 {
481 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
482 }
483
Vijay Khemkab38fd582020-07-23 13:21:23 -0700484 if (value < 0)
485 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500486 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
487 sensorConfig.name);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700488 return;
489 }
490
491 /* Remove first item from the queue */
Potin Laic82e6162022-08-02 10:22:56 +0000492 if (valQueue.size() >= sensorConfig.windowSize)
493 {
494 valQueue.pop_front();
495 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700496 /* Add new item at the back */
497 valQueue.push_back(value);
Potin Laic82e6162022-08-02 10:22:56 +0000498 /* Wait until the queue is filled with enough reference*/
499 if (valQueue.size() < sensorConfig.windowSize)
500 {
501 return;
502 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700503
504 /* Calculate average values for the given window size */
505 double avgValue = 0;
506 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
507 avgValue = avgValue / sensorConfig.windowSize;
508
509 /* Set this new value to dbus */
510 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700511
512 /* Check the sensor threshold and log required message */
513 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700514}
515
Potin Lai156ecf32022-07-11 17:09:10 +0800516void HealthSensor::startUnit(const std::string& sysdUnit)
517{
518 if (sysdUnit.empty())
519 {
520 return;
521 }
522
Patrick Williamsbbfe7182022-07-22 19:26:56 -0500523 sdbusplus::message_t msg = bus.new_method_call(
Potin Lai156ecf32022-07-11 17:09:10 +0800524 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
525 "org.freedesktop.systemd1.Manager", "StartUnit");
526 msg.append(sysdUnit, "replace");
527 bus.call_noreply(msg);
528}
529
Sui Chen036f1612021-07-22 01:31:49 -0700530void HealthMon::recreateSensors()
531{
532 PHOSPHOR_LOG2_USING;
533 healthSensors.clear();
Sui Chen036f1612021-07-22 01:31:49 -0700534
Sui Chen517524a2021-12-19 20:52:46 -0800535 // Find BMC inventory paths and create health sensors
536 std::vector<std::string> bmcInventoryPaths =
537 findPathsWithType(bus, BMC_INVENTORY_ITEM);
538 createHealthSensors(bmcInventoryPaths);
Sui Chen036f1612021-07-22 01:31:49 -0700539}
540
Vijay Khemka15537762020-07-22 11:44:56 -0700541void printConfig(HealthConfig& cfg)
542{
543 std::cout << "Name: " << cfg.name << "\n";
544 std::cout << "Freq: " << (int)cfg.freq << "\n";
545 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
546 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
547 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
548 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
549 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
550 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
551 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800552 std::cout << "Path : " << cfg.path << "\n\n";
Vijay Khemka15537762020-07-22 11:44:56 -0700553}
554
Vijay Khemkae2795302020-07-15 17:28:45 -0700555/* Create dbus utilization sensor object for each configured sensors */
Sui Chen517524a2021-12-19 20:52:46 -0800556void HealthMon::createHealthSensors(
557 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemkae2795302020-07-15 17:28:45 -0700558{
559 for (auto& cfg : sensorConfigs)
560 {
561 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
Sui Chen517524a2021-12-19 20:52:46 -0800562 auto healthSensor = std::make_shared<HealthSensor>(
563 bus, objPath.c_str(), cfg, bmcInventoryPaths);
Vijay Khemkae2795302020-07-15 17:28:45 -0700564 healthSensors.emplace(cfg.name, healthSensor);
565
Patrick Williams957e03c2021-09-02 16:38:42 -0500566 info("{SENSOR} Health Sensor created", "SENSOR", cfg.name);
Vijay Khemkae2795302020-07-15 17:28:45 -0700567
568 /* Set configured values of crtical and warning high to dbus */
569 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
570 }
571}
572
573/** @brief Parsing Health config JSON file */
574Json HealthMon::parseConfigFile(std::string configFile)
575{
576 std::ifstream jsonFile(configFile);
577 if (!jsonFile.is_open())
578 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500579 error("config JSON file not found: {PATH}", "PATH", configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700580 }
581
582 auto data = Json::parse(jsonFile, nullptr, false);
583 if (data.is_discarded())
584 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500585 error("config readings JSON parser failure: {PATH}", "PATH",
586 configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700587 }
588
589 return data;
590}
591
Vijay Khemkae2795302020-07-15 17:28:45 -0700592void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
593{
Vijay Khemkae2795302020-07-15 17:28:45 -0700594 static const Json empty{};
595
Vijay Khemka15537762020-07-22 11:44:56 -0700596 /* Default frerquency of sensor polling is 1 second */
597 cfg.freq = data.value("Frequency", 1);
598
599 /* Default window size sensor queue is 1 */
600 cfg.windowSize = data.value("Window_size", 1);
601
Vijay Khemkae2795302020-07-15 17:28:45 -0700602 auto threshold = data.value("Threshold", empty);
603 if (!threshold.empty())
604 {
605 auto criticalData = threshold.value("Critical", empty);
606 if (!criticalData.empty())
607 {
Patrick Williams658d70a2023-05-10 07:51:03 -0500608 cfg.criticalHigh = criticalData.value("Value",
609 defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700610 cfg.criticalLog = criticalData.value("Log", true);
611 cfg.criticalTgt = criticalData.value("Target", "");
612 }
613 auto warningData = threshold.value("Warning", empty);
614 if (!warningData.empty())
615 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700616 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
617 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700618 cfg.warningTgt = warningData.value("Target", "");
619 }
620 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800621 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700622}
623
Vijay Khemka15537762020-07-22 11:44:56 -0700624std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700625{
Vijay Khemkae2795302020-07-15 17:28:45 -0700626 std::vector<HealthConfig> cfgs;
Vijay Khemkae2795302020-07-15 17:28:45 -0700627 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
628
629 // print values
630 if (DEBUG)
631 std::cout << "Config json data:\n" << data << "\n\n";
632
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800633 /* Get data items from config json data*/
Vijay Khemkae2795302020-07-15 17:28:45 -0700634 for (auto& j : data.items())
635 {
636 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800637 /* key need match default value in map readSensors or match the key
638 * start with "Storage" or "Inode" */
Patrick Williams658d70a2023-05-10 07:51:03 -0500639 bool isStorageOrInode = (key.rfind(storage, 0) == 0 ||
640 key.rfind(inode, 0) == 0);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800641 if (readSensors.find(key) != readSensors.end() || isStorageOrInode)
Vijay Khemkae2795302020-07-15 17:28:45 -0700642 {
643 HealthConfig cfg = HealthConfig();
644 cfg.name = j.key();
645 getConfigData(j.value(), cfg);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800646 if (isStorageOrInode)
647 {
648 struct statvfs buffer
649 {};
650 int ret = statvfs(cfg.path.c_str(), &buffer);
651 if (ret != 0)
652 {
653 auto e = errno;
654 std::cerr << "Error from statvfs: " << strerror(e)
655 << ", name: " << cfg.name
656 << ", path: " << cfg.path
657 << ", please check your settings in config file."
658 << std::endl;
659 continue;
660 }
661 }
Vijay Khemkae2795302020-07-15 17:28:45 -0700662 cfgs.push_back(cfg);
663 if (DEBUG)
664 printConfig(cfg);
665 }
666 else
667 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500668 error("{SENSOR} Health Sensor not supported", "SENSOR", key);
Vijay Khemkae2795302020-07-15 17:28:45 -0700669 }
670 }
671 return cfgs;
672}
673
Sui Chen517524a2021-12-19 20:52:46 -0800674// Two caveats here.
675// 1. The BMC Inventory will only show up by the nearest ObjectMapper polling
676// interval.
677// 2. InterfacesAdded events will are not emitted like they are with E-M.
678void HealthMon::createBmcInventoryIfNotCreated()
679{
680 if (bmcInventory == nullptr)
681 {
682 info("createBmcInventory");
683 bmcInventory = std::make_shared<phosphor::health::BmcInventory>(
684 bus, "/xyz/openbmc_project/inventory/bmc");
685 }
686}
687
688bool HealthMon::bmcInventoryCreated()
689{
690 return bmcInventory != nullptr;
691}
692
Vijay Khemkae2795302020-07-15 17:28:45 -0700693} // namespace health
694} // namespace phosphor
695
Sui Chen517524a2021-12-19 20:52:46 -0800696void sensorRecreateTimerCallback(
Patrick Williams9ca00452022-11-26 09:41:58 -0600697 std::shared_ptr<boost::asio::deadline_timer> timer, sdbusplus::bus_t& bus)
Sui Chen517524a2021-12-19 20:52:46 -0800698{
699 timer->expires_from_now(boost::posix_time::seconds(TIMER_INTERVAL));
700 timer->async_wait([timer, &bus](const boost::system::error_code& ec) {
701 if (ec == boost::asio::error::operation_aborted)
702 {
703 info("sensorRecreateTimer aborted");
704 return;
705 }
706
707 // When Entity-manager is already running
708 if (!needUpdate)
709 {
710 if ((!healthMon->bmcInventoryCreated()) &&
711 (!phosphor::health::findPathsWithType(bus, BMC_CONFIGURATION)
712 .empty()))
713 {
714 healthMon->createBmcInventoryIfNotCreated();
715 needUpdate = true;
716 }
717 }
718 else
719 {
Sui Chen517524a2021-12-19 20:52:46 -0800720 // If this daemon maintains its own DBus object, we must make sure
721 // the object is registered to ObjectMapper
722 if (phosphor::health::findPathsWithType(bus, BMC_INVENTORY_ITEM)
723 .empty())
724 {
725 info(
726 "BMC inventory item not registered to Object Mapper yet, waiting for next iteration");
727 }
728 else
729 {
730 info(
731 "BMC inventory item registered to Object Mapper, creating sensors now");
732 healthMon->recreateSensors();
733 needUpdate = false;
734 }
735 }
736 sensorRecreateTimerCallback(timer, bus);
737 });
738}
739
Vijay Khemkae2795302020-07-15 17:28:45 -0700740/**
741 * @brief Main
742 */
743int main()
744{
Sui Chen036f1612021-07-22 01:31:49 -0700745 // The io_context is needed for the timer
746 boost::asio::io_context io;
747
748 // DBus connection
749 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
750
751 conn->request_name(HEALTH_BUS_NAME);
752
Vijay Khemkae2795302020-07-15 17:28:45 -0700753 // Get a default event loop
754 auto event = sdeventplus::Event::get_default();
755
Vijay Khemkae2795302020-07-15 17:28:45 -0700756 // Create an health monitor object
Sui Chen036f1612021-07-22 01:31:49 -0700757 healthMon = std::make_shared<phosphor::health::HealthMon>(*conn);
Vijay Khemkae2795302020-07-15 17:28:45 -0700758
Yong Lif8d79732021-03-12 09:12:19 +0800759 // Add object manager through object_server
760 sdbusplus::asio::object_server objectServer(conn);
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700761
Sui Chen036f1612021-07-22 01:31:49 -0700762 sdbusplus::asio::sd_event_wrapper sdEvents(io);
763
764 sensorRecreateTimer = std::make_shared<boost::asio::deadline_timer>(io);
765
766 // If the SystemInventory does not exist: wait for the InterfaceAdded signal
Patrick Williams658d70a2023-05-10 07:51:03 -0500767 auto interfacesAddedSignalHandler =
768 std::make_unique<sdbusplus::bus::match_t>(
769 static_cast<sdbusplus::bus_t&>(*conn),
770 sdbusplus::bus::match::rules::interfacesAdded(),
771 [conn](sdbusplus::message_t& msg) {
772 using Association = std::tuple<std::string, std::string, std::string>;
773 using InterfacesAdded = std::vector<std::pair<
774 std::string,
775 std::vector<std::pair<std::string,
776 std::variant<std::vector<Association>>>>>>;
Sui Chen517524a2021-12-19 20:52:46 -0800777
Patrick Williams658d70a2023-05-10 07:51:03 -0500778 sdbusplus::message::object_path o;
779 InterfacesAdded interfacesAdded;
Sui Chen517524a2021-12-19 20:52:46 -0800780
Patrick Williams658d70a2023-05-10 07:51:03 -0500781 try
782 {
783 msg.read(o);
784 msg.read(interfacesAdded);
785 }
786 catch (const std::exception& e)
787 {
788 error(
789 "Exception occurred while processing interfacesAdded: {EXCEPTION}",
790 "EXCEPTION", e.what());
791 return;
792 }
Sui Chen517524a2021-12-19 20:52:46 -0800793
Patrick Williams658d70a2023-05-10 07:51:03 -0500794 // Ignore any signal coming from health-monitor itself.
795 if (msg.get_sender() == conn->get_unique_name())
796 {
797 return;
798 }
Sui Chen517524a2021-12-19 20:52:46 -0800799
Patrick Williams658d70a2023-05-10 07:51:03 -0500800 // Check if the BMC Inventory is in the interfaces created.
801 bool hasBmcConfiguration = false;
802 for (const auto& x : interfacesAdded)
803 {
804 if (x.first == BMC_CONFIGURATION)
Sui Chen517524a2021-12-19 20:52:46 -0800805 {
Patrick Williams658d70a2023-05-10 07:51:03 -0500806 hasBmcConfiguration = true;
Sui Chen517524a2021-12-19 20:52:46 -0800807 }
Patrick Williams658d70a2023-05-10 07:51:03 -0500808 }
Sui Chen517524a2021-12-19 20:52:46 -0800809
Patrick Williams658d70a2023-05-10 07:51:03 -0500810 if (hasBmcConfiguration)
811 {
812 info(
813 "BMC configuration detected, will create a corresponding Inventory item");
814 healthMon->createBmcInventoryIfNotCreated();
815 needUpdate = true;
816 }
817 });
Sui Chen036f1612021-07-22 01:31:49 -0700818
819 // Start the timer
Ed Tanousa19c6fb2023-03-06 13:53:27 -0800820 boost::asio::post(io, [conn]() {
821 sensorRecreateTimerCallback(sensorRecreateTimer, *conn);
822 });
Sui Chen036f1612021-07-22 01:31:49 -0700823 io.run();
Vijay Khemkae2795302020-07-15 17:28:45 -0700824
825 return 0;
826}