blob: f8b2c5436ad33f1f0ffbe1cb154b2cb09724f970 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
Sui Chen036f1612021-07-22 01:31:49 -07005#include <unistd.h>
6
7#include <boost/asio/deadline_timer.hpp>
8#include <sdbusplus/asio/connection.hpp>
9#include <sdbusplus/asio/object_server.hpp>
10#include <sdbusplus/asio/sd_event.hpp>
11#include <sdbusplus/bus/match.hpp>
Vijay Khemka1d0d0122020-09-29 12:17:43 -070012#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -070013#include <sdeventplus/event.hpp>
14
15#include <fstream>
16#include <iostream>
Sui Chen036f1612021-07-22 01:31:49 -070017#include <memory>
Vijay Khemka15537762020-07-22 11:44:56 -070018#include <numeric>
19#include <sstream>
20
21extern "C"
22{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080023#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070024#include <sys/sysinfo.h>
25}
Vijay Khemkae2795302020-07-15 17:28:45 -070026
Patrick Williams957e03c2021-09-02 16:38:42 -050027PHOSPHOR_LOG2_USING;
28
Vijay Khemkae2795302020-07-15 17:28:45 -070029static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070030static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070031
Sui Chen036f1612021-07-22 01:31:49 -070032// Limit sensor recreation interval to 10s
33bool needUpdate;
34static constexpr int TIMER_INTERVAL = 10;
35std::shared_ptr<boost::asio::deadline_timer> sensorRecreateTimer;
36std::shared_ptr<phosphor::health::HealthMon> healthMon;
37
Vijay Khemkae2795302020-07-15 17:28:45 -070038namespace phosphor
39{
40namespace health
41{
42
Sui Chen517524a2021-12-19 20:52:46 -080043// Example values for iface:
44// BMC_CONFIGURATION
45// BMC_INVENTORY_ITEM
Patrick Williams9ca00452022-11-26 09:41:58 -060046std::vector<std::string> findPathsWithType(sdbusplus::bus_t& bus,
Sui Chen517524a2021-12-19 20:52:46 -080047 const std::string& iface)
48{
49 PHOSPHOR_LOG2_USING;
50 std::vector<std::string> ret;
51
52 // Find all BMCs (DBus objects implementing the
53 // Inventory.Item.Bmc interface that may be created by
54 // configuring the Inventory Manager)
Patrick Williams9ca00452022-11-26 09:41:58 -060055 sdbusplus::message_t msg = bus.new_method_call(
Sui Chen517524a2021-12-19 20:52:46 -080056 "xyz.openbmc_project.ObjectMapper",
57 "/xyz/openbmc_project/object_mapper",
58 "xyz.openbmc_project.ObjectMapper", "GetSubTreePaths");
59
60 // "/": No limit for paths for all the paths that may be touched
61 // in this daemon
62
63 // 0: Limit the depth to 0 to match both objects created by
64 // EntityManager and by InventoryManager
65
66 // {iface}: The endpoint of the Association Definition must have
67 // the Inventory.Item.Bmc interface
68 msg.append("/", 0, std::vector<std::string>{iface});
69
70 try
71 {
72 bus.call(msg, 0).read(ret);
73
74 if (!ret.empty())
75 {
76 debug("{IFACE} found", "IFACE", iface);
77 }
78 else
79 {
80 debug("{IFACE} not found", "IFACE", iface);
81 }
82 }
83 catch (std::exception& e)
84 {
85 error("Exception occurred while calling {PATH}: {ERROR}", "PATH",
86 InventoryPath, "ERROR", e);
87 }
88 return ret;
89}
90
Vijay Khemka15537762020-07-22 11:44:56 -070091enum CPUStatesTime
92{
93 USER_IDX = 0,
94 NICE_IDX,
95 SYSTEM_IDX,
96 IDLE_IDX,
97 IOWAIT_IDX,
98 IRQ_IDX,
99 SOFTIRQ_IDX,
100 STEAL_IDX,
101 GUEST_USER_IDX,
102 GUEST_NICE_IDX,
103 NUM_CPU_STATES_TIME
104};
105
Sui Chenec6601d2023-01-09 14:55:54 -0800106// # cat /proc/stat|grep 'cpu '
107// cpu 5750423 14827 1572788 9259794 1317 0 28879 0 0 0
108static_assert(NUM_CPU_STATES_TIME == 10);
109
Sui Chen51bcfcb2021-11-01 15:28:51 -0700110enum CPUUtilizationType
111{
112 USER = 0,
113 KERNEL,
114 TOTAL
115};
116
117double readCPUUtilization(enum CPUUtilizationType type)
Vijay Khemka15537762020-07-22 11:44:56 -0700118{
Patrick Williams957e03c2021-09-02 16:38:42 -0500119 auto proc_stat = "/proc/stat";
120 std::ifstream fileStat(proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700121 if (!fileStat.is_open())
122 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500123 error("cpu file not available: {PATH}", "PATH", proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -0700124 return -1;
125 }
126
127 std::string firstLine, labelName;
128 std::size_t timeData[NUM_CPU_STATES_TIME];
129
130 std::getline(fileStat, firstLine);
131 std::stringstream ss(firstLine);
132 ss >> labelName;
133
134 if (DEBUG)
135 std::cout << "CPU stats first Line is " << firstLine << "\n";
136
137 if (labelName.compare("cpu"))
138 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500139 error("CPU data not available");
Vijay Khemka15537762020-07-22 11:44:56 -0700140 return -1;
141 }
142
143 int i;
144 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
145 {
146 if (!(ss >> timeData[i]))
147 break;
148 }
149
150 if (i != NUM_CPU_STATES_TIME)
151 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500152 error("CPU data not correct");
Vijay Khemka15537762020-07-22 11:44:56 -0700153 return -1;
154 }
155
Sui Chenec6601d2023-01-09 14:55:54 -0800156 static std::unordered_map<enum CPUUtilizationType, uint64_t> preActiveTime,
157 preTotalTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700158
Sui Chenec6601d2023-01-09 14:55:54 -0800159 // These are actually Jiffies. On the BMC, 1 jiffy usually corresponds to
160 // 0.01 second.
161 uint64_t activeTime = 0, activeTimeDiff = 0, totalTime = 0,
162 totalTimeDiff = 0;
163 double activePercValue = 0;
164
Sui Chen51bcfcb2021-11-01 15:28:51 -0700165 if (type == TOTAL)
166 {
167 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
168 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
169 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
170 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
171 }
172 else if (type == KERNEL)
173 {
174 activeTime = timeData[SYSTEM_IDX];
175 }
176 else if (type == USER)
177 {
178 activeTime = timeData[USER_IDX];
179 }
Vijay Khemka15537762020-07-22 11:44:56 -0700180
Sui Chenec6601d2023-01-09 14:55:54 -0800181 totalTime = std::accumulate(std::begin(timeData), std::end(timeData), 0);
182
Sui Chen51bcfcb2021-11-01 15:28:51 -0700183 activeTimeDiff = activeTime - preActiveTime[type];
Sui Chenec6601d2023-01-09 14:55:54 -0800184 totalTimeDiff = totalTime - preTotalTime[type];
Vijay Khemka15537762020-07-22 11:44:56 -0700185
186 /* Store current idle and active time for next calculation */
Sui Chen51bcfcb2021-11-01 15:28:51 -0700187 preActiveTime[type] = activeTime;
Sui Chenec6601d2023-01-09 14:55:54 -0800188 preTotalTime[type] = totalTime;
Vijay Khemka15537762020-07-22 11:44:56 -0700189
Sui Chenec6601d2023-01-09 14:55:54 -0800190 activePercValue = (100.0 * activeTimeDiff) / totalTimeDiff;
Vijay Khemka15537762020-07-22 11:44:56 -0700191
192 if (DEBUG)
193 std::cout << "CPU Utilization is " << activePercValue << "\n";
194
195 return activePercValue;
196}
197
Sui Chen51bcfcb2021-11-01 15:28:51 -0700198auto readCPUUtilizationTotal([[maybe_unused]] const std::string& path)
199{
200 return readCPUUtilization(CPUUtilizationType::TOTAL);
201}
202
203auto readCPUUtilizationKernel([[maybe_unused]] const std::string& path)
204{
205 return readCPUUtilization(CPUUtilizationType::KERNEL);
206}
207
208auto readCPUUtilizationUser([[maybe_unused]] const std::string& path)
209{
210 return readCPUUtilization(CPUUtilizationType::USER);
211}
212
Sui Chen517524a2021-12-19 20:52:46 -0800213double readMemoryUtilization([[maybe_unused]] const std::string& path)
Vijay Khemka15537762020-07-22 11:44:56 -0700214{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800215 /* Unused var: path */
216 std::ignore = path;
Potin Laib7d7bd52022-08-23 01:47:13 +0000217 std::ifstream meminfo("/proc/meminfo");
218 std::string line;
219 double memTotal = -1;
220 double memAvail = -1;
Vijay Khemka15537762020-07-22 11:44:56 -0700221
Potin Laib7d7bd52022-08-23 01:47:13 +0000222 while (std::getline(meminfo, line))
223 {
224 std::string name;
225 double value;
226 std::istringstream iss(line);
227
228 if (!(iss >> name >> value))
229 {
230 continue;
231 }
232
233 if (name.starts_with("MemTotal"))
234 {
235 memTotal = value;
236 }
237 else if (name.starts_with("MemAvailable"))
238 {
239 memAvail = value;
240 }
241 }
242
243 if (memTotal <= 0 || memAvail <= 0)
244 {
245 return std::numeric_limits<double>::quiet_NaN();
246 }
Vijay Khemka15537762020-07-22 11:44:56 -0700247
248 if (DEBUG)
249 {
Potin Laib7d7bd52022-08-23 01:47:13 +0000250 std::cout << "MemTotal: " << memTotal << " MemAvailable: " << memAvail
251 << std::endl;
Vijay Khemka15537762020-07-22 11:44:56 -0700252 }
253
Potin Laib7d7bd52022-08-23 01:47:13 +0000254 return (memTotal - memAvail) / memTotal * 100;
Vijay Khemka15537762020-07-22 11:44:56 -0700255}
256
Sui Chen517524a2021-12-19 20:52:46 -0800257double readStorageUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800258{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800259 struct statvfs buffer
260 {};
261 int ret = statvfs(path.c_str(), &buffer);
262 double total = 0;
263 double available = 0;
264 double used = 0;
265 double usedPercentage = 0;
266
267 if (ret != 0)
268 {
269 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800270 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
271 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800272 return 0;
273 }
274
275 total = buffer.f_blocks * (buffer.f_frsize / 1024);
276 available = buffer.f_bfree * (buffer.f_frsize / 1024);
277 used = total - available;
278 usedPercentage = (used / total) * 100;
279
280 if (DEBUG)
281 {
282 std::cout << "Total:" << total << "\n";
283 std::cout << "Available:" << available << "\n";
284 std::cout << "Used:" << used << "\n";
285 std::cout << "Storage utilization is:" << usedPercentage << "\n";
286 }
287
288 return usedPercentage;
289}
290
Sui Chen517524a2021-12-19 20:52:46 -0800291double readInodeUtilization([[maybe_unused]] const std::string& path)
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800292{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800293 struct statvfs buffer
294 {};
295 int ret = statvfs(path.c_str(), &buffer);
296 double totalInodes = 0;
297 double availableInodes = 0;
298 double used = 0;
299 double usedPercentage = 0;
300
301 if (ret != 0)
302 {
303 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800304 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
305 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800306 return 0;
307 }
308
309 totalInodes = buffer.f_files;
310 availableInodes = buffer.f_ffree;
311 used = totalInodes - availableInodes;
312 usedPercentage = (used / totalInodes) * 100;
313
314 if (DEBUG)
315 {
316 std::cout << "Total Inodes:" << totalInodes << "\n";
317 std::cout << "Available Inodes:" << availableInodes << "\n";
318 std::cout << "Used:" << used << "\n";
319 std::cout << "Inodes utilization is:" << usedPercentage << "\n";
320 }
321
322 return usedPercentage;
323}
324
325constexpr auto storage = "Storage";
326constexpr auto inode = "Inode";
Sui Chen51bcfcb2021-11-01 15:28:51 -0700327
328/** Map of read function for each health sensors supported
329 *
330 * The following health sensors are read in the ManagerDiagnosticData
331 * Redfish resource:
332 * - CPU_Kernel populates ProcessorStatistics.KernelPercent
333 * - CPU_User populates ProcessorStatistics.UserPercent
334 */
Sui Chen517524a2021-12-19 20:52:46 -0800335const std::map<std::string, std::function<double(const std::string& path)>>
Sui Chen51bcfcb2021-11-01 15:28:51 -0700336 readSensors = {{"CPU", readCPUUtilizationTotal},
337 {"CPU_Kernel", readCPUUtilizationKernel},
338 {"CPU_User", readCPUUtilizationUser},
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800339 {"Memory", readMemoryUtilization},
340 {storage, readStorageUtilization},
341 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700342
343void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700344{
345 CriticalInterface::criticalHigh(criticalHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800346 CriticalInterface::criticalLow(std::numeric_limits<double>::quiet_NaN());
347
Vijay Khemkae2795302020-07-15 17:28:45 -0700348 WarningInterface::warningHigh(warningHigh);
Yong Lif8d79732021-03-12 09:12:19 +0800349 WarningInterface::warningLow(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkae2795302020-07-15 17:28:45 -0700350}
351
Vijay Khemka15537762020-07-22 11:44:56 -0700352void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700353{
354 ValueIface::value(value);
355}
356
Sui Chen517524a2021-12-19 20:52:46 -0800357void HealthSensor::initHealthSensor(
358 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemka15537762020-07-22 11:44:56 -0700359{
Sui Chen517524a2021-12-19 20:52:46 -0800360 info("{SENSOR} Health Sensor initialized", "SENSOR", sensorConfig.name);
361
362 /* Look for sensor read functions and Read Sensor values */
363 auto it = readSensors.find(sensorConfig.name);
364
365 if (sensorConfig.name.rfind(storage, 0) == 0)
366 {
367 it = readSensors.find(storage);
368 }
369 else if (sensorConfig.name.rfind(inode, 0) == 0)
370 {
371 it = readSensors.find(inode);
372 }
373 else if (it == readSensors.end())
374 {
375 error("Sensor read function not available");
376 return;
377 }
378
379 double value = it->second(sensorConfig.path);
380
381 if (value < 0)
382 {
383 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
384 sensorConfig.name);
385 return;
386 }
387
Vijay Khemka08797702020-09-21 14:53:57 -0700388 /* Initialize unit value (Percent) for utilization sensor */
389 ValueIface::unit(ValueIface::Unit::Percent);
390
Konstantin Aladyshev9d29b372021-12-21 15:45:02 +0300391 ValueIface::maxValue(100);
392 ValueIface::minValue(0);
Potin Laic82e6162022-08-02 10:22:56 +0000393 ValueIface::value(std::numeric_limits<double>::quiet_NaN());
Vijay Khemkab38fd582020-07-23 13:21:23 -0700394
Sui Chen670cc132021-04-13 09:27:22 -0700395 // Associate the sensor to chassis
Sui Chen517524a2021-12-19 20:52:46 -0800396 // This connects the DBus object to a Chassis.
397
Sui Chen670cc132021-04-13 09:27:22 -0700398 std::vector<AssociationTuple> associationTuples;
Sui Chen517524a2021-12-19 20:52:46 -0800399 for (const auto& chassisId : bmcInventoryPaths)
Sui Chen670cc132021-04-13 09:27:22 -0700400 {
Sui Chen517524a2021-12-19 20:52:46 -0800401 // This utilization sensor "is monitoring" the BMC with path chassisId.
402 // The chassisId is "monitored_by" this utilization sensor.
403 associationTuples.push_back({"monitors", "monitored_by", chassisId});
Sui Chen670cc132021-04-13 09:27:22 -0700404 }
405 AssociationDefinitionInterface::associations(associationTuples);
406
Vijay Khemkab38fd582020-07-23 13:21:23 -0700407 /* Start the timer for reading sensor data at regular interval */
408 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
409}
410
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700411void HealthSensor::checkSensorThreshold(const double value)
412{
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300413 if (std::isfinite(sensorConfig.criticalHigh) &&
414 (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700415 {
416 if (!CriticalInterface::criticalAlarmHigh())
417 {
418 CriticalInterface::criticalAlarmHigh(true);
419 if (sensorConfig.criticalLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800420 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500421 error(
422 "ASSERT: sensor {SENSOR} is above the upper threshold critical high",
423 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800424 startUnit(sensorConfig.criticalTgt);
425 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700426 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300427 return;
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700428 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300429
430 if (CriticalInterface::criticalAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700431 {
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300432 CriticalInterface::criticalAlarmHigh(false);
433 if (sensorConfig.criticalLog)
434 info(
435 "DEASSERT: sensor {SENSOR} is under the upper threshold critical high",
436 "SENSOR", sensorConfig.name);
437 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700438
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300439 if (std::isfinite(sensorConfig.warningHigh) &&
440 (value > sensorConfig.warningHigh))
441 {
442 if (!WarningInterface::warningAlarmHigh())
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700443 {
444 WarningInterface::warningAlarmHigh(true);
445 if (sensorConfig.warningLog)
Potin Lai156ecf32022-07-11 17:09:10 +0800446 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500447 error(
448 "ASSERT: sensor {SENSOR} is above the upper threshold warning high",
449 "SENSOR", sensorConfig.name);
Potin Lai156ecf32022-07-11 17:09:10 +0800450 startUnit(sensorConfig.warningTgt);
451 }
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700452 }
Konstantin Aladysheva6cd7042021-12-21 15:36:01 +0300453 return;
454 }
455
456 if (WarningInterface::warningAlarmHigh())
457 {
458 WarningInterface::warningAlarmHigh(false);
459 if (sensorConfig.warningLog)
460 info(
461 "DEASSERT: sensor {SENSOR} is under the upper threshold warning high",
462 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700463 }
464}
465
Vijay Khemkab38fd582020-07-23 13:21:23 -0700466void HealthSensor::readHealthSensor()
467{
468 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800469 double value;
470
471 if (sensorConfig.name.rfind(storage, 0) == 0)
472 {
473 value = readSensors.find(storage)->second(sensorConfig.path);
474 }
475 else if (sensorConfig.name.rfind(inode, 0) == 0)
476 {
477 value = readSensors.find(inode)->second(sensorConfig.path);
478 }
479 else
480 {
481 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
482 }
483
Vijay Khemkab38fd582020-07-23 13:21:23 -0700484 if (value < 0)
485 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500486 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
487 sensorConfig.name);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700488 return;
489 }
490
491 /* Remove first item from the queue */
Potin Laic82e6162022-08-02 10:22:56 +0000492 if (valQueue.size() >= sensorConfig.windowSize)
493 {
494 valQueue.pop_front();
495 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700496 /* Add new item at the back */
497 valQueue.push_back(value);
Potin Laic82e6162022-08-02 10:22:56 +0000498 /* Wait until the queue is filled with enough reference*/
499 if (valQueue.size() < sensorConfig.windowSize)
500 {
501 return;
502 }
Vijay Khemkab38fd582020-07-23 13:21:23 -0700503
504 /* Calculate average values for the given window size */
505 double avgValue = 0;
506 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
507 avgValue = avgValue / sensorConfig.windowSize;
508
509 /* Set this new value to dbus */
510 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700511
512 /* Check the sensor threshold and log required message */
513 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700514}
515
Potin Lai156ecf32022-07-11 17:09:10 +0800516void HealthSensor::startUnit(const std::string& sysdUnit)
517{
518 if (sysdUnit.empty())
519 {
520 return;
521 }
522
Patrick Williamsbbfe7182022-07-22 19:26:56 -0500523 sdbusplus::message_t msg = bus.new_method_call(
Potin Lai156ecf32022-07-11 17:09:10 +0800524 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
525 "org.freedesktop.systemd1.Manager", "StartUnit");
526 msg.append(sysdUnit, "replace");
527 bus.call_noreply(msg);
528}
529
Sui Chen036f1612021-07-22 01:31:49 -0700530void HealthMon::recreateSensors()
531{
532 PHOSPHOR_LOG2_USING;
533 healthSensors.clear();
Sui Chen036f1612021-07-22 01:31:49 -0700534
Sui Chen517524a2021-12-19 20:52:46 -0800535 // Find BMC inventory paths and create health sensors
536 std::vector<std::string> bmcInventoryPaths =
537 findPathsWithType(bus, BMC_INVENTORY_ITEM);
538 createHealthSensors(bmcInventoryPaths);
Sui Chen036f1612021-07-22 01:31:49 -0700539}
540
Vijay Khemka15537762020-07-22 11:44:56 -0700541void printConfig(HealthConfig& cfg)
542{
543 std::cout << "Name: " << cfg.name << "\n";
544 std::cout << "Freq: " << (int)cfg.freq << "\n";
545 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
546 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
547 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
548 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
549 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
550 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
551 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800552 std::cout << "Path : " << cfg.path << "\n\n";
Vijay Khemka15537762020-07-22 11:44:56 -0700553}
554
Vijay Khemkae2795302020-07-15 17:28:45 -0700555/* Create dbus utilization sensor object for each configured sensors */
Sui Chen517524a2021-12-19 20:52:46 -0800556void HealthMon::createHealthSensors(
557 const std::vector<std::string>& bmcInventoryPaths)
Vijay Khemkae2795302020-07-15 17:28:45 -0700558{
559 for (auto& cfg : sensorConfigs)
560 {
561 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
Sui Chen517524a2021-12-19 20:52:46 -0800562 auto healthSensor = std::make_shared<HealthSensor>(
563 bus, objPath.c_str(), cfg, bmcInventoryPaths);
Vijay Khemkae2795302020-07-15 17:28:45 -0700564 healthSensors.emplace(cfg.name, healthSensor);
565
Patrick Williams957e03c2021-09-02 16:38:42 -0500566 info("{SENSOR} Health Sensor created", "SENSOR", cfg.name);
Vijay Khemkae2795302020-07-15 17:28:45 -0700567
568 /* Set configured values of crtical and warning high to dbus */
569 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
570 }
571}
572
573/** @brief Parsing Health config JSON file */
574Json HealthMon::parseConfigFile(std::string configFile)
575{
576 std::ifstream jsonFile(configFile);
577 if (!jsonFile.is_open())
578 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500579 error("config JSON file not found: {PATH}", "PATH", configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700580 }
581
582 auto data = Json::parse(jsonFile, nullptr, false);
583 if (data.is_discarded())
584 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500585 error("config readings JSON parser failure: {PATH}", "PATH",
586 configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700587 }
588
589 return data;
590}
591
Vijay Khemkae2795302020-07-15 17:28:45 -0700592void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
593{
594
595 static const Json empty{};
596
Vijay Khemka15537762020-07-22 11:44:56 -0700597 /* Default frerquency of sensor polling is 1 second */
598 cfg.freq = data.value("Frequency", 1);
599
600 /* Default window size sensor queue is 1 */
601 cfg.windowSize = data.value("Window_size", 1);
602
Vijay Khemkae2795302020-07-15 17:28:45 -0700603 auto threshold = data.value("Threshold", empty);
604 if (!threshold.empty())
605 {
606 auto criticalData = threshold.value("Critical", empty);
607 if (!criticalData.empty())
608 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700609 cfg.criticalHigh =
610 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700611 cfg.criticalLog = criticalData.value("Log", true);
612 cfg.criticalTgt = criticalData.value("Target", "");
613 }
614 auto warningData = threshold.value("Warning", empty);
615 if (!warningData.empty())
616 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700617 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
618 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700619 cfg.warningTgt = warningData.value("Target", "");
620 }
621 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800622 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700623}
624
Vijay Khemka15537762020-07-22 11:44:56 -0700625std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700626{
627
628 std::vector<HealthConfig> cfgs;
Vijay Khemkae2795302020-07-15 17:28:45 -0700629 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
630
631 // print values
632 if (DEBUG)
633 std::cout << "Config json data:\n" << data << "\n\n";
634
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800635 /* Get data items from config json data*/
Vijay Khemkae2795302020-07-15 17:28:45 -0700636 for (auto& j : data.items())
637 {
638 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800639 /* key need match default value in map readSensors or match the key
640 * start with "Storage" or "Inode" */
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800641 bool isStorageOrInode =
642 (key.rfind(storage, 0) == 0 || key.rfind(inode, 0) == 0);
643 if (readSensors.find(key) != readSensors.end() || isStorageOrInode)
Vijay Khemkae2795302020-07-15 17:28:45 -0700644 {
645 HealthConfig cfg = HealthConfig();
646 cfg.name = j.key();
647 getConfigData(j.value(), cfg);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800648 if (isStorageOrInode)
649 {
650 struct statvfs buffer
651 {};
652 int ret = statvfs(cfg.path.c_str(), &buffer);
653 if (ret != 0)
654 {
655 auto e = errno;
656 std::cerr << "Error from statvfs: " << strerror(e)
657 << ", name: " << cfg.name
658 << ", path: " << cfg.path
659 << ", please check your settings in config file."
660 << std::endl;
661 continue;
662 }
663 }
Vijay Khemkae2795302020-07-15 17:28:45 -0700664 cfgs.push_back(cfg);
665 if (DEBUG)
666 printConfig(cfg);
667 }
668 else
669 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500670 error("{SENSOR} Health Sensor not supported", "SENSOR", key);
Vijay Khemkae2795302020-07-15 17:28:45 -0700671 }
672 }
673 return cfgs;
674}
675
Sui Chen517524a2021-12-19 20:52:46 -0800676// Two caveats here.
677// 1. The BMC Inventory will only show up by the nearest ObjectMapper polling
678// interval.
679// 2. InterfacesAdded events will are not emitted like they are with E-M.
680void HealthMon::createBmcInventoryIfNotCreated()
681{
682 if (bmcInventory == nullptr)
683 {
684 info("createBmcInventory");
685 bmcInventory = std::make_shared<phosphor::health::BmcInventory>(
686 bus, "/xyz/openbmc_project/inventory/bmc");
687 }
688}
689
690bool HealthMon::bmcInventoryCreated()
691{
692 return bmcInventory != nullptr;
693}
694
Vijay Khemkae2795302020-07-15 17:28:45 -0700695} // namespace health
696} // namespace phosphor
697
Sui Chen517524a2021-12-19 20:52:46 -0800698void sensorRecreateTimerCallback(
Patrick Williams9ca00452022-11-26 09:41:58 -0600699 std::shared_ptr<boost::asio::deadline_timer> timer, sdbusplus::bus_t& bus)
Sui Chen517524a2021-12-19 20:52:46 -0800700{
701 timer->expires_from_now(boost::posix_time::seconds(TIMER_INTERVAL));
702 timer->async_wait([timer, &bus](const boost::system::error_code& ec) {
703 if (ec == boost::asio::error::operation_aborted)
704 {
705 info("sensorRecreateTimer aborted");
706 return;
707 }
708
709 // When Entity-manager is already running
710 if (!needUpdate)
711 {
712 if ((!healthMon->bmcInventoryCreated()) &&
713 (!phosphor::health::findPathsWithType(bus, BMC_CONFIGURATION)
714 .empty()))
715 {
716 healthMon->createBmcInventoryIfNotCreated();
717 needUpdate = true;
718 }
719 }
720 else
721 {
722
723 // If this daemon maintains its own DBus object, we must make sure
724 // the object is registered to ObjectMapper
725 if (phosphor::health::findPathsWithType(bus, BMC_INVENTORY_ITEM)
726 .empty())
727 {
728 info(
729 "BMC inventory item not registered to Object Mapper yet, waiting for next iteration");
730 }
731 else
732 {
733 info(
734 "BMC inventory item registered to Object Mapper, creating sensors now");
735 healthMon->recreateSensors();
736 needUpdate = false;
737 }
738 }
739 sensorRecreateTimerCallback(timer, bus);
740 });
741}
742
Vijay Khemkae2795302020-07-15 17:28:45 -0700743/**
744 * @brief Main
745 */
746int main()
747{
Sui Chen036f1612021-07-22 01:31:49 -0700748 // The io_context is needed for the timer
749 boost::asio::io_context io;
750
751 // DBus connection
752 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
753
754 conn->request_name(HEALTH_BUS_NAME);
755
Vijay Khemkae2795302020-07-15 17:28:45 -0700756 // Get a default event loop
757 auto event = sdeventplus::Event::get_default();
758
Vijay Khemkae2795302020-07-15 17:28:45 -0700759 // Create an health monitor object
Sui Chen036f1612021-07-22 01:31:49 -0700760 healthMon = std::make_shared<phosphor::health::HealthMon>(*conn);
Vijay Khemkae2795302020-07-15 17:28:45 -0700761
Yong Lif8d79732021-03-12 09:12:19 +0800762 // Add object manager through object_server
763 sdbusplus::asio::object_server objectServer(conn);
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700764
Sui Chen036f1612021-07-22 01:31:49 -0700765 sdbusplus::asio::sd_event_wrapper sdEvents(io);
766
767 sensorRecreateTimer = std::make_shared<boost::asio::deadline_timer>(io);
768
769 // If the SystemInventory does not exist: wait for the InterfaceAdded signal
Sui Chen517524a2021-12-19 20:52:46 -0800770 auto interfacesAddedSignalHandler = std::make_unique<
771 sdbusplus::bus::match_t>(
772 static_cast<sdbusplus::bus_t&>(*conn),
773 sdbusplus::bus::match::rules::interfacesAdded(),
Patrick Williams9ca00452022-11-26 09:41:58 -0600774 [conn](sdbusplus::message_t& msg) {
Sui Chen517524a2021-12-19 20:52:46 -0800775 using Association =
776 std::tuple<std::string, std::string, std::string>;
777 using InterfacesAdded = std::vector<std::pair<
778 std::string,
779 std::vector<std::pair<
780 std::string, std::variant<std::vector<Association>>>>>>;
781
782 sdbusplus::message::object_path o;
783 InterfacesAdded interfacesAdded;
784
785 try
786 {
Sui Chen036f1612021-07-22 01:31:49 -0700787 msg.read(o);
Sui Chen517524a2021-12-19 20:52:46 -0800788 msg.read(interfacesAdded);
789 }
790 catch (const std::exception& e)
791 {
792 error(
793 "Exception occurred while processing interfacesAdded: {EXCEPTION}",
794 "EXCEPTION", e.what());
795 return;
796 }
797
798 // Ignore any signal coming from health-monitor itself.
799 if (msg.get_sender() == conn->get_unique_name())
800 {
801 return;
802 }
803
804 // Check if the BMC Inventory is in the interfaces created.
805 bool hasBmcConfiguration = false;
806 for (const auto& x : interfacesAdded)
807 {
808 if (x.first == BMC_CONFIGURATION)
Sui Chen036f1612021-07-22 01:31:49 -0700809 {
Sui Chen517524a2021-12-19 20:52:46 -0800810 hasBmcConfiguration = true;
Sui Chen036f1612021-07-22 01:31:49 -0700811 }
Sui Chen517524a2021-12-19 20:52:46 -0800812 }
813
814 if (hasBmcConfiguration)
815 {
816 info(
817 "BMC configuration detected, will create a corresponding Inventory item");
818 healthMon->createBmcInventoryIfNotCreated();
819 needUpdate = true;
820 }
821 });
Sui Chen036f1612021-07-22 01:31:49 -0700822
823 // Start the timer
Ed Tanousa19c6fb2023-03-06 13:53:27 -0800824 boost::asio::post(io, [conn]() {
825 sensorRecreateTimerCallback(sensorRecreateTimer, *conn);
826 });
Sui Chen036f1612021-07-22 01:31:49 -0700827 io.run();
Vijay Khemkae2795302020-07-15 17:28:45 -0700828
829 return 0;
830}