blob: fb9e180d198b2e40f5110df1f0d30cb03cf425e9 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
Vijay Khemka1d0d0122020-09-29 12:17:43 -07005#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -07006#include <sdeventplus/event.hpp>
7
8#include <fstream>
9#include <iostream>
Vijay Khemka15537762020-07-22 11:44:56 -070010#include <numeric>
11#include <sstream>
12
13extern "C"
14{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080015#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070016#include <sys/sysinfo.h>
17}
Vijay Khemkae2795302020-07-15 17:28:45 -070018
19static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070020static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070021
22namespace phosphor
23{
24namespace health
25{
26
27using namespace phosphor::logging;
28
Vijay Khemka15537762020-07-22 11:44:56 -070029enum CPUStatesTime
30{
31 USER_IDX = 0,
32 NICE_IDX,
33 SYSTEM_IDX,
34 IDLE_IDX,
35 IOWAIT_IDX,
36 IRQ_IDX,
37 SOFTIRQ_IDX,
38 STEAL_IDX,
39 GUEST_USER_IDX,
40 GUEST_NICE_IDX,
41 NUM_CPU_STATES_TIME
42};
43
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080044double readCPUUtilization(std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -070045{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080046 /* Unused var: path */
47 std::ignore = path;
Vijay Khemka15537762020-07-22 11:44:56 -070048 std::ifstream fileStat("/proc/stat");
49 if (!fileStat.is_open())
50 {
51 log<level::ERR>("cpu file not available",
52 entry("FILENAME = /proc/stat"));
53 return -1;
54 }
55
56 std::string firstLine, labelName;
57 std::size_t timeData[NUM_CPU_STATES_TIME];
58
59 std::getline(fileStat, firstLine);
60 std::stringstream ss(firstLine);
61 ss >> labelName;
62
63 if (DEBUG)
64 std::cout << "CPU stats first Line is " << firstLine << "\n";
65
66 if (labelName.compare("cpu"))
67 {
68 log<level::ERR>("CPU data not available");
69 return -1;
70 }
71
72 int i;
73 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
74 {
75 if (!(ss >> timeData[i]))
76 break;
77 }
78
79 if (i != NUM_CPU_STATES_TIME)
80 {
81 log<level::ERR>("CPU data not correct");
82 return -1;
83 }
84
85 static double preActiveTime = 0, preIdleTime = 0;
86 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
87 activePercValue;
88
89 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
90 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
91 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
92 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
93 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
94
95 idleTimeDiff = idleTime - preIdleTime;
96 activeTimeDiff = activeTime - preActiveTime;
97
98 /* Store current idle and active time for next calculation */
99 preIdleTime = idleTime;
100 preActiveTime = activeTime;
101
102 totalTime = idleTimeDiff + activeTimeDiff;
103
104 activePercValue = activeTimeDiff / totalTime * 100;
105
106 if (DEBUG)
107 std::cout << "CPU Utilization is " << activePercValue << "\n";
108
109 return activePercValue;
110}
111
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800112double readMemoryUtilization(std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -0700113{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800114 /* Unused var: path */
115 std::ignore = path;
Vijay Khemka15537762020-07-22 11:44:56 -0700116 struct sysinfo s_info;
117
118 sysinfo(&s_info);
119 double usedRam = s_info.totalram - s_info.freeram;
120 double memUsePerc = usedRam / s_info.totalram * 100;
121
122 if (DEBUG)
123 {
124 std::cout << "Memory Utilization is " << memUsePerc << "\n";
125
126 std::cout << "TotalRam: " << s_info.totalram
127 << " FreeRam: " << s_info.freeram << "\n";
128 std::cout << "UseRam: " << usedRam << "\n";
129 }
130
131 return memUsePerc;
132}
133
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800134double readStorageUtilization(std::string path)
135{
136
137 struct statvfs buffer
138 {};
139 int ret = statvfs(path.c_str(), &buffer);
140 double total = 0;
141 double available = 0;
142 double used = 0;
143 double usedPercentage = 0;
144
145 if (ret != 0)
146 {
147 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800148 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
149 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800150 return 0;
151 }
152
153 total = buffer.f_blocks * (buffer.f_frsize / 1024);
154 available = buffer.f_bfree * (buffer.f_frsize / 1024);
155 used = total - available;
156 usedPercentage = (used / total) * 100;
157
158 if (DEBUG)
159 {
160 std::cout << "Total:" << total << "\n";
161 std::cout << "Available:" << available << "\n";
162 std::cout << "Used:" << used << "\n";
163 std::cout << "Storage utilization is:" << usedPercentage << "\n";
164 }
165
166 return usedPercentage;
167}
168
169double readInodeUtilization(std::string path)
170{
171
172 struct statvfs buffer
173 {};
174 int ret = statvfs(path.c_str(), &buffer);
175 double totalInodes = 0;
176 double availableInodes = 0;
177 double used = 0;
178 double usedPercentage = 0;
179
180 if (ret != 0)
181 {
182 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800183 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
184 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800185 return 0;
186 }
187
188 totalInodes = buffer.f_files;
189 availableInodes = buffer.f_ffree;
190 used = totalInodes - availableInodes;
191 usedPercentage = (used / totalInodes) * 100;
192
193 if (DEBUG)
194 {
195 std::cout << "Total Inodes:" << totalInodes << "\n";
196 std::cout << "Available Inodes:" << availableInodes << "\n";
197 std::cout << "Used:" << used << "\n";
198 std::cout << "Inodes utilization is:" << usedPercentage << "\n";
199 }
200
201 return usedPercentage;
202}
203
204constexpr auto storage = "Storage";
205constexpr auto inode = "Inode";
Vijay Khemka15537762020-07-22 11:44:56 -0700206/** Map of read function for each health sensors supported */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800207const std::map<std::string, std::function<double(std::string path)>>
208 readSensors = {{"CPU", readCPUUtilization},
209 {"Memory", readMemoryUtilization},
210 {storage, readStorageUtilization},
211 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700212
213void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700214{
215 CriticalInterface::criticalHigh(criticalHigh);
216 WarningInterface::warningHigh(warningHigh);
217}
218
Vijay Khemka15537762020-07-22 11:44:56 -0700219void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700220{
221 ValueIface::value(value);
222}
223
Sui Chen670cc132021-04-13 09:27:22 -0700224void HealthSensor::initHealthSensor(const std::vector<std::string>& chassisIds)
Vijay Khemka15537762020-07-22 11:44:56 -0700225{
226 std::string logMsg = sensorConfig.name + " Health Sensor initialized";
227 log<level::INFO>(logMsg.c_str());
228
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800229 /* Look for sensor read functions and Read Sensor values */
230 double value;
231 std::map<std::string,
232 std::function<double(std::string path)>>::const_iterator it;
233 it = readSensors.find(sensorConfig.name);
234
235 if (sensorConfig.name.rfind(storage, 0) == 0)
236 {
237 it = readSensors.find(storage);
238 }
239 else if (sensorConfig.name.rfind(inode, 0) == 0)
240 {
241 it = readSensors.find(inode);
242 }
243 else if (it == readSensors.end())
Vijay Khemka15537762020-07-22 11:44:56 -0700244 {
245 log<level::ERR>("Sensor read function not available");
246 return;
247 }
248
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800249 value = it->second(sensorConfig.path);
Vijay Khemka15537762020-07-22 11:44:56 -0700250
251 if (value < 0)
252 {
253 log<level::ERR>("Reading Sensor Utilization failed",
254 entry("NAME = %s", sensorConfig.name.c_str()));
255 return;
256 }
257
Vijay Khemka08797702020-09-21 14:53:57 -0700258 /* Initialize value queue with initial sensor reading */
Vijay Khemka15537762020-07-22 11:44:56 -0700259 for (int i = 0; i < sensorConfig.windowSize; i++)
260 {
261 valQueue.push_back(value);
262 }
Vijay Khemka08797702020-09-21 14:53:57 -0700263
264 /* Initialize unit value (Percent) for utilization sensor */
265 ValueIface::unit(ValueIface::Unit::Percent);
266
Vijay Khemka15537762020-07-22 11:44:56 -0700267 setSensorValueToDbus(value);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700268
Sui Chen670cc132021-04-13 09:27:22 -0700269 // Associate the sensor to chassis
270 std::vector<AssociationTuple> associationTuples;
271 for (const auto& chassisId : chassisIds)
272 {
273 associationTuples.push_back({"bmc", "all_sensors", chassisId});
274 }
275 AssociationDefinitionInterface::associations(associationTuples);
276
Vijay Khemkab38fd582020-07-23 13:21:23 -0700277 /* Start the timer for reading sensor data at regular interval */
278 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
279}
280
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700281void HealthSensor::checkSensorThreshold(const double value)
282{
Vijay Khemka415dcd22020-09-21 15:58:21 -0700283 if (sensorConfig.criticalHigh && (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700284 {
285 if (!CriticalInterface::criticalAlarmHigh())
286 {
287 CriticalInterface::criticalAlarmHigh(true);
288 if (sensorConfig.criticalLog)
289 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
290 "critical high threshold",
291 entry("NAME = %s", sensorConfig.name.c_str()));
292 }
293 }
294 else
295 {
296 if (CriticalInterface::criticalAlarmHigh())
297 {
298 CriticalInterface::criticalAlarmHigh(false);
299 if (sensorConfig.criticalLog)
300 log<level::INFO>("DEASSERT: Utilization Sensor is under "
301 "critical high threshold",
302 entry("NAME = %s", sensorConfig.name.c_str()));
303 }
304
Vijay Khemka415dcd22020-09-21 15:58:21 -0700305 /* if warning high value is not set then return */
306 if (!sensorConfig.warningHigh)
307 return;
308
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700309 if ((value > sensorConfig.warningHigh) &&
310 (!WarningInterface::warningAlarmHigh()))
311 {
312 WarningInterface::warningAlarmHigh(true);
313 if (sensorConfig.warningLog)
314 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
315 "warning high threshold",
316 entry("NAME = %s", sensorConfig.name.c_str()));
317 }
318 else if ((value <= sensorConfig.warningHigh) &&
319 (WarningInterface::warningAlarmHigh()))
320 {
321 WarningInterface::warningAlarmHigh(false);
322 if (sensorConfig.warningLog)
323 log<level::INFO>("DEASSERT: Utilization Sensor is under "
324 "warning high threshold",
325 entry("NAME = %s", sensorConfig.name.c_str()));
326 }
327 }
328}
329
Vijay Khemkab38fd582020-07-23 13:21:23 -0700330void HealthSensor::readHealthSensor()
331{
332 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800333 double value;
334
335 if (sensorConfig.name.rfind(storage, 0) == 0)
336 {
337 value = readSensors.find(storage)->second(sensorConfig.path);
338 }
339 else if (sensorConfig.name.rfind(inode, 0) == 0)
340 {
341 value = readSensors.find(inode)->second(sensorConfig.path);
342 }
343 else
344 {
345 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
346 }
347
Vijay Khemkab38fd582020-07-23 13:21:23 -0700348 if (value < 0)
349 {
350 log<level::ERR>("Reading Sensor Utilization failed",
351 entry("NAME = %s", sensorConfig.name.c_str()));
352 return;
353 }
354
355 /* Remove first item from the queue */
356 valQueue.pop_front();
357 /* Add new item at the back */
358 valQueue.push_back(value);
359
360 /* Calculate average values for the given window size */
361 double avgValue = 0;
362 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
363 avgValue = avgValue / sensorConfig.windowSize;
364
365 /* Set this new value to dbus */
366 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700367
368 /* Check the sensor threshold and log required message */
369 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700370}
371
372void printConfig(HealthConfig& cfg)
373{
374 std::cout << "Name: " << cfg.name << "\n";
375 std::cout << "Freq: " << (int)cfg.freq << "\n";
376 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
377 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
378 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
379 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
380 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
381 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
382 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800383 std::cout << "Path : " << cfg.path << "\n\n";
Vijay Khemka15537762020-07-22 11:44:56 -0700384}
385
Vijay Khemkae2795302020-07-15 17:28:45 -0700386/* Create dbus utilization sensor object for each configured sensors */
Sui Chen670cc132021-04-13 09:27:22 -0700387void HealthMon::createHealthSensors(const std::vector<std::string>& chassisIds)
Vijay Khemkae2795302020-07-15 17:28:45 -0700388{
389 for (auto& cfg : sensorConfigs)
390 {
391 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
Sui Chen670cc132021-04-13 09:27:22 -0700392 auto healthSensor = std::make_shared<HealthSensor>(bus, objPath.c_str(),
393 cfg, chassisIds);
Vijay Khemkae2795302020-07-15 17:28:45 -0700394 healthSensors.emplace(cfg.name, healthSensor);
395
396 std::string logMsg = cfg.name + " Health Sensor created";
397 log<level::INFO>(logMsg.c_str(), entry("NAME = %s", cfg.name.c_str()));
398
399 /* Set configured values of crtical and warning high to dbus */
400 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
401 }
402}
403
404/** @brief Parsing Health config JSON file */
405Json HealthMon::parseConfigFile(std::string configFile)
406{
407 std::ifstream jsonFile(configFile);
408 if (!jsonFile.is_open())
409 {
410 log<level::ERR>("config JSON file not found",
411 entry("FILENAME = %s", configFile.c_str()));
412 }
413
414 auto data = Json::parse(jsonFile, nullptr, false);
415 if (data.is_discarded())
416 {
417 log<level::ERR>("config readings JSON parser failure",
418 entry("FILENAME = %s", configFile.c_str()));
419 }
420
421 return data;
422}
423
Vijay Khemkae2795302020-07-15 17:28:45 -0700424void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
425{
426
427 static const Json empty{};
428
Vijay Khemka15537762020-07-22 11:44:56 -0700429 /* Default frerquency of sensor polling is 1 second */
430 cfg.freq = data.value("Frequency", 1);
431
432 /* Default window size sensor queue is 1 */
433 cfg.windowSize = data.value("Window_size", 1);
434
Vijay Khemkae2795302020-07-15 17:28:45 -0700435 auto threshold = data.value("Threshold", empty);
436 if (!threshold.empty())
437 {
438 auto criticalData = threshold.value("Critical", empty);
439 if (!criticalData.empty())
440 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700441 cfg.criticalHigh =
442 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700443 cfg.criticalLog = criticalData.value("Log", true);
444 cfg.criticalTgt = criticalData.value("Target", "");
445 }
446 auto warningData = threshold.value("Warning", empty);
447 if (!warningData.empty())
448 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700449 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
450 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700451 cfg.warningTgt = warningData.value("Target", "");
452 }
453 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800454 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700455}
456
Vijay Khemka15537762020-07-22 11:44:56 -0700457std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700458{
459
460 std::vector<HealthConfig> cfgs;
461 HealthConfig cfg;
462 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
463
464 // print values
465 if (DEBUG)
466 std::cout << "Config json data:\n" << data << "\n\n";
467
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800468 /* Get data items from config json data*/
Vijay Khemkae2795302020-07-15 17:28:45 -0700469 for (auto& j : data.items())
470 {
471 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800472 /* key need match default value in map readSensors or match the key
473 * start with "Storage" or "Inode" */
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800474 bool isStorageOrInode =
475 (key.rfind(storage, 0) == 0 || key.rfind(inode, 0) == 0);
476 if (readSensors.find(key) != readSensors.end() || isStorageOrInode)
Vijay Khemkae2795302020-07-15 17:28:45 -0700477 {
478 HealthConfig cfg = HealthConfig();
479 cfg.name = j.key();
480 getConfigData(j.value(), cfg);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800481 if (isStorageOrInode)
482 {
483 struct statvfs buffer
484 {};
485 int ret = statvfs(cfg.path.c_str(), &buffer);
486 if (ret != 0)
487 {
488 auto e = errno;
489 std::cerr << "Error from statvfs: " << strerror(e)
490 << ", name: " << cfg.name
491 << ", path: " << cfg.path
492 << ", please check your settings in config file."
493 << std::endl;
494 continue;
495 }
496 }
Vijay Khemkae2795302020-07-15 17:28:45 -0700497 cfgs.push_back(cfg);
498 if (DEBUG)
499 printConfig(cfg);
500 }
501 else
502 {
503 std::string logMsg = key + " Health Sensor not supported";
504 log<level::ERR>(logMsg.c_str(), entry("NAME = %s", key.c_str()));
505 }
506 }
507 return cfgs;
508}
509
510} // namespace health
511} // namespace phosphor
512
513/**
514 * @brief Main
515 */
516int main()
517{
Vijay Khemkae2795302020-07-15 17:28:45 -0700518 // Get a default event loop
519 auto event = sdeventplus::Event::get_default();
520
521 // Get a handle to system dbus
522 auto bus = sdbusplus::bus::new_default();
523
524 // Create an health monitor object
525 phosphor::health::HealthMon healthMon(bus);
526
527 // Request service bus name
528 bus.request_name(HEALTH_BUS_NAME);
529
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700530 // Add object manager to sensor node
531 sdbusplus::server::manager::manager objManager(bus, SENSOR_OBJPATH);
532
Vijay Khemkae2795302020-07-15 17:28:45 -0700533 // Attach the bus to sd_event to service user requests
534 bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL);
535 event.loop();
536
537 return 0;
538}