blob: 8861403c257efe97920336394cf5cdd29aa6ff81 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
5#include <phosphor-logging/log.hpp>
Vijay Khemka1d0d0122020-09-29 12:17:43 -07006#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -07007#include <sdeventplus/event.hpp>
8
9#include <fstream>
10#include <iostream>
Vijay Khemka15537762020-07-22 11:44:56 -070011#include <numeric>
12#include <sstream>
13
14extern "C"
15{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080016#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070017#include <sys/sysinfo.h>
18}
Vijay Khemkae2795302020-07-15 17:28:45 -070019
20static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070021static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070022
23namespace phosphor
24{
25namespace health
26{
27
28using namespace phosphor::logging;
29
Vijay Khemka15537762020-07-22 11:44:56 -070030enum CPUStatesTime
31{
32 USER_IDX = 0,
33 NICE_IDX,
34 SYSTEM_IDX,
35 IDLE_IDX,
36 IOWAIT_IDX,
37 IRQ_IDX,
38 SOFTIRQ_IDX,
39 STEAL_IDX,
40 GUEST_USER_IDX,
41 GUEST_NICE_IDX,
42 NUM_CPU_STATES_TIME
43};
44
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080045double readCPUUtilization(std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -070046{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080047 /* Unused var: path */
48 std::ignore = path;
Vijay Khemka15537762020-07-22 11:44:56 -070049 std::ifstream fileStat("/proc/stat");
50 if (!fileStat.is_open())
51 {
52 log<level::ERR>("cpu file not available",
53 entry("FILENAME = /proc/stat"));
54 return -1;
55 }
56
57 std::string firstLine, labelName;
58 std::size_t timeData[NUM_CPU_STATES_TIME];
59
60 std::getline(fileStat, firstLine);
61 std::stringstream ss(firstLine);
62 ss >> labelName;
63
64 if (DEBUG)
65 std::cout << "CPU stats first Line is " << firstLine << "\n";
66
67 if (labelName.compare("cpu"))
68 {
69 log<level::ERR>("CPU data not available");
70 return -1;
71 }
72
73 int i;
74 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
75 {
76 if (!(ss >> timeData[i]))
77 break;
78 }
79
80 if (i != NUM_CPU_STATES_TIME)
81 {
82 log<level::ERR>("CPU data not correct");
83 return -1;
84 }
85
86 static double preActiveTime = 0, preIdleTime = 0;
87 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
88 activePercValue;
89
90 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
91 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
92 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
93 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
94 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
95
96 idleTimeDiff = idleTime - preIdleTime;
97 activeTimeDiff = activeTime - preActiveTime;
98
99 /* Store current idle and active time for next calculation */
100 preIdleTime = idleTime;
101 preActiveTime = activeTime;
102
103 totalTime = idleTimeDiff + activeTimeDiff;
104
105 activePercValue = activeTimeDiff / totalTime * 100;
106
107 if (DEBUG)
108 std::cout << "CPU Utilization is " << activePercValue << "\n";
109
110 return activePercValue;
111}
112
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800113double readMemoryUtilization(std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -0700114{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800115 /* Unused var: path */
116 std::ignore = path;
Vijay Khemka15537762020-07-22 11:44:56 -0700117 struct sysinfo s_info;
118
119 sysinfo(&s_info);
120 double usedRam = s_info.totalram - s_info.freeram;
121 double memUsePerc = usedRam / s_info.totalram * 100;
122
123 if (DEBUG)
124 {
125 std::cout << "Memory Utilization is " << memUsePerc << "\n";
126
127 std::cout << "TotalRam: " << s_info.totalram
128 << " FreeRam: " << s_info.freeram << "\n";
129 std::cout << "UseRam: " << usedRam << "\n";
130 }
131
132 return memUsePerc;
133}
134
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800135double readStorageUtilization(std::string path)
136{
137
138 struct statvfs buffer
139 {};
140 int ret = statvfs(path.c_str(), &buffer);
141 double total = 0;
142 double available = 0;
143 double used = 0;
144 double usedPercentage = 0;
145
146 if (ret != 0)
147 {
148 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800149 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
150 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800151 return 0;
152 }
153
154 total = buffer.f_blocks * (buffer.f_frsize / 1024);
155 available = buffer.f_bfree * (buffer.f_frsize / 1024);
156 used = total - available;
157 usedPercentage = (used / total) * 100;
158
159 if (DEBUG)
160 {
161 std::cout << "Total:" << total << "\n";
162 std::cout << "Available:" << available << "\n";
163 std::cout << "Used:" << used << "\n";
164 std::cout << "Storage utilization is:" << usedPercentage << "\n";
165 }
166
167 return usedPercentage;
168}
169
170double readInodeUtilization(std::string path)
171{
172
173 struct statvfs buffer
174 {};
175 int ret = statvfs(path.c_str(), &buffer);
176 double totalInodes = 0;
177 double availableInodes = 0;
178 double used = 0;
179 double usedPercentage = 0;
180
181 if (ret != 0)
182 {
183 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800184 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
185 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800186 return 0;
187 }
188
189 totalInodes = buffer.f_files;
190 availableInodes = buffer.f_ffree;
191 used = totalInodes - availableInodes;
192 usedPercentage = (used / totalInodes) * 100;
193
194 if (DEBUG)
195 {
196 std::cout << "Total Inodes:" << totalInodes << "\n";
197 std::cout << "Available Inodes:" << availableInodes << "\n";
198 std::cout << "Used:" << used << "\n";
199 std::cout << "Inodes utilization is:" << usedPercentage << "\n";
200 }
201
202 return usedPercentage;
203}
204
205constexpr auto storage = "Storage";
206constexpr auto inode = "Inode";
Vijay Khemka15537762020-07-22 11:44:56 -0700207/** Map of read function for each health sensors supported */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800208const std::map<std::string, std::function<double(std::string path)>>
209 readSensors = {{"CPU", readCPUUtilization},
210 {"Memory", readMemoryUtilization},
211 {storage, readStorageUtilization},
212 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700213
214void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700215{
216 CriticalInterface::criticalHigh(criticalHigh);
217 WarningInterface::warningHigh(warningHigh);
218}
219
Vijay Khemka15537762020-07-22 11:44:56 -0700220void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700221{
222 ValueIface::value(value);
223}
224
Vijay Khemka15537762020-07-22 11:44:56 -0700225void HealthSensor::initHealthSensor()
226{
227 std::string logMsg = sensorConfig.name + " Health Sensor initialized";
228 log<level::INFO>(logMsg.c_str());
229
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800230 /* Look for sensor read functions and Read Sensor values */
231 double value;
232 std::map<std::string,
233 std::function<double(std::string path)>>::const_iterator it;
234 it = readSensors.find(sensorConfig.name);
235
236 if (sensorConfig.name.rfind(storage, 0) == 0)
237 {
238 it = readSensors.find(storage);
239 }
240 else if (sensorConfig.name.rfind(inode, 0) == 0)
241 {
242 it = readSensors.find(inode);
243 }
244 else if (it == readSensors.end())
Vijay Khemka15537762020-07-22 11:44:56 -0700245 {
246 log<level::ERR>("Sensor read function not available");
247 return;
248 }
249
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800250 value = it->second(sensorConfig.path);
Vijay Khemka15537762020-07-22 11:44:56 -0700251
252 if (value < 0)
253 {
254 log<level::ERR>("Reading Sensor Utilization failed",
255 entry("NAME = %s", sensorConfig.name.c_str()));
256 return;
257 }
258
Vijay Khemka08797702020-09-21 14:53:57 -0700259 /* Initialize value queue with initial sensor reading */
Vijay Khemka15537762020-07-22 11:44:56 -0700260 for (int i = 0; i < sensorConfig.windowSize; i++)
261 {
262 valQueue.push_back(value);
263 }
Vijay Khemka08797702020-09-21 14:53:57 -0700264
265 /* Initialize unit value (Percent) for utilization sensor */
266 ValueIface::unit(ValueIface::Unit::Percent);
267
Vijay Khemka15537762020-07-22 11:44:56 -0700268 setSensorValueToDbus(value);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700269
270 /* Start the timer for reading sensor data at regular interval */
271 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
272}
273
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700274void HealthSensor::checkSensorThreshold(const double value)
275{
Vijay Khemka415dcd22020-09-21 15:58:21 -0700276 if (sensorConfig.criticalHigh && (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700277 {
278 if (!CriticalInterface::criticalAlarmHigh())
279 {
280 CriticalInterface::criticalAlarmHigh(true);
281 if (sensorConfig.criticalLog)
282 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
283 "critical high threshold",
284 entry("NAME = %s", sensorConfig.name.c_str()));
285 }
286 }
287 else
288 {
289 if (CriticalInterface::criticalAlarmHigh())
290 {
291 CriticalInterface::criticalAlarmHigh(false);
292 if (sensorConfig.criticalLog)
293 log<level::INFO>("DEASSERT: Utilization Sensor is under "
294 "critical high threshold",
295 entry("NAME = %s", sensorConfig.name.c_str()));
296 }
297
Vijay Khemka415dcd22020-09-21 15:58:21 -0700298 /* if warning high value is not set then return */
299 if (!sensorConfig.warningHigh)
300 return;
301
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700302 if ((value > sensorConfig.warningHigh) &&
303 (!WarningInterface::warningAlarmHigh()))
304 {
305 WarningInterface::warningAlarmHigh(true);
306 if (sensorConfig.warningLog)
307 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
308 "warning high threshold",
309 entry("NAME = %s", sensorConfig.name.c_str()));
310 }
311 else if ((value <= sensorConfig.warningHigh) &&
312 (WarningInterface::warningAlarmHigh()))
313 {
314 WarningInterface::warningAlarmHigh(false);
315 if (sensorConfig.warningLog)
316 log<level::INFO>("DEASSERT: Utilization Sensor is under "
317 "warning high threshold",
318 entry("NAME = %s", sensorConfig.name.c_str()));
319 }
320 }
321}
322
Vijay Khemkab38fd582020-07-23 13:21:23 -0700323void HealthSensor::readHealthSensor()
324{
325 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800326 double value;
327
328 if (sensorConfig.name.rfind(storage, 0) == 0)
329 {
330 value = readSensors.find(storage)->second(sensorConfig.path);
331 }
332 else if (sensorConfig.name.rfind(inode, 0) == 0)
333 {
334 value = readSensors.find(inode)->second(sensorConfig.path);
335 }
336 else
337 {
338 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
339 }
340
Vijay Khemkab38fd582020-07-23 13:21:23 -0700341 if (value < 0)
342 {
343 log<level::ERR>("Reading Sensor Utilization failed",
344 entry("NAME = %s", sensorConfig.name.c_str()));
345 return;
346 }
347
348 /* Remove first item from the queue */
349 valQueue.pop_front();
350 /* Add new item at the back */
351 valQueue.push_back(value);
352
353 /* Calculate average values for the given window size */
354 double avgValue = 0;
355 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
356 avgValue = avgValue / sensorConfig.windowSize;
357
358 /* Set this new value to dbus */
359 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700360
361 /* Check the sensor threshold and log required message */
362 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700363}
364
365void printConfig(HealthConfig& cfg)
366{
367 std::cout << "Name: " << cfg.name << "\n";
368 std::cout << "Freq: " << (int)cfg.freq << "\n";
369 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
370 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
371 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
372 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
373 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
374 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
375 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800376 std::cout << "Path : " << cfg.path << "\n\n";
Vijay Khemka15537762020-07-22 11:44:56 -0700377}
378
Vijay Khemkae2795302020-07-15 17:28:45 -0700379/* Create dbus utilization sensor object for each configured sensors */
380void HealthMon::createHealthSensors()
381{
382 for (auto& cfg : sensorConfigs)
383 {
384 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
385 auto healthSensor =
Vijay Khemka15537762020-07-22 11:44:56 -0700386 std::make_shared<HealthSensor>(bus, objPath.c_str(), cfg);
Vijay Khemkae2795302020-07-15 17:28:45 -0700387 healthSensors.emplace(cfg.name, healthSensor);
388
389 std::string logMsg = cfg.name + " Health Sensor created";
390 log<level::INFO>(logMsg.c_str(), entry("NAME = %s", cfg.name.c_str()));
391
392 /* Set configured values of crtical and warning high to dbus */
393 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
394 }
395}
396
397/** @brief Parsing Health config JSON file */
398Json HealthMon::parseConfigFile(std::string configFile)
399{
400 std::ifstream jsonFile(configFile);
401 if (!jsonFile.is_open())
402 {
403 log<level::ERR>("config JSON file not found",
404 entry("FILENAME = %s", configFile.c_str()));
405 }
406
407 auto data = Json::parse(jsonFile, nullptr, false);
408 if (data.is_discarded())
409 {
410 log<level::ERR>("config readings JSON parser failure",
411 entry("FILENAME = %s", configFile.c_str()));
412 }
413
414 return data;
415}
416
Vijay Khemkae2795302020-07-15 17:28:45 -0700417void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
418{
419
420 static const Json empty{};
421
Vijay Khemka15537762020-07-22 11:44:56 -0700422 /* Default frerquency of sensor polling is 1 second */
423 cfg.freq = data.value("Frequency", 1);
424
425 /* Default window size sensor queue is 1 */
426 cfg.windowSize = data.value("Window_size", 1);
427
Vijay Khemkae2795302020-07-15 17:28:45 -0700428 auto threshold = data.value("Threshold", empty);
429 if (!threshold.empty())
430 {
431 auto criticalData = threshold.value("Critical", empty);
432 if (!criticalData.empty())
433 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700434 cfg.criticalHigh =
435 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700436 cfg.criticalLog = criticalData.value("Log", true);
437 cfg.criticalTgt = criticalData.value("Target", "");
438 }
439 auto warningData = threshold.value("Warning", empty);
440 if (!warningData.empty())
441 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700442 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
443 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700444 cfg.warningTgt = warningData.value("Target", "");
445 }
446 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800447 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700448}
449
Vijay Khemka15537762020-07-22 11:44:56 -0700450std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700451{
452
453 std::vector<HealthConfig> cfgs;
454 HealthConfig cfg;
455 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
456
457 // print values
458 if (DEBUG)
459 std::cout << "Config json data:\n" << data << "\n\n";
460
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800461 /* Get data items from config json data*/
Vijay Khemkae2795302020-07-15 17:28:45 -0700462 for (auto& j : data.items())
463 {
464 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800465 /* key need match default value in map readSensors or match the key
466 * start with "Storage" or "Inode" */
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800467 bool isStorageOrInode =
468 (key.rfind(storage, 0) == 0 || key.rfind(inode, 0) == 0);
469 if (readSensors.find(key) != readSensors.end() || isStorageOrInode)
Vijay Khemkae2795302020-07-15 17:28:45 -0700470 {
471 HealthConfig cfg = HealthConfig();
472 cfg.name = j.key();
473 getConfigData(j.value(), cfg);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800474 if (isStorageOrInode)
475 {
476 struct statvfs buffer
477 {};
478 int ret = statvfs(cfg.path.c_str(), &buffer);
479 if (ret != 0)
480 {
481 auto e = errno;
482 std::cerr << "Error from statvfs: " << strerror(e)
483 << ", name: " << cfg.name
484 << ", path: " << cfg.path
485 << ", please check your settings in config file."
486 << std::endl;
487 continue;
488 }
489 }
Vijay Khemkae2795302020-07-15 17:28:45 -0700490 cfgs.push_back(cfg);
491 if (DEBUG)
492 printConfig(cfg);
493 }
494 else
495 {
496 std::string logMsg = key + " Health Sensor not supported";
497 log<level::ERR>(logMsg.c_str(), entry("NAME = %s", key.c_str()));
498 }
499 }
500 return cfgs;
501}
502
503} // namespace health
504} // namespace phosphor
505
506/**
507 * @brief Main
508 */
509int main()
510{
511
512 // Get a default event loop
513 auto event = sdeventplus::Event::get_default();
514
515 // Get a handle to system dbus
516 auto bus = sdbusplus::bus::new_default();
517
518 // Create an health monitor object
519 phosphor::health::HealthMon healthMon(bus);
520
521 // Request service bus name
522 bus.request_name(HEALTH_BUS_NAME);
523
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700524 // Add object manager to sensor node
525 sdbusplus::server::manager::manager objManager(bus, SENSOR_OBJPATH);
526
Vijay Khemkae2795302020-07-15 17:28:45 -0700527 // Attach the bus to sd_event to service user requests
528 bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL);
529 event.loop();
530
531 return 0;
532}