blob: dac0751acbc08bba51cc3aef4a1042373ac1f754 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
5#include <phosphor-logging/log.hpp>
Vijay Khemka1d0d0122020-09-29 12:17:43 -07006#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -07007#include <sdeventplus/event.hpp>
8
9#include <fstream>
10#include <iostream>
Vijay Khemka15537762020-07-22 11:44:56 -070011#include <numeric>
12#include <sstream>
13
14extern "C"
15{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080016#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070017#include <sys/sysinfo.h>
18}
Vijay Khemkae2795302020-07-15 17:28:45 -070019
20static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070021static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070022
23namespace phosphor
24{
25namespace health
26{
27
28using namespace phosphor::logging;
29
Vijay Khemka15537762020-07-22 11:44:56 -070030enum CPUStatesTime
31{
32 USER_IDX = 0,
33 NICE_IDX,
34 SYSTEM_IDX,
35 IDLE_IDX,
36 IOWAIT_IDX,
37 IRQ_IDX,
38 SOFTIRQ_IDX,
39 STEAL_IDX,
40 GUEST_USER_IDX,
41 GUEST_NICE_IDX,
42 NUM_CPU_STATES_TIME
43};
44
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080045double readCPUUtilization(std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -070046{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080047 /* Unused var: path */
48 std::ignore = path;
Vijay Khemka15537762020-07-22 11:44:56 -070049 std::ifstream fileStat("/proc/stat");
50 if (!fileStat.is_open())
51 {
52 log<level::ERR>("cpu file not available",
53 entry("FILENAME = /proc/stat"));
54 return -1;
55 }
56
57 std::string firstLine, labelName;
58 std::size_t timeData[NUM_CPU_STATES_TIME];
59
60 std::getline(fileStat, firstLine);
61 std::stringstream ss(firstLine);
62 ss >> labelName;
63
64 if (DEBUG)
65 std::cout << "CPU stats first Line is " << firstLine << "\n";
66
67 if (labelName.compare("cpu"))
68 {
69 log<level::ERR>("CPU data not available");
70 return -1;
71 }
72
73 int i;
74 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
75 {
76 if (!(ss >> timeData[i]))
77 break;
78 }
79
80 if (i != NUM_CPU_STATES_TIME)
81 {
82 log<level::ERR>("CPU data not correct");
83 return -1;
84 }
85
86 static double preActiveTime = 0, preIdleTime = 0;
87 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
88 activePercValue;
89
90 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
91 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
92 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
93 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
94 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
95
96 idleTimeDiff = idleTime - preIdleTime;
97 activeTimeDiff = activeTime - preActiveTime;
98
99 /* Store current idle and active time for next calculation */
100 preIdleTime = idleTime;
101 preActiveTime = activeTime;
102
103 totalTime = idleTimeDiff + activeTimeDiff;
104
105 activePercValue = activeTimeDiff / totalTime * 100;
106
107 if (DEBUG)
108 std::cout << "CPU Utilization is " << activePercValue << "\n";
109
110 return activePercValue;
111}
112
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800113double readMemoryUtilization(std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -0700114{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800115 /* Unused var: path */
116 std::ignore = path;
Vijay Khemka15537762020-07-22 11:44:56 -0700117 struct sysinfo s_info;
118
119 sysinfo(&s_info);
120 double usedRam = s_info.totalram - s_info.freeram;
121 double memUsePerc = usedRam / s_info.totalram * 100;
122
123 if (DEBUG)
124 {
125 std::cout << "Memory Utilization is " << memUsePerc << "\n";
126
127 std::cout << "TotalRam: " << s_info.totalram
128 << " FreeRam: " << s_info.freeram << "\n";
129 std::cout << "UseRam: " << usedRam << "\n";
130 }
131
132 return memUsePerc;
133}
134
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800135double readStorageUtilization(std::string path)
136{
137
138 struct statvfs buffer
139 {};
140 int ret = statvfs(path.c_str(), &buffer);
141 double total = 0;
142 double available = 0;
143 double used = 0;
144 double usedPercentage = 0;
145
146 if (ret != 0)
147 {
148 auto e = errno;
149 std::cerr << "Error from statvfs" << e << std::endl;
150 return 0;
151 }
152
153 total = buffer.f_blocks * (buffer.f_frsize / 1024);
154 available = buffer.f_bfree * (buffer.f_frsize / 1024);
155 used = total - available;
156 usedPercentage = (used / total) * 100;
157
158 if (DEBUG)
159 {
160 std::cout << "Total:" << total << "\n";
161 std::cout << "Available:" << available << "\n";
162 std::cout << "Used:" << used << "\n";
163 std::cout << "Storage utilization is:" << usedPercentage << "\n";
164 }
165
166 return usedPercentage;
167}
168
169double readInodeUtilization(std::string path)
170{
171
172 struct statvfs buffer
173 {};
174 int ret = statvfs(path.c_str(), &buffer);
175 double totalInodes = 0;
176 double availableInodes = 0;
177 double used = 0;
178 double usedPercentage = 0;
179
180 if (ret != 0)
181 {
182 auto e = errno;
183 std::cerr << "Error from statvfs" << e << std::endl;
184 return 0;
185 }
186
187 totalInodes = buffer.f_files;
188 availableInodes = buffer.f_ffree;
189 used = totalInodes - availableInodes;
190 usedPercentage = (used / totalInodes) * 100;
191
192 if (DEBUG)
193 {
194 std::cout << "Total Inodes:" << totalInodes << "\n";
195 std::cout << "Available Inodes:" << availableInodes << "\n";
196 std::cout << "Used:" << used << "\n";
197 std::cout << "Inodes utilization is:" << usedPercentage << "\n";
198 }
199
200 return usedPercentage;
201}
202
203constexpr auto storage = "Storage";
204constexpr auto inode = "Inode";
Vijay Khemka15537762020-07-22 11:44:56 -0700205/** Map of read function for each health sensors supported */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800206const std::map<std::string, std::function<double(std::string path)>>
207 readSensors = {{"CPU", readCPUUtilization},
208 {"Memory", readMemoryUtilization},
209 {storage, readStorageUtilization},
210 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700211
212void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700213{
214 CriticalInterface::criticalHigh(criticalHigh);
215 WarningInterface::warningHigh(warningHigh);
216}
217
Vijay Khemka15537762020-07-22 11:44:56 -0700218void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700219{
220 ValueIface::value(value);
221}
222
Vijay Khemka15537762020-07-22 11:44:56 -0700223void HealthSensor::initHealthSensor()
224{
225 std::string logMsg = sensorConfig.name + " Health Sensor initialized";
226 log<level::INFO>(logMsg.c_str());
227
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800228 /* Look for sensor read functions and Read Sensor values */
229 double value;
230 std::map<std::string,
231 std::function<double(std::string path)>>::const_iterator it;
232 it = readSensors.find(sensorConfig.name);
233
234 if (sensorConfig.name.rfind(storage, 0) == 0)
235 {
236 it = readSensors.find(storage);
237 }
238 else if (sensorConfig.name.rfind(inode, 0) == 0)
239 {
240 it = readSensors.find(inode);
241 }
242 else if (it == readSensors.end())
Vijay Khemka15537762020-07-22 11:44:56 -0700243 {
244 log<level::ERR>("Sensor read function not available");
245 return;
246 }
247
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800248 value = it->second(sensorConfig.path);
Vijay Khemka15537762020-07-22 11:44:56 -0700249
250 if (value < 0)
251 {
252 log<level::ERR>("Reading Sensor Utilization failed",
253 entry("NAME = %s", sensorConfig.name.c_str()));
254 return;
255 }
256
Vijay Khemka08797702020-09-21 14:53:57 -0700257 /* Initialize value queue with initial sensor reading */
Vijay Khemka15537762020-07-22 11:44:56 -0700258 for (int i = 0; i < sensorConfig.windowSize; i++)
259 {
260 valQueue.push_back(value);
261 }
Vijay Khemka08797702020-09-21 14:53:57 -0700262
263 /* Initialize unit value (Percent) for utilization sensor */
264 ValueIface::unit(ValueIface::Unit::Percent);
265
Vijay Khemka15537762020-07-22 11:44:56 -0700266 setSensorValueToDbus(value);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700267
268 /* Start the timer for reading sensor data at regular interval */
269 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
270}
271
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700272void HealthSensor::checkSensorThreshold(const double value)
273{
Vijay Khemka415dcd22020-09-21 15:58:21 -0700274 if (sensorConfig.criticalHigh && (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700275 {
276 if (!CriticalInterface::criticalAlarmHigh())
277 {
278 CriticalInterface::criticalAlarmHigh(true);
279 if (sensorConfig.criticalLog)
280 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
281 "critical high threshold",
282 entry("NAME = %s", sensorConfig.name.c_str()));
283 }
284 }
285 else
286 {
287 if (CriticalInterface::criticalAlarmHigh())
288 {
289 CriticalInterface::criticalAlarmHigh(false);
290 if (sensorConfig.criticalLog)
291 log<level::INFO>("DEASSERT: Utilization Sensor is under "
292 "critical high threshold",
293 entry("NAME = %s", sensorConfig.name.c_str()));
294 }
295
Vijay Khemka415dcd22020-09-21 15:58:21 -0700296 /* if warning high value is not set then return */
297 if (!sensorConfig.warningHigh)
298 return;
299
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700300 if ((value > sensorConfig.warningHigh) &&
301 (!WarningInterface::warningAlarmHigh()))
302 {
303 WarningInterface::warningAlarmHigh(true);
304 if (sensorConfig.warningLog)
305 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
306 "warning high threshold",
307 entry("NAME = %s", sensorConfig.name.c_str()));
308 }
309 else if ((value <= sensorConfig.warningHigh) &&
310 (WarningInterface::warningAlarmHigh()))
311 {
312 WarningInterface::warningAlarmHigh(false);
313 if (sensorConfig.warningLog)
314 log<level::INFO>("DEASSERT: Utilization Sensor is under "
315 "warning high threshold",
316 entry("NAME = %s", sensorConfig.name.c_str()));
317 }
318 }
319}
320
Vijay Khemkab38fd582020-07-23 13:21:23 -0700321void HealthSensor::readHealthSensor()
322{
323 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800324 double value;
325
326 if (sensorConfig.name.rfind(storage, 0) == 0)
327 {
328 value = readSensors.find(storage)->second(sensorConfig.path);
329 }
330 else if (sensorConfig.name.rfind(inode, 0) == 0)
331 {
332 value = readSensors.find(inode)->second(sensorConfig.path);
333 }
334 else
335 {
336 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
337 }
338
Vijay Khemkab38fd582020-07-23 13:21:23 -0700339 if (value < 0)
340 {
341 log<level::ERR>("Reading Sensor Utilization failed",
342 entry("NAME = %s", sensorConfig.name.c_str()));
343 return;
344 }
345
346 /* Remove first item from the queue */
347 valQueue.pop_front();
348 /* Add new item at the back */
349 valQueue.push_back(value);
350
351 /* Calculate average values for the given window size */
352 double avgValue = 0;
353 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
354 avgValue = avgValue / sensorConfig.windowSize;
355
356 /* Set this new value to dbus */
357 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700358
359 /* Check the sensor threshold and log required message */
360 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700361}
362
363void printConfig(HealthConfig& cfg)
364{
365 std::cout << "Name: " << cfg.name << "\n";
366 std::cout << "Freq: " << (int)cfg.freq << "\n";
367 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
368 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
369 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
370 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
371 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
372 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
373 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800374 std::cout << "Path : " << cfg.path << "\n\n";
Vijay Khemka15537762020-07-22 11:44:56 -0700375}
376
Vijay Khemkae2795302020-07-15 17:28:45 -0700377/* Create dbus utilization sensor object for each configured sensors */
378void HealthMon::createHealthSensors()
379{
380 for (auto& cfg : sensorConfigs)
381 {
382 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
383 auto healthSensor =
Vijay Khemka15537762020-07-22 11:44:56 -0700384 std::make_shared<HealthSensor>(bus, objPath.c_str(), cfg);
Vijay Khemkae2795302020-07-15 17:28:45 -0700385 healthSensors.emplace(cfg.name, healthSensor);
386
387 std::string logMsg = cfg.name + " Health Sensor created";
388 log<level::INFO>(logMsg.c_str(), entry("NAME = %s", cfg.name.c_str()));
389
390 /* Set configured values of crtical and warning high to dbus */
391 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
392 }
393}
394
395/** @brief Parsing Health config JSON file */
396Json HealthMon::parseConfigFile(std::string configFile)
397{
398 std::ifstream jsonFile(configFile);
399 if (!jsonFile.is_open())
400 {
401 log<level::ERR>("config JSON file not found",
402 entry("FILENAME = %s", configFile.c_str()));
403 }
404
405 auto data = Json::parse(jsonFile, nullptr, false);
406 if (data.is_discarded())
407 {
408 log<level::ERR>("config readings JSON parser failure",
409 entry("FILENAME = %s", configFile.c_str()));
410 }
411
412 return data;
413}
414
Vijay Khemkae2795302020-07-15 17:28:45 -0700415void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
416{
417
418 static const Json empty{};
419
Vijay Khemka15537762020-07-22 11:44:56 -0700420 /* Default frerquency of sensor polling is 1 second */
421 cfg.freq = data.value("Frequency", 1);
422
423 /* Default window size sensor queue is 1 */
424 cfg.windowSize = data.value("Window_size", 1);
425
Vijay Khemkae2795302020-07-15 17:28:45 -0700426 auto threshold = data.value("Threshold", empty);
427 if (!threshold.empty())
428 {
429 auto criticalData = threshold.value("Critical", empty);
430 if (!criticalData.empty())
431 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700432 cfg.criticalHigh =
433 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700434 cfg.criticalLog = criticalData.value("Log", true);
435 cfg.criticalTgt = criticalData.value("Target", "");
436 }
437 auto warningData = threshold.value("Warning", empty);
438 if (!warningData.empty())
439 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700440 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
441 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700442 cfg.warningTgt = warningData.value("Target", "");
443 }
444 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800445 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700446}
447
Vijay Khemka15537762020-07-22 11:44:56 -0700448std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700449{
450
451 std::vector<HealthConfig> cfgs;
452 HealthConfig cfg;
453 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
454
455 // print values
456 if (DEBUG)
457 std::cout << "Config json data:\n" << data << "\n\n";
458
459 /* Get CPU config data */
460 for (auto& j : data.items())
461 {
462 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800463 /* key need match default value in map readSensors or match the key
464 * start with "Storage" or "Inode" */
465 if (readSensors.find(key) != readSensors.end() ||
466 (key.rfind(storage, 0) == 0) || (key.rfind(inode, 0) == 0))
Vijay Khemkae2795302020-07-15 17:28:45 -0700467 {
468 HealthConfig cfg = HealthConfig();
469 cfg.name = j.key();
470 getConfigData(j.value(), cfg);
471 cfgs.push_back(cfg);
472 if (DEBUG)
473 printConfig(cfg);
474 }
475 else
476 {
477 std::string logMsg = key + " Health Sensor not supported";
478 log<level::ERR>(logMsg.c_str(), entry("NAME = %s", key.c_str()));
479 }
480 }
481 return cfgs;
482}
483
484} // namespace health
485} // namespace phosphor
486
487/**
488 * @brief Main
489 */
490int main()
491{
492
493 // Get a default event loop
494 auto event = sdeventplus::Event::get_default();
495
496 // Get a handle to system dbus
497 auto bus = sdbusplus::bus::new_default();
498
499 // Create an health monitor object
500 phosphor::health::HealthMon healthMon(bus);
501
502 // Request service bus name
503 bus.request_name(HEALTH_BUS_NAME);
504
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700505 // Add object manager to sensor node
506 sdbusplus::server::manager::manager objManager(bus, SENSOR_OBJPATH);
507
Vijay Khemkae2795302020-07-15 17:28:45 -0700508 // Attach the bus to sd_event to service user requests
509 bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL);
510 event.loop();
511
512 return 0;
513}