blob: c3fccd4c34d064602f8d5638c53e2a2be09f3c07 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
Vijay Khemka1d0d0122020-09-29 12:17:43 -07005#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -07006#include <sdeventplus/event.hpp>
7
8#include <fstream>
9#include <iostream>
Vijay Khemka15537762020-07-22 11:44:56 -070010#include <numeric>
11#include <sstream>
12
13extern "C"
14{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +080015#include <sys/statvfs.h>
Vijay Khemka15537762020-07-22 11:44:56 -070016#include <sys/sysinfo.h>
17}
Vijay Khemkae2795302020-07-15 17:28:45 -070018
Patrick Williams957e03c2021-09-02 16:38:42 -050019PHOSPHOR_LOG2_USING;
20
Vijay Khemkae2795302020-07-15 17:28:45 -070021static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070022static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070023
24namespace phosphor
25{
26namespace health
27{
28
Vijay Khemka15537762020-07-22 11:44:56 -070029enum CPUStatesTime
30{
31 USER_IDX = 0,
32 NICE_IDX,
33 SYSTEM_IDX,
34 IDLE_IDX,
35 IOWAIT_IDX,
36 IRQ_IDX,
37 SOFTIRQ_IDX,
38 STEAL_IDX,
39 GUEST_USER_IDX,
40 GUEST_NICE_IDX,
41 NUM_CPU_STATES_TIME
42};
43
Patrick Williams957e03c2021-09-02 16:38:42 -050044double readCPUUtilization([[maybe_unused]] std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -070045{
Patrick Williams957e03c2021-09-02 16:38:42 -050046 auto proc_stat = "/proc/stat";
47 std::ifstream fileStat(proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -070048 if (!fileStat.is_open())
49 {
Patrick Williams957e03c2021-09-02 16:38:42 -050050 error("cpu file not available: {PATH}", "PATH", proc_stat);
Vijay Khemka15537762020-07-22 11:44:56 -070051 return -1;
52 }
53
54 std::string firstLine, labelName;
55 std::size_t timeData[NUM_CPU_STATES_TIME];
56
57 std::getline(fileStat, firstLine);
58 std::stringstream ss(firstLine);
59 ss >> labelName;
60
61 if (DEBUG)
62 std::cout << "CPU stats first Line is " << firstLine << "\n";
63
64 if (labelName.compare("cpu"))
65 {
Patrick Williams957e03c2021-09-02 16:38:42 -050066 error("CPU data not available");
Vijay Khemka15537762020-07-22 11:44:56 -070067 return -1;
68 }
69
70 int i;
71 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
72 {
73 if (!(ss >> timeData[i]))
74 break;
75 }
76
77 if (i != NUM_CPU_STATES_TIME)
78 {
Patrick Williams957e03c2021-09-02 16:38:42 -050079 error("CPU data not correct");
Vijay Khemka15537762020-07-22 11:44:56 -070080 return -1;
81 }
82
83 static double preActiveTime = 0, preIdleTime = 0;
84 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
85 activePercValue;
86
87 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
88 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
89 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
90 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
91 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
92
93 idleTimeDiff = idleTime - preIdleTime;
94 activeTimeDiff = activeTime - preActiveTime;
95
96 /* Store current idle and active time for next calculation */
97 preIdleTime = idleTime;
98 preActiveTime = activeTime;
99
100 totalTime = idleTimeDiff + activeTimeDiff;
101
102 activePercValue = activeTimeDiff / totalTime * 100;
103
104 if (DEBUG)
105 std::cout << "CPU Utilization is " << activePercValue << "\n";
106
107 return activePercValue;
108}
109
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800110double readMemoryUtilization(std::string path)
Vijay Khemka15537762020-07-22 11:44:56 -0700111{
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800112 /* Unused var: path */
113 std::ignore = path;
Vijay Khemka15537762020-07-22 11:44:56 -0700114 struct sysinfo s_info;
115
116 sysinfo(&s_info);
117 double usedRam = s_info.totalram - s_info.freeram;
118 double memUsePerc = usedRam / s_info.totalram * 100;
119
120 if (DEBUG)
121 {
122 std::cout << "Memory Utilization is " << memUsePerc << "\n";
123
124 std::cout << "TotalRam: " << s_info.totalram
125 << " FreeRam: " << s_info.freeram << "\n";
126 std::cout << "UseRam: " << usedRam << "\n";
127 }
128
129 return memUsePerc;
130}
131
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800132double readStorageUtilization(std::string path)
133{
134
135 struct statvfs buffer
136 {};
137 int ret = statvfs(path.c_str(), &buffer);
138 double total = 0;
139 double available = 0;
140 double used = 0;
141 double usedPercentage = 0;
142
143 if (ret != 0)
144 {
145 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800146 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
147 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800148 return 0;
149 }
150
151 total = buffer.f_blocks * (buffer.f_frsize / 1024);
152 available = buffer.f_bfree * (buffer.f_frsize / 1024);
153 used = total - available;
154 usedPercentage = (used / total) * 100;
155
156 if (DEBUG)
157 {
158 std::cout << "Total:" << total << "\n";
159 std::cout << "Available:" << available << "\n";
160 std::cout << "Used:" << used << "\n";
161 std::cout << "Storage utilization is:" << usedPercentage << "\n";
162 }
163
164 return usedPercentage;
165}
166
167double readInodeUtilization(std::string path)
168{
169
170 struct statvfs buffer
171 {};
172 int ret = statvfs(path.c_str(), &buffer);
173 double totalInodes = 0;
174 double availableInodes = 0;
175 double used = 0;
176 double usedPercentage = 0;
177
178 if (ret != 0)
179 {
180 auto e = errno;
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800181 std::cerr << "Error from statvfs: " << strerror(e) << ",path: " << path
182 << std::endl;
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800183 return 0;
184 }
185
186 totalInodes = buffer.f_files;
187 availableInodes = buffer.f_ffree;
188 used = totalInodes - availableInodes;
189 usedPercentage = (used / totalInodes) * 100;
190
191 if (DEBUG)
192 {
193 std::cout << "Total Inodes:" << totalInodes << "\n";
194 std::cout << "Available Inodes:" << availableInodes << "\n";
195 std::cout << "Used:" << used << "\n";
196 std::cout << "Inodes utilization is:" << usedPercentage << "\n";
197 }
198
199 return usedPercentage;
200}
201
202constexpr auto storage = "Storage";
203constexpr auto inode = "Inode";
Vijay Khemka15537762020-07-22 11:44:56 -0700204/** Map of read function for each health sensors supported */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800205const std::map<std::string, std::function<double(std::string path)>>
206 readSensors = {{"CPU", readCPUUtilization},
207 {"Memory", readMemoryUtilization},
208 {storage, readStorageUtilization},
209 {inode, readInodeUtilization}};
Vijay Khemka15537762020-07-22 11:44:56 -0700210
211void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700212{
213 CriticalInterface::criticalHigh(criticalHigh);
214 WarningInterface::warningHigh(warningHigh);
215}
216
Vijay Khemka15537762020-07-22 11:44:56 -0700217void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700218{
219 ValueIface::value(value);
220}
221
Sui Chen670cc132021-04-13 09:27:22 -0700222void HealthSensor::initHealthSensor(const std::vector<std::string>& chassisIds)
Vijay Khemka15537762020-07-22 11:44:56 -0700223{
Patrick Williams957e03c2021-09-02 16:38:42 -0500224 info("{SENSOR} Health Sensor initialized", "SENSOR", sensorConfig.name);
Vijay Khemka15537762020-07-22 11:44:56 -0700225
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800226 /* Look for sensor read functions and Read Sensor values */
227 double value;
228 std::map<std::string,
229 std::function<double(std::string path)>>::const_iterator it;
230 it = readSensors.find(sensorConfig.name);
231
232 if (sensorConfig.name.rfind(storage, 0) == 0)
233 {
234 it = readSensors.find(storage);
235 }
236 else if (sensorConfig.name.rfind(inode, 0) == 0)
237 {
238 it = readSensors.find(inode);
239 }
240 else if (it == readSensors.end())
Vijay Khemka15537762020-07-22 11:44:56 -0700241 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500242 error("Sensor read function not available");
Vijay Khemka15537762020-07-22 11:44:56 -0700243 return;
244 }
245
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800246 value = it->second(sensorConfig.path);
Vijay Khemka15537762020-07-22 11:44:56 -0700247
248 if (value < 0)
249 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500250 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
251 sensorConfig.name);
Vijay Khemka15537762020-07-22 11:44:56 -0700252 return;
253 }
254
Vijay Khemka08797702020-09-21 14:53:57 -0700255 /* Initialize value queue with initial sensor reading */
Vijay Khemka15537762020-07-22 11:44:56 -0700256 for (int i = 0; i < sensorConfig.windowSize; i++)
257 {
258 valQueue.push_back(value);
259 }
Vijay Khemka08797702020-09-21 14:53:57 -0700260
261 /* Initialize unit value (Percent) for utilization sensor */
262 ValueIface::unit(ValueIface::Unit::Percent);
263
Vijay Khemka15537762020-07-22 11:44:56 -0700264 setSensorValueToDbus(value);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700265
Sui Chen670cc132021-04-13 09:27:22 -0700266 // Associate the sensor to chassis
267 std::vector<AssociationTuple> associationTuples;
268 for (const auto& chassisId : chassisIds)
269 {
270 associationTuples.push_back({"bmc", "all_sensors", chassisId});
271 }
272 AssociationDefinitionInterface::associations(associationTuples);
273
Vijay Khemkab38fd582020-07-23 13:21:23 -0700274 /* Start the timer for reading sensor data at regular interval */
275 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
276}
277
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700278void HealthSensor::checkSensorThreshold(const double value)
279{
Vijay Khemka415dcd22020-09-21 15:58:21 -0700280 if (sensorConfig.criticalHigh && (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700281 {
282 if (!CriticalInterface::criticalAlarmHigh())
283 {
284 CriticalInterface::criticalAlarmHigh(true);
285 if (sensorConfig.criticalLog)
Patrick Williams957e03c2021-09-02 16:38:42 -0500286 error(
287 "ASSERT: sensor {SENSOR} is above the upper threshold critical high",
288 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700289 }
290 }
291 else
292 {
293 if (CriticalInterface::criticalAlarmHigh())
294 {
295 CriticalInterface::criticalAlarmHigh(false);
296 if (sensorConfig.criticalLog)
Patrick Williams957e03c2021-09-02 16:38:42 -0500297 info(
298 "DEASSERT: sensor {SENSOR} is under the upper threshold critical high",
299 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700300 }
301
Vijay Khemka415dcd22020-09-21 15:58:21 -0700302 /* if warning high value is not set then return */
303 if (!sensorConfig.warningHigh)
304 return;
305
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700306 if ((value > sensorConfig.warningHigh) &&
307 (!WarningInterface::warningAlarmHigh()))
308 {
309 WarningInterface::warningAlarmHigh(true);
310 if (sensorConfig.warningLog)
Patrick Williams957e03c2021-09-02 16:38:42 -0500311 error(
312 "ASSERT: sensor {SENSOR} is above the upper threshold warning high",
313 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700314 }
315 else if ((value <= sensorConfig.warningHigh) &&
316 (WarningInterface::warningAlarmHigh()))
317 {
318 WarningInterface::warningAlarmHigh(false);
319 if (sensorConfig.warningLog)
Patrick Williams957e03c2021-09-02 16:38:42 -0500320 info(
321 "DEASSERT: sensor {SENSOR} is under the upper threshold warning high",
322 "SENSOR", sensorConfig.name);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700323 }
324 }
325}
326
Vijay Khemkab38fd582020-07-23 13:21:23 -0700327void HealthSensor::readHealthSensor()
328{
329 /* Read current sensor value */
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800330 double value;
331
332 if (sensorConfig.name.rfind(storage, 0) == 0)
333 {
334 value = readSensors.find(storage)->second(sensorConfig.path);
335 }
336 else if (sensorConfig.name.rfind(inode, 0) == 0)
337 {
338 value = readSensors.find(inode)->second(sensorConfig.path);
339 }
340 else
341 {
342 value = readSensors.find(sensorConfig.name)->second(sensorConfig.path);
343 }
344
Vijay Khemkab38fd582020-07-23 13:21:23 -0700345 if (value < 0)
346 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500347 error("Reading Sensor Utilization failed: {SENSOR}", "SENSOR",
348 sensorConfig.name);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700349 return;
350 }
351
352 /* Remove first item from the queue */
353 valQueue.pop_front();
354 /* Add new item at the back */
355 valQueue.push_back(value);
356
357 /* Calculate average values for the given window size */
358 double avgValue = 0;
359 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
360 avgValue = avgValue / sensorConfig.windowSize;
361
362 /* Set this new value to dbus */
363 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700364
365 /* Check the sensor threshold and log required message */
366 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700367}
368
369void printConfig(HealthConfig& cfg)
370{
371 std::cout << "Name: " << cfg.name << "\n";
372 std::cout << "Freq: " << (int)cfg.freq << "\n";
373 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
374 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
375 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
376 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
377 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
378 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
379 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800380 std::cout << "Path : " << cfg.path << "\n\n";
Vijay Khemka15537762020-07-22 11:44:56 -0700381}
382
Vijay Khemkae2795302020-07-15 17:28:45 -0700383/* Create dbus utilization sensor object for each configured sensors */
Sui Chen670cc132021-04-13 09:27:22 -0700384void HealthMon::createHealthSensors(const std::vector<std::string>& chassisIds)
Vijay Khemkae2795302020-07-15 17:28:45 -0700385{
386 for (auto& cfg : sensorConfigs)
387 {
388 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
Sui Chen670cc132021-04-13 09:27:22 -0700389 auto healthSensor = std::make_shared<HealthSensor>(bus, objPath.c_str(),
390 cfg, chassisIds);
Vijay Khemkae2795302020-07-15 17:28:45 -0700391 healthSensors.emplace(cfg.name, healthSensor);
392
Patrick Williams957e03c2021-09-02 16:38:42 -0500393 info("{SENSOR} Health Sensor created", "SENSOR", cfg.name);
Vijay Khemkae2795302020-07-15 17:28:45 -0700394
395 /* Set configured values of crtical and warning high to dbus */
396 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
397 }
398}
399
400/** @brief Parsing Health config JSON file */
401Json HealthMon::parseConfigFile(std::string configFile)
402{
403 std::ifstream jsonFile(configFile);
404 if (!jsonFile.is_open())
405 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500406 error("config JSON file not found: {PATH}", "PATH", configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700407 }
408
409 auto data = Json::parse(jsonFile, nullptr, false);
410 if (data.is_discarded())
411 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500412 error("config readings JSON parser failure: {PATH}", "PATH",
413 configFile);
Vijay Khemkae2795302020-07-15 17:28:45 -0700414 }
415
416 return data;
417}
418
Vijay Khemkae2795302020-07-15 17:28:45 -0700419void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
420{
421
422 static const Json empty{};
423
Vijay Khemka15537762020-07-22 11:44:56 -0700424 /* Default frerquency of sensor polling is 1 second */
425 cfg.freq = data.value("Frequency", 1);
426
427 /* Default window size sensor queue is 1 */
428 cfg.windowSize = data.value("Window_size", 1);
429
Vijay Khemkae2795302020-07-15 17:28:45 -0700430 auto threshold = data.value("Threshold", empty);
431 if (!threshold.empty())
432 {
433 auto criticalData = threshold.value("Critical", empty);
434 if (!criticalData.empty())
435 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700436 cfg.criticalHigh =
437 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700438 cfg.criticalLog = criticalData.value("Log", true);
439 cfg.criticalTgt = criticalData.value("Target", "");
440 }
441 auto warningData = threshold.value("Warning", empty);
442 if (!warningData.empty())
443 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700444 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
445 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700446 cfg.warningTgt = warningData.value("Target", "");
447 }
448 }
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800449 cfg.path = data.value("Path", "");
Vijay Khemkae2795302020-07-15 17:28:45 -0700450}
451
Vijay Khemka15537762020-07-22 11:44:56 -0700452std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700453{
454
455 std::vector<HealthConfig> cfgs;
456 HealthConfig cfg;
457 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
458
459 // print values
460 if (DEBUG)
461 std::cout << "Config json data:\n" << data << "\n\n";
462
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800463 /* Get data items from config json data*/
Vijay Khemkae2795302020-07-15 17:28:45 -0700464 for (auto& j : data.items())
465 {
466 auto key = j.key();
Bruceleequantatwaf9acbd2020-10-12 15:21:42 +0800467 /* key need match default value in map readSensors or match the key
468 * start with "Storage" or "Inode" */
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800469 bool isStorageOrInode =
470 (key.rfind(storage, 0) == 0 || key.rfind(inode, 0) == 0);
471 if (readSensors.find(key) != readSensors.end() || isStorageOrInode)
Vijay Khemkae2795302020-07-15 17:28:45 -0700472 {
473 HealthConfig cfg = HealthConfig();
474 cfg.name = j.key();
475 getConfigData(j.value(), cfg);
Bruceleequantatw2b231e82020-11-23 13:23:45 +0800476 if (isStorageOrInode)
477 {
478 struct statvfs buffer
479 {};
480 int ret = statvfs(cfg.path.c_str(), &buffer);
481 if (ret != 0)
482 {
483 auto e = errno;
484 std::cerr << "Error from statvfs: " << strerror(e)
485 << ", name: " << cfg.name
486 << ", path: " << cfg.path
487 << ", please check your settings in config file."
488 << std::endl;
489 continue;
490 }
491 }
Vijay Khemkae2795302020-07-15 17:28:45 -0700492 cfgs.push_back(cfg);
493 if (DEBUG)
494 printConfig(cfg);
495 }
496 else
497 {
Patrick Williams957e03c2021-09-02 16:38:42 -0500498 error("{SENSOR} Health Sensor not supported", "SENSOR", key);
Vijay Khemkae2795302020-07-15 17:28:45 -0700499 }
500 }
501 return cfgs;
502}
503
504} // namespace health
505} // namespace phosphor
506
507/**
508 * @brief Main
509 */
510int main()
511{
Vijay Khemkae2795302020-07-15 17:28:45 -0700512 // Get a default event loop
513 auto event = sdeventplus::Event::get_default();
514
515 // Get a handle to system dbus
516 auto bus = sdbusplus::bus::new_default();
517
518 // Create an health monitor object
519 phosphor::health::HealthMon healthMon(bus);
520
521 // Request service bus name
522 bus.request_name(HEALTH_BUS_NAME);
523
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700524 // Add object manager to sensor node
525 sdbusplus::server::manager::manager objManager(bus, SENSOR_OBJPATH);
526
Vijay Khemkae2795302020-07-15 17:28:45 -0700527 // Attach the bus to sd_event to service user requests
528 bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL);
529 event.loop();
530
531 return 0;
532}