blob: 4981cd6238a499ef0a23a6440e7165d9b8d7c163 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
5#include <phosphor-logging/log.hpp>
6#include <sdeventplus/event.hpp>
7
8#include <fstream>
9#include <iostream>
Vijay Khemka15537762020-07-22 11:44:56 -070010#include <numeric>
11#include <sstream>
12
13extern "C"
14{
15#include <sys/sysinfo.h>
16}
Vijay Khemkae2795302020-07-15 17:28:45 -070017
18static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070019static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070020
21namespace phosphor
22{
23namespace health
24{
25
26using namespace phosphor::logging;
27
Vijay Khemka15537762020-07-22 11:44:56 -070028enum CPUStatesTime
29{
30 USER_IDX = 0,
31 NICE_IDX,
32 SYSTEM_IDX,
33 IDLE_IDX,
34 IOWAIT_IDX,
35 IRQ_IDX,
36 SOFTIRQ_IDX,
37 STEAL_IDX,
38 GUEST_USER_IDX,
39 GUEST_NICE_IDX,
40 NUM_CPU_STATES_TIME
41};
42
43double readCPUUtilization()
44{
45 std::ifstream fileStat("/proc/stat");
46 if (!fileStat.is_open())
47 {
48 log<level::ERR>("cpu file not available",
49 entry("FILENAME = /proc/stat"));
50 return -1;
51 }
52
53 std::string firstLine, labelName;
54 std::size_t timeData[NUM_CPU_STATES_TIME];
55
56 std::getline(fileStat, firstLine);
57 std::stringstream ss(firstLine);
58 ss >> labelName;
59
60 if (DEBUG)
61 std::cout << "CPU stats first Line is " << firstLine << "\n";
62
63 if (labelName.compare("cpu"))
64 {
65 log<level::ERR>("CPU data not available");
66 return -1;
67 }
68
69 int i;
70 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
71 {
72 if (!(ss >> timeData[i]))
73 break;
74 }
75
76 if (i != NUM_CPU_STATES_TIME)
77 {
78 log<level::ERR>("CPU data not correct");
79 return -1;
80 }
81
82 static double preActiveTime = 0, preIdleTime = 0;
83 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
84 activePercValue;
85
86 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
87 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
88 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
89 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
90 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
91
92 idleTimeDiff = idleTime - preIdleTime;
93 activeTimeDiff = activeTime - preActiveTime;
94
95 /* Store current idle and active time for next calculation */
96 preIdleTime = idleTime;
97 preActiveTime = activeTime;
98
99 totalTime = idleTimeDiff + activeTimeDiff;
100
101 activePercValue = activeTimeDiff / totalTime * 100;
102
103 if (DEBUG)
104 std::cout << "CPU Utilization is " << activePercValue << "\n";
105
106 return activePercValue;
107}
108
109double readMemoryUtilization()
110{
111 struct sysinfo s_info;
112
113 sysinfo(&s_info);
114 double usedRam = s_info.totalram - s_info.freeram;
115 double memUsePerc = usedRam / s_info.totalram * 100;
116
117 if (DEBUG)
118 {
119 std::cout << "Memory Utilization is " << memUsePerc << "\n";
120
121 std::cout << "TotalRam: " << s_info.totalram
122 << " FreeRam: " << s_info.freeram << "\n";
123 std::cout << "UseRam: " << usedRam << "\n";
124 }
125
126 return memUsePerc;
127}
128
129/** Map of read function for each health sensors supported */
130std::map<std::string, std::function<double()>> readSensors = {
131 {"CPU", readCPUUtilization}, {"Memory", readMemoryUtilization}};
132
133void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700134{
135 CriticalInterface::criticalHigh(criticalHigh);
136 WarningInterface::warningHigh(warningHigh);
137}
138
Vijay Khemka15537762020-07-22 11:44:56 -0700139void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700140{
141 ValueIface::value(value);
142}
143
Vijay Khemka15537762020-07-22 11:44:56 -0700144void HealthSensor::initHealthSensor()
145{
146 std::string logMsg = sensorConfig.name + " Health Sensor initialized";
147 log<level::INFO>(logMsg.c_str());
148
149 /* Look for sensor read functions */
150 if (readSensors.find(sensorConfig.name) == readSensors.end())
151 {
152 log<level::ERR>("Sensor read function not available");
153 return;
154 }
155
156 /* Read Sensor values */
157 auto value = readSensors[sensorConfig.name]();
158
159 if (value < 0)
160 {
161 log<level::ERR>("Reading Sensor Utilization failed",
162 entry("NAME = %s", sensorConfig.name.c_str()));
163 return;
164 }
165
Vijay Khemka08797702020-09-21 14:53:57 -0700166 /* Initialize value queue with initial sensor reading */
Vijay Khemka15537762020-07-22 11:44:56 -0700167 for (int i = 0; i < sensorConfig.windowSize; i++)
168 {
169 valQueue.push_back(value);
170 }
Vijay Khemka08797702020-09-21 14:53:57 -0700171
172 /* Initialize unit value (Percent) for utilization sensor */
173 ValueIface::unit(ValueIface::Unit::Percent);
174
Vijay Khemka15537762020-07-22 11:44:56 -0700175 setSensorValueToDbus(value);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700176
177 /* Start the timer for reading sensor data at regular interval */
178 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
179}
180
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700181void HealthSensor::checkSensorThreshold(const double value)
182{
Vijay Khemka415dcd22020-09-21 15:58:21 -0700183 if (sensorConfig.criticalHigh && (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700184 {
185 if (!CriticalInterface::criticalAlarmHigh())
186 {
187 CriticalInterface::criticalAlarmHigh(true);
188 if (sensorConfig.criticalLog)
189 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
190 "critical high threshold",
191 entry("NAME = %s", sensorConfig.name.c_str()));
192 }
193 }
194 else
195 {
196 if (CriticalInterface::criticalAlarmHigh())
197 {
198 CriticalInterface::criticalAlarmHigh(false);
199 if (sensorConfig.criticalLog)
200 log<level::INFO>("DEASSERT: Utilization Sensor is under "
201 "critical high threshold",
202 entry("NAME = %s", sensorConfig.name.c_str()));
203 }
204
Vijay Khemka415dcd22020-09-21 15:58:21 -0700205 /* if warning high value is not set then return */
206 if (!sensorConfig.warningHigh)
207 return;
208
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700209 if ((value > sensorConfig.warningHigh) &&
210 (!WarningInterface::warningAlarmHigh()))
211 {
212 WarningInterface::warningAlarmHigh(true);
213 if (sensorConfig.warningLog)
214 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
215 "warning high threshold",
216 entry("NAME = %s", sensorConfig.name.c_str()));
217 }
218 else if ((value <= sensorConfig.warningHigh) &&
219 (WarningInterface::warningAlarmHigh()))
220 {
221 WarningInterface::warningAlarmHigh(false);
222 if (sensorConfig.warningLog)
223 log<level::INFO>("DEASSERT: Utilization Sensor is under "
224 "warning high threshold",
225 entry("NAME = %s", sensorConfig.name.c_str()));
226 }
227 }
228}
229
Vijay Khemkab38fd582020-07-23 13:21:23 -0700230void HealthSensor::readHealthSensor()
231{
232 /* Read current sensor value */
233 double value = readSensors[sensorConfig.name]();
234 if (value < 0)
235 {
236 log<level::ERR>("Reading Sensor Utilization failed",
237 entry("NAME = %s", sensorConfig.name.c_str()));
238 return;
239 }
240
241 /* Remove first item from the queue */
242 valQueue.pop_front();
243 /* Add new item at the back */
244 valQueue.push_back(value);
245
246 /* Calculate average values for the given window size */
247 double avgValue = 0;
248 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
249 avgValue = avgValue / sensorConfig.windowSize;
250
251 /* Set this new value to dbus */
252 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700253
254 /* Check the sensor threshold and log required message */
255 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700256}
257
258void printConfig(HealthConfig& cfg)
259{
260 std::cout << "Name: " << cfg.name << "\n";
261 std::cout << "Freq: " << (int)cfg.freq << "\n";
262 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
263 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
264 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
265 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
266 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
267 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
268 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
269}
270
Vijay Khemkae2795302020-07-15 17:28:45 -0700271/* Create dbus utilization sensor object for each configured sensors */
272void HealthMon::createHealthSensors()
273{
274 for (auto& cfg : sensorConfigs)
275 {
276 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
277 auto healthSensor =
Vijay Khemka15537762020-07-22 11:44:56 -0700278 std::make_shared<HealthSensor>(bus, objPath.c_str(), cfg);
Vijay Khemkae2795302020-07-15 17:28:45 -0700279 healthSensors.emplace(cfg.name, healthSensor);
280
281 std::string logMsg = cfg.name + " Health Sensor created";
282 log<level::INFO>(logMsg.c_str(), entry("NAME = %s", cfg.name.c_str()));
283
284 /* Set configured values of crtical and warning high to dbus */
285 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
286 }
287}
288
289/** @brief Parsing Health config JSON file */
290Json HealthMon::parseConfigFile(std::string configFile)
291{
292 std::ifstream jsonFile(configFile);
293 if (!jsonFile.is_open())
294 {
295 log<level::ERR>("config JSON file not found",
296 entry("FILENAME = %s", configFile.c_str()));
297 }
298
299 auto data = Json::parse(jsonFile, nullptr, false);
300 if (data.is_discarded())
301 {
302 log<level::ERR>("config readings JSON parser failure",
303 entry("FILENAME = %s", configFile.c_str()));
304 }
305
306 return data;
307}
308
Vijay Khemkae2795302020-07-15 17:28:45 -0700309void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
310{
311
312 static const Json empty{};
313
Vijay Khemka15537762020-07-22 11:44:56 -0700314 /* Default frerquency of sensor polling is 1 second */
315 cfg.freq = data.value("Frequency", 1);
316
317 /* Default window size sensor queue is 1 */
318 cfg.windowSize = data.value("Window_size", 1);
319
Vijay Khemkae2795302020-07-15 17:28:45 -0700320 auto threshold = data.value("Threshold", empty);
321 if (!threshold.empty())
322 {
323 auto criticalData = threshold.value("Critical", empty);
324 if (!criticalData.empty())
325 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700326 cfg.criticalHigh =
327 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700328 cfg.criticalLog = criticalData.value("Log", true);
329 cfg.criticalTgt = criticalData.value("Target", "");
330 }
331 auto warningData = threshold.value("Warning", empty);
332 if (!warningData.empty())
333 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700334 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
335 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700336 cfg.warningTgt = warningData.value("Target", "");
337 }
338 }
339}
340
Vijay Khemka15537762020-07-22 11:44:56 -0700341std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700342{
343
344 std::vector<HealthConfig> cfgs;
345 HealthConfig cfg;
346 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
347
348 // print values
349 if (DEBUG)
350 std::cout << "Config json data:\n" << data << "\n\n";
351
352 /* Get CPU config data */
353 for (auto& j : data.items())
354 {
355 auto key = j.key();
Vijay Khemka15537762020-07-22 11:44:56 -0700356 if (readSensors.find(key) != readSensors.end())
Vijay Khemkae2795302020-07-15 17:28:45 -0700357 {
358 HealthConfig cfg = HealthConfig();
359 cfg.name = j.key();
360 getConfigData(j.value(), cfg);
361 cfgs.push_back(cfg);
362 if (DEBUG)
363 printConfig(cfg);
364 }
365 else
366 {
367 std::string logMsg = key + " Health Sensor not supported";
368 log<level::ERR>(logMsg.c_str(), entry("NAME = %s", key.c_str()));
369 }
370 }
371 return cfgs;
372}
373
374} // namespace health
375} // namespace phosphor
376
377/**
378 * @brief Main
379 */
380int main()
381{
382
383 // Get a default event loop
384 auto event = sdeventplus::Event::get_default();
385
386 // Get a handle to system dbus
387 auto bus = sdbusplus::bus::new_default();
388
389 // Create an health monitor object
390 phosphor::health::HealthMon healthMon(bus);
391
392 // Request service bus name
393 bus.request_name(HEALTH_BUS_NAME);
394
395 // Attach the bus to sd_event to service user requests
396 bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL);
397 event.loop();
398
399 return 0;
400}