blob: a04ff375fbb7bca3e5719e607ddead6b70916725 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
5#include <phosphor-logging/log.hpp>
6#include <sdeventplus/event.hpp>
7
8#include <fstream>
9#include <iostream>
Vijay Khemka15537762020-07-22 11:44:56 -070010#include <numeric>
11#include <sstream>
12
13extern "C"
14{
15#include <sys/sysinfo.h>
16}
Vijay Khemkae2795302020-07-15 17:28:45 -070017
18static constexpr bool DEBUG = false;
19
20namespace phosphor
21{
22namespace health
23{
24
25using namespace phosphor::logging;
26
Vijay Khemka15537762020-07-22 11:44:56 -070027enum CPUStatesTime
28{
29 USER_IDX = 0,
30 NICE_IDX,
31 SYSTEM_IDX,
32 IDLE_IDX,
33 IOWAIT_IDX,
34 IRQ_IDX,
35 SOFTIRQ_IDX,
36 STEAL_IDX,
37 GUEST_USER_IDX,
38 GUEST_NICE_IDX,
39 NUM_CPU_STATES_TIME
40};
41
42double readCPUUtilization()
43{
44 std::ifstream fileStat("/proc/stat");
45 if (!fileStat.is_open())
46 {
47 log<level::ERR>("cpu file not available",
48 entry("FILENAME = /proc/stat"));
49 return -1;
50 }
51
52 std::string firstLine, labelName;
53 std::size_t timeData[NUM_CPU_STATES_TIME];
54
55 std::getline(fileStat, firstLine);
56 std::stringstream ss(firstLine);
57 ss >> labelName;
58
59 if (DEBUG)
60 std::cout << "CPU stats first Line is " << firstLine << "\n";
61
62 if (labelName.compare("cpu"))
63 {
64 log<level::ERR>("CPU data not available");
65 return -1;
66 }
67
68 int i;
69 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
70 {
71 if (!(ss >> timeData[i]))
72 break;
73 }
74
75 if (i != NUM_CPU_STATES_TIME)
76 {
77 log<level::ERR>("CPU data not correct");
78 return -1;
79 }
80
81 static double preActiveTime = 0, preIdleTime = 0;
82 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
83 activePercValue;
84
85 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
86 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
87 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
88 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
89 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
90
91 idleTimeDiff = idleTime - preIdleTime;
92 activeTimeDiff = activeTime - preActiveTime;
93
94 /* Store current idle and active time for next calculation */
95 preIdleTime = idleTime;
96 preActiveTime = activeTime;
97
98 totalTime = idleTimeDiff + activeTimeDiff;
99
100 activePercValue = activeTimeDiff / totalTime * 100;
101
102 if (DEBUG)
103 std::cout << "CPU Utilization is " << activePercValue << "\n";
104
105 return activePercValue;
106}
107
108double readMemoryUtilization()
109{
110 struct sysinfo s_info;
111
112 sysinfo(&s_info);
113 double usedRam = s_info.totalram - s_info.freeram;
114 double memUsePerc = usedRam / s_info.totalram * 100;
115
116 if (DEBUG)
117 {
118 std::cout << "Memory Utilization is " << memUsePerc << "\n";
119
120 std::cout << "TotalRam: " << s_info.totalram
121 << " FreeRam: " << s_info.freeram << "\n";
122 std::cout << "UseRam: " << usedRam << "\n";
123 }
124
125 return memUsePerc;
126}
127
128/** Map of read function for each health sensors supported */
129std::map<std::string, std::function<double()>> readSensors = {
130 {"CPU", readCPUUtilization}, {"Memory", readMemoryUtilization}};
131
132void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700133{
134 CriticalInterface::criticalHigh(criticalHigh);
135 WarningInterface::warningHigh(warningHigh);
136}
137
Vijay Khemka15537762020-07-22 11:44:56 -0700138void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700139{
140 ValueIface::value(value);
141}
142
Vijay Khemka15537762020-07-22 11:44:56 -0700143void HealthSensor::initHealthSensor()
144{
145 std::string logMsg = sensorConfig.name + " Health Sensor initialized";
146 log<level::INFO>(logMsg.c_str());
147
148 /* Look for sensor read functions */
149 if (readSensors.find(sensorConfig.name) == readSensors.end())
150 {
151 log<level::ERR>("Sensor read function not available");
152 return;
153 }
154
155 /* Read Sensor values */
156 auto value = readSensors[sensorConfig.name]();
157
158 if (value < 0)
159 {
160 log<level::ERR>("Reading Sensor Utilization failed",
161 entry("NAME = %s", sensorConfig.name.c_str()));
162 return;
163 }
164
Vijay Khemka08797702020-09-21 14:53:57 -0700165 /* Initialize value queue with initial sensor reading */
Vijay Khemka15537762020-07-22 11:44:56 -0700166 for (int i = 0; i < sensorConfig.windowSize; i++)
167 {
168 valQueue.push_back(value);
169 }
Vijay Khemka08797702020-09-21 14:53:57 -0700170
171 /* Initialize unit value (Percent) for utilization sensor */
172 ValueIface::unit(ValueIface::Unit::Percent);
173
Vijay Khemka15537762020-07-22 11:44:56 -0700174 setSensorValueToDbus(value);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700175
176 /* Start the timer for reading sensor data at regular interval */
177 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
178}
179
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700180void HealthSensor::checkSensorThreshold(const double value)
181{
182 if (value > sensorConfig.criticalHigh)
183 {
184 if (!CriticalInterface::criticalAlarmHigh())
185 {
186 CriticalInterface::criticalAlarmHigh(true);
187 if (sensorConfig.criticalLog)
188 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
189 "critical high threshold",
190 entry("NAME = %s", sensorConfig.name.c_str()));
191 }
192 }
193 else
194 {
195 if (CriticalInterface::criticalAlarmHigh())
196 {
197 CriticalInterface::criticalAlarmHigh(false);
198 if (sensorConfig.criticalLog)
199 log<level::INFO>("DEASSERT: Utilization Sensor is under "
200 "critical high threshold",
201 entry("NAME = %s", sensorConfig.name.c_str()));
202 }
203
204 if ((value > sensorConfig.warningHigh) &&
205 (!WarningInterface::warningAlarmHigh()))
206 {
207 WarningInterface::warningAlarmHigh(true);
208 if (sensorConfig.warningLog)
209 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
210 "warning high threshold",
211 entry("NAME = %s", sensorConfig.name.c_str()));
212 }
213 else if ((value <= sensorConfig.warningHigh) &&
214 (WarningInterface::warningAlarmHigh()))
215 {
216 WarningInterface::warningAlarmHigh(false);
217 if (sensorConfig.warningLog)
218 log<level::INFO>("DEASSERT: Utilization Sensor is under "
219 "warning high threshold",
220 entry("NAME = %s", sensorConfig.name.c_str()));
221 }
222 }
223}
224
Vijay Khemkab38fd582020-07-23 13:21:23 -0700225void HealthSensor::readHealthSensor()
226{
227 /* Read current sensor value */
228 double value = readSensors[sensorConfig.name]();
229 if (value < 0)
230 {
231 log<level::ERR>("Reading Sensor Utilization failed",
232 entry("NAME = %s", sensorConfig.name.c_str()));
233 return;
234 }
235
236 /* Remove first item from the queue */
237 valQueue.pop_front();
238 /* Add new item at the back */
239 valQueue.push_back(value);
240
241 /* Calculate average values for the given window size */
242 double avgValue = 0;
243 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
244 avgValue = avgValue / sensorConfig.windowSize;
245
246 /* Set this new value to dbus */
247 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700248
249 /* Check the sensor threshold and log required message */
250 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700251}
252
253void printConfig(HealthConfig& cfg)
254{
255 std::cout << "Name: " << cfg.name << "\n";
256 std::cout << "Freq: " << (int)cfg.freq << "\n";
257 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
258 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
259 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
260 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
261 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
262 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
263 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
264}
265
Vijay Khemkae2795302020-07-15 17:28:45 -0700266/* Create dbus utilization sensor object for each configured sensors */
267void HealthMon::createHealthSensors()
268{
269 for (auto& cfg : sensorConfigs)
270 {
271 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
272 auto healthSensor =
Vijay Khemka15537762020-07-22 11:44:56 -0700273 std::make_shared<HealthSensor>(bus, objPath.c_str(), cfg);
Vijay Khemkae2795302020-07-15 17:28:45 -0700274 healthSensors.emplace(cfg.name, healthSensor);
275
276 std::string logMsg = cfg.name + " Health Sensor created";
277 log<level::INFO>(logMsg.c_str(), entry("NAME = %s", cfg.name.c_str()));
278
279 /* Set configured values of crtical and warning high to dbus */
280 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
281 }
282}
283
284/** @brief Parsing Health config JSON file */
285Json HealthMon::parseConfigFile(std::string configFile)
286{
287 std::ifstream jsonFile(configFile);
288 if (!jsonFile.is_open())
289 {
290 log<level::ERR>("config JSON file not found",
291 entry("FILENAME = %s", configFile.c_str()));
292 }
293
294 auto data = Json::parse(jsonFile, nullptr, false);
295 if (data.is_discarded())
296 {
297 log<level::ERR>("config readings JSON parser failure",
298 entry("FILENAME = %s", configFile.c_str()));
299 }
300
301 return data;
302}
303
Vijay Khemkae2795302020-07-15 17:28:45 -0700304void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
305{
306
307 static const Json empty{};
308
Vijay Khemka15537762020-07-22 11:44:56 -0700309 /* Default frerquency of sensor polling is 1 second */
310 cfg.freq = data.value("Frequency", 1);
311
312 /* Default window size sensor queue is 1 */
313 cfg.windowSize = data.value("Window_size", 1);
314
Vijay Khemkae2795302020-07-15 17:28:45 -0700315 auto threshold = data.value("Threshold", empty);
316 if (!threshold.empty())
317 {
318 auto criticalData = threshold.value("Critical", empty);
319 if (!criticalData.empty())
320 {
321 cfg.criticalHigh = criticalData.value("Value", 0);
322 cfg.criticalLog = criticalData.value("Log", true);
323 cfg.criticalTgt = criticalData.value("Target", "");
324 }
325 auto warningData = threshold.value("Warning", empty);
326 if (!warningData.empty())
327 {
328 cfg.warningHigh = warningData.value("Value", 0);
329 cfg.warningLog = warningData.value("Log", true);
330 cfg.warningTgt = warningData.value("Target", "");
331 }
332 }
333}
334
Vijay Khemka15537762020-07-22 11:44:56 -0700335std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700336{
337
338 std::vector<HealthConfig> cfgs;
339 HealthConfig cfg;
340 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
341
342 // print values
343 if (DEBUG)
344 std::cout << "Config json data:\n" << data << "\n\n";
345
346 /* Get CPU config data */
347 for (auto& j : data.items())
348 {
349 auto key = j.key();
Vijay Khemka15537762020-07-22 11:44:56 -0700350 if (readSensors.find(key) != readSensors.end())
Vijay Khemkae2795302020-07-15 17:28:45 -0700351 {
352 HealthConfig cfg = HealthConfig();
353 cfg.name = j.key();
354 getConfigData(j.value(), cfg);
355 cfgs.push_back(cfg);
356 if (DEBUG)
357 printConfig(cfg);
358 }
359 else
360 {
361 std::string logMsg = key + " Health Sensor not supported";
362 log<level::ERR>(logMsg.c_str(), entry("NAME = %s", key.c_str()));
363 }
364 }
365 return cfgs;
366}
367
368} // namespace health
369} // namespace phosphor
370
371/**
372 * @brief Main
373 */
374int main()
375{
376
377 // Get a default event loop
378 auto event = sdeventplus::Event::get_default();
379
380 // Get a handle to system dbus
381 auto bus = sdbusplus::bus::new_default();
382
383 // Create an health monitor object
384 phosphor::health::HealthMon healthMon(bus);
385
386 // Request service bus name
387 bus.request_name(HEALTH_BUS_NAME);
388
389 // Attach the bus to sd_event to service user requests
390 bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL);
391 event.loop();
392
393 return 0;
394}