blob: 361fe74c8513b9e18ad541f36ca2f807695531a8 [file] [log] [blame]
Vijay Khemkae2795302020-07-15 17:28:45 -07001#include "config.h"
2
3#include "healthMonitor.hpp"
4
5#include <phosphor-logging/log.hpp>
Vijay Khemka1d0d0122020-09-29 12:17:43 -07006#include <sdbusplus/server/manager.hpp>
Vijay Khemkae2795302020-07-15 17:28:45 -07007#include <sdeventplus/event.hpp>
8
9#include <fstream>
10#include <iostream>
Vijay Khemka15537762020-07-22 11:44:56 -070011#include <numeric>
12#include <sstream>
13
14extern "C"
15{
16#include <sys/sysinfo.h>
17}
Vijay Khemkae2795302020-07-15 17:28:45 -070018
19static constexpr bool DEBUG = false;
Vijay Khemka415dcd22020-09-21 15:58:21 -070020static constexpr uint8_t defaultHighThreshold = 100;
Vijay Khemkae2795302020-07-15 17:28:45 -070021
22namespace phosphor
23{
24namespace health
25{
26
27using namespace phosphor::logging;
28
Vijay Khemka15537762020-07-22 11:44:56 -070029enum CPUStatesTime
30{
31 USER_IDX = 0,
32 NICE_IDX,
33 SYSTEM_IDX,
34 IDLE_IDX,
35 IOWAIT_IDX,
36 IRQ_IDX,
37 SOFTIRQ_IDX,
38 STEAL_IDX,
39 GUEST_USER_IDX,
40 GUEST_NICE_IDX,
41 NUM_CPU_STATES_TIME
42};
43
44double readCPUUtilization()
45{
46 std::ifstream fileStat("/proc/stat");
47 if (!fileStat.is_open())
48 {
49 log<level::ERR>("cpu file not available",
50 entry("FILENAME = /proc/stat"));
51 return -1;
52 }
53
54 std::string firstLine, labelName;
55 std::size_t timeData[NUM_CPU_STATES_TIME];
56
57 std::getline(fileStat, firstLine);
58 std::stringstream ss(firstLine);
59 ss >> labelName;
60
61 if (DEBUG)
62 std::cout << "CPU stats first Line is " << firstLine << "\n";
63
64 if (labelName.compare("cpu"))
65 {
66 log<level::ERR>("CPU data not available");
67 return -1;
68 }
69
70 int i;
71 for (i = 0; i < NUM_CPU_STATES_TIME; i++)
72 {
73 if (!(ss >> timeData[i]))
74 break;
75 }
76
77 if (i != NUM_CPU_STATES_TIME)
78 {
79 log<level::ERR>("CPU data not correct");
80 return -1;
81 }
82
83 static double preActiveTime = 0, preIdleTime = 0;
84 double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime,
85 activePercValue;
86
87 idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX];
88 activeTime = timeData[USER_IDX] + timeData[NICE_IDX] +
89 timeData[SYSTEM_IDX] + timeData[IRQ_IDX] +
90 timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] +
91 timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX];
92
93 idleTimeDiff = idleTime - preIdleTime;
94 activeTimeDiff = activeTime - preActiveTime;
95
96 /* Store current idle and active time for next calculation */
97 preIdleTime = idleTime;
98 preActiveTime = activeTime;
99
100 totalTime = idleTimeDiff + activeTimeDiff;
101
102 activePercValue = activeTimeDiff / totalTime * 100;
103
104 if (DEBUG)
105 std::cout << "CPU Utilization is " << activePercValue << "\n";
106
107 return activePercValue;
108}
109
110double readMemoryUtilization()
111{
112 struct sysinfo s_info;
113
114 sysinfo(&s_info);
115 double usedRam = s_info.totalram - s_info.freeram;
116 double memUsePerc = usedRam / s_info.totalram * 100;
117
118 if (DEBUG)
119 {
120 std::cout << "Memory Utilization is " << memUsePerc << "\n";
121
122 std::cout << "TotalRam: " << s_info.totalram
123 << " FreeRam: " << s_info.freeram << "\n";
124 std::cout << "UseRam: " << usedRam << "\n";
125 }
126
127 return memUsePerc;
128}
129
130/** Map of read function for each health sensors supported */
131std::map<std::string, std::function<double()>> readSensors = {
132 {"CPU", readCPUUtilization}, {"Memory", readMemoryUtilization}};
133
134void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh)
Vijay Khemkae2795302020-07-15 17:28:45 -0700135{
136 CriticalInterface::criticalHigh(criticalHigh);
137 WarningInterface::warningHigh(warningHigh);
138}
139
Vijay Khemka15537762020-07-22 11:44:56 -0700140void HealthSensor::setSensorValueToDbus(const double value)
Vijay Khemkae2795302020-07-15 17:28:45 -0700141{
142 ValueIface::value(value);
143}
144
Vijay Khemka15537762020-07-22 11:44:56 -0700145void HealthSensor::initHealthSensor()
146{
147 std::string logMsg = sensorConfig.name + " Health Sensor initialized";
148 log<level::INFO>(logMsg.c_str());
149
150 /* Look for sensor read functions */
151 if (readSensors.find(sensorConfig.name) == readSensors.end())
152 {
153 log<level::ERR>("Sensor read function not available");
154 return;
155 }
156
157 /* Read Sensor values */
158 auto value = readSensors[sensorConfig.name]();
159
160 if (value < 0)
161 {
162 log<level::ERR>("Reading Sensor Utilization failed",
163 entry("NAME = %s", sensorConfig.name.c_str()));
164 return;
165 }
166
Vijay Khemka08797702020-09-21 14:53:57 -0700167 /* Initialize value queue with initial sensor reading */
Vijay Khemka15537762020-07-22 11:44:56 -0700168 for (int i = 0; i < sensorConfig.windowSize; i++)
169 {
170 valQueue.push_back(value);
171 }
Vijay Khemka08797702020-09-21 14:53:57 -0700172
173 /* Initialize unit value (Percent) for utilization sensor */
174 ValueIface::unit(ValueIface::Unit::Percent);
175
Vijay Khemka15537762020-07-22 11:44:56 -0700176 setSensorValueToDbus(value);
Vijay Khemkab38fd582020-07-23 13:21:23 -0700177
178 /* Start the timer for reading sensor data at regular interval */
179 readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000));
180}
181
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700182void HealthSensor::checkSensorThreshold(const double value)
183{
Vijay Khemka415dcd22020-09-21 15:58:21 -0700184 if (sensorConfig.criticalHigh && (value > sensorConfig.criticalHigh))
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700185 {
186 if (!CriticalInterface::criticalAlarmHigh())
187 {
188 CriticalInterface::criticalAlarmHigh(true);
189 if (sensorConfig.criticalLog)
190 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
191 "critical high threshold",
192 entry("NAME = %s", sensorConfig.name.c_str()));
193 }
194 }
195 else
196 {
197 if (CriticalInterface::criticalAlarmHigh())
198 {
199 CriticalInterface::criticalAlarmHigh(false);
200 if (sensorConfig.criticalLog)
201 log<level::INFO>("DEASSERT: Utilization Sensor is under "
202 "critical high threshold",
203 entry("NAME = %s", sensorConfig.name.c_str()));
204 }
205
Vijay Khemka415dcd22020-09-21 15:58:21 -0700206 /* if warning high value is not set then return */
207 if (!sensorConfig.warningHigh)
208 return;
209
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700210 if ((value > sensorConfig.warningHigh) &&
211 (!WarningInterface::warningAlarmHigh()))
212 {
213 WarningInterface::warningAlarmHigh(true);
214 if (sensorConfig.warningLog)
215 log<level::ERR>("ASSERT: Utilization Sensor has exceeded "
216 "warning high threshold",
217 entry("NAME = %s", sensorConfig.name.c_str()));
218 }
219 else if ((value <= sensorConfig.warningHigh) &&
220 (WarningInterface::warningAlarmHigh()))
221 {
222 WarningInterface::warningAlarmHigh(false);
223 if (sensorConfig.warningLog)
224 log<level::INFO>("DEASSERT: Utilization Sensor is under "
225 "warning high threshold",
226 entry("NAME = %s", sensorConfig.name.c_str()));
227 }
228 }
229}
230
Vijay Khemkab38fd582020-07-23 13:21:23 -0700231void HealthSensor::readHealthSensor()
232{
233 /* Read current sensor value */
234 double value = readSensors[sensorConfig.name]();
235 if (value < 0)
236 {
237 log<level::ERR>("Reading Sensor Utilization failed",
238 entry("NAME = %s", sensorConfig.name.c_str()));
239 return;
240 }
241
242 /* Remove first item from the queue */
243 valQueue.pop_front();
244 /* Add new item at the back */
245 valQueue.push_back(value);
246
247 /* Calculate average values for the given window size */
248 double avgValue = 0;
249 avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue);
250 avgValue = avgValue / sensorConfig.windowSize;
251
252 /* Set this new value to dbus */
253 setSensorValueToDbus(avgValue);
Vijay Khemkab7a7b8a2020-07-29 12:22:01 -0700254
255 /* Check the sensor threshold and log required message */
256 checkSensorThreshold(avgValue);
Vijay Khemka15537762020-07-22 11:44:56 -0700257}
258
259void printConfig(HealthConfig& cfg)
260{
261 std::cout << "Name: " << cfg.name << "\n";
262 std::cout << "Freq: " << (int)cfg.freq << "\n";
263 std::cout << "Window Size: " << (int)cfg.windowSize << "\n";
264 std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n";
265 std::cout << "warning value: " << (int)cfg.warningHigh << "\n";
266 std::cout << "Critical log: " << (int)cfg.criticalLog << "\n";
267 std::cout << "Warning log: " << (int)cfg.warningLog << "\n";
268 std::cout << "Critical Target: " << cfg.criticalTgt << "\n";
269 std::cout << "Warning Target: " << cfg.warningTgt << "\n\n";
270}
271
Vijay Khemkae2795302020-07-15 17:28:45 -0700272/* Create dbus utilization sensor object for each configured sensors */
273void HealthMon::createHealthSensors()
274{
275 for (auto& cfg : sensorConfigs)
276 {
277 std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name;
278 auto healthSensor =
Vijay Khemka15537762020-07-22 11:44:56 -0700279 std::make_shared<HealthSensor>(bus, objPath.c_str(), cfg);
Vijay Khemkae2795302020-07-15 17:28:45 -0700280 healthSensors.emplace(cfg.name, healthSensor);
281
282 std::string logMsg = cfg.name + " Health Sensor created";
283 log<level::INFO>(logMsg.c_str(), entry("NAME = %s", cfg.name.c_str()));
284
285 /* Set configured values of crtical and warning high to dbus */
286 healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh);
287 }
288}
289
290/** @brief Parsing Health config JSON file */
291Json HealthMon::parseConfigFile(std::string configFile)
292{
293 std::ifstream jsonFile(configFile);
294 if (!jsonFile.is_open())
295 {
296 log<level::ERR>("config JSON file not found",
297 entry("FILENAME = %s", configFile.c_str()));
298 }
299
300 auto data = Json::parse(jsonFile, nullptr, false);
301 if (data.is_discarded())
302 {
303 log<level::ERR>("config readings JSON parser failure",
304 entry("FILENAME = %s", configFile.c_str()));
305 }
306
307 return data;
308}
309
Vijay Khemkae2795302020-07-15 17:28:45 -0700310void HealthMon::getConfigData(Json& data, HealthConfig& cfg)
311{
312
313 static const Json empty{};
314
Vijay Khemka15537762020-07-22 11:44:56 -0700315 /* Default frerquency of sensor polling is 1 second */
316 cfg.freq = data.value("Frequency", 1);
317
318 /* Default window size sensor queue is 1 */
319 cfg.windowSize = data.value("Window_size", 1);
320
Vijay Khemkae2795302020-07-15 17:28:45 -0700321 auto threshold = data.value("Threshold", empty);
322 if (!threshold.empty())
323 {
324 auto criticalData = threshold.value("Critical", empty);
325 if (!criticalData.empty())
326 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700327 cfg.criticalHigh =
328 criticalData.value("Value", defaultHighThreshold);
Vijay Khemkae2795302020-07-15 17:28:45 -0700329 cfg.criticalLog = criticalData.value("Log", true);
330 cfg.criticalTgt = criticalData.value("Target", "");
331 }
332 auto warningData = threshold.value("Warning", empty);
333 if (!warningData.empty())
334 {
Vijay Khemka415dcd22020-09-21 15:58:21 -0700335 cfg.warningHigh = warningData.value("Value", defaultHighThreshold);
336 cfg.warningLog = warningData.value("Log", false);
Vijay Khemkae2795302020-07-15 17:28:45 -0700337 cfg.warningTgt = warningData.value("Target", "");
338 }
339 }
340}
341
Vijay Khemka15537762020-07-22 11:44:56 -0700342std::vector<HealthConfig> HealthMon::getHealthConfig()
Vijay Khemkae2795302020-07-15 17:28:45 -0700343{
344
345 std::vector<HealthConfig> cfgs;
346 HealthConfig cfg;
347 auto data = parseConfigFile(HEALTH_CONFIG_FILE);
348
349 // print values
350 if (DEBUG)
351 std::cout << "Config json data:\n" << data << "\n\n";
352
353 /* Get CPU config data */
354 for (auto& j : data.items())
355 {
356 auto key = j.key();
Vijay Khemka15537762020-07-22 11:44:56 -0700357 if (readSensors.find(key) != readSensors.end())
Vijay Khemkae2795302020-07-15 17:28:45 -0700358 {
359 HealthConfig cfg = HealthConfig();
360 cfg.name = j.key();
361 getConfigData(j.value(), cfg);
362 cfgs.push_back(cfg);
363 if (DEBUG)
364 printConfig(cfg);
365 }
366 else
367 {
368 std::string logMsg = key + " Health Sensor not supported";
369 log<level::ERR>(logMsg.c_str(), entry("NAME = %s", key.c_str()));
370 }
371 }
372 return cfgs;
373}
374
375} // namespace health
376} // namespace phosphor
377
378/**
379 * @brief Main
380 */
381int main()
382{
383
384 // Get a default event loop
385 auto event = sdeventplus::Event::get_default();
386
387 // Get a handle to system dbus
388 auto bus = sdbusplus::bus::new_default();
389
390 // Create an health monitor object
391 phosphor::health::HealthMon healthMon(bus);
392
393 // Request service bus name
394 bus.request_name(HEALTH_BUS_NAME);
395
Vijay Khemka1d0d0122020-09-29 12:17:43 -0700396 // Add object manager to sensor node
397 sdbusplus::server::manager::manager objManager(bus, SENSOR_OBJPATH);
398
Vijay Khemkae2795302020-07-15 17:28:45 -0700399 // Attach the bus to sd_event to service user requests
400 bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL);
401 event.loop();
402
403 return 0;
404}