| #include "config.h" |
| |
| #include "healthMonitor.hpp" |
| |
| #include <phosphor-logging/log.hpp> |
| #include <sdeventplus/event.hpp> |
| |
| #include <fstream> |
| #include <iostream> |
| #include <numeric> |
| #include <sstream> |
| |
| extern "C" |
| { |
| #include <sys/sysinfo.h> |
| } |
| |
| static constexpr bool DEBUG = false; |
| |
| namespace phosphor |
| { |
| namespace health |
| { |
| |
| using namespace phosphor::logging; |
| |
| enum CPUStatesTime |
| { |
| USER_IDX = 0, |
| NICE_IDX, |
| SYSTEM_IDX, |
| IDLE_IDX, |
| IOWAIT_IDX, |
| IRQ_IDX, |
| SOFTIRQ_IDX, |
| STEAL_IDX, |
| GUEST_USER_IDX, |
| GUEST_NICE_IDX, |
| NUM_CPU_STATES_TIME |
| }; |
| |
| double readCPUUtilization() |
| { |
| std::ifstream fileStat("/proc/stat"); |
| if (!fileStat.is_open()) |
| { |
| log<level::ERR>("cpu file not available", |
| entry("FILENAME = /proc/stat")); |
| return -1; |
| } |
| |
| std::string firstLine, labelName; |
| std::size_t timeData[NUM_CPU_STATES_TIME]; |
| |
| std::getline(fileStat, firstLine); |
| std::stringstream ss(firstLine); |
| ss >> labelName; |
| |
| if (DEBUG) |
| std::cout << "CPU stats first Line is " << firstLine << "\n"; |
| |
| if (labelName.compare("cpu")) |
| { |
| log<level::ERR>("CPU data not available"); |
| return -1; |
| } |
| |
| int i; |
| for (i = 0; i < NUM_CPU_STATES_TIME; i++) |
| { |
| if (!(ss >> timeData[i])) |
| break; |
| } |
| |
| if (i != NUM_CPU_STATES_TIME) |
| { |
| log<level::ERR>("CPU data not correct"); |
| return -1; |
| } |
| |
| static double preActiveTime = 0, preIdleTime = 0; |
| double activeTime, activeTimeDiff, idleTime, idleTimeDiff, totalTime, |
| activePercValue; |
| |
| idleTime = timeData[IDLE_IDX] + timeData[IOWAIT_IDX]; |
| activeTime = timeData[USER_IDX] + timeData[NICE_IDX] + |
| timeData[SYSTEM_IDX] + timeData[IRQ_IDX] + |
| timeData[SOFTIRQ_IDX] + timeData[STEAL_IDX] + |
| timeData[GUEST_USER_IDX] + timeData[GUEST_NICE_IDX]; |
| |
| idleTimeDiff = idleTime - preIdleTime; |
| activeTimeDiff = activeTime - preActiveTime; |
| |
| /* Store current idle and active time for next calculation */ |
| preIdleTime = idleTime; |
| preActiveTime = activeTime; |
| |
| totalTime = idleTimeDiff + activeTimeDiff; |
| |
| activePercValue = activeTimeDiff / totalTime * 100; |
| |
| if (DEBUG) |
| std::cout << "CPU Utilization is " << activePercValue << "\n"; |
| |
| return activePercValue; |
| } |
| |
| double readMemoryUtilization() |
| { |
| struct sysinfo s_info; |
| |
| sysinfo(&s_info); |
| double usedRam = s_info.totalram - s_info.freeram; |
| double memUsePerc = usedRam / s_info.totalram * 100; |
| |
| if (DEBUG) |
| { |
| std::cout << "Memory Utilization is " << memUsePerc << "\n"; |
| |
| std::cout << "TotalRam: " << s_info.totalram |
| << " FreeRam: " << s_info.freeram << "\n"; |
| std::cout << "UseRam: " << usedRam << "\n"; |
| } |
| |
| return memUsePerc; |
| } |
| |
| /** Map of read function for each health sensors supported */ |
| std::map<std::string, std::function<double()>> readSensors = { |
| {"CPU", readCPUUtilization}, {"Memory", readMemoryUtilization}}; |
| |
| void HealthSensor::setSensorThreshold(double criticalHigh, double warningHigh) |
| { |
| CriticalInterface::criticalHigh(criticalHigh); |
| WarningInterface::warningHigh(warningHigh); |
| } |
| |
| void HealthSensor::setSensorValueToDbus(const double value) |
| { |
| ValueIface::value(value); |
| } |
| |
| void HealthSensor::initHealthSensor() |
| { |
| std::string logMsg = sensorConfig.name + " Health Sensor initialized"; |
| log<level::INFO>(logMsg.c_str()); |
| |
| /* Look for sensor read functions */ |
| if (readSensors.find(sensorConfig.name) == readSensors.end()) |
| { |
| log<level::ERR>("Sensor read function not available"); |
| return; |
| } |
| |
| /* Read Sensor values */ |
| auto value = readSensors[sensorConfig.name](); |
| |
| if (value < 0) |
| { |
| log<level::ERR>("Reading Sensor Utilization failed", |
| entry("NAME = %s", sensorConfig.name.c_str())); |
| return; |
| } |
| |
| /* Initialize value queue with initial sensor reading */ |
| for (int i = 0; i < sensorConfig.windowSize; i++) |
| { |
| valQueue.push_back(value); |
| } |
| |
| /* Initialize unit value (Percent) for utilization sensor */ |
| ValueIface::unit(ValueIface::Unit::Percent); |
| |
| setSensorValueToDbus(value); |
| |
| /* Start the timer for reading sensor data at regular interval */ |
| readTimer.restart(std::chrono::milliseconds(sensorConfig.freq * 1000)); |
| } |
| |
| void HealthSensor::checkSensorThreshold(const double value) |
| { |
| if (value > sensorConfig.criticalHigh) |
| { |
| if (!CriticalInterface::criticalAlarmHigh()) |
| { |
| CriticalInterface::criticalAlarmHigh(true); |
| if (sensorConfig.criticalLog) |
| log<level::ERR>("ASSERT: Utilization Sensor has exceeded " |
| "critical high threshold", |
| entry("NAME = %s", sensorConfig.name.c_str())); |
| } |
| } |
| else |
| { |
| if (CriticalInterface::criticalAlarmHigh()) |
| { |
| CriticalInterface::criticalAlarmHigh(false); |
| if (sensorConfig.criticalLog) |
| log<level::INFO>("DEASSERT: Utilization Sensor is under " |
| "critical high threshold", |
| entry("NAME = %s", sensorConfig.name.c_str())); |
| } |
| |
| if ((value > sensorConfig.warningHigh) && |
| (!WarningInterface::warningAlarmHigh())) |
| { |
| WarningInterface::warningAlarmHigh(true); |
| if (sensorConfig.warningLog) |
| log<level::ERR>("ASSERT: Utilization Sensor has exceeded " |
| "warning high threshold", |
| entry("NAME = %s", sensorConfig.name.c_str())); |
| } |
| else if ((value <= sensorConfig.warningHigh) && |
| (WarningInterface::warningAlarmHigh())) |
| { |
| WarningInterface::warningAlarmHigh(false); |
| if (sensorConfig.warningLog) |
| log<level::INFO>("DEASSERT: Utilization Sensor is under " |
| "warning high threshold", |
| entry("NAME = %s", sensorConfig.name.c_str())); |
| } |
| } |
| } |
| |
| void HealthSensor::readHealthSensor() |
| { |
| /* Read current sensor value */ |
| double value = readSensors[sensorConfig.name](); |
| if (value < 0) |
| { |
| log<level::ERR>("Reading Sensor Utilization failed", |
| entry("NAME = %s", sensorConfig.name.c_str())); |
| return; |
| } |
| |
| /* Remove first item from the queue */ |
| valQueue.pop_front(); |
| /* Add new item at the back */ |
| valQueue.push_back(value); |
| |
| /* Calculate average values for the given window size */ |
| double avgValue = 0; |
| avgValue = accumulate(valQueue.begin(), valQueue.end(), avgValue); |
| avgValue = avgValue / sensorConfig.windowSize; |
| |
| /* Set this new value to dbus */ |
| setSensorValueToDbus(avgValue); |
| |
| /* Check the sensor threshold and log required message */ |
| checkSensorThreshold(avgValue); |
| } |
| |
| void printConfig(HealthConfig& cfg) |
| { |
| std::cout << "Name: " << cfg.name << "\n"; |
| std::cout << "Freq: " << (int)cfg.freq << "\n"; |
| std::cout << "Window Size: " << (int)cfg.windowSize << "\n"; |
| std::cout << "Critical value: " << (int)cfg.criticalHigh << "\n"; |
| std::cout << "warning value: " << (int)cfg.warningHigh << "\n"; |
| std::cout << "Critical log: " << (int)cfg.criticalLog << "\n"; |
| std::cout << "Warning log: " << (int)cfg.warningLog << "\n"; |
| std::cout << "Critical Target: " << cfg.criticalTgt << "\n"; |
| std::cout << "Warning Target: " << cfg.warningTgt << "\n\n"; |
| } |
| |
| /* Create dbus utilization sensor object for each configured sensors */ |
| void HealthMon::createHealthSensors() |
| { |
| for (auto& cfg : sensorConfigs) |
| { |
| std::string objPath = std::string(HEALTH_SENSOR_PATH) + cfg.name; |
| auto healthSensor = |
| std::make_shared<HealthSensor>(bus, objPath.c_str(), cfg); |
| healthSensors.emplace(cfg.name, healthSensor); |
| |
| std::string logMsg = cfg.name + " Health Sensor created"; |
| log<level::INFO>(logMsg.c_str(), entry("NAME = %s", cfg.name.c_str())); |
| |
| /* Set configured values of crtical and warning high to dbus */ |
| healthSensor->setSensorThreshold(cfg.criticalHigh, cfg.warningHigh); |
| } |
| } |
| |
| /** @brief Parsing Health config JSON file */ |
| Json HealthMon::parseConfigFile(std::string configFile) |
| { |
| std::ifstream jsonFile(configFile); |
| if (!jsonFile.is_open()) |
| { |
| log<level::ERR>("config JSON file not found", |
| entry("FILENAME = %s", configFile.c_str())); |
| } |
| |
| auto data = Json::parse(jsonFile, nullptr, false); |
| if (data.is_discarded()) |
| { |
| log<level::ERR>("config readings JSON parser failure", |
| entry("FILENAME = %s", configFile.c_str())); |
| } |
| |
| return data; |
| } |
| |
| void HealthMon::getConfigData(Json& data, HealthConfig& cfg) |
| { |
| |
| static const Json empty{}; |
| |
| /* Default frerquency of sensor polling is 1 second */ |
| cfg.freq = data.value("Frequency", 1); |
| |
| /* Default window size sensor queue is 1 */ |
| cfg.windowSize = data.value("Window_size", 1); |
| |
| auto threshold = data.value("Threshold", empty); |
| if (!threshold.empty()) |
| { |
| auto criticalData = threshold.value("Critical", empty); |
| if (!criticalData.empty()) |
| { |
| cfg.criticalHigh = criticalData.value("Value", 0); |
| cfg.criticalLog = criticalData.value("Log", true); |
| cfg.criticalTgt = criticalData.value("Target", ""); |
| } |
| auto warningData = threshold.value("Warning", empty); |
| if (!warningData.empty()) |
| { |
| cfg.warningHigh = warningData.value("Value", 0); |
| cfg.warningLog = warningData.value("Log", true); |
| cfg.warningTgt = warningData.value("Target", ""); |
| } |
| } |
| } |
| |
| std::vector<HealthConfig> HealthMon::getHealthConfig() |
| { |
| |
| std::vector<HealthConfig> cfgs; |
| HealthConfig cfg; |
| auto data = parseConfigFile(HEALTH_CONFIG_FILE); |
| |
| // print values |
| if (DEBUG) |
| std::cout << "Config json data:\n" << data << "\n\n"; |
| |
| /* Get CPU config data */ |
| for (auto& j : data.items()) |
| { |
| auto key = j.key(); |
| if (readSensors.find(key) != readSensors.end()) |
| { |
| HealthConfig cfg = HealthConfig(); |
| cfg.name = j.key(); |
| getConfigData(j.value(), cfg); |
| cfgs.push_back(cfg); |
| if (DEBUG) |
| printConfig(cfg); |
| } |
| else |
| { |
| std::string logMsg = key + " Health Sensor not supported"; |
| log<level::ERR>(logMsg.c_str(), entry("NAME = %s", key.c_str())); |
| } |
| } |
| return cfgs; |
| } |
| |
| } // namespace health |
| } // namespace phosphor |
| |
| /** |
| * @brief Main |
| */ |
| int main() |
| { |
| |
| // Get a default event loop |
| auto event = sdeventplus::Event::get_default(); |
| |
| // Get a handle to system dbus |
| auto bus = sdbusplus::bus::new_default(); |
| |
| // Create an health monitor object |
| phosphor::health::HealthMon healthMon(bus); |
| |
| // Request service bus name |
| bus.request_name(HEALTH_BUS_NAME); |
| |
| // Attach the bus to sd_event to service user requests |
| bus.attach_event(event.get(), SD_EVENT_PRIORITY_NORMAL); |
| event.loop(); |
| |
| return 0; |
| } |