blob: 2887766854649a871a4f50e996a3a9a60cfea35b [file] [log] [blame]
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08001#include "health_metric.hpp"
2
3#include <phosphor-logging/lg2.hpp>
4
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -08005#include <cmath>
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08006#include <numeric>
7#include <unordered_map>
8
9PHOSPHOR_LOG2_USING;
10
11namespace phosphor::health::metric
12{
13
14using association_t = std::tuple<std::string, std::string, std::string>;
15
Patrick Williams658efd52024-03-04 12:53:52 -060016auto HealthMetric::getPath(MType type, std::string name, SubType subType)
17 -> std::string
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080018{
19 std::string path;
20 switch (subType)
21 {
22 case SubType::cpuTotal:
23 {
24 return std::string(BmcPath) + "/" + PathIntf::total_cpu;
25 }
26 case SubType::cpuKernel:
27 {
28 return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
29 }
30 case SubType::cpuUser:
31 {
32 return std::string(BmcPath) + "/" + PathIntf::user_cpu;
33 }
34 case SubType::memoryAvailable:
35 {
36 return std::string(BmcPath) + "/" + PathIntf::available_memory;
37 }
38 case SubType::memoryBufferedAndCached:
39 {
40 return std::string(BmcPath) + "/" +
41 PathIntf::buffered_and_cached_memory;
42 }
43 case SubType::memoryFree:
44 {
45 return std::string(BmcPath) + "/" + PathIntf::free_memory;
46 }
47 case SubType::memoryShared:
48 {
49 return std::string(BmcPath) + "/" + PathIntf::shared_memory;
50 }
51 case SubType::memoryTotal:
52 {
53 return std::string(BmcPath) + "/" + PathIntf::total_memory;
54 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080055 case SubType::NA:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080056 {
Patrick Williams658efd52024-03-04 12:53:52 -060057 if (type == MType::storage)
Jagpal Singh Gill97582802024-02-27 13:59:11 -080058 {
59 static constexpr auto nameDelimiter = "_";
60 auto storageType = name.substr(
61 name.find_last_of(nameDelimiter) + 1, name.length());
62 std::ranges::for_each(storageType,
63 [](auto& c) { c = std::tolower(c); });
64 return std::string(BmcPath) + "/" + PathIntf::storage + "/" +
65 storageType;
66 }
67 else
68 {
69 error("Invalid metric {SUBTYPE} for metric {TYPE}", "SUBTYPE",
70 subType, "TYPE", type);
71 return "";
72 }
Jagpal Singh Gilldfe839f2024-02-16 09:54:02 -080073 }
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080074 default:
75 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080076 error("Invalid metric {SUBTYPE}", "SUBTYPE", subType);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080077 return "";
78 }
79 }
80}
81
82void HealthMetric::initProperties()
83{
Jagpal Singh Gill97582802024-02-27 13:59:11 -080084 switch (type)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080085 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080086 case MType::cpu:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080087 {
88 ValueIntf::unit(ValueIntf::Unit::Percent, true);
89 ValueIntf::minValue(0.0, true);
90 ValueIntf::maxValue(100.0, true);
91 break;
92 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080093 case MType::memory:
94 case MType::storage:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080095 {
96 ValueIntf::unit(ValueIntf::Unit::Bytes, true);
97 ValueIntf::minValue(0.0, true);
Jagpal Singh Gill97582802024-02-27 13:59:11 -080098 break;
99 }
100 case MType::inode:
101 case MType::unknown:
102 default:
103 {
104 throw std::invalid_argument("Invalid metric type");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800105 }
106 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800107 ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800108
Patrick Williams658efd52024-03-04 12:53:52 -0600109 using bound_map_t = std::map<Bound, double>;
110 std::map<Type, bound_map_t> thresholds;
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800111 for (const auto& [key, value] : config.thresholds)
112 {
Patrick Williams658efd52024-03-04 12:53:52 -0600113 auto type = std::get<Type>(key);
114 auto bound = std::get<Bound>(key);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800115 auto threshold = thresholds.find(type);
116 if (threshold == thresholds.end())
117 {
118 bound_map_t bounds;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800119 bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN());
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800120 thresholds.emplace(type, bounds);
121 }
122 else
123 {
124 threshold->second.emplace(bound, value.value);
125 }
126 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800127 ThresholdIntf::value(thresholds, true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800128}
129
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800130bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
131 double value)
132{
133 switch (bound)
134 {
135 case ThresholdIntf::Bound::Lower:
136 {
137 return (value < thresholdValue);
138 }
139 case ThresholdIntf::Bound::Upper:
140 {
141 return (value > thresholdValue);
142 }
143 default:
144 {
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600145 error("Invalid threshold bound {BOUND}", "BOUND", bound);
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800146 return false;
147 }
148 }
149}
150
Patrick Williams658efd52024-03-04 12:53:52 -0600151void HealthMetric::checkThreshold(Type type, Bound bound, MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800152{
153 auto threshold = std::make_tuple(type, bound);
154 auto thresholds = ThresholdIntf::value();
155
156 if (thresholds.contains(type) && thresholds[type].contains(bound))
157 {
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800158 auto tConfig = config.thresholds.at(threshold);
159 auto thresholdValue = tConfig.value / 100 * value.total;
160 thresholds[type][bound] = thresholdValue;
161 ThresholdIntf::value(thresholds);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800162 auto assertions = ThresholdIntf::asserted();
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800163 if (didThresholdViolate(bound, thresholdValue, value.current))
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800164 {
165 if (!assertions.contains(threshold))
166 {
167 assertions.insert(threshold);
168 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800169 ThresholdIntf::assertionChanged(type, bound, true,
170 value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800171 if (tConfig.log)
172 {
173 error(
174 "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600175 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800176 startUnit(bus, tConfig.target);
177 }
178 }
179 return;
180 }
181 else if (assertions.contains(threshold))
182 {
183 assertions.erase(threshold);
184 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800185 ThresholdIntf::assertionChanged(type, bound, false, value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800186 if (config.thresholds.find(threshold)->second.log)
187 {
188 info(
189 "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600190 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800191 }
192 }
193 }
194}
195
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800196void HealthMetric::checkThresholds(MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800197{
198 if (!ThresholdIntf::value().empty())
199 {
Patrick Williams658efd52024-03-04 12:53:52 -0600200 for (auto type : {Type::HardShutdown, Type::SoftShutdown,
201 Type::PerformanceLoss, Type::Critical, Type::Warning})
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800202 {
Patrick Williams658efd52024-03-04 12:53:52 -0600203 checkThreshold(type, Bound::Lower, value);
204 checkThreshold(type, Bound::Upper, value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800205 }
206 }
207}
208
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800209auto HealthMetric::shouldNotify(MValue value) -> bool
210{
211 if (std::isnan(value.current))
212 {
213 return true;
214 }
215 auto changed = std::abs((value.current - lastNotifiedValue) /
216 lastNotifiedValue * 100.0);
Jagpal Singh Gilla1027622024-03-05 17:57:33 -0800217 if (changed >= config.hysteresis)
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800218 {
219 lastNotifiedValue = value.current;
220 return true;
221 }
222 return false;
223}
224
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800225void HealthMetric::update(MValue value)
226{
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800227 ValueIntf::value(value.current, !shouldNotify(value));
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800228
229 // Maintain window size for threshold calculation
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800230 if (history.size() >= config.windowSize)
231 {
232 history.pop_front();
233 }
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800234 history.push_back(value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800235
236 if (history.size() < config.windowSize)
237 {
238 // Wait for the metric to have enough samples to calculate average
Patrick Williams0f54d7a2024-02-22 12:39:46 -0600239 debug("Not enough samples to calculate average");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800240 return;
241 }
242
243 double average = (std::accumulate(history.begin(), history.end(), 0.0)) /
244 history.size();
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800245 value.current = average;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800246 checkThresholds(value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800247}
248
249void HealthMetric::create(const paths_t& bmcPaths)
250{
251 info("Create Health Metric: {METRIC}", "METRIC", config.name);
252 initProperties();
253
254 std::vector<association_t> associations;
255 static constexpr auto forwardAssociation = "measuring";
256 static constexpr auto reverseAssociation = "measured_by";
257 for (const auto& bmcPath : bmcPaths)
258 {
259 /*
260 * This metric is "measuring" the health for the BMC at bmcPath
261 * The BMC at bmcPath is "measured_by" this metric.
262 */
263 associations.push_back(
264 {forwardAssociation, reverseAssociation, bmcPath});
265 }
266 AssociationIntf::associations(associations);
267}
268
269} // namespace phosphor::health::metric