blob: f08916dedc589b00461027ae21748f95141a1df2 [file] [log] [blame]
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08001#include "health_metric.hpp"
2
3#include <phosphor-logging/lg2.hpp>
4
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -08005#include <cmath>
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08006#include <numeric>
7#include <unordered_map>
8
9PHOSPHOR_LOG2_USING;
10
11namespace phosphor::health::metric
12{
13
14using association_t = std::tuple<std::string, std::string, std::string>;
15
Patrick Williams2d4cbeb2024-12-18 11:21:50 -050016auto HealthMetric::getPath(MType type, std::string name, SubType subType)
17 -> std::string
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080018{
19 std::string path;
20 switch (subType)
21 {
22 case SubType::cpuTotal:
23 {
24 return std::string(BmcPath) + "/" + PathIntf::total_cpu;
25 }
26 case SubType::cpuKernel:
27 {
28 return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
29 }
30 case SubType::cpuUser:
31 {
32 return std::string(BmcPath) + "/" + PathIntf::user_cpu;
33 }
34 case SubType::memoryAvailable:
35 {
36 return std::string(BmcPath) + "/" + PathIntf::available_memory;
37 }
38 case SubType::memoryBufferedAndCached:
39 {
40 return std::string(BmcPath) + "/" +
41 PathIntf::buffered_and_cached_memory;
42 }
43 case SubType::memoryFree:
44 {
45 return std::string(BmcPath) + "/" + PathIntf::free_memory;
46 }
47 case SubType::memoryShared:
48 {
49 return std::string(BmcPath) + "/" + PathIntf::shared_memory;
50 }
51 case SubType::memoryTotal:
52 {
53 return std::string(BmcPath) + "/" + PathIntf::total_memory;
54 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080055 case SubType::NA:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080056 {
Patrick Williams658efd52024-03-04 12:53:52 -060057 if (type == MType::storage)
Jagpal Singh Gill97582802024-02-27 13:59:11 -080058 {
59 static constexpr auto nameDelimiter = "_";
60 auto storageType = name.substr(
61 name.find_last_of(nameDelimiter) + 1, name.length());
Patrick Williamsce8b5ae2024-08-16 15:21:18 -040062 std::ranges::for_each(storageType, [](auto& c) {
63 c = std::tolower(c);
64 });
Jagpal Singh Gill97582802024-02-27 13:59:11 -080065 return std::string(BmcPath) + "/" + PathIntf::storage + "/" +
66 storageType;
67 }
68 else
69 {
70 error("Invalid metric {SUBTYPE} for metric {TYPE}", "SUBTYPE",
71 subType, "TYPE", type);
72 return "";
73 }
Jagpal Singh Gilldfe839f2024-02-16 09:54:02 -080074 }
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080075 default:
76 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080077 error("Invalid metric {SUBTYPE}", "SUBTYPE", subType);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080078 return "";
79 }
80 }
81}
82
83void HealthMetric::initProperties()
84{
Jagpal Singh Gill97582802024-02-27 13:59:11 -080085 switch (type)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080086 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080087 case MType::cpu:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080088 {
89 ValueIntf::unit(ValueIntf::Unit::Percent, true);
90 ValueIntf::minValue(0.0, true);
91 ValueIntf::maxValue(100.0, true);
92 break;
93 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080094 case MType::memory:
95 case MType::storage:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080096 {
97 ValueIntf::unit(ValueIntf::Unit::Bytes, true);
98 ValueIntf::minValue(0.0, true);
Jagpal Singh Gill97582802024-02-27 13:59:11 -080099 break;
100 }
101 case MType::inode:
102 case MType::unknown:
103 default:
104 {
105 throw std::invalid_argument("Invalid metric type");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800106 }
107 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800108 ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800109
Patrick Williams658efd52024-03-04 12:53:52 -0600110 using bound_map_t = std::map<Bound, double>;
111 std::map<Type, bound_map_t> thresholds;
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800112 for (const auto& [key, value] : config.thresholds)
113 {
Patrick Williams658efd52024-03-04 12:53:52 -0600114 auto type = std::get<Type>(key);
115 auto bound = std::get<Bound>(key);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800116 auto threshold = thresholds.find(type);
117 if (threshold == thresholds.end())
118 {
119 bound_map_t bounds;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800120 bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN());
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800121 thresholds.emplace(type, bounds);
122 }
123 else
124 {
125 threshold->second.emplace(bound, value.value);
126 }
127 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800128 ThresholdIntf::value(thresholds, true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800129}
130
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800131bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
132 double value)
133{
134 switch (bound)
135 {
136 case ThresholdIntf::Bound::Lower:
137 {
138 return (value < thresholdValue);
139 }
140 case ThresholdIntf::Bound::Upper:
141 {
142 return (value > thresholdValue);
143 }
144 default:
145 {
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600146 error("Invalid threshold bound {BOUND}", "BOUND", bound);
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800147 return false;
148 }
149 }
150}
151
Patrick Williams658efd52024-03-04 12:53:52 -0600152void HealthMetric::checkThreshold(Type type, Bound bound, MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800153{
154 auto threshold = std::make_tuple(type, bound);
155 auto thresholds = ThresholdIntf::value();
156
157 if (thresholds.contains(type) && thresholds[type].contains(bound))
158 {
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800159 auto tConfig = config.thresholds.at(threshold);
160 auto thresholdValue = tConfig.value / 100 * value.total;
161 thresholds[type][bound] = thresholdValue;
162 ThresholdIntf::value(thresholds);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800163 auto assertions = ThresholdIntf::asserted();
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800164 if (didThresholdViolate(bound, thresholdValue, value.current))
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800165 {
166 if (!assertions.contains(threshold))
167 {
168 assertions.insert(threshold);
169 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800170 ThresholdIntf::assertionChanged(type, bound, true,
171 value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800172 if (tConfig.log)
173 {
174 error(
175 "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600176 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800177 startUnit(bus, tConfig.target);
178 }
179 }
180 return;
181 }
182 else if (assertions.contains(threshold))
183 {
184 assertions.erase(threshold);
185 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800186 ThresholdIntf::assertionChanged(type, bound, false, value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800187 if (config.thresholds.find(threshold)->second.log)
188 {
189 info(
190 "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600191 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800192 }
193 }
194 }
195}
196
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800197void HealthMetric::checkThresholds(MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800198{
199 if (!ThresholdIntf::value().empty())
200 {
Patrick Williams658efd52024-03-04 12:53:52 -0600201 for (auto type : {Type::HardShutdown, Type::SoftShutdown,
202 Type::PerformanceLoss, Type::Critical, Type::Warning})
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800203 {
Patrick Williams658efd52024-03-04 12:53:52 -0600204 checkThreshold(type, Bound::Lower, value);
205 checkThreshold(type, Bound::Upper, value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800206 }
207 }
208}
209
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800210auto HealthMetric::shouldNotify(MValue value) -> bool
211{
212 if (std::isnan(value.current))
213 {
214 return true;
215 }
Patrick Williamsce8b5ae2024-08-16 15:21:18 -0400216 auto changed = std::abs(
217 (value.current - lastNotifiedValue) / lastNotifiedValue * 100.0);
Jagpal Singh Gilla1027622024-03-05 17:57:33 -0800218 if (changed >= config.hysteresis)
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800219 {
220 lastNotifiedValue = value.current;
221 return true;
222 }
223 return false;
224}
225
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800226void HealthMetric::update(MValue value)
227{
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800228 ValueIntf::value(value.current, !shouldNotify(value));
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800229
230 // Maintain window size for threshold calculation
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800231 if (history.size() >= config.windowSize)
232 {
233 history.pop_front();
234 }
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800235 history.push_back(value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800236
237 if (history.size() < config.windowSize)
238 {
239 // Wait for the metric to have enough samples to calculate average
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800240 return;
241 }
242
Patrick Williamsce8b5ae2024-08-16 15:21:18 -0400243 double average =
244 (std::accumulate(history.begin(), history.end(), 0.0)) / history.size();
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800245 value.current = average;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800246 checkThresholds(value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800247}
248
249void HealthMetric::create(const paths_t& bmcPaths)
250{
251 info("Create Health Metric: {METRIC}", "METRIC", config.name);
252 initProperties();
253
254 std::vector<association_t> associations;
255 static constexpr auto forwardAssociation = "measuring";
256 static constexpr auto reverseAssociation = "measured_by";
257 for (const auto& bmcPath : bmcPaths)
258 {
259 /*
260 * This metric is "measuring" the health for the BMC at bmcPath
261 * The BMC at bmcPath is "measured_by" this metric.
262 */
263 associations.push_back(
264 {forwardAssociation, reverseAssociation, bmcPath});
265 }
266 AssociationIntf::associations(associations);
267}
268
269} // namespace phosphor::health::metric