blob: 2f23394e6776dd96045b5297163a2acc5c0ccc3f [file] [log] [blame]
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08001#include "health_metric.hpp"
2
3#include <phosphor-logging/lg2.hpp>
4
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -08005#include <cmath>
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08006#include <numeric>
7#include <unordered_map>
8
9PHOSPHOR_LOG2_USING;
10
11namespace phosphor::health::metric
12{
13
14using association_t = std::tuple<std::string, std::string, std::string>;
15
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -080016static constexpr double hysteresis = 1.0;
17
Patrick Williams658efd52024-03-04 12:53:52 -060018auto HealthMetric::getPath(MType type, std::string name, SubType subType)
19 -> std::string
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080020{
21 std::string path;
22 switch (subType)
23 {
24 case SubType::cpuTotal:
25 {
26 return std::string(BmcPath) + "/" + PathIntf::total_cpu;
27 }
28 case SubType::cpuKernel:
29 {
30 return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
31 }
32 case SubType::cpuUser:
33 {
34 return std::string(BmcPath) + "/" + PathIntf::user_cpu;
35 }
36 case SubType::memoryAvailable:
37 {
38 return std::string(BmcPath) + "/" + PathIntf::available_memory;
39 }
40 case SubType::memoryBufferedAndCached:
41 {
42 return std::string(BmcPath) + "/" +
43 PathIntf::buffered_and_cached_memory;
44 }
45 case SubType::memoryFree:
46 {
47 return std::string(BmcPath) + "/" + PathIntf::free_memory;
48 }
49 case SubType::memoryShared:
50 {
51 return std::string(BmcPath) + "/" + PathIntf::shared_memory;
52 }
53 case SubType::memoryTotal:
54 {
55 return std::string(BmcPath) + "/" + PathIntf::total_memory;
56 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080057 case SubType::NA:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080058 {
Patrick Williams658efd52024-03-04 12:53:52 -060059 if (type == MType::storage)
Jagpal Singh Gill97582802024-02-27 13:59:11 -080060 {
61 static constexpr auto nameDelimiter = "_";
62 auto storageType = name.substr(
63 name.find_last_of(nameDelimiter) + 1, name.length());
64 std::ranges::for_each(storageType,
65 [](auto& c) { c = std::tolower(c); });
66 return std::string(BmcPath) + "/" + PathIntf::storage + "/" +
67 storageType;
68 }
69 else
70 {
71 error("Invalid metric {SUBTYPE} for metric {TYPE}", "SUBTYPE",
72 subType, "TYPE", type);
73 return "";
74 }
Jagpal Singh Gilldfe839f2024-02-16 09:54:02 -080075 }
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080076 default:
77 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080078 error("Invalid metric {SUBTYPE}", "SUBTYPE", subType);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080079 return "";
80 }
81 }
82}
83
84void HealthMetric::initProperties()
85{
Jagpal Singh Gill97582802024-02-27 13:59:11 -080086 switch (type)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080087 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080088 case MType::cpu:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080089 {
90 ValueIntf::unit(ValueIntf::Unit::Percent, true);
91 ValueIntf::minValue(0.0, true);
92 ValueIntf::maxValue(100.0, true);
93 break;
94 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080095 case MType::memory:
96 case MType::storage:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080097 {
98 ValueIntf::unit(ValueIntf::Unit::Bytes, true);
99 ValueIntf::minValue(0.0, true);
Jagpal Singh Gill97582802024-02-27 13:59:11 -0800100 break;
101 }
102 case MType::inode:
103 case MType::unknown:
104 default:
105 {
106 throw std::invalid_argument("Invalid metric type");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800107 }
108 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800109 ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800110
Patrick Williams658efd52024-03-04 12:53:52 -0600111 using bound_map_t = std::map<Bound, double>;
112 std::map<Type, bound_map_t> thresholds;
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800113 for (const auto& [key, value] : config.thresholds)
114 {
Patrick Williams658efd52024-03-04 12:53:52 -0600115 auto type = std::get<Type>(key);
116 auto bound = std::get<Bound>(key);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800117 auto threshold = thresholds.find(type);
118 if (threshold == thresholds.end())
119 {
120 bound_map_t bounds;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800121 bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN());
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800122 thresholds.emplace(type, bounds);
123 }
124 else
125 {
126 threshold->second.emplace(bound, value.value);
127 }
128 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800129 ThresholdIntf::value(thresholds, true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800130}
131
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800132bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
133 double value)
134{
135 switch (bound)
136 {
137 case ThresholdIntf::Bound::Lower:
138 {
139 return (value < thresholdValue);
140 }
141 case ThresholdIntf::Bound::Upper:
142 {
143 return (value > thresholdValue);
144 }
145 default:
146 {
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600147 error("Invalid threshold bound {BOUND}", "BOUND", bound);
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800148 return false;
149 }
150 }
151}
152
Patrick Williams658efd52024-03-04 12:53:52 -0600153void HealthMetric::checkThreshold(Type type, Bound bound, MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800154{
155 auto threshold = std::make_tuple(type, bound);
156 auto thresholds = ThresholdIntf::value();
157
158 if (thresholds.contains(type) && thresholds[type].contains(bound))
159 {
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800160 auto tConfig = config.thresholds.at(threshold);
161 auto thresholdValue = tConfig.value / 100 * value.total;
162 thresholds[type][bound] = thresholdValue;
163 ThresholdIntf::value(thresholds);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800164 auto assertions = ThresholdIntf::asserted();
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800165 if (didThresholdViolate(bound, thresholdValue, value.current))
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800166 {
167 if (!assertions.contains(threshold))
168 {
169 assertions.insert(threshold);
170 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800171 ThresholdIntf::assertionChanged(type, bound, true,
172 value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800173 if (tConfig.log)
174 {
175 error(
176 "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600177 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800178 startUnit(bus, tConfig.target);
179 }
180 }
181 return;
182 }
183 else if (assertions.contains(threshold))
184 {
185 assertions.erase(threshold);
186 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800187 ThresholdIntf::assertionChanged(type, bound, false, value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800188 if (config.thresholds.find(threshold)->second.log)
189 {
190 info(
191 "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600192 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800193 }
194 }
195 }
196}
197
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800198void HealthMetric::checkThresholds(MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800199{
200 if (!ThresholdIntf::value().empty())
201 {
Patrick Williams658efd52024-03-04 12:53:52 -0600202 for (auto type : {Type::HardShutdown, Type::SoftShutdown,
203 Type::PerformanceLoss, Type::Critical, Type::Warning})
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800204 {
Patrick Williams658efd52024-03-04 12:53:52 -0600205 checkThreshold(type, Bound::Lower, value);
206 checkThreshold(type, Bound::Upper, value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800207 }
208 }
209}
210
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800211auto HealthMetric::shouldNotify(MValue value) -> bool
212{
213 if (std::isnan(value.current))
214 {
215 return true;
216 }
217 auto changed = std::abs((value.current - lastNotifiedValue) /
218 lastNotifiedValue * 100.0);
219 if (changed >= hysteresis)
220 {
221 lastNotifiedValue = value.current;
222 return true;
223 }
224 return false;
225}
226
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800227void HealthMetric::update(MValue value)
228{
Jagpal Singh Gillb94b1222024-03-02 17:53:30 -0800229 ValueIntf::value(value.current, !shouldNotify(value));
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800230
231 // Maintain window size for threshold calculation
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800232 if (history.size() >= config.windowSize)
233 {
234 history.pop_front();
235 }
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800236 history.push_back(value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800237
238 if (history.size() < config.windowSize)
239 {
240 // Wait for the metric to have enough samples to calculate average
Patrick Williams0f54d7a2024-02-22 12:39:46 -0600241 debug("Not enough samples to calculate average");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800242 return;
243 }
244
245 double average = (std::accumulate(history.begin(), history.end(), 0.0)) /
246 history.size();
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800247 value.current = average;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800248 checkThresholds(value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800249}
250
251void HealthMetric::create(const paths_t& bmcPaths)
252{
253 info("Create Health Metric: {METRIC}", "METRIC", config.name);
254 initProperties();
255
256 std::vector<association_t> associations;
257 static constexpr auto forwardAssociation = "measuring";
258 static constexpr auto reverseAssociation = "measured_by";
259 for (const auto& bmcPath : bmcPaths)
260 {
261 /*
262 * This metric is "measuring" the health for the BMC at bmcPath
263 * The BMC at bmcPath is "measured_by" this metric.
264 */
265 associations.push_back(
266 {forwardAssociation, reverseAssociation, bmcPath});
267 }
268 AssociationIntf::associations(associations);
269}
270
271} // namespace phosphor::health::metric