blob: 31feb55c497064f9533c4ccb0220723173c172e1 [file] [log] [blame]
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08001#include "health_metric.hpp"
2
3#include <phosphor-logging/lg2.hpp>
4
5#include <numeric>
6#include <unordered_map>
7
8PHOSPHOR_LOG2_USING;
9
10namespace phosphor::health::metric
11{
12
13using association_t = std::tuple<std::string, std::string, std::string>;
14
Patrick Williams658efd52024-03-04 12:53:52 -060015auto HealthMetric::getPath(MType type, std::string name, SubType subType)
16 -> std::string
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080017{
18 std::string path;
19 switch (subType)
20 {
21 case SubType::cpuTotal:
22 {
23 return std::string(BmcPath) + "/" + PathIntf::total_cpu;
24 }
25 case SubType::cpuKernel:
26 {
27 return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
28 }
29 case SubType::cpuUser:
30 {
31 return std::string(BmcPath) + "/" + PathIntf::user_cpu;
32 }
33 case SubType::memoryAvailable:
34 {
35 return std::string(BmcPath) + "/" + PathIntf::available_memory;
36 }
37 case SubType::memoryBufferedAndCached:
38 {
39 return std::string(BmcPath) + "/" +
40 PathIntf::buffered_and_cached_memory;
41 }
42 case SubType::memoryFree:
43 {
44 return std::string(BmcPath) + "/" + PathIntf::free_memory;
45 }
46 case SubType::memoryShared:
47 {
48 return std::string(BmcPath) + "/" + PathIntf::shared_memory;
49 }
50 case SubType::memoryTotal:
51 {
52 return std::string(BmcPath) + "/" + PathIntf::total_memory;
53 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080054 case SubType::NA:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080055 {
Patrick Williams658efd52024-03-04 12:53:52 -060056 if (type == MType::storage)
Jagpal Singh Gill97582802024-02-27 13:59:11 -080057 {
58 static constexpr auto nameDelimiter = "_";
59 auto storageType = name.substr(
60 name.find_last_of(nameDelimiter) + 1, name.length());
61 std::ranges::for_each(storageType,
62 [](auto& c) { c = std::tolower(c); });
63 return std::string(BmcPath) + "/" + PathIntf::storage + "/" +
64 storageType;
65 }
66 else
67 {
68 error("Invalid metric {SUBTYPE} for metric {TYPE}", "SUBTYPE",
69 subType, "TYPE", type);
70 return "";
71 }
Jagpal Singh Gilldfe839f2024-02-16 09:54:02 -080072 }
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080073 default:
74 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080075 error("Invalid metric {SUBTYPE}", "SUBTYPE", subType);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080076 return "";
77 }
78 }
79}
80
81void HealthMetric::initProperties()
82{
Jagpal Singh Gill97582802024-02-27 13:59:11 -080083 switch (type)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080084 {
Jagpal Singh Gill97582802024-02-27 13:59:11 -080085 case MType::cpu:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080086 {
87 ValueIntf::unit(ValueIntf::Unit::Percent, true);
88 ValueIntf::minValue(0.0, true);
89 ValueIntf::maxValue(100.0, true);
90 break;
91 }
Jagpal Singh Gill97582802024-02-27 13:59:11 -080092 case MType::memory:
93 case MType::storage:
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080094 {
95 ValueIntf::unit(ValueIntf::Unit::Bytes, true);
96 ValueIntf::minValue(0.0, true);
Jagpal Singh Gill97582802024-02-27 13:59:11 -080097 break;
98 }
99 case MType::inode:
100 case MType::unknown:
101 default:
102 {
103 throw std::invalid_argument("Invalid metric type");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800104 }
105 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800106 ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800107
Patrick Williams658efd52024-03-04 12:53:52 -0600108 using bound_map_t = std::map<Bound, double>;
109 std::map<Type, bound_map_t> thresholds;
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800110 for (const auto& [key, value] : config.thresholds)
111 {
Patrick Williams658efd52024-03-04 12:53:52 -0600112 auto type = std::get<Type>(key);
113 auto bound = std::get<Bound>(key);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800114 auto threshold = thresholds.find(type);
115 if (threshold == thresholds.end())
116 {
117 bound_map_t bounds;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800118 bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN());
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800119 thresholds.emplace(type, bounds);
120 }
121 else
122 {
123 threshold->second.emplace(bound, value.value);
124 }
125 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800126 ThresholdIntf::value(thresholds, true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800127}
128
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800129bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
130 double value)
131{
132 switch (bound)
133 {
134 case ThresholdIntf::Bound::Lower:
135 {
136 return (value < thresholdValue);
137 }
138 case ThresholdIntf::Bound::Upper:
139 {
140 return (value > thresholdValue);
141 }
142 default:
143 {
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600144 error("Invalid threshold bound {BOUND}", "BOUND", bound);
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800145 return false;
146 }
147 }
148}
149
Patrick Williams658efd52024-03-04 12:53:52 -0600150void HealthMetric::checkThreshold(Type type, Bound bound, MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800151{
152 auto threshold = std::make_tuple(type, bound);
153 auto thresholds = ThresholdIntf::value();
154
155 if (thresholds.contains(type) && thresholds[type].contains(bound))
156 {
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800157 auto tConfig = config.thresholds.at(threshold);
158 auto thresholdValue = tConfig.value / 100 * value.total;
159 thresholds[type][bound] = thresholdValue;
160 ThresholdIntf::value(thresholds);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800161 auto assertions = ThresholdIntf::asserted();
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800162 if (didThresholdViolate(bound, thresholdValue, value.current))
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800163 {
164 if (!assertions.contains(threshold))
165 {
166 assertions.insert(threshold);
167 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800168 ThresholdIntf::assertionChanged(type, bound, true,
169 value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800170 if (tConfig.log)
171 {
172 error(
173 "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600174 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800175 startUnit(bus, tConfig.target);
176 }
177 }
178 return;
179 }
180 else if (assertions.contains(threshold))
181 {
182 assertions.erase(threshold);
183 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800184 ThresholdIntf::assertionChanged(type, bound, false, value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800185 if (config.thresholds.find(threshold)->second.log)
186 {
187 info(
188 "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
Patrick Williams67b8ebe2024-02-23 20:40:52 -0600189 "METRIC", config.name, "TYPE", type);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800190 }
191 }
192 }
193}
194
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800195void HealthMetric::checkThresholds(MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800196{
197 if (!ThresholdIntf::value().empty())
198 {
Patrick Williams658efd52024-03-04 12:53:52 -0600199 for (auto type : {Type::HardShutdown, Type::SoftShutdown,
200 Type::PerformanceLoss, Type::Critical, Type::Warning})
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800201 {
Patrick Williams658efd52024-03-04 12:53:52 -0600202 checkThreshold(type, Bound::Lower, value);
203 checkThreshold(type, Bound::Upper, value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800204 }
205 }
206}
207
208void HealthMetric::update(MValue value)
209{
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800210 ValueIntf::value(value.current);
211
212 // Maintain window size for threshold calculation
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800213 if (history.size() >= config.windowSize)
214 {
215 history.pop_front();
216 }
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800217 history.push_back(value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800218
219 if (history.size() < config.windowSize)
220 {
221 // Wait for the metric to have enough samples to calculate average
Patrick Williams0f54d7a2024-02-22 12:39:46 -0600222 debug("Not enough samples to calculate average");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800223 return;
224 }
225
226 double average = (std::accumulate(history.begin(), history.end(), 0.0)) /
227 history.size();
Jagpal Singh Gill8fd4df22024-03-01 15:40:26 -0800228 value.current = average;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800229 checkThresholds(value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800230}
231
232void HealthMetric::create(const paths_t& bmcPaths)
233{
234 info("Create Health Metric: {METRIC}", "METRIC", config.name);
235 initProperties();
236
237 std::vector<association_t> associations;
238 static constexpr auto forwardAssociation = "measuring";
239 static constexpr auto reverseAssociation = "measured_by";
240 for (const auto& bmcPath : bmcPaths)
241 {
242 /*
243 * This metric is "measuring" the health for the BMC at bmcPath
244 * The BMC at bmcPath is "measured_by" this metric.
245 */
246 associations.push_back(
247 {forwardAssociation, reverseAssociation, bmcPath});
248 }
249 AssociationIntf::associations(associations);
250}
251
252} // namespace phosphor::health::metric