blob: 4c391cd7c95df8175e7a43d2dea1c90364ca304c [file] [log] [blame]
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08001#include "health_metric.hpp"
2
3#include <phosphor-logging/lg2.hpp>
4
5#include <numeric>
6#include <unordered_map>
7
8PHOSPHOR_LOG2_USING;
9
10namespace phosphor::health::metric
11{
12
13using association_t = std::tuple<std::string, std::string, std::string>;
14
15auto HealthMetric::getPath(SubType subType) -> std::string
16{
17 std::string path;
18 switch (subType)
19 {
20 case SubType::cpuTotal:
21 {
22 return std::string(BmcPath) + "/" + PathIntf::total_cpu;
23 }
24 case SubType::cpuKernel:
25 {
26 return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
27 }
28 case SubType::cpuUser:
29 {
30 return std::string(BmcPath) + "/" + PathIntf::user_cpu;
31 }
32 case SubType::memoryAvailable:
33 {
34 return std::string(BmcPath) + "/" + PathIntf::available_memory;
35 }
36 case SubType::memoryBufferedAndCached:
37 {
38 return std::string(BmcPath) + "/" +
39 PathIntf::buffered_and_cached_memory;
40 }
41 case SubType::memoryFree:
42 {
43 return std::string(BmcPath) + "/" + PathIntf::free_memory;
44 }
45 case SubType::memoryShared:
46 {
47 return std::string(BmcPath) + "/" + PathIntf::shared_memory;
48 }
49 case SubType::memoryTotal:
50 {
51 return std::string(BmcPath) + "/" + PathIntf::total_memory;
52 }
53 case SubType::storageReadWrite:
54 {
55 return std::string(BmcPath) + "/" + PathIntf::read_write_storage;
56 }
Jagpal Singh Gilldfe839f2024-02-16 09:54:02 -080057 case SubType::storageTmp:
58 {
59 return std::string(BmcPath) + "/" + PathIntf::tmp_storage;
60 }
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080061 default:
62 {
63 error("Invalid Memory metric {TYPE}", "TYPE",
64 std::to_underlying(subType));
65 return "";
66 }
67 }
68}
69
70void HealthMetric::initProperties()
71{
72 switch (config.subType)
73 {
74 case SubType::cpuTotal:
75 case SubType::cpuKernel:
76 case SubType::cpuUser:
77 {
78 ValueIntf::unit(ValueIntf::Unit::Percent, true);
79 ValueIntf::minValue(0.0, true);
80 ValueIntf::maxValue(100.0, true);
81 break;
82 }
83 case SubType::memoryAvailable:
84 case SubType::memoryBufferedAndCached:
85 case SubType::memoryFree:
86 case SubType::memoryShared:
87 case SubType::memoryTotal:
88 case SubType::storageReadWrite:
89 default:
90 {
91 ValueIntf::unit(ValueIntf::Unit::Bytes, true);
92 ValueIntf::minValue(0.0, true);
93 }
94 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -080095 ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080096
97 using bound_map_t = std::map<ThresholdIntf::Bound, double>;
98 std::map<ThresholdIntf::Type, bound_map_t> thresholds;
99 for (const auto& [key, value] : config.thresholds)
100 {
101 auto type = std::get<ThresholdIntf::Type>(key);
102 auto bound = std::get<ThresholdIntf::Bound>(key);
103 auto threshold = thresholds.find(type);
104 if (threshold == thresholds.end())
105 {
106 bound_map_t bounds;
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800107 bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN());
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800108 thresholds.emplace(type, bounds);
109 }
110 else
111 {
112 threshold->second.emplace(bound, value.value);
113 }
114 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800115 ThresholdIntf::value(thresholds, true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800116}
117
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800118bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
119 double value)
120{
121 switch (bound)
122 {
123 case ThresholdIntf::Bound::Lower:
124 {
125 return (value < thresholdValue);
126 }
127 case ThresholdIntf::Bound::Upper:
128 {
129 return (value > thresholdValue);
130 }
131 default:
132 {
133 error("Invalid threshold bound {BOUND}", "BOUND",
134 std::to_underlying(bound));
135 return false;
136 }
137 }
138}
139
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800140void HealthMetric::checkThreshold(ThresholdIntf::Type type,
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800141 ThresholdIntf::Bound bound, MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800142{
143 auto threshold = std::make_tuple(type, bound);
144 auto thresholds = ThresholdIntf::value();
145
146 if (thresholds.contains(type) && thresholds[type].contains(bound))
147 {
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800148 auto tConfig = config.thresholds.at(threshold);
149 auto thresholdValue = tConfig.value / 100 * value.total;
150 thresholds[type][bound] = thresholdValue;
151 ThresholdIntf::value(thresholds);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800152 auto assertions = ThresholdIntf::asserted();
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800153 if (didThresholdViolate(bound, thresholdValue, value.current))
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800154 {
155 if (!assertions.contains(threshold))
156 {
157 assertions.insert(threshold);
158 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800159 ThresholdIntf::assertionChanged(type, bound, true,
160 value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800161 if (tConfig.log)
162 {
163 error(
164 "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
165 "METRIC", config.name, "TYPE",
166 sdbusplus::message::convert_to_string(type));
167 startUnit(bus, tConfig.target);
168 }
169 }
170 return;
171 }
172 else if (assertions.contains(threshold))
173 {
174 assertions.erase(threshold);
175 ThresholdIntf::asserted(assertions);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800176 ThresholdIntf::assertionChanged(type, bound, false, value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800177 if (config.thresholds.find(threshold)->second.log)
178 {
179 info(
180 "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
181 "METRIC", config.name, "TYPE",
182 sdbusplus::message::convert_to_string(type));
183 }
184 }
185 }
186}
187
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800188void HealthMetric::checkThresholds(MValue value)
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800189{
190 if (!ThresholdIntf::value().empty())
191 {
192 for (auto type :
193 {ThresholdIntf::Type::HardShutdown,
194 ThresholdIntf::Type::SoftShutdown,
195 ThresholdIntf::Type::PerformanceLoss,
196 ThresholdIntf::Type::Critical, ThresholdIntf::Type::Warning})
197 {
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800198 checkThreshold(type, ThresholdIntf::Bound::Lower, value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800199 checkThreshold(type, ThresholdIntf::Bound::Upper, value);
200 }
201 }
202}
203
204void HealthMetric::update(MValue value)
205{
206 // Maintain window size for metric
207 if (history.size() >= config.windowSize)
208 {
209 history.pop_front();
210 }
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800211 history.push_back(value.current);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800212
213 if (history.size() < config.windowSize)
214 {
215 // Wait for the metric to have enough samples to calculate average
Patrick Williams0f54d7a2024-02-22 12:39:46 -0600216 debug("Not enough samples to calculate average");
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800217 return;
218 }
219
220 double average = (std::accumulate(history.begin(), history.end(), 0.0)) /
221 history.size();
222 ValueIntf::value(average);
Jagpal Singh Gill6a3884a2024-02-24 18:08:23 -0800223 checkThresholds(value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800224}
225
226void HealthMetric::create(const paths_t& bmcPaths)
227{
228 info("Create Health Metric: {METRIC}", "METRIC", config.name);
229 initProperties();
230
231 std::vector<association_t> associations;
232 static constexpr auto forwardAssociation = "measuring";
233 static constexpr auto reverseAssociation = "measured_by";
234 for (const auto& bmcPath : bmcPaths)
235 {
236 /*
237 * This metric is "measuring" the health for the BMC at bmcPath
238 * The BMC at bmcPath is "measured_by" this metric.
239 */
240 associations.push_back(
241 {forwardAssociation, reverseAssociation, bmcPath});
242 }
243 AssociationIntf::associations(associations);
244}
245
246} // namespace phosphor::health::metric