blob: abe77afac4ea5094c78ea81adb5203fb0855b023 [file] [log] [blame]
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -08001#include "health_metric.hpp"
2
3#include <phosphor-logging/lg2.hpp>
4
5#include <numeric>
6#include <unordered_map>
7
8PHOSPHOR_LOG2_USING;
9
10namespace phosphor::health::metric
11{
12
13using association_t = std::tuple<std::string, std::string, std::string>;
14
15auto HealthMetric::getPath(SubType subType) -> std::string
16{
17 std::string path;
18 switch (subType)
19 {
20 case SubType::cpuTotal:
21 {
22 return std::string(BmcPath) + "/" + PathIntf::total_cpu;
23 }
24 case SubType::cpuKernel:
25 {
26 return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
27 }
28 case SubType::cpuUser:
29 {
30 return std::string(BmcPath) + "/" + PathIntf::user_cpu;
31 }
32 case SubType::memoryAvailable:
33 {
34 return std::string(BmcPath) + "/" + PathIntf::available_memory;
35 }
36 case SubType::memoryBufferedAndCached:
37 {
38 return std::string(BmcPath) + "/" +
39 PathIntf::buffered_and_cached_memory;
40 }
41 case SubType::memoryFree:
42 {
43 return std::string(BmcPath) + "/" + PathIntf::free_memory;
44 }
45 case SubType::memoryShared:
46 {
47 return std::string(BmcPath) + "/" + PathIntf::shared_memory;
48 }
49 case SubType::memoryTotal:
50 {
51 return std::string(BmcPath) + "/" + PathIntf::total_memory;
52 }
53 case SubType::storageReadWrite:
54 {
55 return std::string(BmcPath) + "/" + PathIntf::read_write_storage;
56 }
Jagpal Singh Gilldfe839f2024-02-16 09:54:02 -080057 case SubType::storageTmp:
58 {
59 return std::string(BmcPath) + "/" + PathIntf::tmp_storage;
60 }
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080061 default:
62 {
63 error("Invalid Memory metric {TYPE}", "TYPE",
64 std::to_underlying(subType));
65 return "";
66 }
67 }
68}
69
70void HealthMetric::initProperties()
71{
72 switch (config.subType)
73 {
74 case SubType::cpuTotal:
75 case SubType::cpuKernel:
76 case SubType::cpuUser:
77 {
78 ValueIntf::unit(ValueIntf::Unit::Percent, true);
79 ValueIntf::minValue(0.0, true);
80 ValueIntf::maxValue(100.0, true);
81 break;
82 }
83 case SubType::memoryAvailable:
84 case SubType::memoryBufferedAndCached:
85 case SubType::memoryFree:
86 case SubType::memoryShared:
87 case SubType::memoryTotal:
88 case SubType::storageReadWrite:
89 default:
90 {
91 ValueIntf::unit(ValueIntf::Unit::Bytes, true);
92 ValueIntf::minValue(0.0, true);
93 }
94 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -080095 ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -080096
97 using bound_map_t = std::map<ThresholdIntf::Bound, double>;
98 std::map<ThresholdIntf::Type, bound_map_t> thresholds;
99 for (const auto& [key, value] : config.thresholds)
100 {
101 auto type = std::get<ThresholdIntf::Type>(key);
102 auto bound = std::get<ThresholdIntf::Bound>(key);
103 auto threshold = thresholds.find(type);
104 if (threshold == thresholds.end())
105 {
106 bound_map_t bounds;
107 bounds.emplace(bound, value.value);
108 thresholds.emplace(type, bounds);
109 }
110 else
111 {
112 threshold->second.emplace(bound, value.value);
113 }
114 }
Jagpal Singh Gillc5b18bc2024-02-09 15:58:12 -0800115 ThresholdIntf::value(thresholds, true);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800116}
117
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800118bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
119 double value)
120{
121 switch (bound)
122 {
123 case ThresholdIntf::Bound::Lower:
124 {
125 return (value < thresholdValue);
126 }
127 case ThresholdIntf::Bound::Upper:
128 {
129 return (value > thresholdValue);
130 }
131 default:
132 {
133 error("Invalid threshold bound {BOUND}", "BOUND",
134 std::to_underlying(bound));
135 return false;
136 }
137 }
138}
139
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800140void HealthMetric::checkThreshold(ThresholdIntf::Type type,
141 ThresholdIntf::Bound bound, double value)
142{
143 auto threshold = std::make_tuple(type, bound);
144 auto thresholds = ThresholdIntf::value();
145
146 if (thresholds.contains(type) && thresholds[type].contains(bound))
147 {
148 auto thresholdValue = thresholds[type][bound];
149 auto assertions = ThresholdIntf::asserted();
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800150 if (didThresholdViolate(bound, thresholdValue, value))
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800151 {
152 if (!assertions.contains(threshold))
153 {
154 assertions.insert(threshold);
155 ThresholdIntf::asserted(assertions);
156 ThresholdIntf::assertionChanged(type, bound, true, value);
157 auto tConfig = config.thresholds.at(threshold);
158 if (tConfig.log)
159 {
160 error(
161 "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
162 "METRIC", config.name, "TYPE",
163 sdbusplus::message::convert_to_string(type));
164 startUnit(bus, tConfig.target);
165 }
166 }
167 return;
168 }
169 else if (assertions.contains(threshold))
170 {
171 assertions.erase(threshold);
172 ThresholdIntf::asserted(assertions);
173 ThresholdIntf::assertionChanged(type, bound, false, value);
174 if (config.thresholds.find(threshold)->second.log)
175 {
176 info(
177 "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
178 "METRIC", config.name, "TYPE",
179 sdbusplus::message::convert_to_string(type));
180 }
181 }
182 }
183}
184
185void HealthMetric::checkThresholds(double value)
186{
187 if (!ThresholdIntf::value().empty())
188 {
189 for (auto type :
190 {ThresholdIntf::Type::HardShutdown,
191 ThresholdIntf::Type::SoftShutdown,
192 ThresholdIntf::Type::PerformanceLoss,
193 ThresholdIntf::Type::Critical, ThresholdIntf::Type::Warning})
194 {
Jagpal Singh Gill55fb0c92024-02-22 18:07:13 -0800195 checkThreshold(type, ThresholdIntf::Bound::Lower, value);
Jagpal Singh Gill23f091e2023-12-10 15:23:19 -0800196 checkThreshold(type, ThresholdIntf::Bound::Upper, value);
197 }
198 }
199}
200
201void HealthMetric::update(MValue value)
202{
203 // Maintain window size for metric
204 if (history.size() >= config.windowSize)
205 {
206 history.pop_front();
207 }
208 history.push_back(value.user);
209
210 if (history.size() < config.windowSize)
211 {
212 // Wait for the metric to have enough samples to calculate average
213 info("Not enough samples to calculate average");
214 return;
215 }
216
217 double average = (std::accumulate(history.begin(), history.end(), 0.0)) /
218 history.size();
219 ValueIntf::value(average);
220 checkThresholds(value.monitor);
221}
222
223void HealthMetric::create(const paths_t& bmcPaths)
224{
225 info("Create Health Metric: {METRIC}", "METRIC", config.name);
226 initProperties();
227
228 std::vector<association_t> associations;
229 static constexpr auto forwardAssociation = "measuring";
230 static constexpr auto reverseAssociation = "measured_by";
231 for (const auto& bmcPath : bmcPaths)
232 {
233 /*
234 * This metric is "measuring" the health for the BMC at bmcPath
235 * The BMC at bmcPath is "measured_by" this metric.
236 */
237 associations.push_back(
238 {forwardAssociation, reverseAssociation, bmcPath});
239 }
240 AssociationIntf::associations(associations);
241}
242
243} // namespace phosphor::health::metric