blob: ed81339913e2e0439f22a91aaa80ff3e4f1ef755 [file] [log] [blame]
Harshit Agheraacd375a2025-04-21 19:50:10 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
4 */
5
6#include "GpuSensor.hpp"
7
8#include "Thresholds.hpp"
9#include "Utils.hpp"
10#include "sensor.hpp"
11
12#include <bits/basic_string.h>
13
14#include <boost/asio/io_context.hpp>
15#include <boost/container/flat_map.hpp>
16#include <phosphor-logging/lg2.hpp>
17#include <sdbusplus/asio/connection.hpp>
18#include <sdbusplus/asio/object_server.hpp>
19#include <sdbusplus/message.hpp>
20#include <sdbusplus/message/native_types.hpp>
21
22#include <algorithm>
23#include <chrono>
24#include <cstddef>
25#include <cstdint>
26#include <map>
27#include <memory>
28#include <string>
29#include <utility>
30#include <variant>
31#include <vector>
32
33using namespace std::literals;
34
35static constexpr double gpuTempSensorMaxReading = 127;
36static constexpr double gpuTempSensorMinReading = -128;
37
38GpuTempSensor::GpuTempSensor(
39 std::shared_ptr<sdbusplus::asio::connection>& conn,
40 boost::asio::io_context& io, const std::string& name,
41 const std::string& sensorConfiguration,
42 sdbusplus::asio::object_server& objectServer,
43 std::vector<thresholds::Threshold>&& thresholdData) :
44 Sensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
45 "temperature", false, true, gpuTempSensorMaxReading,
46 gpuTempSensorMinReading, conn),
47 waitTimer(io, std::chrono::steady_clock::duration(0)), conn(conn),
48 objectServer(objectServer)
49{
50 std::string dbusPath =
51 sensorPathPrefix + "temperature/"s + escapeName(name);
52
53 sensorInterface = objectServer.add_interface(
54 dbusPath, "xyz.openbmc_project.Sensor.Value");
55
56 for (const auto& threshold : thresholds)
57 {
58 std::string interface = thresholds::getInterface(threshold.level);
59 thresholdInterfaces[static_cast<size_t>(threshold.level)] =
60 objectServer.add_interface(dbusPath, interface);
61 }
62
63 association = objectServer.add_interface(dbusPath, association::interface);
64
65 init();
66}
67
68GpuTempSensor::~GpuTempSensor()
69{
70 waitTimer.cancel();
71 for (const auto& iface : thresholdInterfaces)
72 {
73 objectServer.remove_interface(iface);
74 }
75 objectServer.remove_interface(sensorInterface);
76 objectServer.remove_interface(association);
77}
78
79void GpuTempSensor::checkThresholds()
80{
81 thresholds::checkThresholds(this);
82}
83
84void GpuTempSensor::init()
85{
86 discoverGpus();
87}
88
89void GpuTempSensor::processMctpEndpoints(const boost::system::error_code& ec,
90 const getSubTreeRet& ret)
91{
92 if (ec)
93 {
94 lg2::error("GpuTempSensor::discoverGpus(): Error:{ERROR}", "ERROR",
95 ec.message());
96 return;
97 }
98
99 if (ret.empty())
100 {
101 return;
102 }
103
104 for (const auto& [objPath, services] : ret)
105 {
106 for (const auto& [service, ifaces] : services)
107 {
108 for (const auto& iface : ifaces)
109 {
110 if (iface == "xyz.openbmc_project.MCTP.Endpoint")
111 {
112 conn->async_method_call(
113 [this](const boost::system::error_code& ec,
114 const GpuSensorConfigMap& configs) {
115 this->processEndpointConfigs(ec, configs);
116 },
117 service, objPath, "org.freedesktop.DBus.Properties",
118 "GetAll", iface);
119 }
120 }
121 }
122 }
123}
124
125void GpuTempSensor::processEndpointConfigs(const boost::system::error_code& ec,
126 const GpuSensorConfigMap& configs)
127{
128 if (ec)
129 {
130 lg2::error("GpuTempSensor::discoverGpus(): Error:{ERROR}", "ERROR",
131 ec.message());
132 return;
133 }
134
135 [[maybe_unused]] uint8_t eid{};
136 std::vector<uint8_t> mctpTypes{};
137
138 auto hasEid = configs.find("EID");
139 if (hasEid != configs.end())
140 {
141 const auto* eidPtr = std::get_if<uint8_t>(&hasEid->second);
142 if (eidPtr != nullptr)
143 {
144 eid = *eidPtr;
145 }
146 else
147 {
148 lg2::error(
149 "GpuTempSensor::discoverGpus(): Property EID does not have valid type.");
150 return;
151 }
152 }
153 else
154 {
155 lg2::error(
156 "GpuTempSensor::discoverGpus(): Property EID not found in the configuration.");
157 return;
158 }
159
160 auto hasMctpTypes = configs.find("SupportedMessageTypes");
161 if (hasMctpTypes != configs.end())
162 {
163 const auto* mctpTypePtr =
164 std::get_if<std::vector<uint8_t>>(&hasMctpTypes->second);
165 if (mctpTypePtr != nullptr)
166 {
167 mctpTypes = *mctpTypePtr;
168 }
169 else
170 {
171 lg2::error(
172 "GpuTempSensor::discoverGpus(): Property SupportedMessageTypes does not have valid type.");
173 return;
174 }
175 }
176 else
177 {
178 lg2::error(
179 "GpuTempSensor::discoverGpus(): Property SupportedMessageTypes not found in the configuration.");
180 return;
181 }
182
183 // if the OCP MCTP VDM Message type (0x7E) is found in mctpTypes
184 // process the endpoint further.
185 (void)this;
186}
187
188void GpuTempSensor::discoverGpus()
189{
190 std::string searchPath{"/au/com/codeconstruct/"};
191 std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}};
192
193 conn->async_method_call(
194 [this](const boost::system::error_code& ec, const getSubTreeRet& ret) {
195 processMctpEndpoints(ec, ret);
196 },
197 "xyz.openbmc_project.ObjectMapper",
198 "/xyz/openbmc_project/object_mapper",
199 "xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0,
200 ifaceList);
201}
202
203void processSensorConfigs(
204 boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
205 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
206 sensors,
207 std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
208 const ManagedObjectType& resp)
209{
210 for (const auto& [path, interfaces] : resp)
211 {
212 for (const auto& [intf, cfg] : interfaces)
213 {
214 if (intf != configInterfaceName(sensorType))
215 {
216 continue;
217 }
218
219 std::string name = loadVariant<std::string>(cfg, "Name");
220
221 sensors[name] = std::make_shared<GpuTempSensor>(
222 dbusConnection, io, name, path, objectServer,
223 std::vector<thresholds::Threshold>{});
224
225 lg2::info(
226 "Added GPU Temperature Sensor {NAME} with chassis path: {PATH}.",
227 "NAME", name, "PATH", path);
228 }
229 }
230}
231
232void createSensors(
233 boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
234 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
235 sensors,
236 std::shared_ptr<sdbusplus::asio::connection>& dbusConnection)
237{
238 if (!dbusConnection)
239 {
240 lg2::error("Connection not created");
241 return;
242 }
243 dbusConnection->async_method_call(
244 [&sensors, &dbusConnection, &io,
245 &objectServer](const boost::system::error_code& ec,
246 const ManagedObjectType& resp) {
247 if (ec)
248 {
249 lg2::error("Error contacting entity manager");
250 return;
251 }
252
253 processSensorConfigs(io, objectServer, sensors, dbusConnection,
254 resp);
255 },
256 entityManagerName, "/xyz/openbmc_project/inventory",
257 "org.freedesktop.DBus.ObjectManager", "GetManagedObjects");
258}
259
260void interfaceRemoved(
261 sdbusplus::message_t& message,
262 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
263 sensors)
264{
265 if (message.is_method_error())
266 {
267 lg2::error("interfacesRemoved callback method error");
268 return;
269 }
270
271 sdbusplus::message::object_path removedPath;
272 std::vector<std::string> interfaces;
273
274 message.read(removedPath, interfaces);
275
276 // If the xyz.openbmc_project.Confguration.X interface was removed
277 // for one or more sensors, delete those sensor objects.
278 auto sensorIt = sensors.begin();
279 while (sensorIt != sensors.end())
280 {
281 if ((sensorIt->second->configurationPath == removedPath) &&
282 (std::find(interfaces.begin(), interfaces.end(),
283 configInterfaceName(sensorType)) != interfaces.end()))
284 {
285 sensorIt = sensors.erase(sensorIt);
286 }
287 else
288 {
289 sensorIt++;
290 }
291 }
292}