blob: 3594c2997c5ddc444f16d38f677acb78c953fb5c [file] [log] [blame]
Harshit Agherad837b562025-04-21 19:50:10 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
4 */
5
6#include "NvidiaGpuSensor.hpp"
7
8#include "Thresholds.hpp"
9#include "Utils.hpp"
10#include "sensor.hpp"
11
12#include <bits/basic_string.h>
13
14#include <boost/asio/io_context.hpp>
15#include <boost/container/flat_map.hpp>
16#include <phosphor-logging/lg2.hpp>
17#include <sdbusplus/asio/connection.hpp>
18#include <sdbusplus/asio/object_server.hpp>
19#include <sdbusplus/message.hpp>
20#include <sdbusplus/message/native_types.hpp>
21
22#include <algorithm>
23#include <chrono>
24#include <cstddef>
25#include <cstdint>
26#include <memory>
27#include <string>
28#include <utility>
29#include <variant>
30#include <vector>
31
32using namespace std::literals;
33
34static constexpr double gpuTempSensorMaxReading = 127;
35static constexpr double gpuTempSensorMinReading = -128;
36
37GpuTempSensor::GpuTempSensor(
38 std::shared_ptr<sdbusplus::asio::connection>& conn,
39 boost::asio::io_context& io, const std::string& name,
40 const std::string& sensorConfiguration,
41 sdbusplus::asio::object_server& objectServer,
42 std::vector<thresholds::Threshold>&& thresholdData) :
43 Sensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
44 "temperature", false, true, gpuTempSensorMaxReading,
45 gpuTempSensorMinReading, conn),
46 waitTimer(io, std::chrono::steady_clock::duration(0)), conn(conn),
47 objectServer(objectServer)
48{
49 std::string dbusPath =
50 sensorPathPrefix + "temperature/"s + escapeName(name);
51
52 sensorInterface = objectServer.add_interface(
53 dbusPath, "xyz.openbmc_project.Sensor.Value");
54
55 for (const auto& threshold : thresholds)
56 {
57 std::string interface = thresholds::getInterface(threshold.level);
58 thresholdInterfaces[static_cast<size_t>(threshold.level)] =
59 objectServer.add_interface(dbusPath, interface);
60 }
61
62 association = objectServer.add_interface(dbusPath, association::interface);
63
64 discoverGpus();
65}
66
67GpuTempSensor::~GpuTempSensor()
68{
69 waitTimer.cancel();
70 for (const auto& iface : thresholdInterfaces)
71 {
72 objectServer.remove_interface(iface);
73 }
74 objectServer.remove_interface(association);
75 objectServer.remove_interface(sensorInterface);
76}
77
78void GpuTempSensor::checkThresholds()
79{
80 thresholds::checkThresholds(this);
81}
82
83void GpuTempSensor::queryEndpoints(const boost::system::error_code& ec,
84 const GetSubTreeType& ret)
85{
86 if (ec)
87 {
88 lg2::error("Error querying endoints :{ERROR}", "ERROR", ec.message());
89 return;
90 }
91
92 if (ret.empty())
93 {
94 return;
95 }
96
97 for (const auto& [objPath, services] : ret)
98 {
99 for (const auto& [service, ifaces] : services)
100 {
101 for (const auto& iface : ifaces)
102 {
103 if (iface == "xyz.openbmc_project.MCTP.Endpoint")
104 {
105 conn->async_method_call(
106 [this](const boost::system::error_code& ec,
107 const SensorBaseConfigMap& configs) {
108 this->processEndpoint(ec, configs);
109 },
110 service, objPath, "org.freedesktop.DBus.Properties",
111 "GetAll", iface);
112 }
113 }
114 }
115 }
116}
117
118void GpuTempSensor::processEndpoint(const boost::system::error_code& ec,
119 const SensorBaseConfigMap& endpoint)
120{
121 if (ec)
122 {
123 lg2::error("Error processing MCTP endpoint: {ERROR}", "ERROR",
124 ec.message());
125 return;
126 }
127
128 [[maybe_unused]] uint8_t eid{};
129 std::vector<uint8_t> mctpTypes{};
130
131 auto hasEid = endpoint.find("EID");
132 if (hasEid != endpoint.end())
133 {
134 const auto* eidPtr = std::get_if<uint8_t>(&hasEid->second);
135 if (eidPtr != nullptr)
136 {
137 eid = *eidPtr;
138 }
139 else
140 {
141 lg2::error(
142 "Error processing MCTP endpoint: Property EID does not have valid type.");
143 return;
144 }
145 }
146 else
147 {
148 lg2::error(
149 "Error processing MCTP endpoint: Property EID not found in the configuration.");
150 return;
151 }
152
153 auto hasMctpTypes = endpoint.find("SupportedMessageTypes");
154 if (hasMctpTypes != endpoint.end())
155 {
156 const auto* mctpTypePtr =
157 std::get_if<std::vector<uint8_t>>(&hasMctpTypes->second);
158 if (mctpTypePtr != nullptr)
159 {
160 mctpTypes = *mctpTypePtr;
161 }
162 else
163 {
164 lg2::error(
165 "Error processing MCTP endpoint: Property SupportedMessageTypes does not have valid type.");
166 return;
167 }
168 }
169 else
170 {
171 lg2::error(
172 "Error processing MCTP endpoint: Property SupportedMessageTypes not found in the configuration.");
173 return;
174 }
175
176 // if the OCP MCTP VDM Message type (0x7E) is found in mctpTypes
177 // process the endpoint further.
178 (void)this;
179}
180
181void GpuTempSensor::discoverGpus()
182{
183 std::string searchPath{"/au/com/codeconstruct/"};
184 std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}};
185
186 conn->async_method_call(
187 [this](const boost::system::error_code& ec, const GetSubTreeType& ret) {
188 queryEndpoints(ec, ret);
189 },
190 "xyz.openbmc_project.ObjectMapper",
191 "/xyz/openbmc_project/object_mapper",
192 "xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0,
193 ifaceList);
194}
195
196void processSensorConfigs(
197 boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
198 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
199 sensors,
200 std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
201 const ManagedObjectType& resp)
202{
203 for (const auto& [path, interfaces] : resp)
204 {
205 for (const auto& [intf, cfg] : interfaces)
206 {
207 if (intf != configInterfaceName(sensorType))
208 {
209 continue;
210 }
211
212 std::string name = loadVariant<std::string>(cfg, "Name");
213
214 sensors[name] = std::make_shared<GpuTempSensor>(
215 dbusConnection, io, name, path, objectServer,
216 std::vector<thresholds::Threshold>{});
217
218 lg2::info(
219 "Added GPU Temperature Sensor {NAME} with chassis path: {PATH}.",
220 "NAME", name, "PATH", path);
221 }
222 }
223}
224
225void createSensors(
226 boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
227 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
228 sensors,
229 std::shared_ptr<sdbusplus::asio::connection>& dbusConnection)
230{
231 if (!dbusConnection)
232 {
233 lg2::error("Connection not created");
234 return;
235 }
236 dbusConnection->async_method_call(
237 [&sensors, &dbusConnection, &io,
238 &objectServer](const boost::system::error_code& ec,
239 const ManagedObjectType& resp) {
240 if (ec)
241 {
242 lg2::error("Error contacting entity manager");
243 return;
244 }
245
246 processSensorConfigs(io, objectServer, sensors, dbusConnection,
247 resp);
248 },
249 entityManagerName, "/xyz/openbmc_project/inventory",
250 "org.freedesktop.DBus.ObjectManager", "GetManagedObjects");
251}
252
253void interfaceRemoved(
254 sdbusplus::message_t& message,
255 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
256 sensors)
257{
258 if (message.is_method_error())
259 {
260 lg2::error("interfacesRemoved callback method error");
261 return;
262 }
263
264 sdbusplus::message::object_path removedPath;
265 std::vector<std::string> interfaces;
266
267 message.read(removedPath, interfaces);
268
269 // If the xyz.openbmc_project.Confguration.X interface was removed
270 // for one or more sensors, delete those sensor objects.
271 auto sensorIt = sensors.begin();
272 while (sensorIt != sensors.end())
273 {
274 if ((sensorIt->second->configurationPath == removedPath) &&
275 (std::find(interfaces.begin(), interfaces.end(),
276 configInterfaceName(sensorType)) != interfaces.end()))
277 {
278 sensorIt = sensors.erase(sensorIt);
279 }
280 else
281 {
282 sensorIt++;
283 }
284 }
285}