| /* |
| * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & |
| * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 |
| */ |
| |
| #include "NvidiaGpuSensor.hpp" |
| |
| #include "Thresholds.hpp" |
| #include "Utils.hpp" |
| #include "sensor.hpp" |
| |
| #include <bits/basic_string.h> |
| |
| #include <boost/asio/io_context.hpp> |
| #include <boost/container/flat_map.hpp> |
| #include <phosphor-logging/lg2.hpp> |
| #include <sdbusplus/asio/connection.hpp> |
| #include <sdbusplus/asio/object_server.hpp> |
| #include <sdbusplus/message.hpp> |
| #include <sdbusplus/message/native_types.hpp> |
| |
| #include <algorithm> |
| #include <chrono> |
| #include <cstddef> |
| #include <cstdint> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <variant> |
| #include <vector> |
| |
| using namespace std::literals; |
| |
| static constexpr double gpuTempSensorMaxReading = 127; |
| static constexpr double gpuTempSensorMinReading = -128; |
| |
| GpuTempSensor::GpuTempSensor( |
| std::shared_ptr<sdbusplus::asio::connection>& conn, |
| boost::asio::io_context& io, const std::string& name, |
| const std::string& sensorConfiguration, |
| sdbusplus::asio::object_server& objectServer, |
| std::vector<thresholds::Threshold>&& thresholdData) : |
| Sensor(escapeName(name), std::move(thresholdData), sensorConfiguration, |
| "temperature", false, true, gpuTempSensorMaxReading, |
| gpuTempSensorMinReading, conn), |
| waitTimer(io, std::chrono::steady_clock::duration(0)), conn(conn), |
| objectServer(objectServer) |
| { |
| std::string dbusPath = |
| sensorPathPrefix + "temperature/"s + escapeName(name); |
| |
| sensorInterface = objectServer.add_interface( |
| dbusPath, "xyz.openbmc_project.Sensor.Value"); |
| |
| for (const auto& threshold : thresholds) |
| { |
| std::string interface = thresholds::getInterface(threshold.level); |
| thresholdInterfaces[static_cast<size_t>(threshold.level)] = |
| objectServer.add_interface(dbusPath, interface); |
| } |
| |
| association = objectServer.add_interface(dbusPath, association::interface); |
| |
| discoverGpus(); |
| } |
| |
| GpuTempSensor::~GpuTempSensor() |
| { |
| waitTimer.cancel(); |
| for (const auto& iface : thresholdInterfaces) |
| { |
| objectServer.remove_interface(iface); |
| } |
| objectServer.remove_interface(association); |
| objectServer.remove_interface(sensorInterface); |
| } |
| |
| void GpuTempSensor::checkThresholds() |
| { |
| thresholds::checkThresholds(this); |
| } |
| |
| void GpuTempSensor::queryEndpoints(const boost::system::error_code& ec, |
| const GetSubTreeType& ret) |
| { |
| if (ec) |
| { |
| lg2::error("Error querying endoints :{ERROR}", "ERROR", ec.message()); |
| return; |
| } |
| |
| if (ret.empty()) |
| { |
| return; |
| } |
| |
| for (const auto& [objPath, services] : ret) |
| { |
| for (const auto& [service, ifaces] : services) |
| { |
| for (const auto& iface : ifaces) |
| { |
| if (iface == "xyz.openbmc_project.MCTP.Endpoint") |
| { |
| conn->async_method_call( |
| [this](const boost::system::error_code& ec, |
| const SensorBaseConfigMap& configs) { |
| this->processEndpoint(ec, configs); |
| }, |
| service, objPath, "org.freedesktop.DBus.Properties", |
| "GetAll", iface); |
| } |
| } |
| } |
| } |
| } |
| |
| void GpuTempSensor::processEndpoint(const boost::system::error_code& ec, |
| const SensorBaseConfigMap& endpoint) |
| { |
| if (ec) |
| { |
| lg2::error("Error processing MCTP endpoint: {ERROR}", "ERROR", |
| ec.message()); |
| return; |
| } |
| |
| [[maybe_unused]] uint8_t eid{}; |
| std::vector<uint8_t> mctpTypes{}; |
| |
| auto hasEid = endpoint.find("EID"); |
| if (hasEid != endpoint.end()) |
| { |
| const auto* eidPtr = std::get_if<uint8_t>(&hasEid->second); |
| if (eidPtr != nullptr) |
| { |
| eid = *eidPtr; |
| } |
| else |
| { |
| lg2::error( |
| "Error processing MCTP endpoint: Property EID does not have valid type."); |
| return; |
| } |
| } |
| else |
| { |
| lg2::error( |
| "Error processing MCTP endpoint: Property EID not found in the configuration."); |
| return; |
| } |
| |
| auto hasMctpTypes = endpoint.find("SupportedMessageTypes"); |
| if (hasMctpTypes != endpoint.end()) |
| { |
| const auto* mctpTypePtr = |
| std::get_if<std::vector<uint8_t>>(&hasMctpTypes->second); |
| if (mctpTypePtr != nullptr) |
| { |
| mctpTypes = *mctpTypePtr; |
| } |
| else |
| { |
| lg2::error( |
| "Error processing MCTP endpoint: Property SupportedMessageTypes does not have valid type."); |
| return; |
| } |
| } |
| else |
| { |
| lg2::error( |
| "Error processing MCTP endpoint: Property SupportedMessageTypes not found in the configuration."); |
| return; |
| } |
| |
| // if the OCP MCTP VDM Message type (0x7E) is found in mctpTypes |
| // process the endpoint further. |
| (void)this; |
| } |
| |
| void GpuTempSensor::discoverGpus() |
| { |
| std::string searchPath{"/au/com/codeconstruct/"}; |
| std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}}; |
| |
| conn->async_method_call( |
| [this](const boost::system::error_code& ec, const GetSubTreeType& ret) { |
| queryEndpoints(ec, ret); |
| }, |
| "xyz.openbmc_project.ObjectMapper", |
| "/xyz/openbmc_project/object_mapper", |
| "xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0, |
| ifaceList); |
| } |
| |
| void processSensorConfigs( |
| boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, |
| boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>& |
| sensors, |
| std::shared_ptr<sdbusplus::asio::connection>& dbusConnection, |
| const ManagedObjectType& resp) |
| { |
| for (const auto& [path, interfaces] : resp) |
| { |
| for (const auto& [intf, cfg] : interfaces) |
| { |
| if (intf != configInterfaceName(sensorType)) |
| { |
| continue; |
| } |
| |
| std::string name = loadVariant<std::string>(cfg, "Name"); |
| |
| sensors[name] = std::make_shared<GpuTempSensor>( |
| dbusConnection, io, name, path, objectServer, |
| std::vector<thresholds::Threshold>{}); |
| |
| lg2::info( |
| "Added GPU Temperature Sensor {NAME} with chassis path: {PATH}.", |
| "NAME", name, "PATH", path); |
| } |
| } |
| } |
| |
| void createSensors( |
| boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, |
| boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>& |
| sensors, |
| std::shared_ptr<sdbusplus::asio::connection>& dbusConnection) |
| { |
| if (!dbusConnection) |
| { |
| lg2::error("Connection not created"); |
| return; |
| } |
| dbusConnection->async_method_call( |
| [&sensors, &dbusConnection, &io, |
| &objectServer](const boost::system::error_code& ec, |
| const ManagedObjectType& resp) { |
| if (ec) |
| { |
| lg2::error("Error contacting entity manager"); |
| return; |
| } |
| |
| processSensorConfigs(io, objectServer, sensors, dbusConnection, |
| resp); |
| }, |
| entityManagerName, "/xyz/openbmc_project/inventory", |
| "org.freedesktop.DBus.ObjectManager", "GetManagedObjects"); |
| } |
| |
| void interfaceRemoved( |
| sdbusplus::message_t& message, |
| boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>& |
| sensors) |
| { |
| if (message.is_method_error()) |
| { |
| lg2::error("interfacesRemoved callback method error"); |
| return; |
| } |
| |
| sdbusplus::message::object_path removedPath; |
| std::vector<std::string> interfaces; |
| |
| message.read(removedPath, interfaces); |
| |
| // If the xyz.openbmc_project.Confguration.X interface was removed |
| // for one or more sensors, delete those sensor objects. |
| auto sensorIt = sensors.begin(); |
| while (sensorIt != sensors.end()) |
| { |
| if ((sensorIt->second->configurationPath == removedPath) && |
| (std::find(interfaces.begin(), interfaces.end(), |
| configInterfaceName(sensorType)) != interfaces.end())) |
| { |
| sensorIt = sensors.erase(sensorIt); |
| } |
| else |
| { |
| sensorIt++; |
| } |
| } |
| } |