blob: 3594c2997c5ddc444f16d38f677acb78c953fb5c [file] [log] [blame]
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
* AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
*/
#include "NvidiaGpuSensor.hpp"
#include "Thresholds.hpp"
#include "Utils.hpp"
#include "sensor.hpp"
#include <bits/basic_string.h>
#include <boost/asio/io_context.hpp>
#include <boost/container/flat_map.hpp>
#include <phosphor-logging/lg2.hpp>
#include <sdbusplus/asio/connection.hpp>
#include <sdbusplus/asio/object_server.hpp>
#include <sdbusplus/message.hpp>
#include <sdbusplus/message/native_types.hpp>
#include <algorithm>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <variant>
#include <vector>
using namespace std::literals;
static constexpr double gpuTempSensorMaxReading = 127;
static constexpr double gpuTempSensorMinReading = -128;
GpuTempSensor::GpuTempSensor(
std::shared_ptr<sdbusplus::asio::connection>& conn,
boost::asio::io_context& io, const std::string& name,
const std::string& sensorConfiguration,
sdbusplus::asio::object_server& objectServer,
std::vector<thresholds::Threshold>&& thresholdData) :
Sensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
"temperature", false, true, gpuTempSensorMaxReading,
gpuTempSensorMinReading, conn),
waitTimer(io, std::chrono::steady_clock::duration(0)), conn(conn),
objectServer(objectServer)
{
std::string dbusPath =
sensorPathPrefix + "temperature/"s + escapeName(name);
sensorInterface = objectServer.add_interface(
dbusPath, "xyz.openbmc_project.Sensor.Value");
for (const auto& threshold : thresholds)
{
std::string interface = thresholds::getInterface(threshold.level);
thresholdInterfaces[static_cast<size_t>(threshold.level)] =
objectServer.add_interface(dbusPath, interface);
}
association = objectServer.add_interface(dbusPath, association::interface);
discoverGpus();
}
GpuTempSensor::~GpuTempSensor()
{
waitTimer.cancel();
for (const auto& iface : thresholdInterfaces)
{
objectServer.remove_interface(iface);
}
objectServer.remove_interface(association);
objectServer.remove_interface(sensorInterface);
}
void GpuTempSensor::checkThresholds()
{
thresholds::checkThresholds(this);
}
void GpuTempSensor::queryEndpoints(const boost::system::error_code& ec,
const GetSubTreeType& ret)
{
if (ec)
{
lg2::error("Error querying endoints :{ERROR}", "ERROR", ec.message());
return;
}
if (ret.empty())
{
return;
}
for (const auto& [objPath, services] : ret)
{
for (const auto& [service, ifaces] : services)
{
for (const auto& iface : ifaces)
{
if (iface == "xyz.openbmc_project.MCTP.Endpoint")
{
conn->async_method_call(
[this](const boost::system::error_code& ec,
const SensorBaseConfigMap& configs) {
this->processEndpoint(ec, configs);
},
service, objPath, "org.freedesktop.DBus.Properties",
"GetAll", iface);
}
}
}
}
}
void GpuTempSensor::processEndpoint(const boost::system::error_code& ec,
const SensorBaseConfigMap& endpoint)
{
if (ec)
{
lg2::error("Error processing MCTP endpoint: {ERROR}", "ERROR",
ec.message());
return;
}
[[maybe_unused]] uint8_t eid{};
std::vector<uint8_t> mctpTypes{};
auto hasEid = endpoint.find("EID");
if (hasEid != endpoint.end())
{
const auto* eidPtr = std::get_if<uint8_t>(&hasEid->second);
if (eidPtr != nullptr)
{
eid = *eidPtr;
}
else
{
lg2::error(
"Error processing MCTP endpoint: Property EID does not have valid type.");
return;
}
}
else
{
lg2::error(
"Error processing MCTP endpoint: Property EID not found in the configuration.");
return;
}
auto hasMctpTypes = endpoint.find("SupportedMessageTypes");
if (hasMctpTypes != endpoint.end())
{
const auto* mctpTypePtr =
std::get_if<std::vector<uint8_t>>(&hasMctpTypes->second);
if (mctpTypePtr != nullptr)
{
mctpTypes = *mctpTypePtr;
}
else
{
lg2::error(
"Error processing MCTP endpoint: Property SupportedMessageTypes does not have valid type.");
return;
}
}
else
{
lg2::error(
"Error processing MCTP endpoint: Property SupportedMessageTypes not found in the configuration.");
return;
}
// if the OCP MCTP VDM Message type (0x7E) is found in mctpTypes
// process the endpoint further.
(void)this;
}
void GpuTempSensor::discoverGpus()
{
std::string searchPath{"/au/com/codeconstruct/"};
std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}};
conn->async_method_call(
[this](const boost::system::error_code& ec, const GetSubTreeType& ret) {
queryEndpoints(ec, ret);
},
"xyz.openbmc_project.ObjectMapper",
"/xyz/openbmc_project/object_mapper",
"xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0,
ifaceList);
}
void processSensorConfigs(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
sensors,
std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
const ManagedObjectType& resp)
{
for (const auto& [path, interfaces] : resp)
{
for (const auto& [intf, cfg] : interfaces)
{
if (intf != configInterfaceName(sensorType))
{
continue;
}
std::string name = loadVariant<std::string>(cfg, "Name");
sensors[name] = std::make_shared<GpuTempSensor>(
dbusConnection, io, name, path, objectServer,
std::vector<thresholds::Threshold>{});
lg2::info(
"Added GPU Temperature Sensor {NAME} with chassis path: {PATH}.",
"NAME", name, "PATH", path);
}
}
}
void createSensors(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
sensors,
std::shared_ptr<sdbusplus::asio::connection>& dbusConnection)
{
if (!dbusConnection)
{
lg2::error("Connection not created");
return;
}
dbusConnection->async_method_call(
[&sensors, &dbusConnection, &io,
&objectServer](const boost::system::error_code& ec,
const ManagedObjectType& resp) {
if (ec)
{
lg2::error("Error contacting entity manager");
return;
}
processSensorConfigs(io, objectServer, sensors, dbusConnection,
resp);
},
entityManagerName, "/xyz/openbmc_project/inventory",
"org.freedesktop.DBus.ObjectManager", "GetManagedObjects");
}
void interfaceRemoved(
sdbusplus::message_t& message,
boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
sensors)
{
if (message.is_method_error())
{
lg2::error("interfacesRemoved callback method error");
return;
}
sdbusplus::message::object_path removedPath;
std::vector<std::string> interfaces;
message.read(removedPath, interfaces);
// If the xyz.openbmc_project.Confguration.X interface was removed
// for one or more sensors, delete those sensor objects.
auto sensorIt = sensors.begin();
while (sensorIt != sensors.end())
{
if ((sensorIt->second->configurationPath == removedPath) &&
(std::find(interfaces.begin(), interfaces.end(),
configInterfaceName(sensorType)) != interfaces.end()))
{
sensorIt = sensors.erase(sensorIt);
}
else
{
sensorIt++;
}
}
}