blob: adb21eadfb8d47641d11dcc0dbe3320fe24fd216 [file] [log] [blame]
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
* AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#include "NvidiaDeviceDiscovery.hpp"
#include "NvidiaGpuDevice.hpp"
#include "Utils.hpp"
#include <bits/basic_string.h>
#include <MctpRequester.hpp>
#include <NvidiaGpuMctpVdm.hpp>
#include <OcpMctpVdm.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/container/flat_map.hpp>
#include <phosphor-logging/lg2.hpp>
#include <sdbusplus/asio/connection.hpp>
#include <sdbusplus/asio/object_server.hpp>
#include <sdbusplus/message.hpp>
#include <sdbusplus/message/native_types.hpp>
#include <algorithm>
#include <array>
#include <cstdint>
#include <memory>
#include <span>
#include <string>
#include <utility>
#include <variant>
#include <vector>
void processQueryDeviceIdResponse(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, uint8_t eid, int sendRecvMsgResult,
std::span<uint8_t> queryDeviceIdentificationResponse)
{
if (sendRecvMsgResult != 0)
{
lg2::error(
"Error processing MCTP endpoint with eid {EID} : sending message over MCTP failed, rc={RC}",
"EID", eid, "RC", sendRecvMsgResult);
return;
}
ocp::accelerator_management::CompletionCode cc{};
uint16_t reasonCode = 0;
uint8_t responseDeviceType = 0;
uint8_t responseInstanceId = 0;
auto rc = gpu::decodeQueryDeviceIdentificationResponse(
queryDeviceIdentificationResponse, cc, reasonCode, responseDeviceType,
responseInstanceId);
if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
{
lg2::error(
"Error processing MCTP endpoint with eid {EID} : decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
"EID", eid, "RC", rc, "CC", cc, "RESC", reasonCode);
return;
}
if (responseDeviceType ==
static_cast<uint8_t>(gpu::DeviceIdentification::DEVICE_GPU))
{
lg2::info(
"Found the GPU with EID {EID}, DeviceType {DEVTYPE}, InstanceId {IID}.",
"EID", eid, "DEVTYPE", responseDeviceType, "IID",
responseInstanceId);
auto gpuName = configs.name + '_' + std::to_string(responseInstanceId);
gpuDevices[gpuName] = std::make_shared<GpuDevice>(
configs, gpuName, path, conn, eid, io, mctpRequester, objectServer);
}
}
void queryDeviceIdentification(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, uint8_t eid)
{
auto queryDeviceIdentificationRequest = std::make_shared<
std::array<uint8_t, sizeof(gpu::QueryDeviceIdentificationRequest)>>();
auto queryDeviceIdentificationResponse = std::make_shared<
std::array<uint8_t, sizeof(gpu::QueryDeviceIdentificationResponse)>>();
auto rc = gpu::encodeQueryDeviceIdentificationRequest(
0, *queryDeviceIdentificationRequest);
if (rc != 0)
{
lg2::error(
"Error processing MCTP endpoint with eid {EID} : encode failed, rc={RC}",
"EID", eid, "RC", rc);
return;
}
mctpRequester.sendRecvMsg(
eid, *queryDeviceIdentificationRequest,
*queryDeviceIdentificationResponse,
[&io, &objectServer, &gpuDevices, conn, &mctpRequester, configs, path,
eid, queryDeviceIdentificationRequest,
queryDeviceIdentificationResponse](int sendRecvMsgResult) {
processQueryDeviceIdResponse(
io, objectServer, gpuDevices, conn, mctpRequester, configs,
path, eid, sendRecvMsgResult,
*queryDeviceIdentificationResponse);
});
}
void processEndpoint(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, const boost::system::error_code& ec,
const SensorBaseConfigMap& endpoint)
{
if (ec)
{
lg2::error("Error processing MCTP endpoint: Error:{ERROR}", "ERROR",
ec.message());
return;
}
auto hasEid = endpoint.find("EID");
uint8_t eid{};
if (hasEid != endpoint.end())
{
const auto* eidPtr = std::get_if<uint8_t>(&hasEid->second);
if (eidPtr != nullptr)
{
eid = *eidPtr;
}
else
{
lg2::error(
"Error processing MCTP endpoint: Property EID does not have valid type.");
return;
}
}
else
{
lg2::error(
"Error processing MCTP endpoint: Property EID not found in the configuration.");
return;
}
auto hasMctpTypes = endpoint.find("SupportedMessageTypes");
std::vector<uint8_t> mctpTypes{};
if (hasMctpTypes != endpoint.end())
{
const auto* mctpTypePtr =
std::get_if<std::vector<uint8_t>>(&hasMctpTypes->second);
if (mctpTypePtr != nullptr)
{
mctpTypes = *mctpTypePtr;
}
else
{
lg2::error(
"Error processing MCTP endpoint with eid {EID} : Property SupportedMessageTypes does not have valid type.",
"EID", eid);
return;
}
}
else
{
lg2::error(
"Error processing MCTP endpoint with eid {EID} : Property SupportedMessageTypes not found in the configuration.",
"EID", eid);
return;
}
if (std::find(mctpTypes.begin(), mctpTypes.end(),
ocp::accelerator_management::messageType) != mctpTypes.end())
{
lg2::info("Found OCP MCTP VDM Endpoint with ID {EID}", "EID", eid);
queryDeviceIdentification(io, objectServer, gpuDevices, conn,
mctpRequester, configs, path, eid);
}
}
void queryEndpoints(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, const boost::system::error_code& ec,
const GetSubTreeType& ret)
{
if (ec)
{
lg2::error("Error processing MCTP endpoints: {ERROR}", "ERROR",
ec.message());
return;
}
if (ret.empty())
{
return;
}
for (const auto& [objPath, services] : ret)
{
for (const auto& [service, ifaces] : services)
{
for (const auto& iface : ifaces)
{
if (iface == "xyz.openbmc_project.MCTP.Endpoint")
{
conn->async_method_call(
[&io, &objectServer, &gpuDevices, conn, &mctpRequester,
configs, path](const boost::system::error_code& ec,
const SensorBaseConfigMap& endpoint) {
processEndpoint(io, objectServer, gpuDevices, conn,
mctpRequester, configs, path, ec,
endpoint);
},
service, objPath, "org.freedesktop.DBus.Properties",
"GetAll", iface);
}
}
}
}
}
void discoverDevices(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path)
{
std::string searchPath{"/au/com/codeconstruct/"};
std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}};
conn->async_method_call(
[&io, &objectServer, &gpuDevices, conn, &mctpRequester, configs,
path](const boost::system::error_code& ec, const GetSubTreeType& ret) {
queryEndpoints(io, objectServer, gpuDevices, conn, mctpRequester,
configs, path, ec, ret);
},
"xyz.openbmc_project.ObjectMapper",
"/xyz/openbmc_project/object_mapper",
"xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0,
ifaceList);
}
void processSensorConfigs(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
mctp::MctpRequester& mctpRequester, const ManagedObjectType& resp)
{
for (const auto& [path, interfaces] : resp)
{
for (const auto& [intf, cfg] : interfaces)
{
if (intf != configInterfaceName(deviceType))
{
continue;
}
SensorConfigs configs;
configs.name = loadVariant<std::string>(cfg, "Name");
configs.pollRate = loadVariant<uint64_t>(cfg, "PollRate");
discoverDevices(io, objectServer, gpuDevices, dbusConnection,
mctpRequester, configs, path);
lg2::info(
"Detected configuration {NAME} of type {TYPE} at path: {PATH}.",
"NAME", configs.name, "TYPE", deviceType, "PATH", path);
}
}
}
void createSensors(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
mctp::MctpRequester& mctpRequester)
{
if (!dbusConnection)
{
lg2::error("Connection not created");
return;
}
dbusConnection->async_method_call(
[&gpuDevices, &mctpRequester, dbusConnection, &io, &objectServer](
boost::system::error_code ec, const ManagedObjectType& resp) {
if (ec)
{
lg2::error("Error contacting entity manager");
return;
}
processSensorConfigs(io, objectServer, gpuDevices, dbusConnection,
mctpRequester, resp);
},
entityManagerName, "/xyz/openbmc_project/inventory",
"org.freedesktop.DBus.ObjectManager", "GetManagedObjects");
}
void interfaceRemoved(
sdbusplus::message_t& message,
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices)
{
if (message.is_method_error())
{
lg2::error("interfacesRemoved callback method error");
return;
}
sdbusplus::message::object_path removedPath;
std::vector<std::string> interfaces;
message.read(removedPath, interfaces);
// If the xyz.openbmc_project.Confguration.X interface was removed
// for one or more sensors, delete those sensor objects.
auto sensorIt = gpuDevices.begin();
while (sensorIt != gpuDevices.end())
{
if ((sensorIt->second->getPath() == removedPath) &&
(std::find(interfaces.begin(), interfaces.end(),
configInterfaceName(deviceType)) != interfaces.end()))
{
sensorIt = gpuDevices.erase(sensorIt);
}
else
{
sensorIt++;
}
}
}