nvidia-gpu: add support for ConnectX device
Add support to discover ConnectX devices and to populate PCIe interface
properties using Phosphor DBus Interface
xyz.openbmc_project.Inventory.Item.PCIeDevice.
ConnectX device has an integrated PCIe Switch. The patch uses
xyz.openbmc_project.Inventory.Item.PCIeSwitch PDI to define the PCIe
Switch resource.
Tested: Build an image for nvl32-obmc machine with the following patch
cherry picked.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/85490
The patch cherry-picks the following patches that are currently under
review.
```
1. device tree
https://lore.kernel.org/all/aRbLqH8pLWCQryhu@molberding.nvidia.com/
2. mctpd patches
https://github.com/CodeConstruct/mctp/pull/85
3. u-boot changes
https://lore.kernel.org/openbmc/20251121-msx4-v1-0-fc0118b666c1@nvidia.com/T/#t
4. kernel changes as specified in the openbmc patch (for espi)
5. entity-manager changes
https://gerrit.openbmc.org/c/openbmc/entity-manager/+/85455
6. platform-init changes
https://gerrit.openbmc.org/c/openbmc/platform-init/+/85456
7. spi changes
https://lore.kernel.org/all/20251121-w25q01jv_fixup-v1-1-3d175050db73@nvidia.com/
```
```
root@nvl32-bmc:~# busctl tree xyz.openbmc_project.GpuSensor
`- /xyz
`- /xyz/openbmc_project
|- /xyz/openbmc_project/inventory
| `- /xyz/openbmc_project/inventory/pcie_devices
| |- /xyz/openbmc_project/inventory/pcie_devices/Nvidia_ConnectX_0
| |- /xyz/openbmc_project/inventory/pcie_devices/Nvidia_ConnectX_1
| |- /xyz/openbmc_project/inventory/pcie_devices/Nvidia_ConnectX_2
| `- /xyz/openbmc_project/inventory/pcie_devices/Nvidia_ConnectX_3
root@nvl32-obmc:~# busctl introspect xyz.openbmc_project.GpuSensor /xyz/openbmc_project/inventory/pcie_devices/Nvidia_ConnectX_0
NAME TYPE SIGNATURE RESULT/VALUE FLAGS
org.freedesktop.DBus.Introspectable interface - - -
.Introspect method - s -
org.freedesktop.DBus.Peer interface - - -
.GetMachineId method - s -
.Ping method - - -
org.freedesktop.DBus.Properties interface - - -
.Get method ss v -
.GetAll method s a{sv} -
.Set method ssv - -
.PropertiesChanged signal sa{sv}as - -
xyz.openbmc_project.Inventory.Item.PCIeDevice interface - - -
.GenerationInUse property s "xyz.openbmc_project.Inventory.Item.P... emits-change
.GenerationSupported property s "xyz.openbmc_project.Inventory.Item.P... emits-change
.LanesInUse property u 8 emits-change
.MaxLanes property u 16 emits-change
xyz.openbmc_project.Inventory.Item.PCIeSwitch interface - - -
$ curl -s -k -u 'root:0penBmc' https://${bmc_ip}/redfish/v1/Systems/system/PCIeDevices/Nvidia_ConnectX_0
{
"@odata.id": "/redfish/v1/Systems/system/PCIeDevices/Nvidia_ConnectX_0",
"@odata.type": "#PCIeDevice.v1_19_0.PCIeDevice",
"Id": "Nvidia_ConnectX_0",
"Name": "PCIe Device",
"PCIeFunctions": {
"@odata.id": "/redfish/v1/Systems/system/PCIeDevices/Nvidia_ConnectX_0/PCIeFunctions"
},
"PCIeInterface": {
"LanesInUse": 8,
"MaxLanes": 16,
"MaxPCIeType": "Gen5",
"PCIeType": "Gen5"
},
"Status": {
"Health": "OK",
"State": "Enabled"
}
}%
```
Change-Id: Id89ce8a298ebb16934e94efcb9ca4679f91a7b26
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaDeviceDiscovery.cpp b/src/nvidia-gpu/NvidiaDeviceDiscovery.cpp
index acb6fec..b682345 100644
--- a/src/nvidia-gpu/NvidiaDeviceDiscovery.cpp
+++ b/src/nvidia-gpu/NvidiaDeviceDiscovery.cpp
@@ -6,6 +6,7 @@
#include "NvidiaDeviceDiscovery.hpp"
#include "NvidiaGpuDevice.hpp"
+#include "NvidiaPcieDevice.hpp"
#include "NvidiaSmaDevice.hpp"
#include "Utils.hpp"
@@ -25,6 +26,7 @@
#include <algorithm>
#include <array>
#include <cstdint>
+#include <format>
#include <memory>
#include <span>
#include <stdexcept>
@@ -42,6 +44,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, uint8_t eid,
@@ -91,7 +95,7 @@
configs, gpuName, path, conn, eid,
io, mctpRequester, objectServer)))
.first;
- (*gpu).second->init();
+ gpu->second->init();
break;
}
@@ -111,7 +115,28 @@
configs, smaName, path, conn, eid,
io, mctpRequester, objectServer)))
.first;
- (*sma).second->init();
+ sma->second->init();
+ break;
+ }
+
+ case gpu::DeviceIdentification::DEVICE_PCIE:
+ {
+ lg2::info(
+ "Found the PCIe Device with EID {EID}, DeviceType {DEVTYPE}, InstanceId {IID}.",
+ "EID", eid, "DEVTYPE", responseDeviceType, "IID",
+ responseInstanceId);
+
+ std::string pcieName =
+ std::format("Nvidia_ConnectX_{}", responseInstanceId);
+
+ auto pcieDevice =
+ pcieDevices
+ .insert(std::make_pair(
+ pcieName, std::make_shared<PcieDevice>(
+ configs, pcieName, path, conn, eid, io,
+ mctpRequester, objectServer)))
+ .first;
+ pcieDevice->second->init();
break;
}
}
@@ -123,6 +148,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, uint8_t eid)
@@ -142,12 +169,12 @@
mctpRequester.sendRecvMsg(
eid, *queryDeviceIdentificationRequest,
- [&io, &objectServer, &gpuDevices, &smaDevices, conn, &mctpRequester,
- configs, path, eid, queryDeviceIdentificationRequest](
+ [&io, &objectServer, &gpuDevices, &smaDevices, &pcieDevices, conn,
+ &mctpRequester, configs, path, eid, queryDeviceIdentificationRequest](
const std::error_code& ec, std::span<const uint8_t> response) {
- processQueryDeviceIdResponse(io, objectServer, gpuDevices,
- smaDevices, conn, mctpRequester,
- configs, path, eid, ec, response);
+ processQueryDeviceIdResponse(
+ io, objectServer, gpuDevices, smaDevices, pcieDevices, conn,
+ mctpRequester, configs, path, eid, ec, response);
});
}
@@ -157,6 +184,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, const boost::system::error_code& ec,
@@ -225,7 +254,8 @@
{
lg2::info("Found OCP MCTP VDM Endpoint with ID {EID}", "EID", eid);
queryDeviceIdentification(io, objectServer, gpuDevices, smaDevices,
- conn, mctpRequester, configs, path, eid);
+ pcieDevices, conn, mctpRequester, configs,
+ path, eid);
}
}
@@ -235,6 +265,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path, const boost::system::error_code& ec,
@@ -261,13 +293,14 @@
if (iface == "xyz.openbmc_project.MCTP.Endpoint")
{
conn->async_method_call(
- [&io, &objectServer, &gpuDevices, &smaDevices, conn,
- &mctpRequester, configs,
+ [&io, &objectServer, &gpuDevices, &smaDevices,
+ &pcieDevices, conn, &mctpRequester, configs,
path](const boost::system::error_code& ec,
const SensorBaseConfigMap& endpoint) {
processEndpoint(io, objectServer, gpuDevices,
- smaDevices, conn, mctpRequester,
- configs, path, ec, endpoint);
+ smaDevices, pcieDevices, conn,
+ mctpRequester, configs, path, ec,
+ endpoint);
},
service, objPath, "org.freedesktop.DBus.Properties",
"GetAll", iface);
@@ -283,6 +316,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& conn,
mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
const std::string& path)
@@ -291,11 +326,12 @@
std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}};
conn->async_method_call(
- [&io, &objectServer, &gpuDevices, &smaDevices, conn, &mctpRequester,
- configs,
+ [&io, &objectServer, &gpuDevices, &smaDevices, &pcieDevices, conn,
+ &mctpRequester, configs,
path](const boost::system::error_code& ec, const GetSubTreeType& ret) {
- queryEndpoints(io, objectServer, gpuDevices, smaDevices, conn,
- mctpRequester, configs, path, ec, ret);
+ queryEndpoints(io, objectServer, gpuDevices, smaDevices,
+ pcieDevices, conn, mctpRequester, configs, path, ec,
+ ret);
},
"xyz.openbmc_project.ObjectMapper",
"/xyz/openbmc_project/object_mapper",
@@ -309,6 +345,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
mctp::MctpRequester& mctpRequester, const ManagedObjectType& resp)
{
@@ -336,7 +374,8 @@
}
discoverDevices(io, objectServer, gpuDevices, smaDevices,
- dbusConnection, mctpRequester, configs, path);
+ pcieDevices, dbusConnection, mctpRequester, configs,
+ path);
lg2::info(
"Detected configuration {NAME} of type {TYPE} at path: {PATH}.",
@@ -351,6 +390,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
mctp::MctpRequester& mctpRequester)
{
@@ -360,9 +401,9 @@
return;
}
dbusConnection->async_method_call(
- [&gpuDevices, &smaDevices, &mctpRequester, dbusConnection, &io,
- &objectServer](boost::system::error_code ec,
- const ManagedObjectType& resp) {
+ [&gpuDevices, &smaDevices, &pcieDevices, &mctpRequester, dbusConnection,
+ &io, &objectServer](boost::system::error_code ec,
+ const ManagedObjectType& resp) {
if (ec)
{
lg2::error("Error contacting entity manager");
@@ -370,7 +411,8 @@
}
processSensorConfigs(io, objectServer, gpuDevices, smaDevices,
- dbusConnection, mctpRequester, resp);
+ pcieDevices, dbusConnection, mctpRequester,
+ resp);
},
entityManagerName, "/xyz/openbmc_project/inventory",
"org.freedesktop.DBus.ObjectManager", "GetManagedObjects");
@@ -381,7 +423,9 @@
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
- smaDevices)
+ smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices)
{
if (message.is_method_error())
{
@@ -425,4 +469,19 @@
smaSensorIt++;
}
}
+
+ auto pcieSensorIt = pcieDevices.begin();
+ while (pcieSensorIt != pcieDevices.end())
+ {
+ if ((pcieSensorIt->second->getPath() == removedPath) &&
+ (std::find(interfaces.begin(), interfaces.end(),
+ configInterfaceName(deviceType)) != interfaces.end()))
+ {
+ pcieSensorIt = pcieDevices.erase(pcieSensorIt);
+ }
+ else
+ {
+ pcieSensorIt++;
+ }
+ }
}
diff --git a/src/nvidia-gpu/NvidiaDeviceDiscovery.hpp b/src/nvidia-gpu/NvidiaDeviceDiscovery.hpp
index c1c1bb9..67f2419 100644
--- a/src/nvidia-gpu/NvidiaDeviceDiscovery.hpp
+++ b/src/nvidia-gpu/NvidiaDeviceDiscovery.hpp
@@ -28,6 +28,7 @@
class GpuDevice;
class SmaDevice;
+class PcieDevice;
void createSensors(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
@@ -35,6 +36,8 @@
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices,
const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
mctp::MctpRequester& mctpRequester);
@@ -43,4 +46,6 @@
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
gpuDevices,
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>>&
- smaDevices);
+ smaDevices,
+ boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>&
+ pcieDevices);
diff --git a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
index 4af306e..e8b2db0 100644
--- a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
+++ b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
@@ -10,6 +10,7 @@
#include <endian.h>
#include <cerrno>
+#include <cstddef>
#include <cstdint>
#include <span>
#include <vector>
@@ -489,5 +490,89 @@
return 0;
}
+int encodeQueryScalarGroupTelemetryV2Request(
+ uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber,
+ uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf)
+{
+ if (buf.size() < sizeof(QueryScalarGroupTelemetryV2Request))
+ {
+ return EINVAL;
+ }
+
+ auto* msg =
+ reinterpret_cast<QueryScalarGroupTelemetryV2Request*>(buf.data());
+
+ ocp::accelerator_management::BindingPciVidInfo header{};
+ header.ocp_accelerator_management_msg_type =
+ static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
+ header.instance_id = instanceId &
+ ocp::accelerator_management::instanceIdBitMask;
+ header.msg_type = static_cast<uint8_t>(MessageType::PCIE_LINK);
+
+ auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
+
+ if (rc != 0)
+ {
+ return rc;
+ }
+
+ msg->hdr.command =
+ static_cast<uint8_t>(PcieLinkCommands::QueryScalarGroupTelemetryV2);
+ msg->hdr.data_size = 3;
+ msg->upstreamPortNumber =
+ (static_cast<uint8_t>(portType) << 7) | (upstreamPortNumber & 0x7F);
+ msg->portNumber = portNumber;
+ msg->groupId = groupId;
+
+ return 0;
+}
+
+int decodeQueryScalarGroupTelemetryV2Response(
+ std::span<const uint8_t> buf,
+ ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
+ size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues)
+{
+ auto rc =
+ ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
+
+ if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
+ {
+ return rc;
+ }
+
+ if (buf.size() < sizeof(ocp::accelerator_management::CommonResponse))
+ {
+ return EINVAL;
+ }
+
+ const auto* response =
+ reinterpret_cast<const ocp::accelerator_management::CommonResponse*>(
+ buf.data());
+
+ const uint16_t dataSize = le16toh(response->data_size);
+
+ if (buf.size() <
+ dataSize + sizeof(ocp::accelerator_management::CommonResponse))
+ {
+ return EINVAL;
+ }
+
+ numTelemetryValues = dataSize / sizeof(uint32_t);
+
+ if (telemetryValues.size() < numTelemetryValues)
+ {
+ telemetryValues.resize(numTelemetryValues);
+ }
+
+ const auto* telemetryDataPtr = reinterpret_cast<const uint32_t*>(
+ buf.data() + sizeof(ocp::accelerator_management::CommonResponse));
+
+ for (size_t i = 0; i < numTelemetryValues; i++)
+ {
+ telemetryValues[i] = le32toh(telemetryDataPtr[i]);
+ }
+
+ return 0;
+}
// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
} // namespace gpu
diff --git a/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp b/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
index 9bc70dc..5c298a7 100644
--- a/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
+++ b/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
@@ -26,6 +26,7 @@
enum class MessageType : uint8_t
{
DEVICE_CAPABILITY_DISCOVERY = 0,
+ PCIE_LINK = 2,
PLATFORM_ENVIRONMENTAL = 3
};
@@ -45,9 +46,15 @@
GET_VOLTAGE = 0x0F,
};
+enum class PcieLinkCommands : uint8_t
+{
+ QueryScalarGroupTelemetryV2 = 0x24,
+};
+
enum class DeviceIdentification : uint8_t
{
DEVICE_GPU = 0,
+ DEVICE_PCIE = 2,
DEVICE_SMA = 5
};
@@ -92,6 +99,12 @@
NVLINK_PEER_TYPE = 36
};
+enum class PciePortType : uint8_t
+{
+ UPSTREAM = 0,
+ DOWNSTREAM = 1,
+};
+
struct QueryDeviceIdentificationRequest
{
ocp::accelerator_management::CommonRequest hdr;
@@ -125,6 +138,14 @@
using GetVoltageRequest = GetNumericSensorReadingRequest;
+struct QueryScalarGroupTelemetryV2Request
+{
+ ocp::accelerator_management::CommonRequest hdr;
+ uint8_t upstreamPortNumber;
+ uint8_t portNumber;
+ uint8_t groupId;
+} __attribute__((packed));
+
struct GetTemperatureReadingResponse
{
ocp::accelerator_management::CommonResponse hdr;
@@ -225,4 +246,13 @@
ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
InventoryPropertyId propertyId, InventoryValue& value);
+int encodeQueryScalarGroupTelemetryV2Request(
+ uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber,
+ uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf);
+
+int decodeQueryScalarGroupTelemetryV2Response(
+ std::span<const uint8_t> buf,
+ ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
+ size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues);
+
} // namespace gpu
diff --git a/src/nvidia-gpu/NvidiaGpuSensorMain.cpp b/src/nvidia-gpu/NvidiaGpuSensorMain.cpp
index 18e5e1a..20c7f3a 100644
--- a/src/nvidia-gpu/NvidiaGpuSensorMain.cpp
+++ b/src/nvidia-gpu/NvidiaGpuSensorMain.cpp
@@ -7,6 +7,7 @@
#include "Utils.hpp"
#include <NvidiaDeviceDiscovery.hpp>
+#include <NvidiaPcieDevice.hpp>
#include <NvidiaSmaDevice.hpp>
#include <boost/asio/error.hpp>
#include <boost/asio/io_context.hpp>
@@ -31,6 +32,8 @@
boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>> gpuDevices;
boost::container::flat_map<std::string, std::shared_ptr<SmaDevice>> smaDevices;
+boost::container::flat_map<std::string, std::shared_ptr<PcieDevice>>
+ pcieDevices;
void configTimerExpiryCallback(
boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
@@ -41,8 +44,8 @@
{
return; // we're being canceled
}
- createSensors(io, objectServer, gpuDevices, smaDevices, dbusConnection,
- mctpRequester);
+ createSensors(io, objectServer, gpuDevices, smaDevices, pcieDevices,
+ dbusConnection, mctpRequester);
}
int main()
@@ -57,8 +60,8 @@
mctp::MctpRequester mctpRequester(io);
boost::asio::post(io, [&]() {
- createSensors(io, objectServer, gpuDevices, smaDevices, systemBus,
- mctpRequester);
+ createSensors(io, objectServer, gpuDevices, smaDevices, pcieDevices,
+ systemBus, mctpRequester);
});
boost::asio::steady_timer configTimer(io);
@@ -84,7 +87,7 @@
sdbusplus::bus::match::rules::interfacesRemovedAtPath(
std::string(inventoryPath)),
[](sdbusplus::message_t& msg) {
- interfaceRemoved(msg, gpuDevices, smaDevices);
+ interfaceRemoved(msg, gpuDevices, smaDevices, pcieDevices);
});
try
diff --git a/src/nvidia-gpu/NvidiaPcieDevice.cpp b/src/nvidia-gpu/NvidiaPcieDevice.cpp
new file mode 100644
index 0000000..5df4c20
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaPcieDevice.cpp
@@ -0,0 +1,63 @@
+/*
+ * SPDX-FileCopyrightText: Copyright OpenBMC Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "NvidiaPcieDevice.hpp"
+
+#include "NvidiaDeviceDiscovery.hpp"
+#include "NvidiaPcieInterface.hpp"
+#include "Utils.hpp"
+
+#include <MctpRequester.hpp>
+#include <boost/asio/io_context.hpp>
+#include <phosphor-logging/lg2.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <chrono>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+PcieDevice::PcieDevice(const SensorConfigs& configs, const std::string& name,
+ const std::string& path,
+ const std::shared_ptr<sdbusplus::asio::connection>& conn,
+ uint8_t eid, boost::asio::io_context& io,
+ mctp::MctpRequester& mctpRequester,
+ sdbusplus::asio::object_server& objectServer) :
+ eid(eid), sensorPollMs(std::chrono::milliseconds{configs.pollRate}),
+ waitTimer(io, std::chrono::steady_clock::duration(0)),
+ mctpRequester(mctpRequester), conn(conn), objectServer(objectServer),
+ configs(configs), name(escapeName(name)), path(path)
+{}
+
+void PcieDevice::init()
+{
+ makeSensors();
+}
+
+void PcieDevice::makeSensors()
+{
+ pcieInterface = std::make_shared<NvidiaPcieInterface>(
+ conn, mctpRequester, name, path, eid, objectServer);
+
+ lg2::info("Added PCIe {NAME} Sensors with chassis path: {PATH}.", "NAME",
+ name, "PATH", path);
+
+ read();
+}
+
+void PcieDevice::read()
+{
+ pcieInterface->update();
+
+ waitTimer.expires_after(std::chrono::milliseconds(sensorPollMs));
+ waitTimer.async_wait([this](const boost::system::error_code& ec) {
+ if (ec)
+ {
+ return;
+ }
+ read();
+ });
+}
diff --git a/src/nvidia-gpu/NvidiaPcieDevice.hpp b/src/nvidia-gpu/NvidiaPcieDevice.hpp
new file mode 100644
index 0000000..9767060
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaPcieDevice.hpp
@@ -0,0 +1,66 @@
+/*
+ * SPDX-FileCopyrightText: Copyright OpenBMC Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "MctpRequester.hpp"
+#include "NvidiaDeviceDiscovery.hpp"
+#include "NvidiaPcieInterface.hpp"
+
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/steady_timer.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <chrono>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+constexpr const char* pcieDevicePathPrefix =
+ "/xyz/openbmc_project/inventory/pcie_devices/";
+
+class PcieDevice
+{
+ public:
+ PcieDevice(const SensorConfigs& configs, const std::string& name,
+ const std::string& path,
+ const std::shared_ptr<sdbusplus::asio::connection>& conn,
+ uint8_t eid, boost::asio::io_context& io,
+ mctp::MctpRequester& mctpRequester,
+ sdbusplus::asio::object_server& objectServer);
+
+ const std::string& getPath() const
+ {
+ return path;
+ }
+
+ void init();
+
+ private:
+ void makeSensors();
+
+ void read();
+
+ uint8_t eid{};
+
+ std::chrono::milliseconds sensorPollMs;
+
+ boost::asio::steady_timer waitTimer;
+
+ mctp::MctpRequester& mctpRequester;
+
+ std::shared_ptr<sdbusplus::asio::connection> conn;
+
+ sdbusplus::asio::object_server& objectServer;
+
+ SensorConfigs configs;
+
+ std::string name;
+
+ std::string path;
+
+ std::shared_ptr<NvidiaPcieInterface> pcieInterface;
+};
diff --git a/src/nvidia-gpu/NvidiaPcieInterface.cpp b/src/nvidia-gpu/NvidiaPcieInterface.cpp
new file mode 100644
index 0000000..17c8926
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaPcieInterface.cpp
@@ -0,0 +1,185 @@
+/*
+ * SPDX-FileCopyrightText: Copyright OpenBMC Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "NvidiaPcieInterface.hpp"
+
+#include "Utils.hpp"
+
+#include <bits/basic_string.h>
+
+#include <MctpRequester.hpp>
+#include <NvidiaGpuMctpVdm.hpp>
+#include <NvidiaPcieDevice.hpp>
+#include <OcpMctpVdm.hpp>
+#include <phosphor-logging/lg2.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <span>
+#include <string>
+#include <system_error>
+#include <vector>
+
+using std::string;
+
+using namespace std::literals;
+
+NvidiaPcieInterface::NvidiaPcieInterface(
+ std::shared_ptr<sdbusplus::asio::connection>& conn,
+ mctp::MctpRequester& mctpRequester, const std::string& name,
+ const std::string& path, uint8_t eid,
+ sdbusplus::asio::object_server& objectServer) :
+ eid(eid), path(path), conn(conn), mctpRequester(mctpRequester)
+{
+ const std::string dbusPath = pcieDevicePathPrefix + escapeName(name);
+
+ pcieDeviceInterface = objectServer.add_interface(
+ dbusPath, "xyz.openbmc_project.Inventory.Item.PCIeDevice");
+
+ switchInterface = objectServer.add_interface(
+ dbusPath, "xyz.openbmc_project.Inventory.Item.PCIeSwitch");
+
+ pcieDeviceInterface->register_property(
+ "GenerationInUse",
+ std::string(
+ "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Unknown"));
+
+ pcieDeviceInterface->register_property("LanesInUse",
+ std::numeric_limits<size_t>::max());
+
+ pcieDeviceInterface->register_property(
+ "GenerationSupported",
+ std::string(
+ "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Unknown"));
+
+ pcieDeviceInterface->register_property("MaxLanes", static_cast<size_t>(0));
+
+ if (!pcieDeviceInterface->initialize())
+ {
+ lg2::error("Error initializing PCIe Device Interface for EID={EID}",
+ "EID", eid);
+ }
+
+ if (!switchInterface->initialize())
+ {
+ lg2::error("Error initializing Switch Interface for EID={EID}", "EID",
+ eid);
+ }
+}
+
+string NvidiaPcieInterface::mapPcieGeneration(uint32_t value)
+{
+ switch (value)
+ {
+ case 1:
+ return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen1";
+ case 2:
+ return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen2";
+ case 3:
+ return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen3";
+ case 4:
+ return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen4";
+ case 5:
+ return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen5";
+ case 6:
+ return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen6";
+ default:
+ return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Unknown";
+ }
+}
+
+size_t NvidiaPcieInterface::decodeLinkWidth(uint32_t value)
+{
+ return (value > 0) ? pow(2, value - 1) : 0;
+}
+
+void NvidiaPcieInterface::processResponse(const std::error_code& ec,
+ std::span<const uint8_t> response)
+{
+ if (ec)
+ {
+ lg2::error(
+ "Error updating PCIe Interface: sending message over MCTP failed, "
+ "rc={RC}, EID={EID}",
+ "RC", ec.value(), "EID", eid);
+ return;
+ }
+
+ ocp::accelerator_management::CompletionCode cc{};
+ uint16_t reasonCode = 0;
+ size_t numTelemetryValue = 0;
+
+ auto rc = gpu::decodeQueryScalarGroupTelemetryV2Response(
+ response, cc, reasonCode, numTelemetryValue, telemetryValues);
+
+ if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
+ {
+ lg2::error("Error updating PCIe Interface: decode failed, "
+ "rc={RC}, cc={CC}, reasonCode={RESC}, EID={EID}",
+ "RC", rc, "CC", static_cast<uint8_t>(cc), "RESC", reasonCode,
+ "EID", eid);
+ return;
+ }
+
+ if (!telemetryValues.empty())
+ {
+ pcieDeviceInterface->set_property(
+ "GenerationInUse", mapPcieGeneration(telemetryValues[0]));
+ }
+
+ if (telemetryValues.size() > 1)
+ {
+ pcieDeviceInterface->set_property(
+ "LanesInUse",
+ decodeLinkWidth(static_cast<size_t>(telemetryValues[1])));
+ }
+
+ if (telemetryValues.size() > 3)
+ {
+ pcieDeviceInterface->set_property(
+ "GenerationSupported", mapPcieGeneration(telemetryValues[3]));
+ }
+
+ if (telemetryValues.size() > 4)
+ {
+ pcieDeviceInterface->set_property(
+ "MaxLanes",
+ decodeLinkWidth(static_cast<size_t>(telemetryValues[4])));
+ }
+}
+
+void NvidiaPcieInterface::update()
+{
+ auto rc =
+ gpu::encodeQueryScalarGroupTelemetryV2Request(0, {}, 0, 0, 1, request);
+
+ if (rc != 0)
+ {
+ lg2::error("Error updating PCIe Interface: failed, rc={RC}, EID={EID}",
+ "RC", rc, "EID", eid);
+ return;
+ }
+
+ mctpRequester.sendRecvMsg(
+ eid, request,
+ [weak{weak_from_this()}](const std::error_code& ec,
+ std::span<const uint8_t> buffer) {
+ std::shared_ptr<NvidiaPcieInterface> self = weak.lock();
+ if (!self)
+ {
+ lg2::error(
+ "Invalid reference to NvidiaPcieInterface for EID {EID}",
+ "EID", self->eid);
+ return;
+ }
+ self->processResponse(ec, buffer);
+ });
+}
diff --git a/src/nvidia-gpu/NvidiaPcieInterface.hpp b/src/nvidia-gpu/NvidiaPcieInterface.hpp
new file mode 100644
index 0000000..7027f89
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaPcieInterface.hpp
@@ -0,0 +1,58 @@
+/*
+ * SPDX-FileCopyrightText: Copyright OpenBMC Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "MctpRequester.hpp"
+
+#include <NvidiaGpuMctpVdm.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+struct NvidiaPcieInterface :
+ public std::enable_shared_from_this<NvidiaPcieInterface>
+{
+ public:
+ NvidiaPcieInterface(std::shared_ptr<sdbusplus::asio::connection>& conn,
+ mctp::MctpRequester& mctpRequester,
+ const std::string& name, const std::string& path,
+ uint8_t eid,
+ sdbusplus::asio::object_server& objectServer);
+
+ void update();
+
+ static size_t decodeLinkWidth(uint32_t value);
+
+ private:
+ static constexpr size_t maxTelemetryValues = 64;
+
+ void processResponse(const std::error_code& ec,
+ std::span<const uint8_t> response);
+
+ static std::string mapPcieGeneration(uint32_t value);
+
+ uint8_t eid{};
+
+ std::string path;
+
+ std::shared_ptr<sdbusplus::asio::connection> conn;
+
+ mctp::MctpRequester& mctpRequester;
+
+ std::array<uint8_t, sizeof(gpu::QueryScalarGroupTelemetryV2Request)>
+ request{};
+
+ std::shared_ptr<sdbusplus::asio::dbus_interface> pcieDeviceInterface;
+ std::shared_ptr<sdbusplus::asio::dbus_interface> switchInterface;
+
+ std::vector<uint32_t> telemetryValues{maxTelemetryValues};
+};
diff --git a/src/nvidia-gpu/meson.build b/src/nvidia-gpu/meson.build
index b1763c0..44c7773 100644
--- a/src/nvidia-gpu/meson.build
+++ b/src/nvidia-gpu/meson.build
@@ -9,6 +9,8 @@
'NvidiaGpuPowerSensor.cpp',
'NvidiaGpuSensor.cpp',
'NvidiaGpuVoltageSensor.cpp',
+ 'NvidiaPcieDevice.cpp',
+ 'NvidiaPcieInterface.cpp',
'NvidiaSmaDevice.cpp',
'OcpMctpVdm.cpp',
)