Nvidia-gpu: Encode/decode APIs for GPU Inventory
Added support for encoding and decoding Get Inventory command.
The command supports fetching inventory properties including Serial
Number, Part Number, Marketing Name, etc for Nvidia GPUs
Tested
- added new UT for encode and decode APIs which pass
Change-Id: I9e5afbe356b64fd7ae4f7a2a65043f3eeffa3807
Signed-off-by: Rohit PAI <ropai@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
index 80f5bfe..ebfa76b 100644
--- a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
+++ b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
@@ -13,6 +13,7 @@
#include <cerrno>
#include <cstdint>
#include <span>
+#include <vector>
namespace gpu
{
@@ -412,5 +413,84 @@
return 0;
}
+
+int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
+ std::span<uint8_t> buf)
+{
+ if (buf.size() < sizeof(GetInventoryInformationRequest))
+ {
+ return EINVAL;
+ }
+
+ auto* msg = reinterpret_cast<GetInventoryInformationRequest*>(buf.data());
+
+ ocp::accelerator_management::BindingPciVidInfo header{};
+ header.ocp_accelerator_management_msg_type =
+ static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
+ header.instance_id = instanceId &
+ ocp::accelerator_management::instanceIdBitMask;
+ header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
+
+ auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
+
+ if (rc != 0)
+ {
+ return rc;
+ }
+
+ msg->hdr.command = static_cast<uint8_t>(
+ PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION);
+ msg->hdr.data_size = sizeof(propertyId);
+ msg->property_id = propertyId;
+
+ return 0;
+}
+
+int decodeGetInventoryInformationResponse(
+ std::span<const uint8_t> buf,
+ ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
+ InventoryPropertyId propertyId, InventoryValue& value)
+{
+ auto rc =
+ ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
+ if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
+ {
+ return rc;
+ }
+ // Expect at least one byte of inventory response data after common response
+ if (buf.size() < (sizeof(ocp::accelerator_management::CommonResponse) + 1))
+ {
+ return EINVAL;
+ }
+
+ const auto* response =
+ reinterpret_cast<const GetInventoryInformationResponse*>(buf.data());
+ uint16_t dataSize = le16toh(response->hdr.data_size);
+
+ if (dataSize == 0 || dataSize > maxInventoryDataSize)
+ {
+ return EINVAL;
+ }
+
+ const uint8_t* dataPtr = response->data.data();
+
+ switch (propertyId)
+ {
+ case InventoryPropertyId::BOARD_PART_NUMBER:
+ case InventoryPropertyId::SERIAL_NUMBER:
+ case InventoryPropertyId::MARKETING_NAME:
+ case InventoryPropertyId::DEVICE_PART_NUMBER:
+ value =
+ std::string(reinterpret_cast<const char*>(dataPtr), dataSize);
+ break;
+ case InventoryPropertyId::DEVICE_GUID:
+ value = std::vector<uint8_t>(dataPtr, dataPtr + dataSize);
+ break;
+ default:
+ return EINVAL;
+ }
+ return 0;
+}
+
// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
} // namespace gpu
diff --git a/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp b/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
index 09df4e6..19b9929 100644
--- a/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
+++ b/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
@@ -8,12 +8,20 @@
#include <OcpMctpVdm.hpp>
+#include <array>
+#include <cstddef>
#include <cstdint>
#include <span>
+#include <string>
+#include <variant>
+#include <vector>
namespace gpu
{
+using InventoryValue = std::variant<std::string, std::vector<uint8_t>>;
+constexpr size_t maxInventoryDataSize = 256;
+
constexpr uint16_t nvidiaPciVendorId = 0x10de;
enum class MessageType : uint8_t
@@ -33,6 +41,7 @@
READ_THERMAL_PARAMETERS = 0x02,
GET_CURRENT_POWER_DRAW = 0x03,
GET_CURRENT_ENERGY_COUNTER = 0x06,
+ GET_INVENTORY_INFORMATION = 0x0C,
GET_VOLTAGE = 0x0F,
};
@@ -42,6 +51,47 @@
DEVICE_SMA = 5
};
+enum class InventoryPropertyId : uint8_t
+{
+ BOARD_PART_NUMBER = 0,
+ SERIAL_NUMBER = 1,
+ MARKETING_NAME = 2,
+ DEVICE_PART_NUMBER = 3,
+ FRU_PART_NUMBER = 4,
+ MEMORY_VENDOR = 5,
+ MEMORY_PART_NUMBER = 6,
+ MAX_MEMORY_CAPACITY = 7,
+ BUILD_DATE = 8,
+ FIRMWARE_VERSION = 9,
+ DEVICE_GUID = 10,
+ INFOROM_VERSION = 11,
+ PRODUCT_LENGTH = 12,
+ PRODUCT_WIDTH = 13,
+ PRODUCT_HEIGHT = 14,
+ RATED_DEVICE_POWER_LIMIT = 15,
+ MIN_DEVICE_POWER_LIMIT = 16,
+ MAX_DEVICE_POWER_LIMIT = 17,
+ MAX_MODULE_POWER_LIMIT = 18,
+ MIN_MODULE_POWER_LIMIT = 19,
+ RATED_MODULE_POWER_LIMIT = 20,
+ DEFAULT_BOOST_CLOCKS = 21,
+ DEFAULT_BASE_CLOCKS = 22,
+ DEFAULT_EDPP_SCALING = 23,
+ MIN_EDPP_SCALING = 24,
+ MAX_EDPP_SCALING = 25,
+ MIN_GRAPHICS_CLOCK = 26,
+ MAX_GRAPHICS_CLOCK = 27,
+ MIN_MEMORY_CLOCK = 28,
+ MAX_MEMORY_CLOCK = 29,
+ INFINIBAND_GUID = 30,
+ RACK_GUID = 31,
+ RACK_SLOT_NUMBER = 32,
+ COMPUTE_SLOT_INDEX = 33,
+ NODE_INDEX = 34,
+ GPU_NODE_ID = 35,
+ NVLINK_PEER_TYPE = 36
+};
+
struct QueryDeviceIdentificationRequest
{
ocp::accelerator_management::CommonRequest hdr;
@@ -105,6 +155,18 @@
uint32_t voltage;
} __attribute__((packed));
+struct GetInventoryInformationRequest
+{
+ ocp::accelerator_management::CommonRequest hdr;
+ uint8_t property_id;
+} __attribute__((packed));
+
+struct GetInventoryInformationResponse
+{
+ ocp::accelerator_management::CommonResponse hdr;
+ std::array<uint8_t, maxInventoryDataSize> data;
+} __attribute__((packed));
+
int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
ocp::accelerator_management::BindingPciVid& msg);
@@ -155,4 +217,13 @@
int decodeGetVoltageResponse(std::span<const uint8_t> buf,
ocp::accelerator_management::CompletionCode& cc,
uint16_t& reasonCode, uint32_t& voltage);
+
+int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
+ std::span<uint8_t> buf);
+
+int decodeGetInventoryInformationResponse(
+ std::span<const uint8_t> buf,
+ ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
+ InventoryPropertyId propertyId, InventoryValue& value);
+
} // namespace gpu
diff --git a/src/nvidia-gpu/tests/NvidiaDeviceInventoryMctpVdm.cpp b/src/nvidia-gpu/tests/NvidiaDeviceInventoryMctpVdm.cpp
new file mode 100644
index 0000000..09bea75
--- /dev/null
+++ b/src/nvidia-gpu/tests/NvidiaDeviceInventoryMctpVdm.cpp
@@ -0,0 +1,115 @@
+#include "NvidiaGpuMctpVdm.hpp"
+#include "OcpMctpVdm.hpp"
+
+#include <endian.h>
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+using namespace gpu;
+// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
+TEST(NvidiaGpuMctpVdmTest, EncodeGetInventoryInformationRequest)
+{
+ std::array<uint8_t, 256> buf{};
+ uint8_t instanceId = 1;
+ uint8_t propertyId =
+ static_cast<uint8_t>(InventoryPropertyId::BOARD_PART_NUMBER);
+
+ auto rc = encodeGetInventoryInformationRequest(instanceId, propertyId, buf);
+ EXPECT_EQ(rc, 0);
+
+ auto* msg = reinterpret_cast<GetInventoryInformationRequest*>(buf.data());
+ EXPECT_EQ(msg->hdr.command,
+ static_cast<uint8_t>(
+ PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION));
+ EXPECT_EQ(msg->hdr.data_size, sizeof(propertyId));
+ EXPECT_EQ(msg->property_id, propertyId);
+}
+
+TEST(NvidiaGpuMctpVdmTest, DecodeInventoryString)
+{
+ std::array<uint8_t, 256> buf{};
+ auto* response =
+ reinterpret_cast<ocp::accelerator_management::CommonResponse*>(
+ buf.data());
+
+ // Fill header
+ response->msgHdr.hdr.pci_vendor_id = htobe16(0x10DE); // NVIDIA vendor ID
+ response->msgHdr.hdr.instance_id = 0x01; // Instance ID
+ response->msgHdr.hdr.ocp_version = 0x89; // OCP version and type
+ response->msgHdr.hdr.ocp_accelerator_management_msg_type =
+ static_cast<uint8_t>(
+ ocp::accelerator_management::MessageType::RESPONSE);
+
+ response->command = static_cast<uint8_t>(
+ PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION);
+ response->completion_code = static_cast<uint8_t>(
+ ocp::accelerator_management::CompletionCode::SUCCESS);
+ response->reserved = 0;
+ response->data_size = htole16(5); // 5 bytes for "TEST1"
+
+ const char* testStr = "TEST1";
+ memcpy(buf.data() + sizeof(ocp::accelerator_management::CommonResponse),
+ testStr, 5);
+
+ ocp::accelerator_management::CompletionCode cc =
+ ocp::accelerator_management::CompletionCode::ERROR;
+ uint16_t reasonCode = 0;
+ InventoryValue info;
+
+ auto rc = decodeGetInventoryInformationResponse(
+ buf, cc, reasonCode, InventoryPropertyId::BOARD_PART_NUMBER, info);
+ EXPECT_EQ(rc, 0);
+ EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::SUCCESS);
+ EXPECT_EQ(reasonCode, 0);
+ EXPECT_TRUE(std::holds_alternative<std::string>(info));
+ EXPECT_EQ(std::get<std::string>(info), "TEST1");
+}
+
+TEST(NvidiaGpuMctpVdmTest, DecodeInventoryDeviceGuid)
+{
+ std::array<uint8_t, 256> buf{};
+ auto* response =
+ reinterpret_cast<ocp::accelerator_management::CommonResponse*>(
+ buf.data());
+
+ // Fill header
+ response->msgHdr.hdr.pci_vendor_id = htobe16(0x10DE); // NVIDIA vendor ID
+ response->msgHdr.hdr.instance_id = 0x01; // Instance ID
+ response->msgHdr.hdr.ocp_version = 0x89; // OCP version and type
+ response->msgHdr.hdr.ocp_accelerator_management_msg_type =
+ static_cast<uint8_t>(
+ ocp::accelerator_management::MessageType::RESPONSE);
+
+ response->command = static_cast<uint8_t>(
+ PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION);
+ response->completion_code = static_cast<uint8_t>(
+ ocp::accelerator_management::CompletionCode::SUCCESS);
+ response->reserved = 0;
+ response->data_size = htole16(8); // 8 bytes for DEVICE_GUID
+
+ std::vector<uint8_t> dummyGuid = {0xDE, 0xAD, 0xBE, 0xEF,
+ 0x01, 0x23, 0x45, 0x67};
+ memcpy(buf.data() + sizeof(ocp::accelerator_management::CommonResponse),
+ dummyGuid.data(), dummyGuid.size());
+
+ ocp::accelerator_management::CompletionCode cc =
+ ocp::accelerator_management::CompletionCode::ERROR;
+ uint16_t reasonCode = 0;
+ InventoryValue info;
+
+ auto rc = decodeGetInventoryInformationResponse(
+ buf, cc, reasonCode, InventoryPropertyId::DEVICE_GUID, info);
+ EXPECT_EQ(rc, 0);
+ EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::SUCCESS);
+ EXPECT_EQ(reasonCode, 0);
+ EXPECT_TRUE(std::holds_alternative<std::vector<uint8_t>>(info));
+ EXPECT_EQ(std::get<std::vector<uint8_t>>(info), dummyGuid);
+}
+// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
diff --git a/src/nvidia-gpu/tests/meson.build b/src/nvidia-gpu/tests/meson.build
index 4923868..99ed721 100644
--- a/src/nvidia-gpu/tests/meson.build
+++ b/src/nvidia-gpu/tests/meson.build
@@ -19,6 +19,7 @@
executable(
'nvidiagpusensor_test',
'NvidiaGpuSensorTest.cpp',
+ 'NvidiaDeviceInventoryMctpVdm.cpp',
'../OcpMctpVdm.cpp',
'../NvidiaGpuMctpVdm.cpp',
implicit_include_directories: false,