nvidia-gpu: add support for communication to the endpoint The commit uses MCTP VDM protocol to read temperature sensor value from the gpu. The MCTP VDM protocol is an extension of the OCP Accelerator Management Interface specification. [1] Tested: Build an image for gb200nvl-obmc machine with the following patches cherry picked. This patches are needed to enable the mctp stack. https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422 Restart the nvidiagpusensor service. ``` root@gb200nvl-obmc:~# systemctl start xyz.openbmc_project.nvidiagpusensor.service ``` The app is detecting entity-manager configuration on gb200nvl-obmc machine. The app is also able to detect all the endpoints from the mctp service dbus tree. The app is reading temperature sensor value from gpu correctly and the temperature sensor is also present on redfish. ``` $ curl -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU { "@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU", "@odata.type": "#Sensor.v1_2_0.Sensor", "Id": "temperature_NVIDIA_GB200_GPU", "Name": "NVIDIA GB200 GPU", "Reading": 36.4375, "ReadingRangeMax": 127.0, "ReadingRangeMin": -128.0, "ReadingType": "Temperature", "ReadingUnits": "Cel", "Status": { "Health": "OK", "State": "Enabled" } }% root@gb200nvl-obmc:~# busctl tree xyz.openbmc_project.GpuSensor └─ /xyz └─ /xyz/openbmc_project └─ /xyz/openbmc_project/sensors └─ /xyz/openbmc_project/sensors/temperature └─ /xyz/openbmc_project/sensors/temperature/NVIDIA_GB200_GPU root@gb200nvl-obmc:~# busctl introspect xyz.openbmc_project.GpuSensor /xyz/openbmc_project/sensors/temperature/NVIDIA_GB200_GPU NAME TYPE SIGNATURE RESULT/VALUE FLAGS org.freedesktop.DBus.Introspectable interface - - - .Introspect method - s - org.freedesktop.DBus.Peer interface - - - .GetMachineId method - s - .Ping method - - - org.freedesktop.DBus.Properties interface - - - .Get method ss v - .GetAll method s a{sv} - .Set method ssv - - .PropertiesChanged signal sa{sv}as - - xyz.openbmc_project.Association.Definitions interface - - - .Associations property a(sss) 1 "chassis" "all_sensors" "/xyz/openbmc… emits-change xyz.openbmc_project.Sensor.Value interface - - - .MaxValue property d 127 emits-change .MinValue property d -128 emits-change .Unit property s "xyz.openbmc_project.Sensor.Value.Unit.… emits-change .Value property d 36.3125 emits-change writable xyz.openbmc_project.Sensor.ValueMutability interface - - - .Mutable property b true emits-change xyz.openbmc_project.State.Decorator.Availability interface - - - .Available property b true emits-change writable xyz.openbmc_project.State.Decorator.OperationalStatus interface - - - .Functional property b true emits-change ``` [1] https://www.opencompute.org/documents/ocp-gpu-accelerator-management-interfaces-v1-pdf Change-Id: Ied938b9e5c19751ee283b4b948e16c905c78fb48 Signed-off-by: Harshit Aghera <haghera@nvidia.com>

commit: 560e6af7b1f74e9c020a0f82817f9d926e0c4f72 [log] [tgz]
author: Harshit Aghera <haghera@nvidia.com> Mon Apr 21 20:04:56 2025 +0530
committer: Ed Tanous <ed@tanous.net> Fri Jun 06 20:47:25 2025 +0000
tree: fd847f6ed009220327e016fd53caa4c4693a4a46
parent: 490fc51e44727704e4402cf94fc20a162e6c09c2 [diff]
diff --git a/src/nvidia-gpu/tests/NvidiaGpuSensorTest.cpp b/src/nvidia-gpu/tests/NvidiaGpuSensorTest.cpp
new file mode 100644
index 0000000..c630ffa
--- /dev/null
+++ b/src/nvidia-gpu/tests/NvidiaGpuSensorTest.cpp

@@ -0,0 +1,500 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "NvidiaGpuMctpVdm.hpp"
+#include "OcpMctpVdm.hpp"
+
+#include <endian.h>
+
+#include <array>
+#include <cerrno>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace ocp_mctp_tests
+{
+
+class OcpMctpVdmTests : public ::testing::Test
+{
+  protected:
+    void SetUp() override
+    {
+        // Initialize common test data here
+    }
+};
+
+// Tests for OcpMctpVdm::packHeader function
+TEST_F(OcpMctpVdmTests, PackHeaderRequestSuccess)
+{
+    const uint16_t pciVendorId = 0x1234;
+    ocp::accelerator_management::BindingPciVidInfo hdr{};
+    ocp::accelerator_management::BindingPciVid msg{};
+
+    hdr.ocp_accelerator_management_msg_type =
+        static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
+    hdr.instance_id = 5;
+    hdr.msg_type = 0x7E;
+
+    int result = ocp::accelerator_management::packHeader(pciVendorId, hdr, msg);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(msg.pci_vendor_id, htobe16(pciVendorId));
+    EXPECT_EQ(msg.instance_id & ocp::accelerator_management::instanceIdBitMask,
+              5);
+    EXPECT_NE(msg.instance_id & ocp::accelerator_management::requestBitMask, 0);
+    EXPECT_EQ(msg.ocp_version & 0x0F, ocp::accelerator_management::ocpVersion);
+    EXPECT_EQ((msg.ocp_version & 0xF0) >>
+                  ocp::accelerator_management::ocpTypeBitOffset,
+              ocp::accelerator_management::ocpType);
+    EXPECT_EQ(msg.ocp_accelerator_management_msg_type, 0x7E);
+}
+
+TEST_F(OcpMctpVdmTests, PackHeaderResponseSuccess)
+{
+    const uint16_t pciVendorId = 0x1234;
+    ocp::accelerator_management::BindingPciVidInfo hdr{};
+    ocp::accelerator_management::BindingPciVid msg{};
+
+    hdr.ocp_accelerator_management_msg_type = static_cast<uint8_t>(
+        ocp::accelerator_management::MessageType::RESPONSE);
+    hdr.instance_id = 10;
+    hdr.msg_type = 0x7E;
+
+    int result = ocp::accelerator_management::packHeader(pciVendorId, hdr, msg);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(msg.pci_vendor_id, htobe16(pciVendorId));
+    EXPECT_EQ(msg.instance_id & ocp::accelerator_management::instanceIdBitMask,
+              10);
+    EXPECT_EQ(msg.instance_id & ocp::accelerator_management::requestBitMask, 0);
+    EXPECT_EQ(msg.ocp_version & 0x0F, ocp::accelerator_management::ocpVersion);
+    EXPECT_EQ((msg.ocp_version & 0xF0) >>
+                  ocp::accelerator_management::ocpTypeBitOffset,
+              ocp::accelerator_management::ocpType);
+    EXPECT_EQ(msg.ocp_accelerator_management_msg_type, 0x7E);
+}
+
+TEST_F(OcpMctpVdmTests, PackHeaderInvalidMessageType)
+{
+    const uint16_t pciVendorId = 0x1234;
+    ocp::accelerator_management::BindingPciVidInfo hdr{};
+    ocp::accelerator_management::BindingPciVid msg{};
+
+    hdr.ocp_accelerator_management_msg_type = 3; // Invalid message type
+    hdr.instance_id = 5;
+    hdr.msg_type = 0x7E;
+
+    int result = ocp::accelerator_management::packHeader(pciVendorId, hdr, msg);
+
+    EXPECT_EQ(result, EINVAL);
+}
+
+TEST_F(OcpMctpVdmTests, PackHeaderInvalidInstanceId)
+{
+    const uint16_t pciVendorId = 0x1234;
+    ocp::accelerator_management::BindingPciVidInfo hdr{};
+    ocp::accelerator_management::BindingPciVid msg{};
+
+    hdr.ocp_accelerator_management_msg_type =
+        static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
+    hdr.instance_id = 32; // Out of range (0-31 valid)
+    hdr.msg_type = 0x7E;
+
+    int result = ocp::accelerator_management::packHeader(pciVendorId, hdr, msg);
+
+    EXPECT_EQ(result, EINVAL);
+}
+
+// Tests for OcpMctpVdm::decodeReasonCodeAndCC function
+TEST_F(OcpMctpVdmTests, DecodeReasonCodeAndCCSuccessCase)
+{
+    ocp::accelerator_management::CommonNonSuccessResponse response{};
+    response.command = 0x42;
+    response.completion_code = static_cast<uint8_t>(
+        ocp::accelerator_management::CompletionCode::SUCCESS);
+    response.reason_code = htole16(0x1234);
+
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+
+    std::array<uint8_t, sizeof(response)> buf{};
+    std::memcpy(buf.data(), &response, sizeof(response));
+
+    int result =
+        ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::SUCCESS);
+    EXPECT_EQ(reasonCode, 0); // Should be 0 for SUCCESS
+}
+
+TEST_F(OcpMctpVdmTests, DecodeReasonCodeAndCCErrorCase)
+{
+    ocp::accelerator_management::CommonNonSuccessResponse response{};
+    response.command = 0x42;
+    response.completion_code = static_cast<uint8_t>(
+        ocp::accelerator_management::CompletionCode::ERROR);
+    response.reason_code = htole16(0x5678);
+
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+
+    std::array<uint8_t, sizeof(response)> buf{};
+    std::memcpy(buf.data(), &response, sizeof(response));
+
+    int result =
+        ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::ERROR);
+    EXPECT_EQ(reasonCode, 0x5678);
+}
+
+} // namespace ocp_mctp_tests
+
+namespace gpu_mctp_tests
+{
+
+class GpuMctpVdmTests : public ::testing::Test
+{
+  protected:
+    void SetUp() override
+    {
+        // Initialize common test data here
+    }
+};
+
+// Tests for GpuMctpVdm::packHeader function
+TEST_F(GpuMctpVdmTests, PackHeaderSuccess)
+{
+    ocp::accelerator_management::BindingPciVidInfo hdr{};
+    ocp::accelerator_management::BindingPciVid msg{};
+
+    hdr.ocp_accelerator_management_msg_type =
+        static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
+    hdr.instance_id = 5;
+    hdr.msg_type = 0x7E;
+
+    int result = gpu::packHeader(hdr, msg);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(msg.pci_vendor_id, htobe16(gpu::nvidiaPciVendorId));
+    EXPECT_EQ(msg.instance_id & ocp::accelerator_management::instanceIdBitMask,
+              5);
+    EXPECT_NE(msg.instance_id & ocp::accelerator_management::requestBitMask, 0);
+    EXPECT_EQ(msg.ocp_version & 0x0F, ocp::accelerator_management::ocpVersion);
+    EXPECT_EQ((msg.ocp_version & 0xF0) >>
+                  ocp::accelerator_management::ocpTypeBitOffset,
+              ocp::accelerator_management::ocpType);
+    EXPECT_EQ(msg.ocp_accelerator_management_msg_type, 0x7E);
+}
+
+// Tests for GpuMctpVdm::encodeQueryDeviceIdentificationRequest function
+TEST_F(GpuMctpVdmTests, EncodeQueryDeviceIdentificationRequestSuccess)
+{
+    const uint8_t instanceId = 3;
+    std::vector<uint8_t> buf(256);
+
+    int result = gpu::encodeQueryDeviceIdentificationRequest(instanceId, buf);
+
+    EXPECT_EQ(result, 0);
+
+    gpu::QueryDeviceIdentificationRequest request{};
+    std::memcpy(&request, buf.data(), sizeof(request));
+
+    EXPECT_EQ(request.hdr.msgHdr.hdr.pci_vendor_id,
+              htobe16(gpu::nvidiaPciVendorId));
+    EXPECT_EQ(request.hdr.msgHdr.hdr.instance_id &
+                  ocp::accelerator_management::instanceIdBitMask,
+              instanceId & ocp::accelerator_management::instanceIdBitMask);
+    EXPECT_NE(request.hdr.msgHdr.hdr.instance_id &
+                  ocp::accelerator_management::requestBitMask,
+              0);
+
+    EXPECT_EQ(request.hdr.command,
+              static_cast<uint8_t>(gpu::DeviceCapabilityDiscoveryCommands::
+                                       QUERY_DEVICE_IDENTIFICATION));
+    EXPECT_EQ(request.hdr.data_size, 0);
+}
+
+// Tests for GpuMctpVdm::decodeQueryDeviceIdentificationResponse function
+TEST_F(GpuMctpVdmTests, DecodeQueryDeviceIdentificationResponseSuccess)
+{
+    // Create a mock successful response
+    std::vector<uint8_t> buf(sizeof(gpu::QueryDeviceIdentificationResponse));
+
+    gpu::QueryDeviceIdentificationResponse response{};
+    ocp::accelerator_management::BindingPciVidInfo headerInfo{};
+    headerInfo.ocp_accelerator_management_msg_type = static_cast<uint8_t>(
+        ocp::accelerator_management::MessageType::RESPONSE);
+    headerInfo.instance_id = 3;
+    headerInfo.msg_type =
+        static_cast<uint8_t>(gpu::MessageType::DEVICE_CAPABILITY_DISCOVERY);
+
+    gpu::packHeader(headerInfo, response.hdr.msgHdr.hdr);
+
+    // Populate response data
+    response.hdr.command = static_cast<uint8_t>(
+        gpu::DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
+    response.hdr.completion_code = static_cast<uint8_t>(
+        ocp::accelerator_management::CompletionCode::SUCCESS);
+    response.hdr.reserved = 0;
+    response.hdr.data_size =
+        htole16(2); // Size of device_identification + instance_id
+    response.device_identification =
+        static_cast<uint8_t>(gpu::DeviceIdentification::DEVICE_GPU);
+    response.instance_id = 7;
+
+    std::memcpy(buf.data(), &response, sizeof(response));
+
+    // Test decoding
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+    uint8_t deviceIdentification{};
+    uint8_t deviceInstance{};
+
+    int result = gpu::decodeQueryDeviceIdentificationResponse(
+        buf, cc, reasonCode, deviceIdentification, deviceInstance);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::SUCCESS);
+    EXPECT_EQ(reasonCode, 0);
+    EXPECT_EQ(deviceIdentification,
+              static_cast<uint8_t>(gpu::DeviceIdentification::DEVICE_GPU));
+    EXPECT_EQ(deviceInstance, 7);
+}
+
+TEST_F(GpuMctpVdmTests, DecodeQueryDeviceIdentificationResponseError)
+{
+    // Create a mock successful response
+    std::vector<uint8_t> buf(
+        sizeof(ocp::accelerator_management::CommonNonSuccessResponse));
+
+    ocp::accelerator_management::CommonNonSuccessResponse response{};
+    ocp::accelerator_management::BindingPciVidInfo headerInfo{};
+    headerInfo.ocp_accelerator_management_msg_type = static_cast<uint8_t>(
+        ocp::accelerator_management::MessageType::RESPONSE);
+    headerInfo.instance_id = 3;
+    headerInfo.msg_type =
+        static_cast<uint8_t>(gpu::MessageType::DEVICE_CAPABILITY_DISCOVERY);
+
+    gpu::packHeader(headerInfo, response.msgHdr.hdr);
+
+    // Populate response data
+    response.command = static_cast<uint8_t>(
+        gpu::DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
+    response.command = static_cast<uint8_t>(
+        gpu::DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
+    response.completion_code = static_cast<uint8_t>(
+        ocp::accelerator_management::CompletionCode::ERROR);
+    response.reason_code = htole16(0x1234);
+
+    std::memcpy(buf.data(), &response, sizeof(response));
+
+    // Test decoding
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+    uint8_t deviceIdentification{};
+    uint8_t deviceInstance{};
+
+    int result = gpu::decodeQueryDeviceIdentificationResponse(
+        buf, cc, reasonCode, deviceIdentification, deviceInstance);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::ERROR);
+    EXPECT_EQ(reasonCode, 0x1234);
+}
+
+TEST_F(GpuMctpVdmTests, DecodeQueryDeviceIdentificationResponseInvalidSize)
+{
+    // Create a too-small buffer
+    std::vector<uint8_t> buf(
+        sizeof(ocp::accelerator_management::Message) + 2); // Too small
+
+    // Populate Message header only
+    ocp::accelerator_management::Message msg{};
+    ocp::accelerator_management::BindingPciVidInfo headerInfo{};
+    headerInfo.ocp_accelerator_management_msg_type = static_cast<uint8_t>(
+        ocp::accelerator_management::MessageType::RESPONSE);
+    headerInfo.instance_id = 3;
+    headerInfo.msg_type =
+        static_cast<uint8_t>(gpu::MessageType::DEVICE_CAPABILITY_DISCOVERY);
+
+    gpu::packHeader(headerInfo, msg.hdr);
+    std::memcpy(buf.data(), &msg, sizeof(msg));
+
+    // Test decoding with insufficient data
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+    uint8_t deviceIdentification{};
+    uint8_t deviceInstance{};
+
+    int result = gpu::decodeQueryDeviceIdentificationResponse(
+        buf, cc, reasonCode, deviceIdentification, deviceInstance);
+
+    EXPECT_EQ(result, EINVAL); // Should indicate error for invalid size
+}
+
+// Tests for GpuMctpVdm::encodeGetTemperatureReadingRequest function
+TEST_F(GpuMctpVdmTests, EncodeGetTemperatureReadingRequestSuccess)
+{
+    const uint8_t instanceId = 4;
+    const uint8_t sensorId = 0;
+    std::vector<uint8_t> buf(256);
+
+    int result =
+        gpu::encodeGetTemperatureReadingRequest(instanceId, sensorId, buf);
+
+    EXPECT_EQ(result, 0);
+
+    gpu::GetTemperatureReadingRequest request{};
+    std::memcpy(&request, buf.data(), sizeof(request));
+
+    EXPECT_EQ(request.hdr.msgHdr.hdr.pci_vendor_id,
+              htobe16(gpu::nvidiaPciVendorId));
+    EXPECT_EQ(request.hdr.msgHdr.hdr.instance_id &
+                  ocp::accelerator_management::instanceIdBitMask,
+              instanceId & ocp::accelerator_management::instanceIdBitMask);
+    EXPECT_NE(request.hdr.msgHdr.hdr.instance_id &
+                  ocp::accelerator_management::requestBitMask,
+              0);
+    EXPECT_EQ(request.hdr.msgHdr.hdr.ocp_accelerator_management_msg_type,
+              static_cast<uint8_t>(gpu::MessageType::PLATFORM_ENVIRONMENTAL));
+
+    // Verify request data
+    EXPECT_EQ(request.hdr.command,
+              static_cast<uint8_t>(
+                  gpu::PlatformEnvironmentalCommands::GET_TEMPERATURE_READING));
+    EXPECT_EQ(request.hdr.data_size, sizeof(sensorId));
+    EXPECT_EQ(request.sensor_id, sensorId);
+}
+
+// Tests for GpuMctpVdm::decodeGetTemperatureReadingResponse function
+TEST_F(GpuMctpVdmTests, DecodeGetTemperatureReadingResponseSuccess)
+{
+    // Create a mock successful response
+    std::vector<uint8_t> buf(sizeof(gpu::GetTemperatureReadingResponse));
+
+    gpu::GetTemperatureReadingResponse response{};
+    ocp::accelerator_management::BindingPciVidInfo headerInfo{};
+    headerInfo.ocp_accelerator_management_msg_type = static_cast<uint8_t>(
+        ocp::accelerator_management::MessageType::RESPONSE);
+    headerInfo.instance_id = 4;
+    headerInfo.msg_type =
+        static_cast<uint8_t>(gpu::MessageType::PLATFORM_ENVIRONMENTAL);
+
+    gpu::packHeader(headerInfo, response.hdr.msgHdr.hdr);
+
+    // Populate response data
+    response.hdr.command = static_cast<uint8_t>(
+        gpu::PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
+    response.hdr.completion_code = static_cast<uint8_t>(
+        ocp::accelerator_management::CompletionCode::SUCCESS);
+    response.hdr.reserved = 0;
+    response.hdr.data_size = htole16(sizeof(int32_t));
+
+    // Set a temperature value of 75.5°C (75.5 * 256 = 19328)
+    response.reading = htole32(19328);
+
+    std::memcpy(buf.data(), &response, sizeof(response));
+
+    // Test decoding
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+    double temperatureReading{};
+
+    int result = gpu::decodeGetTemperatureReadingResponse(
+        buf, cc, reasonCode, temperatureReading);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::SUCCESS);
+    EXPECT_EQ(reasonCode, 0);
+    EXPECT_NEAR(temperatureReading, 75.5, 0.01);
+}
+
+TEST_F(GpuMctpVdmTests, DecodeGetTemperatureReadingResponseError)
+{
+    std::vector<uint8_t> buf(
+        sizeof(ocp::accelerator_management::CommonNonSuccessResponse));
+
+    // Populate error response data
+    ocp::accelerator_management::CommonNonSuccessResponse errorResponse{};
+    ocp::accelerator_management::BindingPciVidInfo headerInfo{};
+    headerInfo.ocp_accelerator_management_msg_type = static_cast<uint8_t>(
+        ocp::accelerator_management::MessageType::RESPONSE);
+    headerInfo.instance_id = 3;
+    headerInfo.msg_type =
+        static_cast<uint8_t>(gpu::MessageType::DEVICE_CAPABILITY_DISCOVERY);
+
+    gpu::packHeader(headerInfo, errorResponse.msgHdr.hdr);
+
+    errorResponse.command = static_cast<uint8_t>(
+        gpu::PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
+    errorResponse.completion_code = static_cast<uint8_t>(
+        ocp::accelerator_management::CompletionCode::ERR_NOT_READY);
+    errorResponse.reason_code = htole16(0x4321);
+
+    std::memcpy(buf.data(), &errorResponse, sizeof(errorResponse));
+
+    // Test decoding
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+    double temperatureReading{};
+
+    int result = gpu::decodeGetTemperatureReadingResponse(
+        buf, cc, reasonCode, temperatureReading);
+
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(cc, ocp::accelerator_management::CompletionCode::ERR_NOT_READY);
+    EXPECT_EQ(reasonCode, 0x4321);
+}
+
+TEST_F(GpuMctpVdmTests, DecodeGetTemperatureReadingResponseInvalidSize)
+{
+    // Create a mock response with invalid data_size
+    std::vector<uint8_t> buf(sizeof(gpu::GetTemperatureReadingResponse));
+
+    gpu::GetTemperatureReadingResponse response{};
+    ocp::accelerator_management::BindingPciVidInfo headerInfo{};
+    headerInfo.ocp_accelerator_management_msg_type = static_cast<uint8_t>(
+        ocp::accelerator_management::MessageType::RESPONSE);
+    headerInfo.instance_id = 4;
+    headerInfo.msg_type =
+        static_cast<uint8_t>(gpu::MessageType::PLATFORM_ENVIRONMENTAL);
+
+    gpu::packHeader(headerInfo, response.hdr.msgHdr.hdr);
+
+    response.hdr.command = static_cast<uint8_t>(
+        gpu::PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
+    response.hdr.completion_code = static_cast<uint8_t>(
+        ocp::accelerator_management::CompletionCode::SUCCESS);
+    response.hdr.reserved = 0;
+    response.hdr.data_size = htole16(1); // Invalid - should be sizeof(int32_t)
+    response.reading = htole32(19328);
+
+    std::memcpy(buf.data(), &response, sizeof(response));
+
+    // Test decoding
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode{};
+    double temperatureReading{};
+
+    int result = gpu::decodeGetTemperatureReadingResponse(
+        buf, cc, reasonCode, temperatureReading);
+
+    EXPECT_EQ(result, EINVAL); // Should indicate error for invalid data size
+}
+
+} // namespace gpu_mctp_tests
+
+int main(int argc, char** argv)
+{
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}

diff --git a/src/nvidia-gpu/tests/meson.build b/src/nvidia-gpu/tests/meson.build
new file mode 100644
index 0000000..4923868
--- /dev/null
+++ b/src/nvidia-gpu/tests/meson.build

@@ -0,0 +1,29 @@
+gtest_dep = dependency('gtest', main: true, disabler: true, required: false)
+gmock_dep = dependency('gmock', disabler: true, required: false)
+if not gtest_dep.found() or not gmock_dep.found()
+    gtest_proj = import('cmake').subproject('googletest', required: true)
+    gtest_dep = declare_dependency(
+        dependencies: [
+            dependency('threads'),
+            gtest_proj.dependency('gtest'),
+            gtest_proj.dependency('gtest_main'),
+        ],
+    )
+    gmock_dep = gtest_proj.dependency('gmock')
+endif
+
+gpusensor_test_include_dirs = [gpusensor_include_dir]
+
+test(
+    'nvidiagpusensor_test',
+    executable(
+        'nvidiagpusensor_test',
+        'NvidiaGpuSensorTest.cpp',
+        '../OcpMctpVdm.cpp',
+        '../NvidiaGpuMctpVdm.cpp',
+        implicit_include_directories: false,
+        include_directories: gpusensor_test_include_dirs,
+        dependencies: [gtest_dep, gmock_dep],
+    ),
+    workdir: meson.current_source_dir(),
+)
commit	560e6af7b1f74e9c020a0f82817f9d926e0c4f72	[log] [tgz]
author	Harshit Aghera <haghera@nvidia.com>	Mon Apr 21 20:04:56 2025 +0530
committer	Ed Tanous <ed@tanous.net>	Fri Jun 06 20:47:25 2025 +0000
tree	fd847f6ed009220327e016fd53caa4c4693a4a46
parent	490fc51e44727704e4402cf94fc20a162e6c09c2 [diff]