nvidia-gpu: add thresholds support to TLimit

This patch introduces support for retrieving GPU TLimit thresholds
directly from the GPU device. TLimit Temperature represents the
difference in degrees Celsius between the current GPU temperature and
the initial throttle threshold. The patch also enables the extraction of
three critical throttle thresholds — Warning Low, Critical Low, and Hard
Shutdown Low — from the GPU hardware.

Tested: Build an image for gb200nvl-obmc machine with the following
patches cherry picked. This patches are needed to enable the mctp stack.

https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422

```
$ curl -s -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_1
{
  "@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_1",
  "@odata.type": "#Sensor.v1_2_0.Sensor",
  "Id": "temperature_NVIDIA_GB200_GPU_0_TEMP_1",
  "Name": "NVIDIA GB200 GPU 0 TEMP 1",
  "Reading": 57.3984375,
  "ReadingRangeMax": 127.0,
  "ReadingRangeMin": -128.0,
  "ReadingType": "Temperature",
  "ReadingUnits": "Cel",
  "Status": {
    "Health": "OK",
    "State": "Enabled"
  },
  "Thresholds": {
    "LowerCaution": {
      "Reading": 0.0
    },
    "LowerCritical": {
      "Reading": 0.0
    },
    "LowerFatal": {
      "Reading": 0.0
    }
  }
}%
```

Change-Id: I6f2ff2652ce9246287f9bd63c4297d9ad3229963
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
index 17f71e0..7a48b30 100644
--- a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
+++ b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
@@ -152,5 +152,70 @@
 
     return 0;
 }
+
+int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
+                                       std::span<uint8_t> buf)
+{
+    if (buf.size() < sizeof(ReadThermalParametersRequest))
+    {
+        return EINVAL;
+    }
+
+    auto* msg = reinterpret_cast<ReadThermalParametersRequest*>(buf.data());
+
+    ocp::accelerator_management::BindingPciVidInfo header{};
+    header.ocp_accelerator_management_msg_type =
+        static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
+    header.instance_id = instanceId &
+                         ocp::accelerator_management::instanceIdBitMask;
+    header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
+
+    auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
+
+    if (rc != 0)
+    {
+        return rc;
+    }
+
+    msg->hdr.command = static_cast<uint8_t>(
+        PlatformEnvironmentalCommands::READ_THERMAL_PARAMETERS);
+    msg->hdr.data_size = sizeof(sensorId);
+    msg->sensor_id = sensorId;
+
+    return 0;
+}
+
+int decodeReadThermalParametersResponse(
+    std::span<const uint8_t> buf,
+    ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
+    int32_t& threshold)
+{
+    auto rc =
+        ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
+
+    if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
+    {
+        return rc;
+    }
+
+    if (buf.size() < sizeof(ReadThermalParametersResponse))
+    {
+        return EINVAL;
+    }
+
+    const auto* response =
+        reinterpret_cast<const ReadThermalParametersResponse*>(buf.data());
+
+    uint16_t dataSize = le16toh(response->hdr.data_size);
+
+    if (dataSize != sizeof(int32_t))
+    {
+        return EINVAL;
+    }
+
+    threshold = le32toh(response->threshold);
+
+    return 0;
+}
 // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
 } // namespace gpu