gpu: add TLimit sensor

This commit introduces a new thermal limit (TLimit) sensor for the GPU,
enhancing the existing temperature monitoring capabilities.

Tested.

The TEMP_0 update is disabled while testing this patch as it requires
MCTP request queueing since OCP MCTP VDM specifies at max one
outstanding request to the device. The MCTP request queueing is being
introduces with this patch -
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/80023

Build an image for gb200nvl-obmc machine with the following patches
cherry picked. This patches are needed to enable the mctp stack.

https://gerrit.openbmc.org/c/openbmc/openbmc/+/79312
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79410
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422

Copy the configuration file on gb200nvl-obmc machine and restart the
entity-manager service.
```
root@gb200nvl-obmc:~# rm -rf /var/configuration/
root@gb200nvl-obmc:~# systemctl restart xyz.openbmc_project.EntityManager.service
```

Copy the gpusensor app and run it.
```
root@gb200nvl-obmc:~# ./gpusensor
```

```
$ curl -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_TEMP_1
{
  "@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_TEMP_1",
  "@odata.type": "#Sensor.v1_2_0.Sensor",
  "Id": "temperature_NVIDIA_GB200_GPU_TEMP_1",
  "Name": "NVIDIA GB200 GPU TEMP 1",
  "Reading": 47.875,
  "ReadingRangeMax": 127.0,
  "ReadingRangeMin": -128.0,
  "ReadingType": "Temperature",
  "ReadingUnits": "Cel",
  "Status": {
    "Health": "OK",
    "State": "Enabled"
  }
}%

root@gb200nvl-obmc:~# busctl tree xyz.openbmc_project.GpuSensor
└─ /xyz
  └─ /xyz/openbmc_project
    └─ /xyz/openbmc_project/sensors
      └─ /xyz/openbmc_project/sensors/temperature
        ├─ /xyz/openbmc_project/sensors/temperature/NVIDIA_GB200_GPU_TEMP_0
        └─ /xyz/openbmc_project/sensors/temperature/NVIDIA_GB200_GPU_TEMP_1

root@gb200nvl-obmc:~# busctl introspect xyz.openbmc_project.GpuSensor /xyz/openbmc_project/sensors/temperature/NVIDIA_GB200_GPU_TEMP_1
NAME                                                  TYPE      SIGNATURE RESULT/VALUE                             FLAGS
org.freedesktop.DBus.Introspectable                   interface -         -                                        -
.Introspect                                           method    -         s                                        -
org.freedesktop.DBus.Peer                             interface -         -                                        -
.GetMachineId                                         method    -         s                                        -
.Ping                                                 method    -         -                                        -
org.freedesktop.DBus.Properties                       interface -         -                                        -
.Get                                                  method    ss        v                                        -
.GetAll                                               method    s         a{sv}                                    -
.Set                                                  method    ssv       -                                        -
.PropertiesChanged                                    signal    sa{sv}as  -                                        -
xyz.openbmc_project.Association.Definitions           interface -         -                                        -
.Associations                                         property  a(sss)    1 "chassis" "all_sensors" "/xyz/openbmc… emits-change
xyz.openbmc_project.Sensor.Value                      interface -         -                                        -
.MaxValue                                             property  d         127                                      emits-change
.MinValue                                             property  d         -128                                     emits-change
.Unit                                                 property  s         "xyz.openbmc_project.Sensor.Value.Unit.… emits-change
.Value                                                property  d         48                                       emits-change writable
xyz.openbmc_project.Sensor.ValueMutability            interface -         -                                        -
.Mutable                                              property  b         true                                     emits-change
xyz.openbmc_project.State.Decorator.Availability      interface -         -                                        -
.Available                                            property  b         true                                     emits-change writable
xyz.openbmc_project.State.Decorator.OperationalStatus interface -         -                                        -
.Functional                                           property  b         true                                     emits-change
```

Change-Id: Ib8e0ef93a4acbb8870671665b098fb61d0205cb2
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/gpu/GpuDevice.cpp b/src/gpu/GpuDevice.cpp
index dccd730..11423dd 100644
--- a/src/gpu/GpuDevice.cpp
+++ b/src/gpu/GpuDevice.cpp
@@ -6,6 +6,7 @@
 #include "GpuDevice.hpp"
 
 #include "GpuSensor.hpp"
+#include "GpuTLimitSensor.hpp"
 #include "Thresholds.hpp"
 #include "Utils.hpp"
 
@@ -58,13 +59,17 @@
         conn, mctpRequester, name + "_TEMP_0", path, eid, objectServer,
         std::vector<thresholds::Threshold>{}));
 
-    lg2::info("Added GPU Temperature Sensor {NAME} with chassis path: {PATH}.",
-              "NAME", name, "PATH", path);
+    sensors.push_back(std::make_shared<GpuTLimitSensor>(
+        conn, mctpRequester, name + "_TEMP_1", path, eid, objectServer,
+        std::vector<thresholds::Threshold>{}));
+
+    lg2::info("Added GPU {NAME} Sensors with chassis path: {PATH}.", "NAME",
+              name, "PATH", path);
 }
 
 void GpuDevice::read()
 {
-    for ([[maybe_unused]] const auto& sensor : sensors)
+    for (const auto& sensor : sensors)
     {
         sensor->update();
     }
diff --git a/src/gpu/GpuSensor.hpp b/src/gpu/GpuSensor.hpp
index 2961404..c74c57e 100644
--- a/src/gpu/GpuSensor.hpp
+++ b/src/gpu/GpuSensor.hpp
@@ -18,16 +18,6 @@
 #include <vector>
 
 /**
- * @struct DeviceInfo
- * @brief Contains information about a device
- */
-struct DeviceInfo
-{
-    uint8_t deviceType;
-    uint8_t instanceId;
-};
-
-/**
  * @struct GpuTempSensor
  * @brief Implements a GPU temperature sensor that monitors temperature values
  * @details Inherits from Sensor base class and enables shared pointer
diff --git a/src/gpu/GpuTLimitSensor.cpp b/src/gpu/GpuTLimitSensor.cpp
new file mode 100644
index 0000000..5a02326
--- /dev/null
+++ b/src/gpu/GpuTLimitSensor.cpp
@@ -0,0 +1,137 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "GpuTLimitSensor.hpp"
+
+#include "SensorPaths.hpp"
+#include "Thresholds.hpp"
+#include "UpdatableSensor.hpp"
+#include "Utils.hpp"
+
+#include <bits/basic_string.h>
+
+#include <GpuDevice.hpp>
+#include <GpuMctpVdm.hpp>
+#include <MctpRequester.hpp>
+#include <OcpMctpVdm.hpp>
+#include <phosphor-logging/lg2.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace std::literals;
+
+constexpr uint8_t gpuTLimitSensorIdm{2};
+static constexpr double gpuTLimitSensorMaxReading = 127;
+static constexpr double gpuTLimitSensorMinReading = -128;
+
+GpuTLimitSensor::GpuTLimitSensor(
+    std::shared_ptr<sdbusplus::asio::connection>& conn,
+    mctp::MctpRequester& mctpRequester, const std::string& name,
+    const std::string& sensorConfiguration, uint8_t eid,
+    sdbusplus::asio::object_server& objectServer,
+    std::vector<thresholds::Threshold>&& thresholdData) :
+    GpuSensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
+              "temperature", false, true, gpuTLimitSensorMaxReading,
+              gpuTLimitSensorMinReading, conn),
+    eid(eid), sensorId{gpuTLimitSensorIdm}, mctpRequester(mctpRequester),
+    objectServer(objectServer)
+{
+    std::string dbusPath =
+        sensorPathPrefix + "temperature/"s + escapeName(name);
+
+    sensorInterface = objectServer.add_interface(
+        dbusPath, "xyz.openbmc_project.Sensor.Value");
+
+    for (const auto& threshold : thresholds)
+    {
+        std::string interface = thresholds::getInterface(threshold.level);
+        thresholdInterfaces[static_cast<size_t>(threshold.level)] =
+            objectServer.add_interface(dbusPath, interface);
+    }
+
+    association = objectServer.add_interface(dbusPath, association::interface);
+
+    setInitialProperties(sensor_paths::unitDegreesC);
+}
+
+GpuTLimitSensor::~GpuTLimitSensor()
+{
+    for (const auto& iface : thresholdInterfaces)
+    {
+        objectServer.remove_interface(iface);
+    }
+    objectServer.remove_interface(sensorInterface);
+    objectServer.remove_interface(association);
+}
+
+void GpuTLimitSensor::checkThresholds()
+{
+    thresholds::checkThresholds(this);
+}
+
+void GpuTLimitSensor::update()
+{
+    std::vector<uint8_t> reqMsg(
+        sizeof(ocp::accelerator_management::BindingPciVid) +
+        sizeof(gpu::GetTemperatureReadingRequest));
+
+    auto* msg = new (reqMsg.data()) ocp::accelerator_management::Message;
+
+    auto rc = gpu::encodeGetTemperatureReadingRequest(0, sensorId, *msg);
+    if (rc != ocp::accelerator_management::CompletionCode::SUCCESS)
+    {
+        lg2::error(
+            "GpuTLimitSensor::update(): gpuEncodeGetTemperatureReadingRequest failed, rc={RC}",
+            "RC", static_cast<int>(rc));
+        return;
+    }
+
+    mctpRequester.sendRecvMsg(
+        eid, reqMsg,
+        [this](int sendRecvMsgResult, std::vector<uint8_t> respMsg) {
+            if (sendRecvMsgResult != 0)
+            {
+                lg2::error(
+                    "GpuTLimitSensor::update(): MctpRequester::sendRecvMsg() failed, rc={RC}",
+                    "RC", sendRecvMsgResult);
+                return;
+            }
+
+            if (respMsg.empty())
+            {
+                lg2::error(
+                    "GpuTLimitSensor::update(): MctpRequester::sendRecvMsg() failed, respMsgLen=0");
+                return;
+            }
+
+            uint8_t cc = 0;
+            uint16_t reasonCode = 0;
+            double tempValue = 0;
+
+            auto rc = gpu::decodeGetTemperatureReadingResponse(
+                *new (respMsg.data()) ocp::accelerator_management::Message,
+                respMsg.size(), cc, reasonCode, tempValue);
+
+            if (rc != ocp::accelerator_management::CompletionCode::SUCCESS ||
+                cc != static_cast<uint8_t>(
+                          ocp::accelerator_management::CompletionCode::SUCCESS))
+            {
+                lg2::error(
+                    "GpuTLimitSensor::update(): gpuDecodeGetTemperatureReadingResponse() failed, rc={RC} cc={CC} reasonCode={RESC}",
+                    "RC", static_cast<int>(rc), "CC", cc, "RESC", reasonCode);
+                return;
+            }
+
+            updateValue(tempValue);
+        });
+}
diff --git a/src/gpu/GpuTLimitSensor.hpp b/src/gpu/GpuTLimitSensor.hpp
new file mode 100644
index 0000000..0407814
--- /dev/null
+++ b/src/gpu/GpuTLimitSensor.hpp
@@ -0,0 +1,86 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "MctpRequester.hpp"
+#include "Thresholds.hpp"
+#include "UpdatableSensor.hpp"
+
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+/**
+ * @struct GpuTLimitSensor
+ * @brief Implements a GPU temperature sensor that monitors temperature values
+ * @details Inherits from Sensor base class and enables shared pointer
+ * management via std::enable_shared_from_this
+ */
+struct GpuTLimitSensor :
+    public GpuSensor,
+    public std::enable_shared_from_this<GpuTLimitSensor>
+{
+  public:
+    /**
+     * @brief Constructor for GpuTLimitSensor
+     * @param conn D-Bus connection for system communication
+     * @param mctpRequester MCTP protocol requester for GPU communication
+     * @param name Name of the sensor for identification in the system
+     * @param sensorConfiguration Configuration string for the sensor containing
+     * setup parameters
+     * @param eid EID of the device endpoint
+     * @param objectServer D-Bus object server for exposing sensor interfaces
+     * @param thresholdData Vector of threshold configurations for temperature
+     * monitoring
+     */
+    GpuTLimitSensor(std::shared_ptr<sdbusplus::asio::connection>& conn,
+                    mctp::MctpRequester& mctpRequester, const std::string& name,
+                    const std::string& sensorConfiguration, uint8_t eid,
+                    sdbusplus::asio::object_server& objectServer,
+                    std::vector<thresholds::Threshold>&& thresholdData);
+
+    /**
+     * @brief Destructor
+     */
+    ~GpuTLimitSensor() override;
+
+    /**
+     * @brief Check if any thresholds have been crossed
+     * @details Overrides the base class method to implement GPU-specific
+     * threshold checking
+     */
+    void checkThresholds() override;
+
+  private:
+    /**
+     * @brief Update the sensor reading
+     */
+    void update() final;
+
+    /**
+     * @brief MCTP endpoint ID
+     */
+    uint8_t eid{};
+
+    /**
+     * @brief The sensor ID
+     */
+    uint8_t sensorId;
+
+    /**
+     * @brief Reference to the MCTP requester for communication
+     */
+    mctp::MctpRequester& mctpRequester;
+
+    /**
+     * @brief D-Bus object server
+     */
+    sdbusplus::asio::object_server& objectServer;
+};
diff --git a/src/gpu/meson.build b/src/gpu/meson.build
index f8cfe39..c38d254 100644
--- a/src/gpu/meson.build
+++ b/src/gpu/meson.build
@@ -3,6 +3,7 @@
     'GpuMctpVdm.cpp',
     'GpuSensor.cpp',
     'GpuSensorMain.cpp',
+    'GpuTLimitSensor.cpp',
     'MctpRequester.cpp',
     'OcpMctpVdm.cpp',
 )