gpu : add thresholds support to TLimit
This patch adds support to fetch TLimit thresholds from gpu
Tested.
The TEMP_0 update is disabled while testing this patch as it requires
MCTP request queueing since OCP MCTP VDM specifies at max one
outstanding request to the device. The MCTP request queueing is being
introduces with this patch -
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/80023
Build an image for gb200nvl-obmc machine with the following patches
cherry picked. This patches are needed to enable the mctp stack.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79312
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79410
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422
Copy the configuration file on gb200nvl-obmc machine and restart the
entity-manager service.
```
root@gb200nvl-obmc:~# rm -rf /var/configuration/
root@gb200nvl-obmc:~# systemctl restart xyz.openbmc_project.EntityManager.service
```
Copy the gpusensor app and run it.
```
root@gb200nvl-obmc:~# ./gpusensor
```
```
$ curl -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_TEMP_1
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_TEMP_1",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "temperature_NVIDIA_GB200_GPU_TEMP_1",
"Name": "NVIDIA GB200 GPU TEMP 1",
"Reading": 49.0,
"ReadingRangeMax": 127.0,
"ReadingRangeMin": -128.0,
"ReadingType": "Temperature",
"ReadingUnits": "Cel",
"Status": {
"Health": "OK",
"State": "Enabled"
},
"Thresholds": {
"LowerCaution": {
"Reading": 0.0
},
"LowerCritical": {
"Reading": 0.0
}
}
}%
root@gb200nvl-obmc:~# busctl introspect xyz.openbmc_project.GpuSensor /xyz/openbmc_project/sensors/temperature/NVIDIA_GB200_GPU_TEMP_1
NAME TYPE SIGNATURE RESULT/VALUE FLAGS
org.freedesktop.DBus.Introspectable interface - - -
.Introspect method - s -
org.freedesktop.DBus.Peer interface - - -
.GetMachineId method - s -
.Ping method - - -
org.freedesktop.DBus.Properties interface - - -
.Get method ss v -
.GetAll method s a{sv} -
.Set method ssv - -
.PropertiesChanged signal sa{sv}as - -
xyz.openbmc_project.Association.Definitions interface - - -
.Associations property a(sss) 1 "chassis" "all_sensors" "/xyz/openbmc… emits-change
xyz.openbmc_project.Inventory.Item interface - - -
.PrettyName property s "Thermal Limit(TLIMIT) Temperature is t… emits-change
xyz.openbmc_project.Sensor.Threshold.Critical interface - - -
.CriticalAlarmHigh property b false emits-change
.CriticalAlarmLow property b false emits-change
.CriticalHigh property d nan emits-change writable
.CriticalLow property d 0 emits-change writable
xyz.openbmc_project.Sensor.Threshold.HardShutdown interface - - -
.HardShutdownAlarmHigh property b false emits-change
.HardShutdownAlarmLow property b false emits-change
.HardShutdownHigh property d nan emits-change writable
.HardShutdownLow property d 0 emits-change writable
xyz.openbmc_project.Sensor.Threshold.Warning interface - - -
.WarningAlarmHigh property b false emits-change
.WarningAlarmLow property b false emits-change
.WarningHigh property d nan emits-change writable
.WarningLow property d 0 emits-change writable
xyz.openbmc_project.Sensor.Value interface - - -
.MaxValue property d 127 emits-change
.MinValue property d -128 emits-change
.Unit property s "xyz.openbmc_project.Sensor.Value.Unit.… emits-change
.Value property d 48.9688 emits-change writable
xyz.openbmc_project.Sensor.ValueMutability interface - - -
.Mutable property b true emits-change
xyz.openbmc_project.State.Decorator.Availability interface - - -
.Available property b true emits-change writable
xyz.openbmc_project.State.Decorator.OperationalStatus interface - - -
.Functional property b true emits-change
```
Change-Id: I6f2ff2652ce9246287f9bd63c4297d9ad3229963
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/gpu/GpuThresholds.hpp b/src/gpu/GpuThresholds.hpp
new file mode 100644
index 0000000..2b426b4
--- /dev/null
+++ b/src/gpu/GpuThresholds.hpp
@@ -0,0 +1,26 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "MctpRequester.hpp"
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <vector>
+
+/** @brief Read thermal parameters for multiple sensors in a batch operation
+ *
+ * @param[in] eid - Endpoint ID
+ * @param[in] ids - Shared pointer to vector of sensor IDs to read
+ * @param[in] mctpRequester - Reference to MCTP requester
+ * @param[in] callback - Callback function to process results
+ * Takes sensor ID and vector of threshold values
+ */
+void readThermalParametersBatched(
+ uint8_t eid, const std::shared_ptr<std::vector<uint8_t>>& ids,
+ mctp::MctpRequester& mctpRequester,
+ const std::function<void(uint8_t, std::vector<int32_t>)>& callback);