nvidia-gpu: add TLimit sensor properties
Add support for DMTF Redfish properties ReadingBasis and Implementation
for GPU TLimit sensor [1].
Property Implementation for TLimit is set to Synthesized because the GPU
incorporates intelligent logic that determines the temperature delta
from the first thermal management software slowdown event. TLimit is
derived from other reported GPU sensors, such as HBM, Tavg, and others.
DBus Interface definition -
https://gerrit.openbmc.org/c/openbmc/phosphor-dbus-interfaces/+/81658
Tested: Build an image for gb200nvl-obmc machine with the following
patches cherry picked. This patches are needed to enable the mctp stack.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422
```
> curl -s -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_1
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_1",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Description": "Thermal Limit(TLIMIT) Temperature is the distance in deg C from the GPU temperature to the first throttle limit.",
"Id": "temperature_NVIDIA_GB200_GPU_0_TEMP_1",
"Implementation": "Synthesized",
"Name": "NVIDIA GB200 GPU 0 TEMP 1",
"Reading": 56.59375,
"ReadingBasis": "Headroom",
"ReadingRangeMax": 127.0,
"ReadingRangeMin": -128.0,
"ReadingType": "Temperature",
"ReadingUnits": "Cel",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}%
root@gb200nvl-obmc:~# busctl introspect xyz.openbmc_project.GpuSensor /xyz/openbmc_project/sensors/temperature/NVIDIA_GB200_GPU_0_TEMP_1
NAME TYPE SIGNATURE RESULT/VALUE FLAGS
org.freedesktop.DBus.Introspectable interface - - -
.Introspect method - s -
org.freedesktop.DBus.Peer interface - - -
.GetMachineId method - s -
.Ping method - - -
org.freedesktop.DBus.Properties interface - - -
.Get method ss v -
.GetAll method s a{sv} -
.Set method ssv - -
.PropertiesChanged signal sa{sv}as - -
xyz.openbmc_project.Association.Definitions interface - - -
.Associations property a(sss) 1 "chassis" "all_sensors" "/xyz/openb... emits-change
xyz.openbmc_project.Inventory.Item interface - - -
.PrettyName property s "Thermal Limit(TLIMIT) Temperature is... emits-change
xyz.openbmc_project.Sensor.Type interface - - -
.Implementation property s "xyz.openbmc_project.Sensor.Type.Impl... emits-change
.ReadingBasis property s "xyz.openbmc_project.Sensor.Type.Read... emits-change
xyz.openbmc_project.Sensor.Value interface - - -
.MaxValue property d 127 emits-change
.MinValue property d -128 emits-change
.Unit property s "xyz.openbmc_project.Sensor.Value.Uni... emits-change
.Value property d 56.6836 emits-change writable
xyz.openbmc_project.Sensor.ValueMutability interface - - -
.Mutable property b true emits-change
xyz.openbmc_project.State.Decorator.Availability interface - - -
.Available property b true emits-change writable
xyz.openbmc_project.State.Decorator.OperationalStatus interface - - -
.Functional property b true emits-change
```
[1] : https://redfish.dmtf.org/schemas/v1/Sensor.v1_11_0.yaml
Change-Id: I1a16ced44c563794d561d26232a5e5fba041b875
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaGpuSensor.cpp b/src/nvidia-gpu/NvidiaGpuSensor.cpp
index 7cc1fe0..42d03ba 100644
--- a/src/nvidia-gpu/NvidiaGpuSensor.cpp
+++ b/src/nvidia-gpu/NvidiaGpuSensor.cpp
@@ -64,6 +64,26 @@
association = objectServer.add_interface(dbusPath, association::interface);
setInitialProperties(sensor_paths::unitDegreesC);
+
+ if (sensorId == gpuTLimitSensorId)
+ {
+ sensorTypeInterface = objectServer.add_interface(
+ dbusPath, "xyz.openbmc_project.Sensor.Type");
+
+ sensorTypeInterface->register_property(
+ "ReadingBasis",
+ "xyz.openbmc_project.Sensor.Type.ReadingBasisType.Headroom"s);
+ sensorTypeInterface->register_property(
+ "Implementation",
+ "xyz.openbmc_project.Sensor.Type.ImplementationType.Synthesized"s);
+
+ if (!sensorTypeInterface->initialize())
+ {
+ lg2::error(
+ "Error initializing Type Interface for Temperature Sensor for eid {EID} and sensor id {SID}",
+ "EID", eid, "SID", sensorId);
+ }
+ }
}
NvidiaGpuTempSensor::~NvidiaGpuTempSensor()
@@ -74,6 +94,10 @@
}
objectServer.remove_interface(association);
objectServer.remove_interface(sensorInterface);
+ if (sensorTypeInterface)
+ {
+ objectServer.remove_interface(sensorTypeInterface);
+ }
}
void NvidiaGpuTempSensor::checkThresholds()
diff --git a/src/nvidia-gpu/NvidiaGpuSensor.hpp b/src/nvidia-gpu/NvidiaGpuSensor.hpp
index 2b0c49c..4d6861f 100644
--- a/src/nvidia-gpu/NvidiaGpuSensor.hpp
+++ b/src/nvidia-gpu/NvidiaGpuSensor.hpp
@@ -59,4 +59,6 @@
std::array<uint8_t, sizeof(gpu::GetTemperatureReadingRequest)>
getTemperatureReadingRequest{};
+
+ std::shared_ptr<sdbusplus::asio::dbus_interface> sensorTypeInterface;
};