nvidia-gpu: add power sensor
This patch adds support to fetch power sensor value from gpu
Tested: Build an image for gb200nvl-obmc machine with the following
patches cherry picked. This patches are needed to enable the mctp stack.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422
```
$ curl -s -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/power_NVIDIA_GB200_GPU_0_Power_0
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/power_NVIDIA_GB200_GPU_0_Power_0",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "power_NVIDIA_GB200_GPU_0_Power_0",
"Name": "NVIDIA GB200 GPU 0 Power 0",
"Reading": 27.181,
"ReadingRangeMax": 4294967.295,
"ReadingRangeMin": 0.0,
"ReadingType": "Power",
"ReadingUnits": "W",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}%
```
Change-Id: Ic227a0056daa68ab2239a609ed20c7ed2f6bd2c5
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp b/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
index f7c78b8..c7f7511 100644
--- a/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
+++ b/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp
@@ -31,6 +31,7 @@
{
GET_TEMPERATURE_READING = 0x00,
READ_THERMAL_PARAMETERS = 0x02,
+ GET_CURRENT_POWER_DRAW = 0x03,
};
enum class DeviceIdentification : uint8_t
@@ -60,6 +61,13 @@
using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
+struct GetCurrentPowerDrawRequest
+{
+ ocp::accelerator_management::CommonRequest hdr;
+ uint8_t sensorId;
+ uint8_t averagingInterval;
+} __attribute__((packed));
+
struct GetTemperatureReadingResponse
{
ocp::accelerator_management::CommonResponse hdr;
@@ -72,6 +80,12 @@
int32_t threshold;
} __attribute__((packed));
+struct GetCurrentPowerDrawResponse
+{
+ ocp::accelerator_management::CommonResponse hdr;
+ uint32_t power;
+} __attribute__((packed));
+
int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
ocp::accelerator_management::BindingPciVid& msg);
@@ -99,4 +113,12 @@
ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
int32_t& threshold);
+int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId,
+ uint8_t averagingInterval,
+ std::span<uint8_t> buf);
+
+int decodeGetCurrentPowerDrawResponse(
+ std::span<const uint8_t> buf,
+ ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
+ uint32_t& power);
} // namespace gpu