nvidia-gpu: add power sensor
This patch adds support to fetch power sensor value from gpu
Tested: Build an image for gb200nvl-obmc machine with the following
patches cherry picked. This patches are needed to enable the mctp stack.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422
```
$ curl -s -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/power_NVIDIA_GB200_GPU_0_Power_0
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/power_NVIDIA_GB200_GPU_0_Power_0",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "power_NVIDIA_GB200_GPU_0_Power_0",
"Name": "NVIDIA GB200 GPU 0 Power 0",
"Reading": 27.181,
"ReadingRangeMax": 4294967.295,
"ReadingRangeMin": 0.0,
"ReadingType": "Power",
"ReadingUnits": "W",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}%
```
Change-Id: Ic227a0056daa68ab2239a609ed20c7ed2f6bd2c5
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
index 7a48b30..2355bf9 100644
--- a/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
+++ b/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp
@@ -217,5 +217,72 @@
return 0;
}
+
+int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId,
+ uint8_t averagingInterval,
+ std::span<uint8_t> buf)
+{
+ if (buf.size() < sizeof(GetCurrentPowerDrawRequest))
+ {
+ return EINVAL;
+ }
+
+ auto* msg = reinterpret_cast<GetCurrentPowerDrawRequest*>(buf.data());
+
+ ocp::accelerator_management::BindingPciVidInfo header{};
+ header.ocp_accelerator_management_msg_type =
+ static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
+ header.instance_id = instanceId &
+ ocp::accelerator_management::instanceIdBitMask;
+ header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
+
+ auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
+
+ if (rc != 0)
+ {
+ return rc;
+ }
+
+ msg->hdr.command = static_cast<uint8_t>(
+ PlatformEnvironmentalCommands::GET_CURRENT_POWER_DRAW);
+ msg->hdr.data_size = sizeof(sensorId) + sizeof(averagingInterval);
+ msg->sensorId = sensorId;
+ msg->averagingInterval = averagingInterval;
+
+ return 0;
+}
+
+int decodeGetCurrentPowerDrawResponse(
+ std::span<const uint8_t> buf,
+ ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
+ uint32_t& power)
+{
+ auto rc =
+ ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
+
+ if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
+ {
+ return rc;
+ }
+
+ if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
+ {
+ return EINVAL;
+ }
+
+ const auto* response =
+ reinterpret_cast<const GetCurrentPowerDrawResponse*>(buf.data());
+
+ const uint16_t dataSize = le16toh(response->hdr.data_size);
+
+ if (dataSize != sizeof(uint32_t))
+ {
+ return EINVAL;
+ }
+
+ power = le32toh(response->power);
+
+ return 0;
+}
// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
} // namespace gpu