nvidia-gpu: add energy sensor
This commit introduces a energy sensor for the GPU.
Tested: Build an image for gb200nvl-obmc machine with the following
patches cherry picked. This patches are needed to enable the mctp stack.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422
```
$ curl -s -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/energy_NVIDIA_GB200_GPU_0_Energy_0
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/energy_NVIDIA_GB200_GPU_0_Energy_0",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "energy_NVIDIA_GB200_GPU_0_Energy_0",
"Name": "NVIDIA GB200 GPU 0 Energy 0",
"Reading": 269947.856,
"ReadingRangeMax": 1.8446744073709552e+16,
"ReadingRangeMin": 0.0,
"ReadingType": "EnergyJoules",
"ReadingUnits": "J",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}%
```
Change-Id: I6f53ab2a83eedd54005bbdcd781dc8d320d7f26a
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaGpuEnergySensor.hpp b/src/nvidia-gpu/NvidiaGpuEnergySensor.hpp
new file mode 100644
index 0000000..19bb982
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaGpuEnergySensor.hpp
@@ -0,0 +1,58 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "MctpRequester.hpp"
+#include "Thresholds.hpp"
+#include "sensor.hpp"
+
+#include <NvidiaGpuMctpVdm.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+constexpr uint8_t gpuEnergySensorId{0};
+
+struct NvidiaGpuEnergySensor : public Sensor
+{
+ public:
+ NvidiaGpuEnergySensor(
+ std::shared_ptr<sdbusplus::asio::connection>& conn,
+ mctp::MctpRequester& mctpRequester, const std::string& name,
+ const std::string& sensorConfiguration, uint8_t eid, uint8_t sensorId,
+ sdbusplus::asio::object_server& objectServer,
+ std::vector<thresholds::Threshold>&& thresholdData);
+
+ ~NvidiaGpuEnergySensor() override;
+
+ void checkThresholds() override;
+
+ void update();
+
+ private:
+ void processResponse(int sendRecvMsgResult);
+
+ uint8_t eid{};
+
+ uint8_t sensorId;
+
+ std::shared_ptr<sdbusplus::asio::connection> conn;
+
+ mctp::MctpRequester& mctpRequester;
+
+ sdbusplus::asio::object_server& objectServer;
+
+ std::array<uint8_t, sizeof(gpu::GetCurrentEnergyCounterRequest)> request{};
+
+ std::array<uint8_t, sizeof(gpu::GetCurrentEnergyCounterResponse)>
+ response{};
+};