blob: 6d0fcd8b4eb2cbb707387a84a420abc9ee754dc1 [file] [log] [blame]
Harshit Aghera775199d2025-05-27 14:20:24 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#include "NvidiaGpuEnergySensor.hpp"
8
9#include "SensorPaths.hpp"
10#include "Thresholds.hpp"
11#include "Utils.hpp"
12#include "sensor.hpp"
13
14#include <bits/basic_string.h>
15
16#include <MctpRequester.hpp>
17#include <NvidiaDeviceDiscovery.hpp>
18#include <NvidiaGpuMctpVdm.hpp>
19#include <OcpMctpVdm.hpp>
20#include <phosphor-logging/lg2.hpp>
21#include <sdbusplus/asio/connection.hpp>
22#include <sdbusplus/asio/object_server.hpp>
23
24#include <cstddef>
25#include <cstdint>
26#include <limits>
27#include <memory>
28#include <string>
29#include <utility>
30#include <vector>
31
32using namespace std::literals;
33
34// Reading from the device is in millijoules and unit set on the dbus is Joules.
35static constexpr double gpuEnergySensorMaxReading =
36 std::numeric_limits<uint64_t>::max() / 1000.0;
37static constexpr double gpuEnergySensorMinReading = 0.0;
38
39NvidiaGpuEnergySensor::NvidiaGpuEnergySensor(
40 std::shared_ptr<sdbusplus::asio::connection>& conn,
41 mctp::MctpRequester& mctpRequester, const std::string& name,
42 const std::string& sensorConfiguration, const uint8_t eid, uint8_t sensorId,
43 sdbusplus::asio::object_server& objectServer,
44 std::vector<thresholds::Threshold>&& thresholdData) :
45 Sensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
46 "energy", false, true, gpuEnergySensorMaxReading,
47 gpuEnergySensorMinReading, conn),
48 eid(eid), sensorId{sensorId}, mctpRequester(mctpRequester),
49 objectServer(objectServer)
50{
51 std::string dbusPath = sensorPathPrefix + "energy/"s + escapeName(name);
52
53 sensorInterface = objectServer.add_interface(
54 dbusPath, "xyz.openbmc_project.Sensor.Value");
55
56 for (const auto& threshold : thresholds)
57 {
58 std::string interface = thresholds::getInterface(threshold.level);
59 thresholdInterfaces[static_cast<size_t>(threshold.level)] =
60 objectServer.add_interface(dbusPath, interface);
61 }
62
63 association = objectServer.add_interface(dbusPath, association::interface);
64
65 setInitialProperties(sensor_paths::unitJoules);
66}
67
68NvidiaGpuEnergySensor::~NvidiaGpuEnergySensor()
69{
70 for (const auto& iface : thresholdInterfaces)
71 {
72 objectServer.remove_interface(iface);
73 }
74 objectServer.remove_interface(sensorInterface);
75 objectServer.remove_interface(association);
76}
77
78void NvidiaGpuEnergySensor::checkThresholds()
79{
80 thresholds::checkThresholds(this);
81}
82
83void NvidiaGpuEnergySensor::processResponse(int sendRecvMsgResult)
84{
85 if (sendRecvMsgResult != 0)
86 {
87 lg2::error(
88 "Error updating Energy Sensor for eid {EID} and sensor id {SID} : sending message over MCTP failed, rc={RC}",
89 "EID", eid, "SID", sensorId, "RC", sendRecvMsgResult);
90 return;
91 }
92
93 ocp::accelerator_management::CompletionCode cc{};
94 uint16_t reasonCode = 0;
95 uint64_t energyValue = 0;
96
97 auto rc = gpu::decodeGetCurrentEnergyCounterResponse(
98 response, cc, reasonCode, energyValue);
99
100 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
101 {
102 lg2::error(
103 "Error updating Energy Sensor for eid {EID} and sensor id {SID} : decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
104 "EID", eid, "SID", sensorId, "RC", rc, "CC", cc, "RESC",
105 reasonCode);
106 return;
107 }
108
109 // Reading from the device is in millijoules and unit set on the dbus
110 // is Joules.
111 updateValue(energyValue / 1000.0);
112}
113
114void NvidiaGpuEnergySensor::update()
115{
116 auto rc = gpu::encodeGetCurrentEnergyCounterRequest(0, sensorId, request);
117
118 if (rc != 0)
119 {
120 lg2::error(
121 "Error updating Energy Sensor for eid {EID} and sensor id {SID} : encode failed, rc={RC}",
122 "EID", eid, "SID", sensorId, "RC", rc);
123 return;
124 }
125
126 mctpRequester.sendRecvMsg(
127 eid, request, response,
128 [this](int sendRecvMsgResult) { processResponse(sendRecvMsgResult); });
129}