blob: ffec3ad2d7240d3dd2363ccfb5debefa55fb854c [file] [log] [blame]
Harshit Aghera902c6492025-05-08 15:57:42 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#include "NvidiaGpuPowerSensor.hpp"
8
9#include "MctpRequester.hpp"
10#include "SensorPaths.hpp"
11#include "Thresholds.hpp"
12#include "Utils.hpp"
13#include "sensor.hpp"
14
15#include <bits/basic_string.h>
16
17#include <NvidiaDeviceDiscovery.hpp>
18#include <NvidiaGpuMctpVdm.hpp>
19#include <OcpMctpVdm.hpp>
20#include <phosphor-logging/lg2.hpp>
21#include <sdbusplus/asio/connection.hpp>
22#include <sdbusplus/asio/object_server.hpp>
23
24#include <cstddef>
25#include <cstdint>
26#include <functional>
27#include <limits>
28#include <memory>
29#include <string>
30#include <utility>
31#include <vector>
32
33using namespace std::literals;
34
35// GPU Power Sensor Averaging Interval in seconds, 0 implies default
36constexpr uint8_t gpuPowerAveragingIntervalInSec{0};
37
38static constexpr double gpuPowerSensorMaxReading =
39 std::numeric_limits<uint32_t>::max() / 1000.0;
40static constexpr double gpuPowerSensorMinReading =
41 std::numeric_limits<uint32_t>::min();
42
43NvidiaGpuPowerSensor::NvidiaGpuPowerSensor(
44 std::shared_ptr<sdbusplus::asio::connection>& conn,
45 mctp::MctpRequester& mctpRequester, const std::string& name,
46 const std::string& sensorConfiguration, uint8_t eid, uint8_t sensorId,
47 sdbusplus::asio::object_server& objectServer,
48 std::vector<thresholds::Threshold>&& thresholdData) :
49 Sensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
50 "power", false, true, gpuPowerSensorMaxReading,
51 gpuPowerSensorMinReading, conn),
52 eid(eid), sensorId{sensorId},
53 averagingInterval{gpuPowerAveragingIntervalInSec},
54 mctpRequester(mctpRequester), objectServer(objectServer)
55
56{
57 std::string dbusPath = sensorPathPrefix + "power/"s + escapeName(name);
58
59 sensorInterface = objectServer.add_interface(
60 dbusPath, "xyz.openbmc_project.Sensor.Value");
61
62 for (const auto& threshold : thresholds)
63 {
64 std::string interface = thresholds::getInterface(threshold.level);
65 thresholdInterfaces[static_cast<size_t>(threshold.level)] =
66 objectServer.add_interface(dbusPath, interface);
67 }
68
69 association = objectServer.add_interface(dbusPath, association::interface);
70
71 setInitialProperties(sensor_paths::unitWatts);
72}
73
74NvidiaGpuPowerSensor::~NvidiaGpuPowerSensor()
75{
76 for (const auto& iface : thresholdInterfaces)
77 {
78 objectServer.remove_interface(iface);
79 }
80 objectServer.remove_interface(association);
81 objectServer.remove_interface(sensorInterface);
82}
83
84void NvidiaGpuPowerSensor::checkThresholds()
85{
86 thresholds::checkThresholds(this);
87}
88
89void NvidiaGpuPowerSensor::processResponse(int sendRecvMsgResult)
90{
91 if (sendRecvMsgResult != 0)
92 {
93 lg2::error(
94 "Error updating Power Sensor for eid {EID} and sensor id {SID} : sending message over MCTP failed, rc={RC}",
95 "EID", eid, "SID", sensorId, "RC", sendRecvMsgResult);
96 return;
97 }
98
99 ocp::accelerator_management::CompletionCode cc{};
100 uint16_t reasonCode = 0;
101 uint32_t power = 0;
102
103 const int rc =
104 gpu::decodeGetCurrentPowerDrawResponse(response, cc, reasonCode, power);
105
106 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
107 {
108 lg2::error(
109 "Error updating Power Sensor eid {EID} and sensor id {SID} : decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
110 "EID", eid, "SID", sensorId, "RC", rc, "CC", cc, "RESC",
111 reasonCode);
112 return;
113 }
114
115 // Reading from the device is in milliwatts and unit set on the dbus
116 // is watts.
117 updateValue(power / 1000.0);
118}
119
120void NvidiaGpuPowerSensor::update()
121{
122 const int rc = gpu::encodeGetCurrentPowerDrawRequest(
123 0, sensorId, averagingInterval, request);
124
125 if (rc != 0)
126 {
127 lg2::error(
128 "Error updating Temperature Sensor for eid {EID} and sensor id {SID} : encode failed, rc={RC}",
129 "EID", eid, "SID", sensorId, "RC", rc);
130 }
131
132 mctpRequester.sendRecvMsg(
133 eid, request, response,
134 [this](int sendRecvMsgResult) { processResponse(sendRecvMsgResult); });
135}