blob: a857862654361311662b3cfd25432e7afcb36e9c [file] [log] [blame]
Harshit Agheraacd375a2025-04-21 19:50:10 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
4 */
5
6#include "GpuSensor.hpp"
7
Harshit Agheraa3f24f42025-04-21 20:04:56 +05308#include "SensorPaths.hpp"
Harshit Agheraacd375a2025-04-21 19:50:10 +05309#include "Thresholds.hpp"
Harshit Aghera11b9c1a2025-04-29 17:34:25 +053010#include "UpdatableSensor.hpp"
Harshit Agheraacd375a2025-04-21 19:50:10 +053011#include "Utils.hpp"
Harshit Agheraacd375a2025-04-21 19:50:10 +053012
13#include <bits/basic_string.h>
14
Harshit Aghera11b9c1a2025-04-29 17:34:25 +053015#include <GpuDevice.hpp>
Harshit Agheraa3f24f42025-04-21 20:04:56 +053016#include <GpuMctpVdm.hpp>
17#include <MctpRequester.hpp>
18#include <OcpMctpVdm.hpp>
Harshit Agheraacd375a2025-04-21 19:50:10 +053019#include <phosphor-logging/lg2.hpp>
20#include <sdbusplus/asio/connection.hpp>
21#include <sdbusplus/asio/object_server.hpp>
Harshit Agheraacd375a2025-04-21 19:50:10 +053022
Harshit Agheraacd375a2025-04-21 19:50:10 +053023#include <cstddef>
24#include <cstdint>
Harshit Agheraa3f24f42025-04-21 20:04:56 +053025#include <functional>
Harshit Agheraacd375a2025-04-21 19:50:10 +053026#include <memory>
27#include <string>
28#include <utility>
Harshit Agheraacd375a2025-04-21 19:50:10 +053029#include <vector>
30
31using namespace std::literals;
32
Harshit Agheraa3f24f42025-04-21 20:04:56 +053033constexpr uint8_t gpuTempSensorId{0};
Harshit Agheraacd375a2025-04-21 19:50:10 +053034static constexpr double gpuTempSensorMaxReading = 127;
35static constexpr double gpuTempSensorMinReading = -128;
36
37GpuTempSensor::GpuTempSensor(
38 std::shared_ptr<sdbusplus::asio::connection>& conn,
Harshit Aghera11b9c1a2025-04-29 17:34:25 +053039 mctp::MctpRequester& mctpRequester, const std::string& name,
40 const std::string& sensorConfiguration, const uint8_t eid,
Harshit Agheraacd375a2025-04-21 19:50:10 +053041 sdbusplus::asio::object_server& objectServer,
Harshit Aghera11b9c1a2025-04-29 17:34:25 +053042 std::vector<thresholds::Threshold>&& thresholdData) :
43 GpuSensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
44 "temperature", false, true, gpuTempSensorMaxReading,
45 gpuTempSensorMinReading, conn),
46 eid(eid), sensorId{gpuTempSensorId}, mctpRequester(mctpRequester),
47 objectServer(objectServer)
Harshit Agheraacd375a2025-04-21 19:50:10 +053048{
49 std::string dbusPath =
50 sensorPathPrefix + "temperature/"s + escapeName(name);
51
52 sensorInterface = objectServer.add_interface(
53 dbusPath, "xyz.openbmc_project.Sensor.Value");
54
55 for (const auto& threshold : thresholds)
56 {
57 std::string interface = thresholds::getInterface(threshold.level);
58 thresholdInterfaces[static_cast<size_t>(threshold.level)] =
59 objectServer.add_interface(dbusPath, interface);
60 }
61
62 association = objectServer.add_interface(dbusPath, association::interface);
63
Harshit Aghera11b9c1a2025-04-29 17:34:25 +053064 setInitialProperties(sensor_paths::unitDegreesC);
Harshit Agheraacd375a2025-04-21 19:50:10 +053065}
66
67GpuTempSensor::~GpuTempSensor()
68{
Harshit Agheraacd375a2025-04-21 19:50:10 +053069 for (const auto& iface : thresholdInterfaces)
70 {
71 objectServer.remove_interface(iface);
72 }
73 objectServer.remove_interface(sensorInterface);
74 objectServer.remove_interface(association);
75}
76
77void GpuTempSensor::checkThresholds()
78{
79 thresholds::checkThresholds(this);
80}
81
Harshit Agheraa3f24f42025-04-21 20:04:56 +053082void GpuTempSensor::update()
83{
84 std::vector<uint8_t> reqMsg(
85 sizeof(ocp::accelerator_management::BindingPciVid) +
86 sizeof(gpu::GetTemperatureReadingRequest));
87
88 auto* msg = new (reqMsg.data()) ocp::accelerator_management::Message;
89
90 auto rc = gpu::encodeGetTemperatureReadingRequest(0, sensorId, *msg);
91 if (rc != ocp::accelerator_management::CompletionCode::SUCCESS)
92 {
93 lg2::error(
94 "GpuTempSensor::update(): gpuEncodeGetTemperatureReadingRequest failed, rc={RC}",
95 "RC", static_cast<int>(rc));
96 return;
97 }
98
99 mctpRequester.sendRecvMsg(
100 eid, reqMsg,
101 [this](int sendRecvMsgResult, std::vector<uint8_t> respMsg) {
102 if (sendRecvMsgResult != 0)
103 {
104 lg2::error(
105 "GpuTempSensor::update(): MctpRequester::sendRecvMsg() failed, rc={RC}",
106 "RC", sendRecvMsgResult);
107 return;
108 }
109
110 if (respMsg.empty())
111 {
112 lg2::error(
113 "GpuTempSensor::update(): MctpRequester::sendRecvMsg() failed, respMsgLen=0");
114 return;
115 }
116
117 uint8_t cc = 0;
118 uint16_t reasonCode = 0;
119 double tempValue = 0;
120
121 auto rc = gpu::decodeGetTemperatureReadingResponse(
122 *new (respMsg.data()) ocp::accelerator_management::Message,
123 respMsg.size(), cc, reasonCode, tempValue);
124
125 if (rc != ocp::accelerator_management::CompletionCode::SUCCESS ||
126 cc != static_cast<uint8_t>(
127 ocp::accelerator_management::CompletionCode::SUCCESS))
128 {
129 lg2::error(
130 "GpuTempSensor::update(): gpuDecodeGetTemperatureReadingResponse() failed, rc={RC} cc={CC} reasonCode={RESC}",
131 "RC", static_cast<int>(rc), "CC", cc, "RESC", reasonCode);
132 return;
133 }
134
135 updateValue(tempValue);
136 });
137}