blob: 16141f1175228e9d21493cdac75579f62c387e97 [file] [log] [blame]
Harshit Aghera5e7decc2025-05-07 16:20:16 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#include "NvidiaGpuThresholds.hpp"
8
9#include <MctpRequester.hpp>
10#include <NvidiaGpuMctpVdm.hpp>
11#include <OcpMctpVdm.hpp>
12#include <phosphor-logging/lg2.hpp>
13
14#include <array>
15#include <cerrno>
16#include <cstddef>
17#include <cstdint>
18#include <functional>
19#include <memory>
20#include <span>
21#include <vector>
22
23void processReadThermalParameterResponse(
24 const std::function<void(uint8_t, int32_t)>& callback,
25 const std::span<const uint8_t> respMsg, int sendRecvMsgResult)
26{
27 if (sendRecvMsgResult != 0)
28 {
29 lg2::error(
30 "Error reading thermal parameter: sending message over MCTP failed, rc={RC}",
31 "RC", sendRecvMsgResult);
32 callback(EPROTO, 0);
33 return;
34 }
35
36 ocp::accelerator_management::CompletionCode cc{};
37 uint16_t reasonCode = 0;
38 int32_t threshold = 0;
39
40 auto rc = gpu::decodeReadThermalParametersResponse(respMsg, cc, reasonCode,
41 threshold);
42
43 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
44 {
45 lg2::error(
46 "Error reading thermal parameter: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
47 "RC", rc, "CC", cc, "RESC", reasonCode);
48 callback(EPROTO, 0);
49 return;
50 }
51
52 callback(0, threshold);
53};
54
55void readThermalParameter(uint8_t eid, uint8_t id,
56 mctp::MctpRequester& mctpRequester,
57 const std::function<void(uint8_t, int32_t)>& callback)
58{
59 auto reqMsg = std::make_shared<
60 std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)>>();
61
62 auto respMsg = std::make_shared<
63 std::array<uint8_t, sizeof(gpu::ReadThermalParametersResponse)>>();
64
65 auto rc = gpu::encodeReadThermalParametersRequest(0, id, *reqMsg);
66 if (rc != 0)
67 {
68 lg2::error(
69 "Error reading thermal parameter for eid {EID} and parameter id {PID} : encode failed. rc={RC}",
70 "EID", eid, "PID", id, "RC", rc);
71 callback(rc, 0);
72 return;
73 }
74
75 mctpRequester.sendRecvMsg(
76 eid, *reqMsg, *respMsg,
77 [reqMsg, respMsg, callback](int sendRecvMsgResult) {
78 processReadThermalParameterResponse(callback, *respMsg,
79 sendRecvMsgResult);
80 });
81}
82
83void readThermalParameterCallback(
84 uint8_t eid, const std::shared_ptr<std::vector<uint8_t>>& ids,
85 mctp::MctpRequester& mctpRequester,
86 const std::function<void(uint8_t, std::vector<int32_t>)>& callback,
87 size_t index, const std::shared_ptr<std::vector<int32_t>>& thresholds,
88 uint8_t rc, int32_t threshold)
89{
90 if (rc != 0)
91 {
92 lg2::error(
93 "Error reading thermal parameter for eid {EID} and parameter id {PID}. rc={RC}",
94 "EID", eid, "PID", (*ids)[index], "RC", rc);
95 callback(rc, *thresholds);
96 return;
97 }
98
99 thresholds->push_back(threshold);
100
101 ++index;
102 if (index == ids->size())
103 {
104 callback(rc, *thresholds);
105 }
106 else
107 {
108 readThermalParameter(eid, (*ids)[index], mctpRequester,
109 std::bind_front(readThermalParameterCallback, eid,
110 ids, std::ref(mctpRequester),
111 callback, index, thresholds));
112 }
113}
114
115void readThermalParameters(
116 uint8_t eid, const std::vector<uint8_t>& ids,
117 mctp::MctpRequester& mctpRequester,
118 const std::function<void(uint8_t, std::vector<int32_t>)>& callback)
119{
120 auto thresholds = std::make_shared<std::vector<int32_t>>();
121 size_t index = 0;
122
123 readThermalParameter(
124 eid, ids[index], mctpRequester,
125 std::bind_front(readThermalParameterCallback, eid,
126 std::make_shared<std::vector<uint8_t>>(ids),
127 std::ref(mctpRequester), callback, index, thresholds));
128}