blob: 80f5bfef0ba77d35e608e428b6c7b02832ccb588 [file] [log] [blame]
Harshit Aghera560e6af2025-04-21 20:04:56 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#include "NvidiaGpuMctpVdm.hpp"
8
9#include "OcpMctpVdm.hpp"
10
11#include <endian.h>
12
13#include <cerrno>
14#include <cstdint>
Harshit Aghera560e6af2025-04-21 20:04:56 +053015#include <span>
16
17namespace gpu
18{
19// These functions encode/decode data communicated over the network
20// The use of reinterpret_cast enables direct memory access to raw byte buffers
21// without doing unnecessary data copying
22// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
23int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
24 ocp::accelerator_management::BindingPciVid& msg)
25{
26 return ocp::accelerator_management::packHeader(nvidiaPciVendorId, hdr, msg);
27}
28
29int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
30 const std::span<uint8_t> buf)
31{
32 if (buf.size() < sizeof(QueryDeviceIdentificationRequest))
33 {
34 return EINVAL;
35 }
36
37 auto* msg = reinterpret_cast<QueryDeviceIdentificationRequest*>(buf.data());
38
39 ocp::accelerator_management::BindingPciVidInfo header{};
40
41 header.ocp_accelerator_management_msg_type =
42 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
43 header.instance_id = instanceId &
44 ocp::accelerator_management::instanceIdBitMask;
45 header.msg_type =
46 static_cast<uint8_t>(MessageType::DEVICE_CAPABILITY_DISCOVERY);
47
48 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
49
50 if (rc != 0)
51 {
52 return rc;
53 }
54
55 msg->hdr.command = static_cast<uint8_t>(
56 DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
57 msg->hdr.data_size = 0;
58
59 return 0;
60}
61
62int decodeQueryDeviceIdentificationResponse(
63 const std::span<const uint8_t> buf,
64 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
65 uint8_t& deviceIdentification, uint8_t& deviceInstance)
66{
67 auto rc =
68 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
69
70 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
71 {
72 return rc;
73 }
74
75 if (buf.size() < sizeof(QueryDeviceIdentificationResponse))
76 {
77 return EINVAL;
78 }
79
80 const auto* response =
81 reinterpret_cast<const QueryDeviceIdentificationResponse*>(buf.data());
82
83 deviceIdentification = response->device_identification;
84 deviceInstance = response->instance_id;
85
86 return 0;
87}
88
89int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
90 std::span<uint8_t> buf)
91{
92 if (buf.size() < sizeof(GetTemperatureReadingRequest))
93 {
94 return EINVAL;
95 }
96
97 auto* msg = reinterpret_cast<GetTemperatureReadingRequest*>(buf.data());
98
99 ocp::accelerator_management::BindingPciVidInfo header{};
100 header.ocp_accelerator_management_msg_type =
101 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
102 header.instance_id = instanceId &
103 ocp::accelerator_management::instanceIdBitMask;
104 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
105
106 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
107
108 if (rc != 0)
109 {
110 return rc;
111 }
112
113 msg->hdr.command = static_cast<uint8_t>(
114 PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
115 msg->hdr.data_size = sizeof(sensorId);
116 msg->sensor_id = sensorId;
117
118 return 0;
119}
120
121int decodeGetTemperatureReadingResponse(
122 const std::span<const uint8_t> buf,
123 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
124 double& temperatureReading)
125{
126 auto rc =
127 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
128
129 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
130 {
131 return rc;
132 }
133
134 if (buf.size() < sizeof(GetTemperatureReadingResponse))
135 {
136 return EINVAL;
137 }
138
139 const auto* response =
140 reinterpret_cast<const GetTemperatureReadingResponse*>(buf.data());
141
142 uint16_t dataSize = le16toh(response->hdr.data_size);
143
144 if (dataSize != sizeof(int32_t))
145 {
146 return EINVAL;
147 }
148
149 int32_t reading = le32toh(response->reading);
150 temperatureReading = reading / static_cast<double>(1 << 8);
151
152 return 0;
153}
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530154
155int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
156 std::span<uint8_t> buf)
157{
158 if (buf.size() < sizeof(ReadThermalParametersRequest))
159 {
160 return EINVAL;
161 }
162
163 auto* msg = reinterpret_cast<ReadThermalParametersRequest*>(buf.data());
164
165 ocp::accelerator_management::BindingPciVidInfo header{};
166 header.ocp_accelerator_management_msg_type =
167 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
168 header.instance_id = instanceId &
169 ocp::accelerator_management::instanceIdBitMask;
170 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
171
172 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
173
174 if (rc != 0)
175 {
176 return rc;
177 }
178
179 msg->hdr.command = static_cast<uint8_t>(
180 PlatformEnvironmentalCommands::READ_THERMAL_PARAMETERS);
181 msg->hdr.data_size = sizeof(sensorId);
182 msg->sensor_id = sensorId;
183
184 return 0;
185}
186
187int decodeReadThermalParametersResponse(
188 std::span<const uint8_t> buf,
189 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
190 int32_t& threshold)
191{
192 auto rc =
193 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
194
195 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
196 {
197 return rc;
198 }
199
200 if (buf.size() < sizeof(ReadThermalParametersResponse))
201 {
202 return EINVAL;
203 }
204
205 const auto* response =
206 reinterpret_cast<const ReadThermalParametersResponse*>(buf.data());
207
208 uint16_t dataSize = le16toh(response->hdr.data_size);
209
210 if (dataSize != sizeof(int32_t))
211 {
212 return EINVAL;
213 }
214
215 threshold = le32toh(response->threshold);
216
217 return 0;
218}
Harshit Aghera902c6492025-05-08 15:57:42 +0530219
220int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId,
221 uint8_t averagingInterval,
222 std::span<uint8_t> buf)
223{
224 if (buf.size() < sizeof(GetCurrentPowerDrawRequest))
225 {
226 return EINVAL;
227 }
228
229 auto* msg = reinterpret_cast<GetCurrentPowerDrawRequest*>(buf.data());
230
231 ocp::accelerator_management::BindingPciVidInfo header{};
232 header.ocp_accelerator_management_msg_type =
233 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
234 header.instance_id = instanceId &
235 ocp::accelerator_management::instanceIdBitMask;
236 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
237
238 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
239
240 if (rc != 0)
241 {
242 return rc;
243 }
244
245 msg->hdr.command = static_cast<uint8_t>(
246 PlatformEnvironmentalCommands::GET_CURRENT_POWER_DRAW);
247 msg->hdr.data_size = sizeof(sensorId) + sizeof(averagingInterval);
248 msg->sensorId = sensorId;
249 msg->averagingInterval = averagingInterval;
250
251 return 0;
252}
253
254int decodeGetCurrentPowerDrawResponse(
255 std::span<const uint8_t> buf,
256 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
257 uint32_t& power)
258{
259 auto rc =
260 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
261
262 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
263 {
264 return rc;
265 }
266
267 if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
268 {
269 return EINVAL;
270 }
271
272 const auto* response =
273 reinterpret_cast<const GetCurrentPowerDrawResponse*>(buf.data());
274
275 const uint16_t dataSize = le16toh(response->hdr.data_size);
276
277 if (dataSize != sizeof(uint32_t))
278 {
279 return EINVAL;
280 }
281
282 power = le32toh(response->power);
283
284 return 0;
285}
Harshit Aghera775199d2025-05-27 14:20:24 +0530286
287int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
288 std::span<uint8_t> buf)
289{
290 if (buf.size() < sizeof(GetTemperatureReadingRequest))
291 {
292 return EINVAL;
293 }
294
295 auto* msg = reinterpret_cast<GetCurrentEnergyCounterRequest*>(buf.data());
296
297 ocp::accelerator_management::BindingPciVidInfo header{};
298 header.ocp_accelerator_management_msg_type =
299 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
300 header.instance_id = instanceId &
301 ocp::accelerator_management::instanceIdBitMask;
302 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
303
304 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
305
306 if (rc != 0)
307 {
308 return rc;
309 }
310
311 msg->hdr.command = static_cast<uint8_t>(
312 PlatformEnvironmentalCommands::GET_CURRENT_ENERGY_COUNTER);
313 msg->hdr.data_size = sizeof(sensorId);
314 msg->sensor_id = sensorId;
315
316 return 0;
317}
318
319int decodeGetCurrentEnergyCounterResponse(
320 std::span<const uint8_t> buf,
321 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
322 uint64_t& energy)
323{
324 auto rc =
325 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
326
327 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
328 {
329 return rc;
330 }
331
332 if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
333 {
334 return EINVAL;
335 }
336
337 const auto* response =
338 reinterpret_cast<const GetCurrentEnergyCounterResponse*>(buf.data());
339
340 const uint16_t dataSize = le16toh(response->hdr.data_size);
341
342 if (dataSize != sizeof(uint64_t))
343 {
344 return EINVAL;
345 }
346
347 energy = le32toh(response->energy);
348
349 return 0;
350}
Harshit Agherabef4d412025-05-27 14:53:56 +0530351
352int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
353 std::span<uint8_t> buf)
354{
355 if (buf.size() < sizeof(GetVoltageRequest))
356 {
357 return EINVAL;
358 }
359
360 auto* msg = reinterpret_cast<GetVoltageRequest*>(buf.data());
361
362 ocp::accelerator_management::BindingPciVidInfo header{};
363 header.ocp_accelerator_management_msg_type =
364 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
365 header.instance_id = instanceId &
366 ocp::accelerator_management::instanceIdBitMask;
367 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
368
369 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
370
371 if (rc != 0)
372 {
373 return rc;
374 }
375
376 msg->hdr.command =
377 static_cast<uint8_t>(PlatformEnvironmentalCommands::GET_VOLTAGE);
378 msg->hdr.data_size = sizeof(sensorId);
379 msg->sensor_id = sensorId;
380
381 return 0;
382}
383
384int decodeGetVoltageResponse(std::span<const uint8_t> buf,
385 ocp::accelerator_management::CompletionCode& cc,
386 uint16_t& reasonCode, uint32_t& voltage)
387{
388 auto rc =
389 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
390
391 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
392 {
393 return rc;
394 }
395
396 if (buf.size() < sizeof(GetVoltageResponse))
397 {
398 return EINVAL;
399 }
400
401 const auto* response =
402 reinterpret_cast<const GetVoltageResponse*>(buf.data());
403
404 const uint16_t dataSize = le16toh(response->hdr.data_size);
405
406 if (dataSize != sizeof(uint32_t))
407 {
408 return EINVAL;
409 }
410
411 voltage = le32toh(response->voltage);
412
413 return 0;
414}
Harshit Aghera560e6af2025-04-21 20:04:56 +0530415// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
416} // namespace gpu