blob: ebfa76b8267f88cd01b1ead56b4071bd57aec907 [file] [log] [blame]
Harshit Aghera560e6af2025-04-21 20:04:56 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#include "NvidiaGpuMctpVdm.hpp"
8
9#include "OcpMctpVdm.hpp"
10
11#include <endian.h>
12
13#include <cerrno>
14#include <cstdint>
Harshit Aghera560e6af2025-04-21 20:04:56 +053015#include <span>
Rohit PAI86786b62025-06-10 09:46:33 +053016#include <vector>
Harshit Aghera560e6af2025-04-21 20:04:56 +053017
18namespace gpu
19{
20// These functions encode/decode data communicated over the network
21// The use of reinterpret_cast enables direct memory access to raw byte buffers
22// without doing unnecessary data copying
23// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
24int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
25 ocp::accelerator_management::BindingPciVid& msg)
26{
27 return ocp::accelerator_management::packHeader(nvidiaPciVendorId, hdr, msg);
28}
29
30int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
31 const std::span<uint8_t> buf)
32{
33 if (buf.size() < sizeof(QueryDeviceIdentificationRequest))
34 {
35 return EINVAL;
36 }
37
38 auto* msg = reinterpret_cast<QueryDeviceIdentificationRequest*>(buf.data());
39
40 ocp::accelerator_management::BindingPciVidInfo header{};
41
42 header.ocp_accelerator_management_msg_type =
43 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
44 header.instance_id = instanceId &
45 ocp::accelerator_management::instanceIdBitMask;
46 header.msg_type =
47 static_cast<uint8_t>(MessageType::DEVICE_CAPABILITY_DISCOVERY);
48
49 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
50
51 if (rc != 0)
52 {
53 return rc;
54 }
55
56 msg->hdr.command = static_cast<uint8_t>(
57 DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
58 msg->hdr.data_size = 0;
59
60 return 0;
61}
62
63int decodeQueryDeviceIdentificationResponse(
64 const std::span<const uint8_t> buf,
65 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
66 uint8_t& deviceIdentification, uint8_t& deviceInstance)
67{
68 auto rc =
69 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
70
71 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
72 {
73 return rc;
74 }
75
76 if (buf.size() < sizeof(QueryDeviceIdentificationResponse))
77 {
78 return EINVAL;
79 }
80
81 const auto* response =
82 reinterpret_cast<const QueryDeviceIdentificationResponse*>(buf.data());
83
84 deviceIdentification = response->device_identification;
85 deviceInstance = response->instance_id;
86
87 return 0;
88}
89
90int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
91 std::span<uint8_t> buf)
92{
93 if (buf.size() < sizeof(GetTemperatureReadingRequest))
94 {
95 return EINVAL;
96 }
97
98 auto* msg = reinterpret_cast<GetTemperatureReadingRequest*>(buf.data());
99
100 ocp::accelerator_management::BindingPciVidInfo header{};
101 header.ocp_accelerator_management_msg_type =
102 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
103 header.instance_id = instanceId &
104 ocp::accelerator_management::instanceIdBitMask;
105 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
106
107 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
108
109 if (rc != 0)
110 {
111 return rc;
112 }
113
114 msg->hdr.command = static_cast<uint8_t>(
115 PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
116 msg->hdr.data_size = sizeof(sensorId);
117 msg->sensor_id = sensorId;
118
119 return 0;
120}
121
122int decodeGetTemperatureReadingResponse(
123 const std::span<const uint8_t> buf,
124 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
125 double& temperatureReading)
126{
127 auto rc =
128 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
129
130 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
131 {
132 return rc;
133 }
134
135 if (buf.size() < sizeof(GetTemperatureReadingResponse))
136 {
137 return EINVAL;
138 }
139
140 const auto* response =
141 reinterpret_cast<const GetTemperatureReadingResponse*>(buf.data());
142
143 uint16_t dataSize = le16toh(response->hdr.data_size);
144
145 if (dataSize != sizeof(int32_t))
146 {
147 return EINVAL;
148 }
149
150 int32_t reading = le32toh(response->reading);
151 temperatureReading = reading / static_cast<double>(1 << 8);
152
153 return 0;
154}
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530155
156int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
157 std::span<uint8_t> buf)
158{
159 if (buf.size() < sizeof(ReadThermalParametersRequest))
160 {
161 return EINVAL;
162 }
163
164 auto* msg = reinterpret_cast<ReadThermalParametersRequest*>(buf.data());
165
166 ocp::accelerator_management::BindingPciVidInfo header{};
167 header.ocp_accelerator_management_msg_type =
168 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
169 header.instance_id = instanceId &
170 ocp::accelerator_management::instanceIdBitMask;
171 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
172
173 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
174
175 if (rc != 0)
176 {
177 return rc;
178 }
179
180 msg->hdr.command = static_cast<uint8_t>(
181 PlatformEnvironmentalCommands::READ_THERMAL_PARAMETERS);
182 msg->hdr.data_size = sizeof(sensorId);
183 msg->sensor_id = sensorId;
184
185 return 0;
186}
187
188int decodeReadThermalParametersResponse(
189 std::span<const uint8_t> buf,
190 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
191 int32_t& threshold)
192{
193 auto rc =
194 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
195
196 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
197 {
198 return rc;
199 }
200
201 if (buf.size() < sizeof(ReadThermalParametersResponse))
202 {
203 return EINVAL;
204 }
205
206 const auto* response =
207 reinterpret_cast<const ReadThermalParametersResponse*>(buf.data());
208
209 uint16_t dataSize = le16toh(response->hdr.data_size);
210
211 if (dataSize != sizeof(int32_t))
212 {
213 return EINVAL;
214 }
215
216 threshold = le32toh(response->threshold);
217
218 return 0;
219}
Harshit Aghera902c6492025-05-08 15:57:42 +0530220
221int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId,
222 uint8_t averagingInterval,
223 std::span<uint8_t> buf)
224{
225 if (buf.size() < sizeof(GetCurrentPowerDrawRequest))
226 {
227 return EINVAL;
228 }
229
230 auto* msg = reinterpret_cast<GetCurrentPowerDrawRequest*>(buf.data());
231
232 ocp::accelerator_management::BindingPciVidInfo header{};
233 header.ocp_accelerator_management_msg_type =
234 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
235 header.instance_id = instanceId &
236 ocp::accelerator_management::instanceIdBitMask;
237 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
238
239 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
240
241 if (rc != 0)
242 {
243 return rc;
244 }
245
246 msg->hdr.command = static_cast<uint8_t>(
247 PlatformEnvironmentalCommands::GET_CURRENT_POWER_DRAW);
248 msg->hdr.data_size = sizeof(sensorId) + sizeof(averagingInterval);
249 msg->sensorId = sensorId;
250 msg->averagingInterval = averagingInterval;
251
252 return 0;
253}
254
255int decodeGetCurrentPowerDrawResponse(
256 std::span<const uint8_t> buf,
257 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
258 uint32_t& power)
259{
260 auto rc =
261 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
262
263 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
264 {
265 return rc;
266 }
267
268 if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
269 {
270 return EINVAL;
271 }
272
273 const auto* response =
274 reinterpret_cast<const GetCurrentPowerDrawResponse*>(buf.data());
275
276 const uint16_t dataSize = le16toh(response->hdr.data_size);
277
278 if (dataSize != sizeof(uint32_t))
279 {
280 return EINVAL;
281 }
282
283 power = le32toh(response->power);
284
285 return 0;
286}
Harshit Aghera775199d2025-05-27 14:20:24 +0530287
288int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
289 std::span<uint8_t> buf)
290{
291 if (buf.size() < sizeof(GetTemperatureReadingRequest))
292 {
293 return EINVAL;
294 }
295
296 auto* msg = reinterpret_cast<GetCurrentEnergyCounterRequest*>(buf.data());
297
298 ocp::accelerator_management::BindingPciVidInfo header{};
299 header.ocp_accelerator_management_msg_type =
300 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
301 header.instance_id = instanceId &
302 ocp::accelerator_management::instanceIdBitMask;
303 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
304
305 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
306
307 if (rc != 0)
308 {
309 return rc;
310 }
311
312 msg->hdr.command = static_cast<uint8_t>(
313 PlatformEnvironmentalCommands::GET_CURRENT_ENERGY_COUNTER);
314 msg->hdr.data_size = sizeof(sensorId);
315 msg->sensor_id = sensorId;
316
317 return 0;
318}
319
320int decodeGetCurrentEnergyCounterResponse(
321 std::span<const uint8_t> buf,
322 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
323 uint64_t& energy)
324{
325 auto rc =
326 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
327
328 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
329 {
330 return rc;
331 }
332
333 if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
334 {
335 return EINVAL;
336 }
337
338 const auto* response =
339 reinterpret_cast<const GetCurrentEnergyCounterResponse*>(buf.data());
340
341 const uint16_t dataSize = le16toh(response->hdr.data_size);
342
343 if (dataSize != sizeof(uint64_t))
344 {
345 return EINVAL;
346 }
347
348 energy = le32toh(response->energy);
349
350 return 0;
351}
Harshit Agherabef4d412025-05-27 14:53:56 +0530352
353int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
354 std::span<uint8_t> buf)
355{
356 if (buf.size() < sizeof(GetVoltageRequest))
357 {
358 return EINVAL;
359 }
360
361 auto* msg = reinterpret_cast<GetVoltageRequest*>(buf.data());
362
363 ocp::accelerator_management::BindingPciVidInfo header{};
364 header.ocp_accelerator_management_msg_type =
365 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
366 header.instance_id = instanceId &
367 ocp::accelerator_management::instanceIdBitMask;
368 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
369
370 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
371
372 if (rc != 0)
373 {
374 return rc;
375 }
376
377 msg->hdr.command =
378 static_cast<uint8_t>(PlatformEnvironmentalCommands::GET_VOLTAGE);
379 msg->hdr.data_size = sizeof(sensorId);
380 msg->sensor_id = sensorId;
381
382 return 0;
383}
384
385int decodeGetVoltageResponse(std::span<const uint8_t> buf,
386 ocp::accelerator_management::CompletionCode& cc,
387 uint16_t& reasonCode, uint32_t& voltage)
388{
389 auto rc =
390 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
391
392 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
393 {
394 return rc;
395 }
396
397 if (buf.size() < sizeof(GetVoltageResponse))
398 {
399 return EINVAL;
400 }
401
402 const auto* response =
403 reinterpret_cast<const GetVoltageResponse*>(buf.data());
404
405 const uint16_t dataSize = le16toh(response->hdr.data_size);
406
407 if (dataSize != sizeof(uint32_t))
408 {
409 return EINVAL;
410 }
411
412 voltage = le32toh(response->voltage);
413
414 return 0;
415}
Rohit PAI86786b62025-06-10 09:46:33 +0530416
417int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
418 std::span<uint8_t> buf)
419{
420 if (buf.size() < sizeof(GetInventoryInformationRequest))
421 {
422 return EINVAL;
423 }
424
425 auto* msg = reinterpret_cast<GetInventoryInformationRequest*>(buf.data());
426
427 ocp::accelerator_management::BindingPciVidInfo header{};
428 header.ocp_accelerator_management_msg_type =
429 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
430 header.instance_id = instanceId &
431 ocp::accelerator_management::instanceIdBitMask;
432 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
433
434 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
435
436 if (rc != 0)
437 {
438 return rc;
439 }
440
441 msg->hdr.command = static_cast<uint8_t>(
442 PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION);
443 msg->hdr.data_size = sizeof(propertyId);
444 msg->property_id = propertyId;
445
446 return 0;
447}
448
449int decodeGetInventoryInformationResponse(
450 std::span<const uint8_t> buf,
451 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
452 InventoryPropertyId propertyId, InventoryValue& value)
453{
454 auto rc =
455 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
456 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
457 {
458 return rc;
459 }
460 // Expect at least one byte of inventory response data after common response
461 if (buf.size() < (sizeof(ocp::accelerator_management::CommonResponse) + 1))
462 {
463 return EINVAL;
464 }
465
466 const auto* response =
467 reinterpret_cast<const GetInventoryInformationResponse*>(buf.data());
468 uint16_t dataSize = le16toh(response->hdr.data_size);
469
470 if (dataSize == 0 || dataSize > maxInventoryDataSize)
471 {
472 return EINVAL;
473 }
474
475 const uint8_t* dataPtr = response->data.data();
476
477 switch (propertyId)
478 {
479 case InventoryPropertyId::BOARD_PART_NUMBER:
480 case InventoryPropertyId::SERIAL_NUMBER:
481 case InventoryPropertyId::MARKETING_NAME:
482 case InventoryPropertyId::DEVICE_PART_NUMBER:
483 value =
484 std::string(reinterpret_cast<const char*>(dataPtr), dataSize);
485 break;
486 case InventoryPropertyId::DEVICE_GUID:
487 value = std::vector<uint8_t>(dataPtr, dataPtr + dataSize);
488 break;
489 default:
490 return EINVAL;
491 }
492 return 0;
493}
494
Harshit Aghera560e6af2025-04-21 20:04:56 +0530495// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
496} // namespace gpu