blob: 6ebf9631211dc751112e7bba6226b40d924524d3 [file] [log] [blame]
Harshit Aghera560e6af2025-04-21 20:04:56 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#include "NvidiaGpuMctpVdm.hpp"
8
9#include "OcpMctpVdm.hpp"
10
11#include <endian.h>
12
13#include <cerrno>
14#include <cstdint>
15#include <cstring>
16#include <span>
Rohit PAIe8918842025-06-10 09:46:33 +053017#include <string>
18#include <variant>
19#include <vector>
Harshit Aghera560e6af2025-04-21 20:04:56 +053020
21namespace gpu
22{
23// These functions encode/decode data communicated over the network
24// The use of reinterpret_cast enables direct memory access to raw byte buffers
25// without doing unnecessary data copying
26// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
27int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
28 ocp::accelerator_management::BindingPciVid& msg)
29{
30 return ocp::accelerator_management::packHeader(nvidiaPciVendorId, hdr, msg);
31}
32
33int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
34 const std::span<uint8_t> buf)
35{
36 if (buf.size() < sizeof(QueryDeviceIdentificationRequest))
37 {
38 return EINVAL;
39 }
40
41 auto* msg = reinterpret_cast<QueryDeviceIdentificationRequest*>(buf.data());
42
43 ocp::accelerator_management::BindingPciVidInfo header{};
44
45 header.ocp_accelerator_management_msg_type =
46 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
47 header.instance_id = instanceId &
48 ocp::accelerator_management::instanceIdBitMask;
49 header.msg_type =
50 static_cast<uint8_t>(MessageType::DEVICE_CAPABILITY_DISCOVERY);
51
52 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
53
54 if (rc != 0)
55 {
56 return rc;
57 }
58
59 msg->hdr.command = static_cast<uint8_t>(
60 DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
61 msg->hdr.data_size = 0;
62
63 return 0;
64}
65
66int decodeQueryDeviceIdentificationResponse(
67 const std::span<const uint8_t> buf,
68 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
69 uint8_t& deviceIdentification, uint8_t& deviceInstance)
70{
71 auto rc =
72 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
73
74 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
75 {
76 return rc;
77 }
78
79 if (buf.size() < sizeof(QueryDeviceIdentificationResponse))
80 {
81 return EINVAL;
82 }
83
84 const auto* response =
85 reinterpret_cast<const QueryDeviceIdentificationResponse*>(buf.data());
86
87 deviceIdentification = response->device_identification;
88 deviceInstance = response->instance_id;
89
90 return 0;
91}
92
93int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
94 std::span<uint8_t> buf)
95{
96 if (buf.size() < sizeof(GetTemperatureReadingRequest))
97 {
98 return EINVAL;
99 }
100
101 auto* msg = reinterpret_cast<GetTemperatureReadingRequest*>(buf.data());
102
103 ocp::accelerator_management::BindingPciVidInfo header{};
104 header.ocp_accelerator_management_msg_type =
105 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
106 header.instance_id = instanceId &
107 ocp::accelerator_management::instanceIdBitMask;
108 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
109
110 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
111
112 if (rc != 0)
113 {
114 return rc;
115 }
116
117 msg->hdr.command = static_cast<uint8_t>(
118 PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
119 msg->hdr.data_size = sizeof(sensorId);
120 msg->sensor_id = sensorId;
121
122 return 0;
123}
124
125int decodeGetTemperatureReadingResponse(
126 const std::span<const uint8_t> buf,
127 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
128 double& temperatureReading)
129{
130 auto rc =
131 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
132
133 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
134 {
135 return rc;
136 }
137
138 if (buf.size() < sizeof(GetTemperatureReadingResponse))
139 {
140 return EINVAL;
141 }
142
143 const auto* response =
144 reinterpret_cast<const GetTemperatureReadingResponse*>(buf.data());
145
146 uint16_t dataSize = le16toh(response->hdr.data_size);
147
148 if (dataSize != sizeof(int32_t))
149 {
150 return EINVAL;
151 }
152
153 int32_t reading = le32toh(response->reading);
154 temperatureReading = reading / static_cast<double>(1 << 8);
155
156 return 0;
157}
Harshit Agherac20108d2025-05-07 16:20:16 +0530158
159int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
160 std::span<uint8_t> buf)
161{
162 if (buf.size() < sizeof(ReadThermalParametersRequest))
163 {
164 return EINVAL;
165 }
166
167 auto* msg = reinterpret_cast<ReadThermalParametersRequest*>(buf.data());
168
169 ocp::accelerator_management::BindingPciVidInfo header{};
170 header.ocp_accelerator_management_msg_type =
171 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
172 header.instance_id = instanceId &
173 ocp::accelerator_management::instanceIdBitMask;
174 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
175
176 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
177
178 if (rc != 0)
179 {
180 return rc;
181 }
182
183 msg->hdr.command = static_cast<uint8_t>(
184 PlatformEnvironmentalCommands::READ_THERMAL_PARAMETERS);
185 msg->hdr.data_size = sizeof(sensorId);
186 msg->sensor_id = sensorId;
187
188 return 0;
189}
190
191int decodeReadThermalParametersResponse(
192 std::span<const uint8_t> buf,
193 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
194 int32_t& threshold)
195{
196 auto rc =
197 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
198
199 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
200 {
201 return rc;
202 }
203
204 if (buf.size() < sizeof(ReadThermalParametersResponse))
205 {
206 return EINVAL;
207 }
208
209 const auto* response =
210 reinterpret_cast<const ReadThermalParametersResponse*>(buf.data());
211
212 uint16_t dataSize = le16toh(response->hdr.data_size);
213
214 if (dataSize != sizeof(int32_t))
215 {
216 return EINVAL;
217 }
218
219 threshold = le32toh(response->threshold);
220
221 return 0;
222}
Harshit Agherac8dab722025-05-08 15:57:42 +0530223
224int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId,
225 uint8_t averagingInterval,
226 std::span<uint8_t> buf)
227{
228 if (buf.size() < sizeof(GetCurrentPowerDrawRequest))
229 {
230 return EINVAL;
231 }
232
233 auto* msg = reinterpret_cast<GetCurrentPowerDrawRequest*>(buf.data());
234
235 ocp::accelerator_management::BindingPciVidInfo header{};
236 header.ocp_accelerator_management_msg_type =
237 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
238 header.instance_id = instanceId &
239 ocp::accelerator_management::instanceIdBitMask;
240 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
241
242 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
243
244 if (rc != 0)
245 {
246 return rc;
247 }
248
249 msg->hdr.command = static_cast<uint8_t>(
250 PlatformEnvironmentalCommands::GET_CURRENT_POWER_DRAW);
251 msg->hdr.data_size = sizeof(sensorId) + sizeof(averagingInterval);
252 msg->sensorId = sensorId;
253 msg->averagingInterval = averagingInterval;
254
255 return 0;
256}
257
258int decodeGetCurrentPowerDrawResponse(
259 std::span<const uint8_t> buf,
260 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
261 uint32_t& power)
262{
263 auto rc =
264 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
265
266 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
267 {
268 return rc;
269 }
270
271 if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
272 {
273 return EINVAL;
274 }
275
276 const auto* response =
277 reinterpret_cast<const GetCurrentPowerDrawResponse*>(buf.data());
278
279 const uint16_t dataSize = le16toh(response->hdr.data_size);
280
281 if (dataSize != sizeof(uint32_t))
282 {
283 return EINVAL;
284 }
285
286 power = le32toh(response->power);
287
288 return 0;
289}
Harshit Aghera128c91d2025-05-27 14:20:24 +0530290
291int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
292 std::span<uint8_t> buf)
293{
294 if (buf.size() < sizeof(GetTemperatureReadingRequest))
295 {
296 return EINVAL;
297 }
298
299 auto* msg = reinterpret_cast<GetCurrentEnergyCounterRequest*>(buf.data());
300
301 ocp::accelerator_management::BindingPciVidInfo header{};
302 header.ocp_accelerator_management_msg_type =
303 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
304 header.instance_id = instanceId &
305 ocp::accelerator_management::instanceIdBitMask;
306 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
307
308 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
309
310 if (rc != 0)
311 {
312 return rc;
313 }
314
315 msg->hdr.command = static_cast<uint8_t>(
316 PlatformEnvironmentalCommands::GET_CURRENT_ENERGY_COUNTER);
317 msg->hdr.data_size = sizeof(sensorId);
318 msg->sensor_id = sensorId;
319
320 return 0;
321}
322
323int decodeGetCurrentEnergyCounterResponse(
324 std::span<const uint8_t> buf,
325 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
326 uint64_t& energy)
327{
328 auto rc =
329 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
330
331 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
332 {
333 return rc;
334 }
335
336 if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
337 {
338 return EINVAL;
339 }
340
341 const auto* response =
342 reinterpret_cast<const GetCurrentEnergyCounterResponse*>(buf.data());
343
344 const uint16_t dataSize = le16toh(response->hdr.data_size);
345
346 if (dataSize != sizeof(uint64_t))
347 {
348 return EINVAL;
349 }
350
351 energy = le32toh(response->energy);
352
353 return 0;
354}
Harshit Agherab55847f2025-05-27 14:53:56 +0530355
356int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
357 std::span<uint8_t> buf)
358{
359 if (buf.size() < sizeof(GetVoltageRequest))
360 {
361 return EINVAL;
362 }
363
364 auto* msg = reinterpret_cast<GetVoltageRequest*>(buf.data());
365
366 ocp::accelerator_management::BindingPciVidInfo header{};
367 header.ocp_accelerator_management_msg_type =
368 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
369 header.instance_id = instanceId &
370 ocp::accelerator_management::instanceIdBitMask;
371 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
372
373 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
374
375 if (rc != 0)
376 {
377 return rc;
378 }
379
380 msg->hdr.command =
381 static_cast<uint8_t>(PlatformEnvironmentalCommands::GET_VOLTAGE);
382 msg->hdr.data_size = sizeof(sensorId);
383 msg->sensor_id = sensorId;
384
385 return 0;
386}
387
388int decodeGetVoltageResponse(std::span<const uint8_t> buf,
389 ocp::accelerator_management::CompletionCode& cc,
390 uint16_t& reasonCode, uint32_t& voltage)
391{
392 auto rc =
393 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
394
395 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
396 {
397 return rc;
398 }
399
400 if (buf.size() < sizeof(GetVoltageResponse))
401 {
402 return EINVAL;
403 }
404
405 const auto* response =
406 reinterpret_cast<const GetVoltageResponse*>(buf.data());
407
408 const uint16_t dataSize = le16toh(response->hdr.data_size);
409
410 if (dataSize != sizeof(uint32_t))
411 {
412 return EINVAL;
413 }
414
415 voltage = le32toh(response->voltage);
416
417 return 0;
418}
Rohit PAIe8918842025-06-10 09:46:33 +0530419
Rohit PAIe8918842025-06-10 09:46:33 +0530420int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
421 std::span<uint8_t> buf)
422{
423 if (buf.size() < sizeof(GetInventoryInformationRequest))
424 {
425 return EINVAL;
426 }
427
428 auto* msg = reinterpret_cast<GetInventoryInformationRequest*>(buf.data());
429
430 ocp::accelerator_management::BindingPciVidInfo header{};
431 header.ocp_accelerator_management_msg_type =
432 static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
433 header.instance_id = instanceId &
434 ocp::accelerator_management::instanceIdBitMask;
435 header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
436
437 auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
438
439 if (rc != 0)
440 {
441 return rc;
442 }
443
444 msg->hdr.command = static_cast<uint8_t>(
445 PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION);
446 msg->hdr.data_size = sizeof(propertyId);
447 msg->property_id = propertyId;
448
449 return 0;
450}
451
452int decodeGetInventoryInformationResponse(
453 std::span<const uint8_t> buf,
454 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
455 InventoryPropertyId propertyId, InventoryInfo& info)
456{
457 auto rc =
458 ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
459 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
460 {
461 return rc;
462 }
463
464 if (buf.size() < (sizeof(ocp::accelerator_management::CommonResponse) + 1))
465 {
466 return EINVAL;
467 }
468
469 const auto* response =
470 reinterpret_cast<const GetInventoryInformationResponse*>(buf.data());
471 uint16_t dataSize = le16toh(response->hdr.data_size);
472
473 if (dataSize == 0 || dataSize > MAX_INVENTORY_DATA_SIZE)
474 {
475 return EINVAL;
476 }
477
478 switch (propertyId)
479 {
480 case InventoryPropertyId::BOARD_PART_NUMBER:
481 case InventoryPropertyId::SERIAL_NUMBER:
482 case InventoryPropertyId::MARKETING_NAME:
483 case InventoryPropertyId::DEVICE_PART_NUMBER:
484 info = std::string(reinterpret_cast<const char*>(response->data),
485 dataSize);
486 break;
487 case InventoryPropertyId::DEVICE_GUID:
488 info =
489 std::vector<uint8_t>(response->data, response->data + dataSize);
490 break;
491 default:
492 return EINVAL;
493 }
494 return 0;
495}
496
Harshit Aghera560e6af2025-04-21 20:04:56 +0530497// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
498} // namespace gpu