blob: 5bb335ae05af0eca61b83a783c8e3f2e5eaf1faf [file] [log] [blame]
Harshit Aghera560e6af2025-04-21 20:04:56 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#pragma once
8
9#include <OcpMctpVdm.hpp>
10
11#include <cstdint>
12#include <span>
Rohit PAIe8918842025-06-10 09:46:33 +053013#include <string>
14#include <variant>
15#include <vector>
Harshit Aghera560e6af2025-04-21 20:04:56 +053016
17namespace gpu
18{
19
20constexpr uint16_t nvidiaPciVendorId = 0x10de;
21
22enum class MessageType : uint8_t
23{
24 DEVICE_CAPABILITY_DISCOVERY = 0,
25 PLATFORM_ENVIRONMENTAL = 3
26};
27
28enum class DeviceCapabilityDiscoveryCommands : uint8_t
29{
30 QUERY_DEVICE_IDENTIFICATION = 0x09,
31};
32
33enum class PlatformEnvironmentalCommands : uint8_t
34{
35 GET_TEMPERATURE_READING = 0x00,
Harshit Agherac20108d2025-05-07 16:20:16 +053036 READ_THERMAL_PARAMETERS = 0x02,
Harshit Agherac8dab722025-05-08 15:57:42 +053037 GET_CURRENT_POWER_DRAW = 0x03,
Harshit Aghera128c91d2025-05-27 14:20:24 +053038 GET_CURRENT_ENERGY_COUNTER = 0x06,
Rohit PAIe8918842025-06-10 09:46:33 +053039 GET_INVENTORY_INFORMATION = 0x0C,
Harshit Agherab55847f2025-05-27 14:53:56 +053040 GET_VOLTAGE = 0x0F,
Harshit Aghera560e6af2025-04-21 20:04:56 +053041};
42
43enum class DeviceIdentification : uint8_t
44{
45 DEVICE_GPU = 0
46};
47
Rohit PAIe8918842025-06-10 09:46:33 +053048enum class InventoryPropertyId : uint8_t
49{
50 BOARD_PART_NUMBER = 0,
51 SERIAL_NUMBER = 1,
52 MARKETING_NAME = 2,
53 DEVICE_PART_NUMBER = 3,
54 FRU_PART_NUMBER = 4,
55 MEMORY_VENDOR = 5,
56 MEMORY_PART_NUMBER = 6,
57 MAX_MEMORY_CAPACITY = 7,
58 BUILD_DATE = 8,
59 FIRMWARE_VERSION = 9,
60 DEVICE_GUID = 10,
61 INFOROM_VERSION = 11,
62 PRODUCT_LENGTH = 12,
63 PRODUCT_WIDTH = 13,
64 PRODUCT_HEIGHT = 14,
65 RATED_DEVICE_POWER_LIMIT = 15,
66 MIN_DEVICE_POWER_LIMIT = 16,
67 MAX_DEVICE_POWER_LIMIT = 17,
68 MAX_MODULE_POWER_LIMIT = 18,
69 MIN_MODULE_POWER_LIMIT = 19,
70 RATED_MODULE_POWER_LIMIT = 20,
71 DEFAULT_BOOST_CLOCKS = 21,
72 DEFAULT_BASE_CLOCKS = 22,
73 DEFAULT_EDPP_SCALING = 23,
74 MIN_EDPP_SCALING = 24,
75 MAX_EDPP_SCALING = 25,
76 MIN_GRAPHICS_CLOCK = 26,
77 MAX_GRAPHICS_CLOCK = 27,
78 MIN_MEMORY_CLOCK = 28,
79 MAX_MEMORY_CLOCK = 29,
80 INFINIBAND_GUID = 30,
81 RACK_GUID = 31,
82 RACK_SLOT_NUMBER = 32,
83 COMPUTE_SLOT_INDEX = 33,
84 NODE_INDEX = 34,
85 GPU_NODE_ID = 35,
86 NVLINK_PEER_TYPE = 36,
87 FPGA_IMAGE_VERSION = 128,
88 FPGA_MCTP_BRIDGE_UUID = 129,
89};
90
Harshit Aghera560e6af2025-04-21 20:04:56 +053091struct QueryDeviceIdentificationRequest
92{
93 ocp::accelerator_management::CommonRequest hdr;
94} __attribute__((packed));
95
96struct QueryDeviceIdentificationResponse
97{
98 ocp::accelerator_management::CommonResponse hdr;
99 uint8_t device_identification;
100 uint8_t instance_id;
101} __attribute__((packed));
102
103struct GetNumericSensorReadingRequest
104{
105 ocp::accelerator_management::CommonRequest hdr;
106 uint8_t sensor_id;
107} __attribute__((packed));
108
109using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
110
Harshit Agherac20108d2025-05-07 16:20:16 +0530111using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
112
Harshit Agherac8dab722025-05-08 15:57:42 +0530113struct GetCurrentPowerDrawRequest
114{
115 ocp::accelerator_management::CommonRequest hdr;
116 uint8_t sensorId;
117 uint8_t averagingInterval;
118} __attribute__((packed));
119
Harshit Aghera128c91d2025-05-27 14:20:24 +0530120using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest;
121
Harshit Agherab55847f2025-05-27 14:53:56 +0530122using GetVoltageRequest = GetNumericSensorReadingRequest;
123
Harshit Aghera560e6af2025-04-21 20:04:56 +0530124struct GetTemperatureReadingResponse
125{
126 ocp::accelerator_management::CommonResponse hdr;
127 int32_t reading;
128} __attribute__((packed));
129
Harshit Agherac20108d2025-05-07 16:20:16 +0530130struct ReadThermalParametersResponse
131{
132 ocp::accelerator_management::CommonResponse hdr;
133 int32_t threshold;
134} __attribute__((packed));
135
Harshit Agherac8dab722025-05-08 15:57:42 +0530136struct GetCurrentPowerDrawResponse
137{
138 ocp::accelerator_management::CommonResponse hdr;
139 uint32_t power;
140} __attribute__((packed));
141
Harshit Aghera128c91d2025-05-27 14:20:24 +0530142struct GetCurrentEnergyCounterResponse
143{
144 ocp::accelerator_management::CommonResponse hdr;
145 uint64_t energy;
146} __attribute__((packed));
147
Harshit Agherab55847f2025-05-27 14:53:56 +0530148struct GetVoltageResponse
149{
150 ocp::accelerator_management::CommonResponse hdr;
151 uint32_t voltage;
152} __attribute__((packed));
Rohit PAIe8918842025-06-10 09:46:33 +0530153struct GetInventoryInformationRequest
154{
155 ocp::accelerator_management::CommonRequest hdr;
156 uint8_t property_id;
157} __attribute__((packed));
158
159constexpr size_t MAX_INVENTORY_DATA_SIZE = 256;
160
161struct GetInventoryInformationResponse
162{
163 ocp::accelerator_management::CommonResponse hdr;
164 uint8_t data[MAX_INVENTORY_DATA_SIZE];
165} __attribute__((packed));
166
167using InventoryInfo = std::variant<std::string, std::vector<uint8_t>>;
Harshit Agherab55847f2025-05-27 14:53:56 +0530168
Harshit Aghera560e6af2025-04-21 20:04:56 +0530169int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
170 ocp::accelerator_management::BindingPciVid& msg);
171
172int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
173 std::span<uint8_t> buf);
174
175int decodeQueryDeviceIdentificationResponse(
176 std::span<const uint8_t> buf,
177 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
178 uint8_t& deviceIdentification, uint8_t& deviceInstance);
179
180int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
181 std::span<uint8_t> buf);
182
183int decodeGetTemperatureReadingResponse(
184 std::span<const uint8_t> buf,
185 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
186 double& temperatureReading);
187
Harshit Agherac20108d2025-05-07 16:20:16 +0530188int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
189 std::span<uint8_t> buf);
190
191int decodeReadThermalParametersResponse(
192 std::span<const uint8_t> buf,
193 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
194 int32_t& threshold);
195
Harshit Agherac8dab722025-05-08 15:57:42 +0530196int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId,
197 uint8_t averagingInterval,
198 std::span<uint8_t> buf);
199
200int decodeGetCurrentPowerDrawResponse(
201 std::span<const uint8_t> buf,
202 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
203 uint32_t& power);
Harshit Aghera128c91d2025-05-27 14:20:24 +0530204
205int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
206 std::span<uint8_t> buf);
207
208int decodeGetCurrentEnergyCounterResponse(
209 std::span<const uint8_t> buf,
210 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
211 uint64_t& energy);
Harshit Agherab55847f2025-05-27 14:53:56 +0530212
213int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
214 std::span<uint8_t> buf);
215
216int decodeGetVoltageResponse(std::span<const uint8_t> buf,
217 ocp::accelerator_management::CompletionCode& cc,
218 uint16_t& reasonCode, uint32_t& voltage);
Rohit PAIe8918842025-06-10 09:46:33 +0530219int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
220 std::span<uint8_t> buf);
221
222int decodeGetInventoryInformationResponse(
223 std::span<const uint8_t> buf,
224 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
225 InventoryPropertyId propertyId, InventoryInfo& info);
226
Harshit Aghera560e6af2025-04-21 20:04:56 +0530227} // namespace gpu