blob: 1e8e9866829f14c69bef53915bb5c23ad742fd54 [file] [log] [blame]
Harshit Aghera560e6af2025-04-21 20:04:56 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#pragma once
8
9#include <OcpMctpVdm.hpp>
10
Rohit PAI86786b62025-06-10 09:46:33 +053011#include <array>
12#include <cstddef>
Harshit Aghera560e6af2025-04-21 20:04:56 +053013#include <cstdint>
14#include <span>
Rohit PAI86786b62025-06-10 09:46:33 +053015#include <string>
16#include <variant>
17#include <vector>
Harshit Aghera560e6af2025-04-21 20:04:56 +053018
19namespace gpu
20{
21
Rohit PAI86786b62025-06-10 09:46:33 +053022using InventoryValue = std::variant<std::string, std::vector<uint8_t>>;
23constexpr size_t maxInventoryDataSize = 256;
24
Harshit Aghera560e6af2025-04-21 20:04:56 +053025constexpr uint16_t nvidiaPciVendorId = 0x10de;
26
27enum class MessageType : uint8_t
28{
29 DEVICE_CAPABILITY_DISCOVERY = 0,
30 PLATFORM_ENVIRONMENTAL = 3
31};
32
33enum class DeviceCapabilityDiscoveryCommands : uint8_t
34{
35 QUERY_DEVICE_IDENTIFICATION = 0x09,
36};
37
38enum class PlatformEnvironmentalCommands : uint8_t
39{
40 GET_TEMPERATURE_READING = 0x00,
Harshit Aghera5e7decc2025-05-07 16:20:16 +053041 READ_THERMAL_PARAMETERS = 0x02,
Harshit Aghera902c6492025-05-08 15:57:42 +053042 GET_CURRENT_POWER_DRAW = 0x03,
Harshit Aghera6b712322025-07-31 19:25:12 +053043 GET_MAX_OBSERVED_POWER = 0x04,
Harshit Aghera775199d2025-05-27 14:20:24 +053044 GET_CURRENT_ENERGY_COUNTER = 0x06,
Rohit PAI86786b62025-06-10 09:46:33 +053045 GET_INVENTORY_INFORMATION = 0x0C,
Harshit Agherabef4d412025-05-27 14:53:56 +053046 GET_VOLTAGE = 0x0F,
Harshit Aghera560e6af2025-04-21 20:04:56 +053047};
48
49enum class DeviceIdentification : uint8_t
50{
Harshit Aghera8951c872025-06-25 15:25:33 +053051 DEVICE_GPU = 0,
52 DEVICE_SMA = 5
Harshit Aghera560e6af2025-04-21 20:04:56 +053053};
54
Rohit PAI86786b62025-06-10 09:46:33 +053055enum class InventoryPropertyId : uint8_t
56{
57 BOARD_PART_NUMBER = 0,
58 SERIAL_NUMBER = 1,
59 MARKETING_NAME = 2,
60 DEVICE_PART_NUMBER = 3,
61 FRU_PART_NUMBER = 4,
62 MEMORY_VENDOR = 5,
63 MEMORY_PART_NUMBER = 6,
64 MAX_MEMORY_CAPACITY = 7,
65 BUILD_DATE = 8,
66 FIRMWARE_VERSION = 9,
67 DEVICE_GUID = 10,
68 INFOROM_VERSION = 11,
69 PRODUCT_LENGTH = 12,
70 PRODUCT_WIDTH = 13,
71 PRODUCT_HEIGHT = 14,
72 RATED_DEVICE_POWER_LIMIT = 15,
73 MIN_DEVICE_POWER_LIMIT = 16,
74 MAX_DEVICE_POWER_LIMIT = 17,
75 MAX_MODULE_POWER_LIMIT = 18,
76 MIN_MODULE_POWER_LIMIT = 19,
77 RATED_MODULE_POWER_LIMIT = 20,
78 DEFAULT_BOOST_CLOCKS = 21,
79 DEFAULT_BASE_CLOCKS = 22,
80 DEFAULT_EDPP_SCALING = 23,
81 MIN_EDPP_SCALING = 24,
82 MAX_EDPP_SCALING = 25,
83 MIN_GRAPHICS_CLOCK = 26,
84 MAX_GRAPHICS_CLOCK = 27,
85 MIN_MEMORY_CLOCK = 28,
86 MAX_MEMORY_CLOCK = 29,
87 INFINIBAND_GUID = 30,
88 RACK_GUID = 31,
89 RACK_SLOT_NUMBER = 32,
90 COMPUTE_SLOT_INDEX = 33,
91 NODE_INDEX = 34,
92 GPU_NODE_ID = 35,
93 NVLINK_PEER_TYPE = 36
94};
95
Harshit Aghera560e6af2025-04-21 20:04:56 +053096struct QueryDeviceIdentificationRequest
97{
98 ocp::accelerator_management::CommonRequest hdr;
99} __attribute__((packed));
100
101struct QueryDeviceIdentificationResponse
102{
103 ocp::accelerator_management::CommonResponse hdr;
104 uint8_t device_identification;
105 uint8_t instance_id;
106} __attribute__((packed));
107
108struct GetNumericSensorReadingRequest
109{
110 ocp::accelerator_management::CommonRequest hdr;
111 uint8_t sensor_id;
112} __attribute__((packed));
113
114using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
115
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530116using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
117
Harshit Aghera6b712322025-07-31 19:25:12 +0530118struct GetPowerDrawRequest
Harshit Aghera902c6492025-05-08 15:57:42 +0530119{
120 ocp::accelerator_management::CommonRequest hdr;
121 uint8_t sensorId;
122 uint8_t averagingInterval;
123} __attribute__((packed));
124
Harshit Aghera775199d2025-05-27 14:20:24 +0530125using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest;
126
Harshit Agherabef4d412025-05-27 14:53:56 +0530127using GetVoltageRequest = GetNumericSensorReadingRequest;
128
Harshit Aghera560e6af2025-04-21 20:04:56 +0530129struct GetTemperatureReadingResponse
130{
131 ocp::accelerator_management::CommonResponse hdr;
132 int32_t reading;
133} __attribute__((packed));
134
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530135struct ReadThermalParametersResponse
136{
137 ocp::accelerator_management::CommonResponse hdr;
138 int32_t threshold;
139} __attribute__((packed));
140
Harshit Aghera6b712322025-07-31 19:25:12 +0530141struct GetPowerDrawResponse
Harshit Aghera902c6492025-05-08 15:57:42 +0530142{
143 ocp::accelerator_management::CommonResponse hdr;
144 uint32_t power;
145} __attribute__((packed));
146
Harshit Aghera775199d2025-05-27 14:20:24 +0530147struct GetCurrentEnergyCounterResponse
148{
149 ocp::accelerator_management::CommonResponse hdr;
150 uint64_t energy;
151} __attribute__((packed));
152
Harshit Agherabef4d412025-05-27 14:53:56 +0530153struct GetVoltageResponse
154{
155 ocp::accelerator_management::CommonResponse hdr;
156 uint32_t voltage;
157} __attribute__((packed));
158
Rohit PAI86786b62025-06-10 09:46:33 +0530159struct GetInventoryInformationRequest
160{
161 ocp::accelerator_management::CommonRequest hdr;
162 uint8_t property_id;
163} __attribute__((packed));
164
165struct GetInventoryInformationResponse
166{
167 ocp::accelerator_management::CommonResponse hdr;
168 std::array<uint8_t, maxInventoryDataSize> data;
169} __attribute__((packed));
170
Harshit Aghera560e6af2025-04-21 20:04:56 +0530171int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
172 ocp::accelerator_management::BindingPciVid& msg);
173
174int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
175 std::span<uint8_t> buf);
176
177int decodeQueryDeviceIdentificationResponse(
178 std::span<const uint8_t> buf,
179 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
180 uint8_t& deviceIdentification, uint8_t& deviceInstance);
181
182int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
183 std::span<uint8_t> buf);
184
185int decodeGetTemperatureReadingResponse(
186 std::span<const uint8_t> buf,
187 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
188 double& temperatureReading);
189
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530190int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
191 std::span<uint8_t> buf);
192
193int decodeReadThermalParametersResponse(
194 std::span<const uint8_t> buf,
195 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
196 int32_t& threshold);
197
Harshit Aghera6b712322025-07-31 19:25:12 +0530198int encodeGetPowerDrawRequest(
199 PlatformEnvironmentalCommands commandCode, uint8_t instanceId,
200 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf);
Harshit Aghera902c6492025-05-08 15:57:42 +0530201
Harshit Aghera6b712322025-07-31 19:25:12 +0530202int decodeGetPowerDrawResponse(std::span<const uint8_t> buf,
203 ocp::accelerator_management::CompletionCode& cc,
204 uint16_t& reasonCode, uint32_t& power);
Harshit Aghera775199d2025-05-27 14:20:24 +0530205
206int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
207 std::span<uint8_t> buf);
208
209int decodeGetCurrentEnergyCounterResponse(
210 std::span<const uint8_t> buf,
211 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
212 uint64_t& energy);
Harshit Agherabef4d412025-05-27 14:53:56 +0530213
214int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
215 std::span<uint8_t> buf);
216
217int decodeGetVoltageResponse(std::span<const uint8_t> buf,
218 ocp::accelerator_management::CompletionCode& cc,
219 uint16_t& reasonCode, uint32_t& voltage);
Rohit PAI86786b62025-06-10 09:46:33 +0530220
221int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
222 std::span<uint8_t> buf);
223
224int decodeGetInventoryInformationResponse(
225 std::span<const uint8_t> buf,
226 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
227 InventoryPropertyId propertyId, InventoryValue& value);
228
Harshit Aghera560e6af2025-04-21 20:04:56 +0530229} // namespace gpu