blob: 9bc70dc6df47b9c8bbdd550a15ee1db1e83e2d06 [file] [log] [blame]
Harshit Aghera560e6af2025-04-21 20:04:56 +05301/*
Ed Tanousb5e823f2025-10-09 20:28:42 -04002 * SPDX-FileCopyrightText: Copyright OpenBMC Authors
Harshit Aghera560e6af2025-04-21 20:04:56 +05303 * SPDX-License-Identifier: Apache-2.0
4 */
5
6#pragma once
7
8#include <OcpMctpVdm.hpp>
9
Rohit PAI86786b62025-06-10 09:46:33 +053010#include <array>
11#include <cstddef>
Harshit Aghera560e6af2025-04-21 20:04:56 +053012#include <cstdint>
13#include <span>
Rohit PAI86786b62025-06-10 09:46:33 +053014#include <string>
15#include <variant>
16#include <vector>
Harshit Aghera560e6af2025-04-21 20:04:56 +053017
18namespace gpu
19{
20
Rohit PAI86786b62025-06-10 09:46:33 +053021using InventoryValue = std::variant<std::string, std::vector<uint8_t>>;
22constexpr size_t maxInventoryDataSize = 256;
23
Harshit Aghera560e6af2025-04-21 20:04:56 +053024constexpr uint16_t nvidiaPciVendorId = 0x10de;
25
26enum class MessageType : uint8_t
27{
28 DEVICE_CAPABILITY_DISCOVERY = 0,
29 PLATFORM_ENVIRONMENTAL = 3
30};
31
32enum class DeviceCapabilityDiscoveryCommands : uint8_t
33{
34 QUERY_DEVICE_IDENTIFICATION = 0x09,
35};
36
37enum class PlatformEnvironmentalCommands : uint8_t
38{
39 GET_TEMPERATURE_READING = 0x00,
Harshit Aghera5e7decc2025-05-07 16:20:16 +053040 READ_THERMAL_PARAMETERS = 0x02,
Harshit Aghera902c6492025-05-08 15:57:42 +053041 GET_CURRENT_POWER_DRAW = 0x03,
Harshit Aghera6b712322025-07-31 19:25:12 +053042 GET_MAX_OBSERVED_POWER = 0x04,
Harshit Aghera775199d2025-05-27 14:20:24 +053043 GET_CURRENT_ENERGY_COUNTER = 0x06,
Rohit PAI86786b62025-06-10 09:46:33 +053044 GET_INVENTORY_INFORMATION = 0x0C,
Harshit Agherabef4d412025-05-27 14:53:56 +053045 GET_VOLTAGE = 0x0F,
Harshit Aghera560e6af2025-04-21 20:04:56 +053046};
47
48enum class DeviceIdentification : uint8_t
49{
Harshit Aghera8951c872025-06-25 15:25:33 +053050 DEVICE_GPU = 0,
51 DEVICE_SMA = 5
Harshit Aghera560e6af2025-04-21 20:04:56 +053052};
53
Rohit PAI86786b62025-06-10 09:46:33 +053054enum class InventoryPropertyId : uint8_t
55{
56 BOARD_PART_NUMBER = 0,
57 SERIAL_NUMBER = 1,
58 MARKETING_NAME = 2,
59 DEVICE_PART_NUMBER = 3,
60 FRU_PART_NUMBER = 4,
61 MEMORY_VENDOR = 5,
62 MEMORY_PART_NUMBER = 6,
63 MAX_MEMORY_CAPACITY = 7,
64 BUILD_DATE = 8,
65 FIRMWARE_VERSION = 9,
66 DEVICE_GUID = 10,
67 INFOROM_VERSION = 11,
68 PRODUCT_LENGTH = 12,
69 PRODUCT_WIDTH = 13,
70 PRODUCT_HEIGHT = 14,
71 RATED_DEVICE_POWER_LIMIT = 15,
72 MIN_DEVICE_POWER_LIMIT = 16,
73 MAX_DEVICE_POWER_LIMIT = 17,
74 MAX_MODULE_POWER_LIMIT = 18,
75 MIN_MODULE_POWER_LIMIT = 19,
76 RATED_MODULE_POWER_LIMIT = 20,
77 DEFAULT_BOOST_CLOCKS = 21,
78 DEFAULT_BASE_CLOCKS = 22,
79 DEFAULT_EDPP_SCALING = 23,
80 MIN_EDPP_SCALING = 24,
81 MAX_EDPP_SCALING = 25,
82 MIN_GRAPHICS_CLOCK = 26,
83 MAX_GRAPHICS_CLOCK = 27,
84 MIN_MEMORY_CLOCK = 28,
85 MAX_MEMORY_CLOCK = 29,
86 INFINIBAND_GUID = 30,
87 RACK_GUID = 31,
88 RACK_SLOT_NUMBER = 32,
89 COMPUTE_SLOT_INDEX = 33,
90 NODE_INDEX = 34,
91 GPU_NODE_ID = 35,
92 NVLINK_PEER_TYPE = 36
93};
94
Harshit Aghera560e6af2025-04-21 20:04:56 +053095struct QueryDeviceIdentificationRequest
96{
97 ocp::accelerator_management::CommonRequest hdr;
98} __attribute__((packed));
99
100struct QueryDeviceIdentificationResponse
101{
102 ocp::accelerator_management::CommonResponse hdr;
103 uint8_t device_identification;
104 uint8_t instance_id;
105} __attribute__((packed));
106
107struct GetNumericSensorReadingRequest
108{
109 ocp::accelerator_management::CommonRequest hdr;
110 uint8_t sensor_id;
111} __attribute__((packed));
112
113using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
114
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530115using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
116
Harshit Aghera6b712322025-07-31 19:25:12 +0530117struct GetPowerDrawRequest
Harshit Aghera902c6492025-05-08 15:57:42 +0530118{
119 ocp::accelerator_management::CommonRequest hdr;
120 uint8_t sensorId;
121 uint8_t averagingInterval;
122} __attribute__((packed));
123
Harshit Aghera775199d2025-05-27 14:20:24 +0530124using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest;
125
Harshit Agherabef4d412025-05-27 14:53:56 +0530126using GetVoltageRequest = GetNumericSensorReadingRequest;
127
Harshit Aghera560e6af2025-04-21 20:04:56 +0530128struct GetTemperatureReadingResponse
129{
130 ocp::accelerator_management::CommonResponse hdr;
131 int32_t reading;
132} __attribute__((packed));
133
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530134struct ReadThermalParametersResponse
135{
136 ocp::accelerator_management::CommonResponse hdr;
137 int32_t threshold;
138} __attribute__((packed));
139
Harshit Aghera6b712322025-07-31 19:25:12 +0530140struct GetPowerDrawResponse
Harshit Aghera902c6492025-05-08 15:57:42 +0530141{
142 ocp::accelerator_management::CommonResponse hdr;
143 uint32_t power;
144} __attribute__((packed));
145
Harshit Aghera775199d2025-05-27 14:20:24 +0530146struct GetCurrentEnergyCounterResponse
147{
148 ocp::accelerator_management::CommonResponse hdr;
149 uint64_t energy;
150} __attribute__((packed));
151
Harshit Agherabef4d412025-05-27 14:53:56 +0530152struct GetVoltageResponse
153{
154 ocp::accelerator_management::CommonResponse hdr;
155 uint32_t voltage;
156} __attribute__((packed));
157
Rohit PAI86786b62025-06-10 09:46:33 +0530158struct GetInventoryInformationRequest
159{
160 ocp::accelerator_management::CommonRequest hdr;
161 uint8_t property_id;
162} __attribute__((packed));
163
164struct GetInventoryInformationResponse
165{
166 ocp::accelerator_management::CommonResponse hdr;
167 std::array<uint8_t, maxInventoryDataSize> data;
168} __attribute__((packed));
169
Harshit Aghera560e6af2025-04-21 20:04:56 +0530170int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
171 ocp::accelerator_management::BindingPciVid& msg);
172
173int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
174 std::span<uint8_t> buf);
175
176int decodeQueryDeviceIdentificationResponse(
177 std::span<const uint8_t> buf,
178 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
179 uint8_t& deviceIdentification, uint8_t& deviceInstance);
180
181int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
182 std::span<uint8_t> buf);
183
184int decodeGetTemperatureReadingResponse(
185 std::span<const uint8_t> buf,
186 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
187 double& temperatureReading);
188
Harshit Aghera5e7decc2025-05-07 16:20:16 +0530189int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
190 std::span<uint8_t> buf);
191
192int decodeReadThermalParametersResponse(
193 std::span<const uint8_t> buf,
194 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
195 int32_t& threshold);
196
Harshit Aghera6b712322025-07-31 19:25:12 +0530197int encodeGetPowerDrawRequest(
198 PlatformEnvironmentalCommands commandCode, uint8_t instanceId,
199 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf);
Harshit Aghera902c6492025-05-08 15:57:42 +0530200
Harshit Aghera6b712322025-07-31 19:25:12 +0530201int decodeGetPowerDrawResponse(std::span<const uint8_t> buf,
202 ocp::accelerator_management::CompletionCode& cc,
203 uint16_t& reasonCode, uint32_t& power);
Harshit Aghera775199d2025-05-27 14:20:24 +0530204
205int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
206 std::span<uint8_t> buf);
207
208int decodeGetCurrentEnergyCounterResponse(
209 std::span<const uint8_t> buf,
210 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
211 uint64_t& energy);
Harshit Agherabef4d412025-05-27 14:53:56 +0530212
213int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
214 std::span<uint8_t> buf);
215
216int decodeGetVoltageResponse(std::span<const uint8_t> buf,
217 ocp::accelerator_management::CompletionCode& cc,
218 uint16_t& reasonCode, uint32_t& voltage);
Rohit PAI86786b62025-06-10 09:46:33 +0530219
220int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
221 std::span<uint8_t> buf);
222
223int decodeGetInventoryInformationResponse(
224 std::span<const uint8_t> buf,
225 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
226 InventoryPropertyId propertyId, InventoryValue& value);
227
Harshit Aghera560e6af2025-04-21 20:04:56 +0530228} // namespace gpu