blob: 6663df17014c294a2c126d7dbc4185cdd48fc2ec [file] [log] [blame]
Harshit Agheraa3f24f42025-04-21 20:04:56 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
4 */
5
6#pragma once
7
8#include <asm/byteorder.h>
9
10#include <OcpMctpVdm.hpp>
11
12#include <cstddef>
13#include <cstdint>
14
15namespace gpu
16{
17
18/** @brief NVIDIA PCI vendor ID */
19constexpr uint16_t nvidiaPciVendorId = 0x10de;
20
21/** @brief GPU message types
22 *
23 * Enumeration of different message types used in GPU protocol.
24 * These types categorize different classes of messages for device management
25 * and monitoring.
26 */
27enum class MessageType : uint8_t
28{
29 DEVICE_CAPABILITY_DISCOVERY = 0,
30 PLATFORM_ENVIRONMENTAL = 3
31};
32
33/** @brief Type0 Device Capability Discovery Commands
34 */
35enum class DeviceCapabilityDiscoveryCommands : uint8_t
36{
37 QUERY_DEVICE_IDENTIFICATION = 0x09,
38};
39
40/** @brief Type3 platform environmental commands
41 */
42enum class PlatformEnvironmentalCommands : uint8_t
43{
44 GET_TEMPERATURE_READING = 0x00,
Harshit Aghera09f6f2c2025-05-07 16:20:16 +053045 READ_THERMAL_PARAMETERS = 0x02,
Harshit Agheraa3f24f42025-04-21 20:04:56 +053046};
47
48/** @brief device identification types
49 *
50 * Enumeration of different device types that can be identified in the system.
51 * This is used to distinguish between various components during device
52 * discovery.
53 */
54enum class DeviceIdentification : uint8_t
55{
56 DEVICE_GPU = 0
57};
58
59/** @struct QueryDeviceIdentificationRequest
60 *
61 * Structure representing query device identification request
62 */
63struct QueryDeviceIdentificationRequest
64{
65 ocp::accelerator_management::CommonRequest hdr;
66} __attribute__((packed));
67
68/** @struct QueryDeviceIdentificationResponse
69 *
70 * Structure representing query device identification response.
71 */
72struct QueryDeviceIdentificationResponse
73{
74 ocp::accelerator_management::CommonResponse hdr;
75 uint8_t device_identification;
76 uint8_t instance_id;
77} __attribute__((packed));
78
79/** @struct GetNumericSensorReadingRequest
80 *
81 * Structure representing request to get reading of certain numeric
82 * sensors.
83 */
84struct GetNumericSensorReadingRequest
85{
86 ocp::accelerator_management::CommonRequest hdr;
87 uint8_t sensor_id;
88} __attribute__((packed));
89
90/** @struct GetTemperatureReadingRequest
91 *
92 * Structure representing get temperature reading request.
93 */
94using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
95
Harshit Aghera09f6f2c2025-05-07 16:20:16 +053096/** @struct ReadThermalParametersRequest
97 *
98 * Structure representing request to read thermal parameters.
99 */
100using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
101
Harshit Agheraa3f24f42025-04-21 20:04:56 +0530102/** @struct GetTemperatureReadingResponse
103 *
104 * Structure representing get temperature reading response.
105 */
106struct GetTemperatureReadingResponse
107{
108 ocp::accelerator_management::CommonResponse hdr;
109 int32_t reading;
110} __attribute__((packed));
111
Harshit Aghera09f6f2c2025-05-07 16:20:16 +0530112/** @struct ReadThermalParametersResponse
113 *
114 * Structure representing response to read thermal parameters request.
115 * Contains the thermal threshold value for the requested sensor.
116 */
117struct ReadThermalParametersResponse
118{
119 ocp::accelerator_management::CommonResponse hdr;
120 int32_t threshold;
121} __attribute__((packed));
122
Harshit Agheraa3f24f42025-04-21 20:04:56 +0530123/**
124 * @brief Populate the GPU message with the GPU header.
125 * The caller of this API allocates buffer for the GPU header
126 * when forming the GPU message.
127 * The buffer is passed to this API to pack the GPU header.
128 *
129 * @param[in] hdr - Reference to the OCP MCTP VDM header information
130 * @param[out] msg - Reference to GPU message header
131 *
132 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
133 * otherwise appropriate error code.
134 * @note Caller is responsible for alloc and dealloc of msg
135 * and hdr params
136 */
137ocp::accelerator_management::CompletionCode packHeader(
138 const ocp::accelerator_management::BindingPciVidInfo& hdr,
139 ocp::accelerator_management::BindingPciVid& msg);
140
141/** @brief Encode reason code
142 *
143 * @param[in] cc - Completion Code
144 * @param[in] reason_code - reason code
145 * @param[in] command_code - command code
146 * @param[out] msg - Reference to message
147 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
148 * otherwise appropriate error code.
149 */
150ocp::accelerator_management::CompletionCode encodeReasonCode(
151 uint8_t cc, uint16_t reasonCode, uint8_t commandCode,
152 ocp::accelerator_management::Message& msg);
153
154/** @brief Decode to get reason code
155 *
156 * @param[in] msg - response message
157 * @param[in] msg_len - Length of response message
158 * @param[out] cc - reference to completion code
159 * @param[out] reason_code - reference to reason_code
160 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
161 * otherwise appropriate error code.
162 */
163ocp::accelerator_management::CompletionCode decodeReasonCodeAndCC(
164 const ocp::accelerator_management::Message& msg, size_t msgLen, uint8_t& cc,
165 uint16_t& reasonCode);
166
167/** @brief Create a Query device identification request message
168 *
169 * @param[in] instance_id - instance ID
170 * @param[out] msg - Reference to message that will be written to
171 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
172 * otherwise appropriate error code.
173 */
174ocp::accelerator_management::CompletionCode
175 encodeQueryDeviceIdentificationRequest(
176 uint8_t instanceId, ocp::accelerator_management::Message& msg);
177
178/** @brief Encode a Query device identification response message
179 *
180 * @param[in] instance_id - instance ID
181 * @param[in] cc - completion code
182 * @param[in] reason_code - reason code
183 * @param[in] device_identification - device identification
184 * @param[in] device_instance - device instance id
185 * @param[out] msg - Reference to message that will be written to
186 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
187 * otherwise appropriate error code.
188 */
189ocp::accelerator_management::CompletionCode
190 encodeQueryDeviceIdentificationResponse(
191 uint8_t instanceId, uint8_t cc, uint16_t reasonCode,
192 uint8_t deviceIdentification, uint8_t deviceInstance,
193 ocp::accelerator_management::Message& msg);
194
195/** @brief Decode a Query device identification response message
196 *
197 * @param[in] msg - response message
198 * @param[in] msg_len - Length of response message
199 * @param[out] cc - reference to completion code
200 * @param[out] reason_code - reference to reason code
201 * @param[out] device_identification - reference to device_identification
202 * @param[out] device_instance - reference to instance id
203 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
204 * otherwise appropriate error code.
205 */
206ocp::accelerator_management::CompletionCode
207 decodeQueryDeviceIdentificationResponse(
208 const ocp::accelerator_management::Message& msg, size_t msgLen,
209 uint8_t& cc, uint16_t& reasonCode, uint8_t& deviceIdentification,
210 uint8_t& deviceInstance);
211
212/** @brief Encode a Get temperature readings request message
213 *
214 * @param[in] instance_id - instance ID
215 * @param[in] sensor_id - sensor id
216 * @param[out] msg - Reference to message that will be written to
217 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
218 * otherwise appropriate error code.
219 */
220ocp::accelerator_management::CompletionCode encodeGetTemperatureReadingRequest(
221 uint8_t instanceId, uint8_t sensorId,
222 ocp::accelerator_management::Message& msg);
223
224/** @brief Decode a Get temperature readings request message
225 *
226 * @param[in] msg - request message
227 * @param[in] msg_len - Length of request message
228 * @param[out] sensor_id - reference to sensor id
229 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
230 * otherwise appropriate error code.
231 */
232ocp::accelerator_management::CompletionCode decodeGetTemperatureReadingRequest(
233 const ocp::accelerator_management::Message& msg, size_t msgLen,
234 uint8_t& sensorId);
235
236/** @brief Encode a Get temperature readings response message
237 *
238 * @param[in] instance_id - instance ID
239 * @param[in] cc - pointer to response message completion code
240 * @param[in] reason_code - reason code
241 * @param[in] temperature_reading - temperature reading
242 * @param[out] msg - Reference to message that will be written to
243 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
244 * otherwise appropriate error code.
245 */
246ocp::accelerator_management::CompletionCode encodeGetTemperatureReadingResponse(
247 uint8_t instanceId, uint8_t cc, uint16_t reasonCode,
248 double temperatureReading, ocp::accelerator_management::Message& msg);
249
250/** @brief Decode a Get temperature readings response message
251 *
252 * @param[in] msg - response message
253 * @param[in] msg_len - Length of response message
254 * @param[out] cc - reference to response message completion code
255 * @param[out] reason_code - reference to reason code
256 * @param[out] temperature_reading - reference to temperature_reading
257 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
258 * otherwise appropriate error code.
259 */
260ocp::accelerator_management::CompletionCode decodeGetTemperatureReadingResponse(
261 const ocp::accelerator_management::Message& msg, size_t msgLen, uint8_t& cc,
262 uint16_t& reasonCode, double& temperatureReading);
263
Harshit Aghera09f6f2c2025-05-07 16:20:16 +0530264/** @brief Encode a Read thermal parameters request message
265 *
266 * @param[in] instance_id - instance ID
267 * @param[in] sensor_id - sensor id
268 * @param[out] msg - Reference to message that will be written to
269 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
270 * otherwise appropriate error code.
271 */
272ocp::accelerator_management::CompletionCode encodeReadThermalParametersRequest(
273 uint8_t instanceId, uint8_t sensorId,
274 ocp::accelerator_management::Message& msg);
275
276/** @brief Decode a Read thermal parameters request message
277 *
278 * @param[in] msg - request message
279 * @param[in] msg_len - Length of request message
280 * @param[out] sensor_id - reference to sensor id
281 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
282 * otherwise appropriate error code.
283 */
284ocp::accelerator_management::CompletionCode decodeReadThermalParametersRequest(
285 const ocp::accelerator_management::Message& msg, size_t msgLen,
286 uint8_t& sensorId);
287
288/** @brief Encode a Read thermal parameters response message
289 *
290 * @param[in] instance_id - instance ID
291 * @param[in] cc - completion code
292 * @param[in] reason_code - reason code
293 * @param[in] threshold - thermal threshold
294 * @param[out] msg - Reference to message that will be written to
295 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
296 * otherwise appropriate error code.
297 */
298ocp::accelerator_management::CompletionCode encodeReadThermalParametersResponse(
299 uint8_t instanceId, uint8_t cc, uint16_t reasonCode, int32_t threshold,
300 ocp::accelerator_management::Message& msg);
301
302/** @brief Decode a Read thermal parameters response message
303 *
304 * @param[in] msg - response message
305 * @param[in] msg_len - Length of response message
306 * @param[out] cc - reference to completion code
307 * @param[out] reason_code - reference to reason code
308 * @param[out] threshold - reference to thermal threshold
309 * @return ocp::accelerator_management::CompletionCode::SUCCESS on success,
310 * otherwise appropriate error code.
311 */
312ocp::accelerator_management::CompletionCode decodeReadThermalParametersResponse(
313 const ocp::accelerator_management::Message& msg, size_t msgLen, uint8_t& cc,
314 uint16_t& reasonCode, int32_t& threshold);
315
Harshit Agheraa3f24f42025-04-21 20:04:56 +0530316} // namespace gpu