| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 1 | /* | 
 | 2 |  * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & | 
 | 3 |  * AFFILIATES. All rights reserved. | 
 | 4 |  * SPDX-License-Identifier: Apache-2.0 | 
 | 5 |  */ | 
 | 6 |  | 
 | 7 | #pragma once | 
 | 8 |  | 
 | 9 | #include <OcpMctpVdm.hpp> | 
 | 10 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 11 | #include <array> | 
 | 12 | #include <cstddef> | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 13 | #include <cstdint> | 
 | 14 | #include <span> | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 15 | #include <string> | 
 | 16 | #include <variant> | 
 | 17 | #include <vector> | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 18 |  | 
 | 19 | namespace gpu | 
 | 20 | { | 
 | 21 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 22 | using InventoryValue = std::variant<std::string, std::vector<uint8_t>>; | 
 | 23 | constexpr size_t maxInventoryDataSize = 256; | 
 | 24 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 25 | constexpr uint16_t nvidiaPciVendorId = 0x10de; | 
 | 26 |  | 
 | 27 | enum class MessageType : uint8_t | 
 | 28 | { | 
 | 29 |     DEVICE_CAPABILITY_DISCOVERY = 0, | 
 | 30 |     PLATFORM_ENVIRONMENTAL = 3 | 
 | 31 | }; | 
 | 32 |  | 
 | 33 | enum class DeviceCapabilityDiscoveryCommands : uint8_t | 
 | 34 | { | 
 | 35 |     QUERY_DEVICE_IDENTIFICATION = 0x09, | 
 | 36 | }; | 
 | 37 |  | 
 | 38 | enum class PlatformEnvironmentalCommands : uint8_t | 
 | 39 | { | 
 | 40 |     GET_TEMPERATURE_READING = 0x00, | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 41 |     READ_THERMAL_PARAMETERS = 0x02, | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 42 |     GET_CURRENT_POWER_DRAW = 0x03, | 
| Harshit Aghera | 6b71232 | 2025-07-31 19:25:12 +0530 | [diff] [blame] | 43 |     GET_MAX_OBSERVED_POWER = 0x04, | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 44 |     GET_CURRENT_ENERGY_COUNTER = 0x06, | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 45 |     GET_INVENTORY_INFORMATION = 0x0C, | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 46 |     GET_VOLTAGE = 0x0F, | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 47 | }; | 
 | 48 |  | 
 | 49 | enum class DeviceIdentification : uint8_t | 
 | 50 | { | 
| Harshit Aghera | 8951c87 | 2025-06-25 15:25:33 +0530 | [diff] [blame] | 51 |     DEVICE_GPU = 0, | 
 | 52 |     DEVICE_SMA = 5 | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 53 | }; | 
 | 54 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 55 | enum class InventoryPropertyId : uint8_t | 
 | 56 | { | 
 | 57 |     BOARD_PART_NUMBER = 0, | 
 | 58 |     SERIAL_NUMBER = 1, | 
 | 59 |     MARKETING_NAME = 2, | 
 | 60 |     DEVICE_PART_NUMBER = 3, | 
 | 61 |     FRU_PART_NUMBER = 4, | 
 | 62 |     MEMORY_VENDOR = 5, | 
 | 63 |     MEMORY_PART_NUMBER = 6, | 
 | 64 |     MAX_MEMORY_CAPACITY = 7, | 
 | 65 |     BUILD_DATE = 8, | 
 | 66 |     FIRMWARE_VERSION = 9, | 
 | 67 |     DEVICE_GUID = 10, | 
 | 68 |     INFOROM_VERSION = 11, | 
 | 69 |     PRODUCT_LENGTH = 12, | 
 | 70 |     PRODUCT_WIDTH = 13, | 
 | 71 |     PRODUCT_HEIGHT = 14, | 
 | 72 |     RATED_DEVICE_POWER_LIMIT = 15, | 
 | 73 |     MIN_DEVICE_POWER_LIMIT = 16, | 
 | 74 |     MAX_DEVICE_POWER_LIMIT = 17, | 
 | 75 |     MAX_MODULE_POWER_LIMIT = 18, | 
 | 76 |     MIN_MODULE_POWER_LIMIT = 19, | 
 | 77 |     RATED_MODULE_POWER_LIMIT = 20, | 
 | 78 |     DEFAULT_BOOST_CLOCKS = 21, | 
 | 79 |     DEFAULT_BASE_CLOCKS = 22, | 
 | 80 |     DEFAULT_EDPP_SCALING = 23, | 
 | 81 |     MIN_EDPP_SCALING = 24, | 
 | 82 |     MAX_EDPP_SCALING = 25, | 
 | 83 |     MIN_GRAPHICS_CLOCK = 26, | 
 | 84 |     MAX_GRAPHICS_CLOCK = 27, | 
 | 85 |     MIN_MEMORY_CLOCK = 28, | 
 | 86 |     MAX_MEMORY_CLOCK = 29, | 
 | 87 |     INFINIBAND_GUID = 30, | 
 | 88 |     RACK_GUID = 31, | 
 | 89 |     RACK_SLOT_NUMBER = 32, | 
 | 90 |     COMPUTE_SLOT_INDEX = 33, | 
 | 91 |     NODE_INDEX = 34, | 
 | 92 |     GPU_NODE_ID = 35, | 
 | 93 |     NVLINK_PEER_TYPE = 36 | 
 | 94 | }; | 
 | 95 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 96 | struct QueryDeviceIdentificationRequest | 
 | 97 | { | 
 | 98 |     ocp::accelerator_management::CommonRequest hdr; | 
 | 99 | } __attribute__((packed)); | 
 | 100 |  | 
 | 101 | struct QueryDeviceIdentificationResponse | 
 | 102 | { | 
 | 103 |     ocp::accelerator_management::CommonResponse hdr; | 
 | 104 |     uint8_t device_identification; | 
 | 105 |     uint8_t instance_id; | 
 | 106 | } __attribute__((packed)); | 
 | 107 |  | 
 | 108 | struct GetNumericSensorReadingRequest | 
 | 109 | { | 
 | 110 |     ocp::accelerator_management::CommonRequest hdr; | 
 | 111 |     uint8_t sensor_id; | 
 | 112 | } __attribute__((packed)); | 
 | 113 |  | 
 | 114 | using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; | 
 | 115 |  | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 116 | using ReadThermalParametersRequest = GetNumericSensorReadingRequest; | 
 | 117 |  | 
| Harshit Aghera | 6b71232 | 2025-07-31 19:25:12 +0530 | [diff] [blame] | 118 | struct GetPowerDrawRequest | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 119 | { | 
 | 120 |     ocp::accelerator_management::CommonRequest hdr; | 
 | 121 |     uint8_t sensorId; | 
 | 122 |     uint8_t averagingInterval; | 
 | 123 | } __attribute__((packed)); | 
 | 124 |  | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 125 | using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; | 
 | 126 |  | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 127 | using GetVoltageRequest = GetNumericSensorReadingRequest; | 
 | 128 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 129 | struct GetTemperatureReadingResponse | 
 | 130 | { | 
 | 131 |     ocp::accelerator_management::CommonResponse hdr; | 
 | 132 |     int32_t reading; | 
 | 133 | } __attribute__((packed)); | 
 | 134 |  | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 135 | struct ReadThermalParametersResponse | 
 | 136 | { | 
 | 137 |     ocp::accelerator_management::CommonResponse hdr; | 
 | 138 |     int32_t threshold; | 
 | 139 | } __attribute__((packed)); | 
 | 140 |  | 
| Harshit Aghera | 6b71232 | 2025-07-31 19:25:12 +0530 | [diff] [blame] | 141 | struct GetPowerDrawResponse | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 142 | { | 
 | 143 |     ocp::accelerator_management::CommonResponse hdr; | 
 | 144 |     uint32_t power; | 
 | 145 | } __attribute__((packed)); | 
 | 146 |  | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 147 | struct GetCurrentEnergyCounterResponse | 
 | 148 | { | 
 | 149 |     ocp::accelerator_management::CommonResponse hdr; | 
 | 150 |     uint64_t energy; | 
 | 151 | } __attribute__((packed)); | 
 | 152 |  | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 153 | struct GetVoltageResponse | 
 | 154 | { | 
 | 155 |     ocp::accelerator_management::CommonResponse hdr; | 
 | 156 |     uint32_t voltage; | 
 | 157 | } __attribute__((packed)); | 
 | 158 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 159 | struct GetInventoryInformationRequest | 
 | 160 | { | 
 | 161 |     ocp::accelerator_management::CommonRequest hdr; | 
 | 162 |     uint8_t property_id; | 
 | 163 | } __attribute__((packed)); | 
 | 164 |  | 
 | 165 | struct GetInventoryInformationResponse | 
 | 166 | { | 
 | 167 |     ocp::accelerator_management::CommonResponse hdr; | 
 | 168 |     std::array<uint8_t, maxInventoryDataSize> data; | 
 | 169 | } __attribute__((packed)); | 
 | 170 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 171 | int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, | 
 | 172 |                ocp::accelerator_management::BindingPciVid& msg); | 
 | 173 |  | 
 | 174 | int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, | 
 | 175 |                                            std::span<uint8_t> buf); | 
 | 176 |  | 
 | 177 | int decodeQueryDeviceIdentificationResponse( | 
 | 178 |     std::span<const uint8_t> buf, | 
 | 179 |     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
 | 180 |     uint8_t& deviceIdentification, uint8_t& deviceInstance); | 
 | 181 |  | 
 | 182 | int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, | 
 | 183 |                                        std::span<uint8_t> buf); | 
 | 184 |  | 
 | 185 | int decodeGetTemperatureReadingResponse( | 
 | 186 |     std::span<const uint8_t> buf, | 
 | 187 |     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
 | 188 |     double& temperatureReading); | 
 | 189 |  | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 190 | int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, | 
 | 191 |                                        std::span<uint8_t> buf); | 
 | 192 |  | 
 | 193 | int decodeReadThermalParametersResponse( | 
 | 194 |     std::span<const uint8_t> buf, | 
 | 195 |     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
 | 196 |     int32_t& threshold); | 
 | 197 |  | 
| Harshit Aghera | 6b71232 | 2025-07-31 19:25:12 +0530 | [diff] [blame] | 198 | int encodeGetPowerDrawRequest( | 
 | 199 |     PlatformEnvironmentalCommands commandCode, uint8_t instanceId, | 
 | 200 |     uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf); | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 201 |  | 
| Harshit Aghera | 6b71232 | 2025-07-31 19:25:12 +0530 | [diff] [blame] | 202 | int decodeGetPowerDrawResponse(std::span<const uint8_t> buf, | 
 | 203 |                                ocp::accelerator_management::CompletionCode& cc, | 
 | 204 |                                uint16_t& reasonCode, uint32_t& power); | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 205 |  | 
 | 206 | int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, | 
 | 207 |                                          std::span<uint8_t> buf); | 
 | 208 |  | 
 | 209 | int decodeGetCurrentEnergyCounterResponse( | 
 | 210 |     std::span<const uint8_t> buf, | 
 | 211 |     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
 | 212 |     uint64_t& energy); | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 213 |  | 
 | 214 | int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, | 
 | 215 |                             std::span<uint8_t> buf); | 
 | 216 |  | 
 | 217 | int decodeGetVoltageResponse(std::span<const uint8_t> buf, | 
 | 218 |                              ocp::accelerator_management::CompletionCode& cc, | 
 | 219 |                              uint16_t& reasonCode, uint32_t& voltage); | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 220 |  | 
 | 221 | int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, | 
 | 222 |                                          std::span<uint8_t> buf); | 
 | 223 |  | 
 | 224 | int decodeGetInventoryInformationResponse( | 
 | 225 |     std::span<const uint8_t> buf, | 
 | 226 |     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
 | 227 |     InventoryPropertyId propertyId, InventoryValue& value); | 
 | 228 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 229 | } // namespace gpu |