| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 1 | /* | 
|  | 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & | 
|  | 3 | * AFFILIATES. All rights reserved. | 
|  | 4 | * SPDX-License-Identifier: Apache-2.0 | 
|  | 5 | */ | 
|  | 6 |  | 
|  | 7 | #pragma once | 
|  | 8 |  | 
|  | 9 | #include <OcpMctpVdm.hpp> | 
|  | 10 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 11 | #include <array> | 
|  | 12 | #include <cstddef> | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 13 | #include <cstdint> | 
|  | 14 | #include <span> | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 15 | #include <string> | 
|  | 16 | #include <variant> | 
|  | 17 | #include <vector> | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 18 |  | 
|  | 19 | namespace gpu | 
|  | 20 | { | 
|  | 21 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 22 | using InventoryValue = std::variant<std::string, std::vector<uint8_t>>; | 
|  | 23 | constexpr size_t maxInventoryDataSize = 256; | 
|  | 24 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 25 | constexpr uint16_t nvidiaPciVendorId = 0x10de; | 
|  | 26 |  | 
|  | 27 | enum class MessageType : uint8_t | 
|  | 28 | { | 
|  | 29 | DEVICE_CAPABILITY_DISCOVERY = 0, | 
|  | 30 | PLATFORM_ENVIRONMENTAL = 3 | 
|  | 31 | }; | 
|  | 32 |  | 
|  | 33 | enum class DeviceCapabilityDiscoveryCommands : uint8_t | 
|  | 34 | { | 
|  | 35 | QUERY_DEVICE_IDENTIFICATION = 0x09, | 
|  | 36 | }; | 
|  | 37 |  | 
|  | 38 | enum class PlatformEnvironmentalCommands : uint8_t | 
|  | 39 | { | 
|  | 40 | GET_TEMPERATURE_READING = 0x00, | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 41 | READ_THERMAL_PARAMETERS = 0x02, | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 42 | GET_CURRENT_POWER_DRAW = 0x03, | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 43 | GET_CURRENT_ENERGY_COUNTER = 0x06, | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 44 | GET_INVENTORY_INFORMATION = 0x0C, | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 45 | GET_VOLTAGE = 0x0F, | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 46 | }; | 
|  | 47 |  | 
|  | 48 | enum class DeviceIdentification : uint8_t | 
|  | 49 | { | 
| Harshit Aghera | 8951c87 | 2025-06-25 15:25:33 +0530 | [diff] [blame] | 50 | DEVICE_GPU = 0, | 
|  | 51 | DEVICE_SMA = 5 | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 52 | }; | 
|  | 53 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 54 | enum class InventoryPropertyId : uint8_t | 
|  | 55 | { | 
|  | 56 | BOARD_PART_NUMBER = 0, | 
|  | 57 | SERIAL_NUMBER = 1, | 
|  | 58 | MARKETING_NAME = 2, | 
|  | 59 | DEVICE_PART_NUMBER = 3, | 
|  | 60 | FRU_PART_NUMBER = 4, | 
|  | 61 | MEMORY_VENDOR = 5, | 
|  | 62 | MEMORY_PART_NUMBER = 6, | 
|  | 63 | MAX_MEMORY_CAPACITY = 7, | 
|  | 64 | BUILD_DATE = 8, | 
|  | 65 | FIRMWARE_VERSION = 9, | 
|  | 66 | DEVICE_GUID = 10, | 
|  | 67 | INFOROM_VERSION = 11, | 
|  | 68 | PRODUCT_LENGTH = 12, | 
|  | 69 | PRODUCT_WIDTH = 13, | 
|  | 70 | PRODUCT_HEIGHT = 14, | 
|  | 71 | RATED_DEVICE_POWER_LIMIT = 15, | 
|  | 72 | MIN_DEVICE_POWER_LIMIT = 16, | 
|  | 73 | MAX_DEVICE_POWER_LIMIT = 17, | 
|  | 74 | MAX_MODULE_POWER_LIMIT = 18, | 
|  | 75 | MIN_MODULE_POWER_LIMIT = 19, | 
|  | 76 | RATED_MODULE_POWER_LIMIT = 20, | 
|  | 77 | DEFAULT_BOOST_CLOCKS = 21, | 
|  | 78 | DEFAULT_BASE_CLOCKS = 22, | 
|  | 79 | DEFAULT_EDPP_SCALING = 23, | 
|  | 80 | MIN_EDPP_SCALING = 24, | 
|  | 81 | MAX_EDPP_SCALING = 25, | 
|  | 82 | MIN_GRAPHICS_CLOCK = 26, | 
|  | 83 | MAX_GRAPHICS_CLOCK = 27, | 
|  | 84 | MIN_MEMORY_CLOCK = 28, | 
|  | 85 | MAX_MEMORY_CLOCK = 29, | 
|  | 86 | INFINIBAND_GUID = 30, | 
|  | 87 | RACK_GUID = 31, | 
|  | 88 | RACK_SLOT_NUMBER = 32, | 
|  | 89 | COMPUTE_SLOT_INDEX = 33, | 
|  | 90 | NODE_INDEX = 34, | 
|  | 91 | GPU_NODE_ID = 35, | 
|  | 92 | NVLINK_PEER_TYPE = 36 | 
|  | 93 | }; | 
|  | 94 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 95 | struct QueryDeviceIdentificationRequest | 
|  | 96 | { | 
|  | 97 | ocp::accelerator_management::CommonRequest hdr; | 
|  | 98 | } __attribute__((packed)); | 
|  | 99 |  | 
|  | 100 | struct QueryDeviceIdentificationResponse | 
|  | 101 | { | 
|  | 102 | ocp::accelerator_management::CommonResponse hdr; | 
|  | 103 | uint8_t device_identification; | 
|  | 104 | uint8_t instance_id; | 
|  | 105 | } __attribute__((packed)); | 
|  | 106 |  | 
|  | 107 | struct GetNumericSensorReadingRequest | 
|  | 108 | { | 
|  | 109 | ocp::accelerator_management::CommonRequest hdr; | 
|  | 110 | uint8_t sensor_id; | 
|  | 111 | } __attribute__((packed)); | 
|  | 112 |  | 
|  | 113 | using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; | 
|  | 114 |  | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 115 | using ReadThermalParametersRequest = GetNumericSensorReadingRequest; | 
|  | 116 |  | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 117 | struct GetCurrentPowerDrawRequest | 
|  | 118 | { | 
|  | 119 | ocp::accelerator_management::CommonRequest hdr; | 
|  | 120 | uint8_t sensorId; | 
|  | 121 | uint8_t averagingInterval; | 
|  | 122 | } __attribute__((packed)); | 
|  | 123 |  | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 124 | using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; | 
|  | 125 |  | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 126 | using GetVoltageRequest = GetNumericSensorReadingRequest; | 
|  | 127 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 128 | struct GetTemperatureReadingResponse | 
|  | 129 | { | 
|  | 130 | ocp::accelerator_management::CommonResponse hdr; | 
|  | 131 | int32_t reading; | 
|  | 132 | } __attribute__((packed)); | 
|  | 133 |  | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 134 | struct ReadThermalParametersResponse | 
|  | 135 | { | 
|  | 136 | ocp::accelerator_management::CommonResponse hdr; | 
|  | 137 | int32_t threshold; | 
|  | 138 | } __attribute__((packed)); | 
|  | 139 |  | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 140 | struct GetCurrentPowerDrawResponse | 
|  | 141 | { | 
|  | 142 | ocp::accelerator_management::CommonResponse hdr; | 
|  | 143 | uint32_t power; | 
|  | 144 | } __attribute__((packed)); | 
|  | 145 |  | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 146 | struct GetCurrentEnergyCounterResponse | 
|  | 147 | { | 
|  | 148 | ocp::accelerator_management::CommonResponse hdr; | 
|  | 149 | uint64_t energy; | 
|  | 150 | } __attribute__((packed)); | 
|  | 151 |  | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 152 | struct GetVoltageResponse | 
|  | 153 | { | 
|  | 154 | ocp::accelerator_management::CommonResponse hdr; | 
|  | 155 | uint32_t voltage; | 
|  | 156 | } __attribute__((packed)); | 
|  | 157 |  | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 158 | struct GetInventoryInformationRequest | 
|  | 159 | { | 
|  | 160 | ocp::accelerator_management::CommonRequest hdr; | 
|  | 161 | uint8_t property_id; | 
|  | 162 | } __attribute__((packed)); | 
|  | 163 |  | 
|  | 164 | struct GetInventoryInformationResponse | 
|  | 165 | { | 
|  | 166 | ocp::accelerator_management::CommonResponse hdr; | 
|  | 167 | std::array<uint8_t, maxInventoryDataSize> data; | 
|  | 168 | } __attribute__((packed)); | 
|  | 169 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 170 | int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, | 
|  | 171 | ocp::accelerator_management::BindingPciVid& msg); | 
|  | 172 |  | 
|  | 173 | int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, | 
|  | 174 | std::span<uint8_t> buf); | 
|  | 175 |  | 
|  | 176 | int decodeQueryDeviceIdentificationResponse( | 
|  | 177 | std::span<const uint8_t> buf, | 
|  | 178 | ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
|  | 179 | uint8_t& deviceIdentification, uint8_t& deviceInstance); | 
|  | 180 |  | 
|  | 181 | int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, | 
|  | 182 | std::span<uint8_t> buf); | 
|  | 183 |  | 
|  | 184 | int decodeGetTemperatureReadingResponse( | 
|  | 185 | std::span<const uint8_t> buf, | 
|  | 186 | ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
|  | 187 | double& temperatureReading); | 
|  | 188 |  | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 189 | int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, | 
|  | 190 | std::span<uint8_t> buf); | 
|  | 191 |  | 
|  | 192 | int decodeReadThermalParametersResponse( | 
|  | 193 | std::span<const uint8_t> buf, | 
|  | 194 | ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
|  | 195 | int32_t& threshold); | 
|  | 196 |  | 
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 197 | int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId, | 
|  | 198 | uint8_t averagingInterval, | 
|  | 199 | std::span<uint8_t> buf); | 
|  | 200 |  | 
|  | 201 | int decodeGetCurrentPowerDrawResponse( | 
|  | 202 | std::span<const uint8_t> buf, | 
|  | 203 | ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
|  | 204 | uint32_t& power); | 
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 205 |  | 
|  | 206 | int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, | 
|  | 207 | std::span<uint8_t> buf); | 
|  | 208 |  | 
|  | 209 | int decodeGetCurrentEnergyCounterResponse( | 
|  | 210 | std::span<const uint8_t> buf, | 
|  | 211 | ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
|  | 212 | uint64_t& energy); | 
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 213 |  | 
|  | 214 | int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, | 
|  | 215 | std::span<uint8_t> buf); | 
|  | 216 |  | 
|  | 217 | int decodeGetVoltageResponse(std::span<const uint8_t> buf, | 
|  | 218 | ocp::accelerator_management::CompletionCode& cc, | 
|  | 219 | uint16_t& reasonCode, uint32_t& voltage); | 
| Rohit PAI | 86786b6 | 2025-06-10 09:46:33 +0530 | [diff] [blame] | 220 |  | 
|  | 221 | int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, | 
|  | 222 | std::span<uint8_t> buf); | 
|  | 223 |  | 
|  | 224 | int decodeGetInventoryInformationResponse( | 
|  | 225 | std::span<const uint8_t> buf, | 
|  | 226 | ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, | 
|  | 227 | InventoryPropertyId propertyId, InventoryValue& value); | 
|  | 228 |  | 
| Harshit Aghera | 560e6af | 2025-04-21 20:04:56 +0530 | [diff] [blame] | 229 | } // namespace gpu |