| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 1 | /* |
| 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & |
| 3 | * AFFILIATES. All rights reserved. |
| 4 | * SPDX-License-Identifier: Apache-2.0 |
| 5 | */ |
| 6 | |
| 7 | #pragma once |
| 8 | |
| Rohit PAI | 0a88826 | 2025-06-11 08:52:29 +0530 | [diff] [blame] | 9 | #include "Inventory.hpp" |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 10 | #include "MctpRequester.hpp" |
| 11 | #include "NvidiaDeviceDiscovery.hpp" |
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 12 | #include "NvidiaGpuPowerSensor.hpp" |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 13 | #include "NvidiaGpuSensor.hpp" |
| 14 | |
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 15 | #include <NvidiaGpuEnergySensor.hpp> |
| Harshit Aghera | 6b71232 | 2025-07-31 19:25:12 +0530 | [diff] [blame] | 16 | #include <NvidiaGpuPowerPeakReading.hpp> |
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 17 | #include <NvidiaGpuVoltageSensor.hpp> |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 18 | #include <boost/asio/io_context.hpp> |
| 19 | #include <boost/asio/steady_timer.hpp> |
| 20 | #include <sdbusplus/asio/connection.hpp> |
| 21 | #include <sdbusplus/asio/object_server.hpp> |
| 22 | |
| 23 | #include <chrono> |
| 24 | #include <cstdint> |
| 25 | #include <memory> |
| 26 | #include <string> |
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 27 | #include <vector> |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 28 | |
| Marc Olberding | 6282a45 | 2025-09-28 22:00:09 -0700 | [diff] [blame] | 29 | class GpuDevice : public std::enable_shared_from_this<GpuDevice> |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 30 | { |
| 31 | public: |
| 32 | GpuDevice(const SensorConfigs& configs, const std::string& name, |
| 33 | const std::string& path, |
| 34 | const std::shared_ptr<sdbusplus::asio::connection>& conn, |
| 35 | uint8_t eid, boost::asio::io_context& io, |
| 36 | mctp::MctpRequester& mctpRequester, |
| 37 | sdbusplus::asio::object_server& objectServer); |
| 38 | |
| 39 | const std::string& getPath() const |
| 40 | { |
| 41 | return path; |
| 42 | } |
| 43 | |
| Marc Olberding | ac92073 | 2025-09-28 21:56:54 -0700 | [diff] [blame] | 44 | void init(); |
| 45 | |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 46 | private: |
| 47 | void makeSensors(); |
| 48 | |
| 49 | void read(); |
| 50 | |
| Marc Olberding | 6282a45 | 2025-09-28 22:00:09 -0700 | [diff] [blame] | 51 | void processTLimitThresholds(const std::error_code& ec); |
| 52 | |
| 53 | void getTLimitThresholds(); |
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 54 | |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 55 | uint8_t eid{}; |
| 56 | |
| Marc Olberding | 6282a45 | 2025-09-28 22:00:09 -0700 | [diff] [blame] | 57 | void getNextThermalParameter(); |
| 58 | void readThermalParameterCallback(const std::error_code& ec, |
| 59 | std::span<const uint8_t> buffer); |
| 60 | |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 61 | std::chrono::milliseconds sensorPollMs; |
| 62 | |
| 63 | boost::asio::steady_timer waitTimer; |
| 64 | |
| 65 | mctp::MctpRequester& mctpRequester; |
| 66 | |
| 67 | std::shared_ptr<sdbusplus::asio::connection> conn; |
| 68 | |
| 69 | sdbusplus::asio::object_server& objectServer; |
| 70 | |
| 71 | std::shared_ptr<NvidiaGpuTempSensor> tempSensor; |
| Harshit Aghera | ba138da | 2025-05-05 12:26:35 +0530 | [diff] [blame] | 72 | std::shared_ptr<NvidiaGpuTempSensor> tLimitSensor; |
| Harshit Aghera | b10a67b | 2025-05-27 12:19:29 +0530 | [diff] [blame] | 73 | std::shared_ptr<NvidiaGpuTempSensor> dramTempSensor; |
| Harshit Aghera | 902c649 | 2025-05-08 15:57:42 +0530 | [diff] [blame] | 74 | std::shared_ptr<NvidiaGpuPowerSensor> powerSensor; |
| Harshit Aghera | 6b71232 | 2025-07-31 19:25:12 +0530 | [diff] [blame] | 75 | std::shared_ptr<NvidiaGpuPowerPeakReading> peakPower; |
| Harshit Aghera | 775199d | 2025-05-27 14:20:24 +0530 | [diff] [blame] | 76 | std::shared_ptr<NvidiaGpuEnergySensor> energySensor; |
| Harshit Aghera | bef4d41 | 2025-05-27 14:53:56 +0530 | [diff] [blame] | 77 | std::shared_ptr<NvidiaGpuVoltageSensor> voltageSensor; |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 78 | |
| Marc Olberding | 6282a45 | 2025-09-28 22:00:09 -0700 | [diff] [blame] | 79 | std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)> |
| 80 | thermalParamReqMsg{}; |
| Marc Olberding | 1851f64 | 2025-09-29 10:44:46 -0700 | [diff] [blame] | 81 | std::array<int32_t, 3> thresholds{}; |
| Marc Olberding | 6282a45 | 2025-09-28 22:00:09 -0700 | [diff] [blame] | 82 | size_t current_threshold_index{}; |
| 83 | |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 84 | SensorConfigs configs; |
| 85 | |
| 86 | std::string name; |
| 87 | |
| 88 | std::string path; |
| Rohit PAI | 0a88826 | 2025-06-11 08:52:29 +0530 | [diff] [blame] | 89 | |
| 90 | std::shared_ptr<Inventory> inventory; |
| Harshit Aghera | 4ecdfaa | 2025-05-22 11:35:39 +0530 | [diff] [blame] | 91 | }; |