blob: 866db32d9c8b7673980261f8c8fb4643305a735c [file] [log] [blame]
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#pragma once
8
Rohit PAI0a888262025-06-11 08:52:29 +05309#include "Inventory.hpp"
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053010#include "MctpRequester.hpp"
11#include "NvidiaDeviceDiscovery.hpp"
Harshit Aghera902c6492025-05-08 15:57:42 +053012#include "NvidiaGpuPowerSensor.hpp"
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053013#include "NvidiaGpuSensor.hpp"
14
Harshit Aghera775199d2025-05-27 14:20:24 +053015#include <NvidiaGpuEnergySensor.hpp>
Harshit Aghera6b712322025-07-31 19:25:12 +053016#include <NvidiaGpuPowerPeakReading.hpp>
Harshit Agherabef4d412025-05-27 14:53:56 +053017#include <NvidiaGpuVoltageSensor.hpp>
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053018#include <boost/asio/io_context.hpp>
19#include <boost/asio/steady_timer.hpp>
20#include <sdbusplus/asio/connection.hpp>
21#include <sdbusplus/asio/object_server.hpp>
22
23#include <chrono>
24#include <cstdint>
25#include <memory>
26#include <string>
Harshit Aghera5e7decc2025-05-07 16:20:16 +053027#include <vector>
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053028
Marc Olberding6282a452025-09-28 22:00:09 -070029class GpuDevice : public std::enable_shared_from_this<GpuDevice>
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053030{
31 public:
32 GpuDevice(const SensorConfigs& configs, const std::string& name,
33 const std::string& path,
34 const std::shared_ptr<sdbusplus::asio::connection>& conn,
35 uint8_t eid, boost::asio::io_context& io,
36 mctp::MctpRequester& mctpRequester,
37 sdbusplus::asio::object_server& objectServer);
38
39 const std::string& getPath() const
40 {
41 return path;
42 }
43
Marc Olberdingac920732025-09-28 21:56:54 -070044 void init();
45
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053046 private:
47 void makeSensors();
48
49 void read();
50
Marc Olberding6282a452025-09-28 22:00:09 -070051 void processTLimitThresholds(const std::error_code& ec);
52
53 void getTLimitThresholds();
Harshit Aghera5e7decc2025-05-07 16:20:16 +053054
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053055 uint8_t eid{};
56
Marc Olberding6282a452025-09-28 22:00:09 -070057 void getNextThermalParameter();
58 void readThermalParameterCallback(const std::error_code& ec,
59 std::span<const uint8_t> buffer);
60
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053061 std::chrono::milliseconds sensorPollMs;
62
63 boost::asio::steady_timer waitTimer;
64
65 mctp::MctpRequester& mctpRequester;
66
67 std::shared_ptr<sdbusplus::asio::connection> conn;
68
69 sdbusplus::asio::object_server& objectServer;
70
71 std::shared_ptr<NvidiaGpuTempSensor> tempSensor;
Harshit Agheraba138da2025-05-05 12:26:35 +053072 std::shared_ptr<NvidiaGpuTempSensor> tLimitSensor;
Harshit Agherab10a67b2025-05-27 12:19:29 +053073 std::shared_ptr<NvidiaGpuTempSensor> dramTempSensor;
Harshit Aghera902c6492025-05-08 15:57:42 +053074 std::shared_ptr<NvidiaGpuPowerSensor> powerSensor;
Harshit Aghera6b712322025-07-31 19:25:12 +053075 std::shared_ptr<NvidiaGpuPowerPeakReading> peakPower;
Harshit Aghera775199d2025-05-27 14:20:24 +053076 std::shared_ptr<NvidiaGpuEnergySensor> energySensor;
Harshit Agherabef4d412025-05-27 14:53:56 +053077 std::shared_ptr<NvidiaGpuVoltageSensor> voltageSensor;
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053078
Marc Olberding6282a452025-09-28 22:00:09 -070079 std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)>
80 thermalParamReqMsg{};
Marc Olberding1851f642025-09-29 10:44:46 -070081 std::array<int32_t, 3> thresholds{};
Marc Olberding6282a452025-09-28 22:00:09 -070082 size_t current_threshold_index{};
83
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053084 SensorConfigs configs;
85
86 std::string name;
87
88 std::string path;
Rohit PAI0a888262025-06-11 08:52:29 +053089
90 std::shared_ptr<Inventory> inventory;
Harshit Aghera4ecdfaa2025-05-22 11:35:39 +053091};