blob: 158dc41925108a0b1c68c73b112f5703ec4426a0 [file] [log] [blame]
Harshit Agherad837b562025-04-21 19:50:10 +05301/*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
Harshit Aghera560e6af2025-04-21 20:04:56 +05303 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
Harshit Agherad837b562025-04-21 19:50:10 +05305 */
6
7#pragma once
8
Harshit Aghera560e6af2025-04-21 20:04:56 +05309#include "MctpRequester.hpp"
Harshit Agherad837b562025-04-21 19:50:10 +053010#include "Thresholds.hpp"
11#include "Utils.hpp"
12#include "sensor.hpp"
13
Harshit Aghera560e6af2025-04-21 20:04:56 +053014#include <NvidiaGpuMctpVdm.hpp>
15#include <OcpMctpVdm.hpp>
Harshit Agherad837b562025-04-21 19:50:10 +053016#include <boost/asio/io_context.hpp>
17#include <boost/asio/steady_timer.hpp>
18#include <boost/container/flat_map.hpp>
19#include <sdbusplus/asio/connection.hpp>
20#include <sdbusplus/asio/object_server.hpp>
21#include <sdbusplus/message.hpp>
22
Harshit Aghera560e6af2025-04-21 20:04:56 +053023#include <array>
24#include <chrono>
25#include <cstdint>
Harshit Agherad837b562025-04-21 19:50:10 +053026#include <memory>
27#include <string>
28#include <vector>
29
30constexpr const char* sensorPathPrefix = "/xyz/openbmc_project/sensors/";
31constexpr const char* sensorType = "NvidiaMctpVdm";
32
33struct GpuTempSensor :
34 public Sensor,
35 public std::enable_shared_from_this<GpuTempSensor>
36{
37 public:
Harshit Agherad837b562025-04-21 19:50:10 +053038 GpuTempSensor(std::shared_ptr<sdbusplus::asio::connection>& conn,
Harshit Aghera560e6af2025-04-21 20:04:56 +053039 boost::asio::io_context& io,
40 mctp::MctpRequester& mctpRequester, const std::string& name,
Harshit Agherad837b562025-04-21 19:50:10 +053041 const std::string& sensorConfiguration,
42 sdbusplus::asio::object_server& objectServer,
Harshit Aghera560e6af2025-04-21 20:04:56 +053043 std::vector<thresholds::Threshold>&& thresholdData,
44 std::chrono::milliseconds pollRate);
Harshit Agherad837b562025-04-21 19:50:10 +053045
Harshit Agherad837b562025-04-21 19:50:10 +053046 ~GpuTempSensor() override;
47
Harshit Agherad837b562025-04-21 19:50:10 +053048 void checkThresholds() override;
49
50 private:
Harshit Aghera560e6af2025-04-21 20:04:56 +053051 void read();
52
53 void update();
54
Harshit Agherad837b562025-04-21 19:50:10 +053055 void discoverGpus();
56
Harshit Aghera560e6af2025-04-21 20:04:56 +053057 void processResponse(int sendRecvMsgResult);
58
59 void processQueryDeviceIdResponse(uint8_t eid, int sendRecvMsgResult);
60
Harshit Agherad837b562025-04-21 19:50:10 +053061 void queryEndpoints(const boost::system::error_code& ec,
62 const GetSubTreeType& ret);
63
Harshit Agherad837b562025-04-21 19:50:10 +053064 void processEndpoint(const boost::system::error_code& ec,
65 const SensorBaseConfigMap& endpoint);
Harshit Aghera560e6af2025-04-21 20:04:56 +053066 void processGpuEndpoint(uint8_t eid);
Harshit Agherad837b562025-04-21 19:50:10 +053067
Harshit Aghera560e6af2025-04-21 20:04:56 +053068 uint8_t eid{};
69
70 uint8_t sensorId;
71
72 std::chrono::milliseconds sensorPollMs;
73
Harshit Agherad837b562025-04-21 19:50:10 +053074 boost::asio::steady_timer waitTimer;
75
Harshit Aghera560e6af2025-04-21 20:04:56 +053076 mctp::MctpRequester& mctpRequester;
77
Harshit Agherad837b562025-04-21 19:50:10 +053078 std::shared_ptr<sdbusplus::asio::connection> conn;
79
Harshit Agherad837b562025-04-21 19:50:10 +053080 sdbusplus::asio::object_server& objectServer;
Harshit Aghera560e6af2025-04-21 20:04:56 +053081
82 std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
83 sizeof(gpu::GetTemperatureReadingRequest)>
84 getTemperatureReadingRequest{};
85
86 std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
87 sizeof(gpu::GetTemperatureReadingResponse)>
88 getTemperatureReadingResponse{};
89
90 std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
91 sizeof(gpu::QueryDeviceIdentificationRequest)>
92 queryDeviceIdentificationRequest{};
93
94 std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
95 sizeof(gpu::QueryDeviceIdentificationResponse)>
96 queryDeviceIdentificationResponse{};
Harshit Agherad837b562025-04-21 19:50:10 +053097};
98
Harshit Agherad837b562025-04-21 19:50:10 +053099void createSensors(
100 boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
101 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
102 sensors,
Harshit Aghera560e6af2025-04-21 20:04:56 +0530103 std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
104 mctp::MctpRequester& mctpRequester);
Harshit Agherad837b562025-04-21 19:50:10 +0530105
Harshit Agherad837b562025-04-21 19:50:10 +0530106void interfaceRemoved(
107 sdbusplus::message_t& message,
108 boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
109 sensors);