nvidia-gpu: introduce notion of a device

Perform device discovery tasks only once per device to prepare for
introducing additional gpu sensors.

In the current implementation, sensor updates and device discovery via
MCTP are managed within a single class for simplicity. However, since a
GPU device typically includes multiple sensors, performing device
discovery for each individual sensor is inefficient. Instead, it would
be more effective to execute device discovery once per device.

Tested: Build an image for gb200nvl-obmc machine with the following
patches cherry picked. This patches are needed to enable the mctp stack.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422

```
$ curl -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_0
{
  "@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_0",
  "@odata.type": "#Sensor.v1_2_0.Sensor",
  "Id": "temperature_NVIDIA_GB200_GPU_0_TEMP_0",
  "Name": "NVIDIA GB200 GPU 0 TEMP 0",
  "Reading": 37.6875,
  "ReadingRangeMax": 127.0,
  "ReadingRangeMin": -128.0,
  "ReadingType": "Temperature",
  "ReadingUnits": "Cel",
  "Status": {
    "Health": "OK",
    "State": "Enabled"
  }
}%
```

Change-Id: Ie3dcd43caa031b4aaa61d8be3f5d71aefd53bc9a
Signed-off-by: Harshit Aghera <haghera@nvidia.com>
diff --git a/src/nvidia-gpu/NvidiaDeviceDiscovery.cpp b/src/nvidia-gpu/NvidiaDeviceDiscovery.cpp
new file mode 100644
index 0000000..adb21ea
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaDeviceDiscovery.cpp
@@ -0,0 +1,357 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "NvidiaDeviceDiscovery.hpp"
+
+#include "NvidiaGpuDevice.hpp"
+#include "Utils.hpp"
+
+#include <bits/basic_string.h>
+
+#include <MctpRequester.hpp>
+#include <NvidiaGpuMctpVdm.hpp>
+#include <OcpMctpVdm.hpp>
+#include <boost/asio/io_context.hpp>
+#include <boost/container/flat_map.hpp>
+#include <phosphor-logging/lg2.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+#include <sdbusplus/message.hpp>
+#include <sdbusplus/message/native_types.hpp>
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <span>
+#include <string>
+#include <utility>
+#include <variant>
+#include <vector>
+
+void processQueryDeviceIdResponse(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& conn,
+    mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
+    const std::string& path, uint8_t eid, int sendRecvMsgResult,
+    std::span<uint8_t> queryDeviceIdentificationResponse)
+{
+    if (sendRecvMsgResult != 0)
+    {
+        lg2::error(
+            "Error processing MCTP endpoint with eid {EID} : sending message over MCTP failed, rc={RC}",
+            "EID", eid, "RC", sendRecvMsgResult);
+        return;
+    }
+
+    ocp::accelerator_management::CompletionCode cc{};
+    uint16_t reasonCode = 0;
+    uint8_t responseDeviceType = 0;
+    uint8_t responseInstanceId = 0;
+
+    auto rc = gpu::decodeQueryDeviceIdentificationResponse(
+        queryDeviceIdentificationResponse, cc, reasonCode, responseDeviceType,
+        responseInstanceId);
+
+    if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
+    {
+        lg2::error(
+            "Error processing MCTP endpoint with eid {EID} : decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
+            "EID", eid, "RC", rc, "CC", cc, "RESC", reasonCode);
+        return;
+    }
+
+    if (responseDeviceType ==
+        static_cast<uint8_t>(gpu::DeviceIdentification::DEVICE_GPU))
+    {
+        lg2::info(
+            "Found the GPU with EID {EID}, DeviceType {DEVTYPE}, InstanceId {IID}.",
+            "EID", eid, "DEVTYPE", responseDeviceType, "IID",
+            responseInstanceId);
+
+        auto gpuName = configs.name + '_' + std::to_string(responseInstanceId);
+
+        gpuDevices[gpuName] = std::make_shared<GpuDevice>(
+            configs, gpuName, path, conn, eid, io, mctpRequester, objectServer);
+    }
+}
+
+void queryDeviceIdentification(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& conn,
+    mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
+    const std::string& path, uint8_t eid)
+{
+    auto queryDeviceIdentificationRequest = std::make_shared<
+        std::array<uint8_t, sizeof(gpu::QueryDeviceIdentificationRequest)>>();
+
+    auto queryDeviceIdentificationResponse = std::make_shared<
+        std::array<uint8_t, sizeof(gpu::QueryDeviceIdentificationResponse)>>();
+
+    auto rc = gpu::encodeQueryDeviceIdentificationRequest(
+        0, *queryDeviceIdentificationRequest);
+    if (rc != 0)
+    {
+        lg2::error(
+            "Error processing MCTP endpoint with eid {EID} : encode failed, rc={RC}",
+            "EID", eid, "RC", rc);
+        return;
+    }
+
+    mctpRequester.sendRecvMsg(
+        eid, *queryDeviceIdentificationRequest,
+        *queryDeviceIdentificationResponse,
+        [&io, &objectServer, &gpuDevices, conn, &mctpRequester, configs, path,
+         eid, queryDeviceIdentificationRequest,
+         queryDeviceIdentificationResponse](int sendRecvMsgResult) {
+            processQueryDeviceIdResponse(
+                io, objectServer, gpuDevices, conn, mctpRequester, configs,
+                path, eid, sendRecvMsgResult,
+                *queryDeviceIdentificationResponse);
+        });
+}
+
+void processEndpoint(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& conn,
+    mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
+    const std::string& path, const boost::system::error_code& ec,
+    const SensorBaseConfigMap& endpoint)
+{
+    if (ec)
+    {
+        lg2::error("Error processing MCTP endpoint: Error:{ERROR}", "ERROR",
+                   ec.message());
+        return;
+    }
+
+    auto hasEid = endpoint.find("EID");
+    uint8_t eid{};
+
+    if (hasEid != endpoint.end())
+    {
+        const auto* eidPtr = std::get_if<uint8_t>(&hasEid->second);
+        if (eidPtr != nullptr)
+        {
+            eid = *eidPtr;
+        }
+        else
+        {
+            lg2::error(
+                "Error processing MCTP endpoint: Property EID does not have valid type.");
+            return;
+        }
+    }
+    else
+    {
+        lg2::error(
+            "Error processing MCTP endpoint: Property EID not found in the configuration.");
+        return;
+    }
+
+    auto hasMctpTypes = endpoint.find("SupportedMessageTypes");
+    std::vector<uint8_t> mctpTypes{};
+
+    if (hasMctpTypes != endpoint.end())
+    {
+        const auto* mctpTypePtr =
+            std::get_if<std::vector<uint8_t>>(&hasMctpTypes->second);
+        if (mctpTypePtr != nullptr)
+        {
+            mctpTypes = *mctpTypePtr;
+        }
+        else
+        {
+            lg2::error(
+                "Error processing MCTP endpoint with eid {EID} : Property SupportedMessageTypes does not have valid type.",
+                "EID", eid);
+            return;
+        }
+    }
+    else
+    {
+        lg2::error(
+            "Error processing MCTP endpoint with eid {EID} : Property SupportedMessageTypes not found in the configuration.",
+            "EID", eid);
+        return;
+    }
+
+    if (std::find(mctpTypes.begin(), mctpTypes.end(),
+                  ocp::accelerator_management::messageType) != mctpTypes.end())
+    {
+        lg2::info("Found OCP MCTP VDM Endpoint with ID {EID}", "EID", eid);
+        queryDeviceIdentification(io, objectServer, gpuDevices, conn,
+                                  mctpRequester, configs, path, eid);
+    }
+}
+
+void queryEndpoints(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& conn,
+    mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
+    const std::string& path, const boost::system::error_code& ec,
+    const GetSubTreeType& ret)
+{
+    if (ec)
+    {
+        lg2::error("Error processing MCTP endpoints: {ERROR}", "ERROR",
+                   ec.message());
+        return;
+    }
+
+    if (ret.empty())
+    {
+        return;
+    }
+
+    for (const auto& [objPath, services] : ret)
+    {
+        for (const auto& [service, ifaces] : services)
+        {
+            for (const auto& iface : ifaces)
+            {
+                if (iface == "xyz.openbmc_project.MCTP.Endpoint")
+                {
+                    conn->async_method_call(
+                        [&io, &objectServer, &gpuDevices, conn, &mctpRequester,
+                         configs, path](const boost::system::error_code& ec,
+                                        const SensorBaseConfigMap& endpoint) {
+                            processEndpoint(io, objectServer, gpuDevices, conn,
+                                            mctpRequester, configs, path, ec,
+                                            endpoint);
+                        },
+                        service, objPath, "org.freedesktop.DBus.Properties",
+                        "GetAll", iface);
+                }
+            }
+        }
+    }
+}
+
+void discoverDevices(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& conn,
+    mctp::MctpRequester& mctpRequester, const SensorConfigs& configs,
+    const std::string& path)
+{
+    std::string searchPath{"/au/com/codeconstruct/"};
+    std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}};
+
+    conn->async_method_call(
+        [&io, &objectServer, &gpuDevices, conn, &mctpRequester, configs,
+         path](const boost::system::error_code& ec, const GetSubTreeType& ret) {
+            queryEndpoints(io, objectServer, gpuDevices, conn, mctpRequester,
+                           configs, path, ec, ret);
+        },
+        "xyz.openbmc_project.ObjectMapper",
+        "/xyz/openbmc_project/object_mapper",
+        "xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0,
+        ifaceList);
+}
+
+void processSensorConfigs(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
+    mctp::MctpRequester& mctpRequester, const ManagedObjectType& resp)
+{
+    for (const auto& [path, interfaces] : resp)
+    {
+        for (const auto& [intf, cfg] : interfaces)
+        {
+            if (intf != configInterfaceName(deviceType))
+            {
+                continue;
+            }
+
+            SensorConfigs configs;
+
+            configs.name = loadVariant<std::string>(cfg, "Name");
+
+            configs.pollRate = loadVariant<uint64_t>(cfg, "PollRate");
+
+            discoverDevices(io, objectServer, gpuDevices, dbusConnection,
+                            mctpRequester, configs, path);
+
+            lg2::info(
+                "Detected configuration {NAME} of type {TYPE} at path: {PATH}.",
+                "NAME", configs.name, "TYPE", deviceType, "PATH", path);
+        }
+    }
+}
+
+void createSensors(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
+    mctp::MctpRequester& mctpRequester)
+{
+    if (!dbusConnection)
+    {
+        lg2::error("Connection not created");
+        return;
+    }
+    dbusConnection->async_method_call(
+        [&gpuDevices, &mctpRequester, dbusConnection, &io, &objectServer](
+            boost::system::error_code ec, const ManagedObjectType& resp) {
+            if (ec)
+            {
+                lg2::error("Error contacting entity manager");
+                return;
+            }
+
+            processSensorConfigs(io, objectServer, gpuDevices, dbusConnection,
+                                 mctpRequester, resp);
+        },
+        entityManagerName, "/xyz/openbmc_project/inventory",
+        "org.freedesktop.DBus.ObjectManager", "GetManagedObjects");
+}
+
+void interfaceRemoved(
+    sdbusplus::message_t& message,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices)
+{
+    if (message.is_method_error())
+    {
+        lg2::error("interfacesRemoved callback method error");
+        return;
+    }
+
+    sdbusplus::message::object_path removedPath;
+    std::vector<std::string> interfaces;
+
+    message.read(removedPath, interfaces);
+
+    // If the xyz.openbmc_project.Confguration.X interface was removed
+    // for one or more sensors, delete those sensor objects.
+    auto sensorIt = gpuDevices.begin();
+    while (sensorIt != gpuDevices.end())
+    {
+        if ((sensorIt->second->getPath() == removedPath) &&
+            (std::find(interfaces.begin(), interfaces.end(),
+                       configInterfaceName(deviceType)) != interfaces.end()))
+        {
+            sensorIt = gpuDevices.erase(sensorIt);
+        }
+        else
+        {
+            sensorIt++;
+        }
+    }
+}
diff --git a/src/nvidia-gpu/NvidiaDeviceDiscovery.hpp b/src/nvidia-gpu/NvidiaDeviceDiscovery.hpp
new file mode 100644
index 0000000..86211a9
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaDeviceDiscovery.hpp
@@ -0,0 +1,42 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "MctpRequester.hpp"
+
+#include <boost/asio/io_context.hpp>
+#include <boost/container/flat_map.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+#include <sdbusplus/message.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+constexpr const char* sensorPathPrefix = "/xyz/openbmc_project/sensors/";
+constexpr const char* deviceType = "NvidiaMctpVdm";
+
+struct SensorConfigs
+{
+    std::string name;
+    uint64_t pollRate{};
+};
+
+class GpuDevice;
+
+void createSensors(
+    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices,
+    const std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
+    mctp::MctpRequester& mctpRequester);
+
+void interfaceRemoved(
+    sdbusplus::message_t& message,
+    boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>>&
+        gpuDevices);
diff --git a/src/nvidia-gpu/NvidiaGpuDevice.cpp b/src/nvidia-gpu/NvidiaGpuDevice.cpp
new file mode 100644
index 0000000..cd39b56
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaGpuDevice.cpp
@@ -0,0 +1,66 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "NvidiaGpuDevice.hpp"
+
+#include "NvidiaDeviceDiscovery.hpp"
+#include "NvidiaGpuSensor.hpp"
+#include "Thresholds.hpp"
+#include "Utils.hpp"
+
+#include <bits/basic_string.h>
+
+#include <MctpRequester.hpp>
+#include <boost/asio/io_context.hpp>
+#include <phosphor-logging/lg2.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <chrono>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+GpuDevice::GpuDevice(const SensorConfigs& configs, const std::string& name,
+                     const std::string& path,
+                     const std::shared_ptr<sdbusplus::asio::connection>& conn,
+                     uint8_t eid, boost::asio::io_context& io,
+                     mctp::MctpRequester& mctpRequester,
+                     sdbusplus::asio::object_server& objectServer) :
+    eid(eid), sensorPollMs(std::chrono::milliseconds{configs.pollRate}),
+    waitTimer(io, std::chrono::steady_clock::duration(0)),
+    mctpRequester(mctpRequester), conn(conn), objectServer(objectServer),
+    configs(configs), name(escapeName(name)), path(path)
+{
+    makeSensors();
+}
+
+void GpuDevice::makeSensors()
+{
+    tempSensor = std::make_shared<NvidiaGpuTempSensor>(
+        conn, mctpRequester, name + "_TEMP_0", path, eid, objectServer,
+        std::vector<thresholds::Threshold>{});
+
+    lg2::info("Added GPU {NAME} Sensors with chassis path: {PATH}.", "NAME",
+              name, "PATH", path);
+
+    read();
+}
+
+void GpuDevice::read()
+{
+    tempSensor->update();
+
+    waitTimer.expires_after(std::chrono::milliseconds(sensorPollMs));
+    waitTimer.async_wait([this](const boost::system::error_code& ec) {
+        if (ec)
+        {
+            return;
+        }
+        read();
+    });
+}
diff --git a/src/nvidia-gpu/NvidiaGpuDevice.hpp b/src/nvidia-gpu/NvidiaGpuDevice.hpp
new file mode 100644
index 0000000..3653928
--- /dev/null
+++ b/src/nvidia-gpu/NvidiaGpuDevice.hpp
@@ -0,0 +1,62 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "MctpRequester.hpp"
+#include "NvidiaDeviceDiscovery.hpp"
+#include "NvidiaGpuSensor.hpp"
+
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/steady_timer.hpp>
+#include <sdbusplus/asio/connection.hpp>
+#include <sdbusplus/asio/object_server.hpp>
+
+#include <chrono>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+class GpuDevice
+{
+  public:
+    GpuDevice(const SensorConfigs& configs, const std::string& name,
+              const std::string& path,
+              const std::shared_ptr<sdbusplus::asio::connection>& conn,
+              uint8_t eid, boost::asio::io_context& io,
+              mctp::MctpRequester& mctpRequester,
+              sdbusplus::asio::object_server& objectServer);
+
+    const std::string& getPath() const
+    {
+        return path;
+    }
+
+  private:
+    void makeSensors();
+
+    void read();
+
+    uint8_t eid{};
+
+    std::chrono::milliseconds sensorPollMs;
+
+    boost::asio::steady_timer waitTimer;
+
+    mctp::MctpRequester& mctpRequester;
+
+    std::shared_ptr<sdbusplus::asio::connection> conn;
+
+    sdbusplus::asio::object_server& objectServer;
+
+    std::shared_ptr<NvidiaGpuTempSensor> tempSensor;
+
+    SensorConfigs configs;
+
+    std::string name;
+
+    std::string path;
+};
diff --git a/src/nvidia-gpu/NvidiaGpuSensor.cpp b/src/nvidia-gpu/NvidiaGpuSensor.cpp
index 86b356b..1626545 100644
--- a/src/nvidia-gpu/NvidiaGpuSensor.cpp
+++ b/src/nvidia-gpu/NvidiaGpuSensor.cpp
@@ -14,25 +14,19 @@
 #include <bits/basic_string.h>
 
 #include <MctpRequester.hpp>
+#include <NvidiaDeviceDiscovery.hpp>
 #include <NvidiaGpuMctpVdm.hpp>
 #include <OcpMctpVdm.hpp>
-#include <boost/asio/io_context.hpp>
-#include <boost/container/flat_map.hpp>
 #include <phosphor-logging/lg2.hpp>
 #include <sdbusplus/asio/connection.hpp>
 #include <sdbusplus/asio/object_server.hpp>
-#include <sdbusplus/message.hpp>
-#include <sdbusplus/message/native_types.hpp>
 
-#include <algorithm>
-#include <chrono>
 #include <cstddef>
 #include <cstdint>
 #include <functional>
 #include <memory>
 #include <string>
 #include <utility>
-#include <variant>
 #include <vector>
 
 using namespace std::literals;
@@ -41,19 +35,17 @@
 static constexpr double gpuTempSensorMaxReading = 127;
 static constexpr double gpuTempSensorMinReading = -128;
 
-GpuTempSensor::GpuTempSensor(
+NvidiaGpuTempSensor::NvidiaGpuTempSensor(
     std::shared_ptr<sdbusplus::asio::connection>& conn,
-    boost::asio::io_context& io, mctp::MctpRequester& mctpRequester,
-    const std::string& name, const std::string& sensorConfiguration,
+    mctp::MctpRequester& mctpRequester, const std::string& name,
+    const std::string& sensorConfiguration, const uint8_t eid,
     sdbusplus::asio::object_server& objectServer,
-    std::vector<thresholds::Threshold>&& thresholdData,
-    std::chrono::milliseconds pollRate) :
+    std::vector<thresholds::Threshold>&& thresholdData) :
     Sensor(escapeName(name), std::move(thresholdData), sensorConfiguration,
            "temperature", false, true, gpuTempSensorMaxReading,
            gpuTempSensorMinReading, conn),
-    sensorId{gpuTempSensorId}, sensorPollMs(pollRate),
-    waitTimer(io, std::chrono::steady_clock::duration(0)),
-    mctpRequester(mctpRequester), conn(conn), objectServer(objectServer)
+    eid(eid), sensorId{gpuTempSensorId}, mctpRequester(mctpRequester),
+    objectServer(objectServer)
 {
     std::string dbusPath =
         sensorPathPrefix + "temperature/"s + escapeName(name);
@@ -70,12 +62,11 @@
 
     association = objectServer.add_interface(dbusPath, association::interface);
 
-    discoverGpus();
+    setInitialProperties(sensor_paths::unitDegreesC);
 }
 
-GpuTempSensor::~GpuTempSensor()
+NvidiaGpuTempSensor::~NvidiaGpuTempSensor()
 {
-    waitTimer.cancel();
     for (const auto& iface : thresholdInterfaces)
     {
         objectServer.remove_interface(iface);
@@ -84,72 +75,18 @@
     objectServer.remove_interface(sensorInterface);
 }
 
-void GpuTempSensor::checkThresholds()
+void NvidiaGpuTempSensor::checkThresholds()
 {
     thresholds::checkThresholds(this);
 }
 
-void GpuTempSensor::queryEndpoints(const boost::system::error_code& ec,
-                                   const GetSubTreeType& ret)
-{
-    if (ec)
-    {
-        lg2::error("Error querying endoints :{ERROR}", "ERROR", ec.message());
-        return;
-    }
-
-    if (ret.empty())
-    {
-        return;
-    }
-
-    for (const auto& [objPath, services] : ret)
-    {
-        for (const auto& [service, ifaces] : services)
-        {
-            for (const auto& iface : ifaces)
-            {
-                if (iface == "xyz.openbmc_project.MCTP.Endpoint")
-                {
-                    conn->async_method_call(
-                        [this](const boost::system::error_code& ec,
-                               const SensorBaseConfigMap& configs) {
-                            this->processEndpoint(ec, configs);
-                        },
-                        service, objPath, "org.freedesktop.DBus.Properties",
-                        "GetAll", iface);
-                }
-            }
-        }
-    }
-}
-
-void GpuTempSensor::read()
-{
-    update();
-
-    waitTimer.expires_after(std::chrono::milliseconds(sensorPollMs));
-    waitTimer.async_wait(
-        [weakPtrToThis = std::weak_ptr<GpuTempSensor>{shared_from_this()}](
-            const boost::system::error_code& ec) {
-            if (ec)
-            {
-                return;
-            }
-            if (auto ptr = weakPtrToThis.lock())
-            {
-                ptr->read();
-            }
-        });
-}
-
-void GpuTempSensor::processResponse(int sendRecvMsgResult)
+void NvidiaGpuTempSensor::processResponse(int sendRecvMsgResult)
 {
     if (sendRecvMsgResult != 0)
     {
         lg2::error(
-            "Error updating Temperature Sensor: sending message over MCTP failed, rc={RC}",
-            "RC", sendRecvMsgResult);
+            "Error updating Temperature Sensor for eid {EID} and sensor id {SID} : sending message over MCTP failed, rc={RC}",
+            "EID", eid, "SID", sensorId, "RC", sendRecvMsgResult);
         return;
     }
 
@@ -163,264 +100,29 @@
     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
     {
         lg2::error(
-            "Error updating Temperature Sensor: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
-            "RC", rc, "CC", cc, "RESC", reasonCode);
+            "Error updating Temperature Sensor for eid {EID} and sensor id {SID} : decode failed. "
+            "rc={RC}, cc={CC}, reasonCode={RESC}",
+            "EID", eid, "SID", sensorId, "RC", rc, "CC", cc, "RESC",
+            reasonCode);
         return;
     }
 
     updateValue(tempValue);
 }
 
-void GpuTempSensor::update()
+void NvidiaGpuTempSensor::update()
 {
     auto rc = gpu::encodeGetTemperatureReadingRequest(
         0, sensorId, getTemperatureReadingRequest);
+
     if (rc != 0)
     {
-        lg2::error("Error updating Temperature Sensor: encode failed, rc={RC}",
-                   "RC", rc);
-        return;
+        lg2::error(
+            "Error updating Temperature Sensor for eid {EID} and sensor id {SID} : encode failed, rc={RC}",
+            "EID", eid, "SID", sensorId, "RC", rc);
     }
 
     mctpRequester.sendRecvMsg(
         eid, getTemperatureReadingRequest, getTemperatureReadingResponse,
         [this](int sendRecvMsgResult) { processResponse(sendRecvMsgResult); });
 }
-
-void GpuTempSensor::processQueryDeviceIdResponse(uint8_t eid,
-                                                 int sendRecvMsgResult)
-{
-    if (sendRecvMsgResult != 0)
-    {
-        lg2::error(
-            "Error processing GPU endpoint: sending message over MCTP failed, rc={RC}",
-            "RC", sendRecvMsgResult);
-        return;
-    }
-
-    ocp::accelerator_management::CompletionCode cc{};
-    uint16_t reasonCode = 0;
-    uint8_t responseDeviceType = 0;
-    uint8_t responseInstanceId = 0;
-
-    auto rc = gpu::decodeQueryDeviceIdentificationResponse(
-        queryDeviceIdentificationResponse, cc, reasonCode, responseDeviceType,
-        responseInstanceId);
-
-    if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
-    {
-        lg2::error(
-            "Error processing GPU endpoint: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
-            "RC", rc, "CC", cc, "RESC", reasonCode);
-        return;
-    }
-
-    if (responseDeviceType ==
-        static_cast<uint8_t>(gpu::DeviceIdentification::DEVICE_GPU))
-    {
-        lg2::info(
-            "Found the GPU with EID {EID}, DeviceType {DEVTYPE}, InstanceId {IID}.",
-            "EID", eid, "DEVTYPE", responseDeviceType, "IID",
-            responseInstanceId);
-
-        this->eid = eid;
-        setInitialProperties(sensor_paths::unitDegreesC);
-        read();
-    }
-}
-
-void GpuTempSensor::processGpuEndpoint(uint8_t eid)
-{
-    auto rc = gpu::encodeQueryDeviceIdentificationRequest(
-        0, queryDeviceIdentificationRequest);
-    if (rc != 0)
-    {
-        lg2::error("Error processing GPU endpoint: encode failed, rc={RC}",
-                   "RC", rc);
-        return;
-    }
-
-    mctpRequester.sendRecvMsg(
-        eid, queryDeviceIdentificationRequest,
-        queryDeviceIdentificationResponse, [this, eid](int sendRecvMsgResult) {
-            processQueryDeviceIdResponse(eid, sendRecvMsgResult);
-        });
-}
-
-void GpuTempSensor::processEndpoint(const boost::system::error_code& ec,
-                                    const SensorBaseConfigMap& endpoint)
-{
-    if (ec)
-    {
-        lg2::error("Error processing MCTP endpoint: {ERROR}", "ERROR",
-                   ec.message());
-        return;
-    }
-
-    uint8_t eid{};
-    std::vector<uint8_t> mctpTypes{};
-
-    auto hasEid = endpoint.find("EID");
-    if (hasEid != endpoint.end())
-    {
-        const auto* eidPtr = std::get_if<uint8_t>(&hasEid->second);
-        if (eidPtr != nullptr)
-        {
-            eid = *eidPtr;
-        }
-        else
-        {
-            lg2::error(
-                "Error processing MCTP endpoint: Property EID does not have valid type.");
-            return;
-        }
-    }
-    else
-    {
-        lg2::error(
-            "Error processing MCTP endpoint: Property EID not found in the configuration.");
-        return;
-    }
-
-    auto hasMctpTypes = endpoint.find("SupportedMessageTypes");
-    if (hasMctpTypes != endpoint.end())
-    {
-        const auto* mctpTypePtr =
-            std::get_if<std::vector<uint8_t>>(&hasMctpTypes->second);
-        if (mctpTypePtr != nullptr)
-        {
-            mctpTypes = *mctpTypePtr;
-        }
-        else
-        {
-            lg2::error(
-                "Error processing MCTP endpoint: Property SupportedMessageTypes does not have valid type.");
-            return;
-        }
-    }
-    else
-    {
-        lg2::error(
-            "Error processing MCTP endpoint: Property SupportedMessageTypes not found in the configuration.");
-        return;
-    }
-
-    if (std::find(mctpTypes.begin(), mctpTypes.end(),
-                  ocp::accelerator_management::messageType) != mctpTypes.end())
-    {
-        lg2::info(
-            "GpuTempSensor::discoverGpus(): Found OCP MCTP VDM Endpoint with ID {EID}",
-            "EID", eid);
-        this->processGpuEndpoint(eid);
-    }
-}
-
-void GpuTempSensor::discoverGpus()
-{
-    std::string searchPath{"/au/com/codeconstruct/"};
-    std::vector<std::string> ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}};
-
-    conn->async_method_call(
-        [this](const boost::system::error_code& ec, const GetSubTreeType& ret) {
-            queryEndpoints(ec, ret);
-        },
-        "xyz.openbmc_project.ObjectMapper",
-        "/xyz/openbmc_project/object_mapper",
-        "xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0,
-        ifaceList);
-}
-
-void processSensorConfigs(
-    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
-    boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
-        sensors,
-    std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
-    mctp::MctpRequester& mctpRequester, const ManagedObjectType& resp)
-{
-    for (const auto& [path, interfaces] : resp)
-    {
-        for (const auto& [intf, cfg] : interfaces)
-        {
-            if (intf != configInterfaceName(sensorType))
-            {
-                continue;
-            }
-
-            std::string name = loadVariant<std::string>(cfg, "Name");
-
-            uint64_t pollRate = loadVariant<uint64_t>(cfg, "PollRate");
-
-            sensors[name] = std::make_shared<GpuTempSensor>(
-                dbusConnection, io, mctpRequester, name, path, objectServer,
-                std::vector<thresholds::Threshold>{},
-                std::chrono::milliseconds{pollRate});
-
-            lg2::info(
-                "Added GPU Temperature Sensor {NAME} with chassis path: {PATH}.",
-                "NAME", name, "PATH", path);
-        }
-    }
-}
-
-void createSensors(
-    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
-    boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
-        sensors,
-    std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
-    mctp::MctpRequester& mctpRequester)
-{
-    if (!dbusConnection)
-    {
-        lg2::error("Connection not created");
-        return;
-    }
-    dbusConnection->async_method_call(
-        [&sensors, &mctpRequester, &dbusConnection, &io,
-         &objectServer](const boost::system::error_code& ec,
-                        const ManagedObjectType& resp) {
-            if (ec)
-            {
-                lg2::error("Error contacting entity manager");
-                return;
-            }
-
-            processSensorConfigs(io, objectServer, sensors, dbusConnection,
-                                 mctpRequester, resp);
-        },
-        entityManagerName, "/xyz/openbmc_project/inventory",
-        "org.freedesktop.DBus.ObjectManager", "GetManagedObjects");
-}
-
-void interfaceRemoved(
-    sdbusplus::message_t& message,
-    boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
-        sensors)
-{
-    if (message.is_method_error())
-    {
-        lg2::error("interfacesRemoved callback method error");
-        return;
-    }
-
-    sdbusplus::message::object_path removedPath;
-    std::vector<std::string> interfaces;
-
-    message.read(removedPath, interfaces);
-
-    // If the xyz.openbmc_project.Confguration.X interface was removed
-    // for one or more sensors, delete those sensor objects.
-    auto sensorIt = sensors.begin();
-    while (sensorIt != sensors.end())
-    {
-        if ((sensorIt->second->configurationPath == removedPath) &&
-            (std::find(interfaces.begin(), interfaces.end(),
-                       configInterfaceName(sensorType)) != interfaces.end()))
-        {
-            sensorIt = sensors.erase(sensorIt);
-        }
-        else
-        {
-            sensorIt++;
-        }
-    }
-}
diff --git a/src/nvidia-gpu/NvidiaGpuSensor.hpp b/src/nvidia-gpu/NvidiaGpuSensor.hpp
index 158dc41..25fe069 100644
--- a/src/nvidia-gpu/NvidiaGpuSensor.hpp
+++ b/src/nvidia-gpu/NvidiaGpuSensor.hpp
@@ -8,102 +8,52 @@
 
 #include "MctpRequester.hpp"
 #include "Thresholds.hpp"
-#include "Utils.hpp"
 #include "sensor.hpp"
 
 #include <NvidiaGpuMctpVdm.hpp>
-#include <OcpMctpVdm.hpp>
-#include <boost/asio/io_context.hpp>
-#include <boost/asio/steady_timer.hpp>
-#include <boost/container/flat_map.hpp>
 #include <sdbusplus/asio/connection.hpp>
 #include <sdbusplus/asio/object_server.hpp>
-#include <sdbusplus/message.hpp>
 
 #include <array>
-#include <chrono>
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
-constexpr const char* sensorPathPrefix = "/xyz/openbmc_project/sensors/";
-constexpr const char* sensorType = "NvidiaMctpVdm";
-
-struct GpuTempSensor :
+struct NvidiaGpuTempSensor :
     public Sensor,
-    public std::enable_shared_from_this<GpuTempSensor>
+    public std::enable_shared_from_this<NvidiaGpuTempSensor>
 {
   public:
-    GpuTempSensor(std::shared_ptr<sdbusplus::asio::connection>& conn,
-                  boost::asio::io_context& io,
-                  mctp::MctpRequester& mctpRequester, const std::string& name,
-                  const std::string& sensorConfiguration,
-                  sdbusplus::asio::object_server& objectServer,
-                  std::vector<thresholds::Threshold>&& thresholdData,
-                  std::chrono::milliseconds pollRate);
+    NvidiaGpuTempSensor(std::shared_ptr<sdbusplus::asio::connection>& conn,
+                        mctp::MctpRequester& mctpRequester,
+                        const std::string& name,
+                        const std::string& sensorConfiguration, uint8_t eid,
+                        sdbusplus::asio::object_server& objectServer,
+                        std::vector<thresholds::Threshold>&& thresholdData);
 
-    ~GpuTempSensor() override;
+    ~NvidiaGpuTempSensor() override;
 
     void checkThresholds() override;
 
-  private:
-    void read();
-
     void update();
 
-    void discoverGpus();
-
+  private:
     void processResponse(int sendRecvMsgResult);
 
-    void processQueryDeviceIdResponse(uint8_t eid, int sendRecvMsgResult);
-
-    void queryEndpoints(const boost::system::error_code& ec,
-                        const GetSubTreeType& ret);
-
-    void processEndpoint(const boost::system::error_code& ec,
-                         const SensorBaseConfigMap& endpoint);
-    void processGpuEndpoint(uint8_t eid);
-
     uint8_t eid{};
 
     uint8_t sensorId;
 
-    std::chrono::milliseconds sensorPollMs;
-
-    boost::asio::steady_timer waitTimer;
+    std::shared_ptr<sdbusplus::asio::connection> conn;
 
     mctp::MctpRequester& mctpRequester;
 
-    std::shared_ptr<sdbusplus::asio::connection> conn;
-
     sdbusplus::asio::object_server& objectServer;
 
-    std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
-                            sizeof(gpu::GetTemperatureReadingRequest)>
+    std::array<uint8_t, sizeof(gpu::GetTemperatureReadingRequest)>
         getTemperatureReadingRequest{};
 
-    std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
-                            sizeof(gpu::GetTemperatureReadingResponse)>
+    std::array<uint8_t, sizeof(gpu::GetTemperatureReadingResponse)>
         getTemperatureReadingResponse{};
-
-    std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
-                            sizeof(gpu::QueryDeviceIdentificationRequest)>
-        queryDeviceIdentificationRequest{};
-
-    std::array<uint8_t, sizeof(ocp::accelerator_management::Message) +
-                            sizeof(gpu::QueryDeviceIdentificationResponse)>
-        queryDeviceIdentificationResponse{};
 };
-
-void createSensors(
-    boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
-    boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
-        sensors,
-    std::shared_ptr<sdbusplus::asio::connection>& dbusConnection,
-    mctp::MctpRequester& mctpRequester);
-
-void interfaceRemoved(
-    sdbusplus::message_t& message,
-    boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>>&
-        sensors);
diff --git a/src/nvidia-gpu/NvidiaGpuSensorMain.cpp b/src/nvidia-gpu/NvidiaGpuSensorMain.cpp
index cf764d3..6ccbb05 100644
--- a/src/nvidia-gpu/NvidiaGpuSensorMain.cpp
+++ b/src/nvidia-gpu/NvidiaGpuSensorMain.cpp
@@ -5,15 +5,14 @@
  */
 
 #include "MctpRequester.hpp"
-#include "NvidiaGpuSensor.hpp"
 #include "Utils.hpp"
 
+#include <NvidiaDeviceDiscovery.hpp>
 #include <boost/asio/error.hpp>
 #include <boost/asio/io_context.hpp>
 #include <boost/asio/post.hpp>
 #include <boost/asio/steady_timer.hpp>
 #include <boost/container/flat_map.hpp>
-#include <phosphor-logging/lg2.hpp>
 #include <sdbusplus/asio/connection.hpp>
 #include <sdbusplus/asio/object_server.hpp>
 #include <sdbusplus/bus.hpp>
@@ -27,7 +26,7 @@
 #include <string>
 #include <vector>
 
-boost::container::flat_map<std::string, std::shared_ptr<GpuTempSensor>> sensors;
+boost::container::flat_map<std::string, std::shared_ptr<GpuDevice>> gpuDevices;
 
 void configTimerExpiryCallback(
     boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer,
@@ -38,11 +37,7 @@
     {
         return; // we're being canceled
     }
-    createSensors(io, objectServer, sensors, dbusConnection, mctpRequester);
-    if (sensors.empty())
-    {
-        lg2::info("Configuration not detected");
-    }
+    createSensors(io, objectServer, gpuDevices, dbusConnection, mctpRequester);
 }
 
 int main()
@@ -56,7 +51,7 @@
     mctp::MctpRequester mctpRequester(io);
 
     boost::asio::post(io, [&]() {
-        createSensors(io, objectServer, sensors, systemBus, mctpRequester);
+        createSensors(io, objectServer, gpuDevices, systemBus, mctpRequester);
     });
 
     boost::asio::steady_timer configTimer(io);
@@ -73,7 +68,7 @@
 
     std::vector<std::unique_ptr<sdbusplus::bus::match_t>> matches =
         setupPropertiesChangedMatches(
-            *systemBus, std::to_array<const char*>({sensorType}), eventHandler);
+            *systemBus, std::to_array<const char*>({deviceType}), eventHandler);
 
     // Watch for entity-manager to remove configuration interfaces
     // so the corresponding sensors can be removed.
@@ -81,7 +76,7 @@
         static_cast<sdbusplus::bus_t&>(*systemBus),
         sdbusplus::bus::match::rules::interfacesRemovedAtPath(
             std::string(inventoryPath)),
-        [](sdbusplus::message_t& msg) { interfaceRemoved(msg, sensors); });
+        [](sdbusplus::message_t& msg) { interfaceRemoved(msg, gpuDevices); });
 
     io.run();
     return 0;
diff --git a/src/nvidia-gpu/meson.build b/src/nvidia-gpu/meson.build
index cf7c8c5..57bab75 100644
--- a/src/nvidia-gpu/meson.build
+++ b/src/nvidia-gpu/meson.build
@@ -1,5 +1,7 @@
 gpusensor_sources = files(
     'MctpRequester.cpp',
+    'NvidiaDeviceDiscovery.cpp',
+    'NvidiaGpuDevice.cpp',
     'NvidiaGpuMctpVdm.cpp',
     'NvidiaGpuSensor.cpp',
     'NvidiaGpuSensorMain.cpp',