Nvidia-Gpu: Support for Nvidia GPU Serial Number, Part Number

Support for serial number and part number fetch is added in inventory
class which uses the Get Inventory Command. Currently we have a retry
policy of 3 retires to account of any failures to get response from the
GPU device.

Tested
- Able to get Serial Number, Part Number updated from the GPU device

```
 busctl introspect xyz.openbmc_project.GpuSensor  /xyz/openbmc_project/inventory/NVIDIA_GB200_GPU_0
NAME                                                 TYPE      SIGNATURE RESULT/VALUE            FLAGS
org.freedesktop.DBus.Introspectable                  interface -         -                       -
.Introspect                                          method    -         s                       -
org.freedesktop.DBus.Peer                            interface -         -                       -
.GetMachineId                                        method    -         s                       -
.Ping                                                method    -         -                       -
org.freedesktop.DBus.Properties                      interface -         -                       -
.Get                                                 method    ss        v                       -
.GetAll                                              method    s         a{sv}                   -
.Set                                                 method    ssv       -                       -
.PropertiesChanged                                   signal    sa{sv}as  -                       -
xyz.openbmc_project.Inventory.Decorator.Asset        interface -         -                       -
.PartNumber                                          property  s         "699-2G153-0210-TS1"                     emits-change
.SerialNumber                                        property  s         "1330325220002"                          emits-change
xyz.openbmc_project.Inventory.Item.Accelerator       interface -         -                       -
.Type                                                property  s         "GPU"                   emits-change

```

Change-Id: Id2b33a66ff6d5480f8e229fa233528afc0bdcfc0
Signed-off-by: Rohit PAI <ropai@nvidia.com>
diff --git a/src/nvidia-gpu/Inventory.cpp b/src/nvidia-gpu/Inventory.cpp
index 901eca1..c7aa153 100644
--- a/src/nvidia-gpu/Inventory.cpp
+++ b/src/nvidia-gpu/Inventory.cpp
@@ -2,41 +2,225 @@
 
 #include "Utils.hpp"
 
+#include <MctpRequester.hpp>
 #include <NvidiaGpuMctpVdm.hpp>
+#include <OcpMctpVdm.hpp>
+#include <boost/asio/io_context.hpp>
 #include <phosphor-logging/lg2.hpp>
 #include <sdbusplus/asio/connection.hpp>
 #include <sdbusplus/asio/object_server.hpp>
 
-#include <exception>
+#include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
+#include <unordered_map>
+#include <variant>
 
 constexpr const char* inventoryPrefix = "/xyz/openbmc_project/inventory/";
 constexpr const char* acceleratorIfaceName =
     "xyz.openbmc_project.Inventory.Item.Accelerator";
+static constexpr const char* assetIfaceName =
+    "xyz.openbmc_project.Inventory.Decorator.Asset";
 
 Inventory::Inventory(
     const std::shared_ptr<sdbusplus::asio::connection>& /*conn*/,
     sdbusplus::asio::object_server& objectServer,
-    const std::string& inventoryName,
-    const gpu::DeviceIdentification deviceType) :
-    name(escapeName(inventoryName))
+    const std::string& inventoryName, mctp::MctpRequester& mctpRequester,
+    const gpu::DeviceIdentification deviceTypeIn, const uint8_t eid,
+    boost::asio::io_context& io) :
+    name(escapeName(inventoryName)), mctpRequester(mctpRequester),
+    deviceType(deviceTypeIn), eid(eid), retryTimer(io)
 {
+    requestBuffer = std::make_shared<InventoryRequestBuffer>();
+    responseBuffer = std::make_shared<InventoryResponseBuffer>();
+
+    std::string path = inventoryPrefix + name;
+    assetIface = objectServer.add_interface(path, assetIfaceName);
+    assetIface->register_property("Manufacturer", std::string("NVIDIA"));
+    // Register properties which need to be fetched from the device
+    registerProperty(gpu::InventoryPropertyId::SERIAL_NUMBER, assetIface,
+                     "SerialNumber");
+    registerProperty(gpu::InventoryPropertyId::BOARD_PART_NUMBER, assetIface,
+                     "PartNumber");
+    assetIface->initialize();
+
+    // Static properties
     if (deviceType == gpu::DeviceIdentification::DEVICE_GPU)
     {
-        std::string path = inventoryPrefix + name;
-        try
+        acceleratorInterface =
+            objectServer.add_interface(path, acceleratorIfaceName);
+        acceleratorInterface->register_property("Type", std::string("GPU"));
+        acceleratorInterface->initialize();
+    }
+
+    processNextProperty();
+}
+
+void Inventory::registerProperty(
+    gpu::InventoryPropertyId propertyId,
+    const std::shared_ptr<sdbusplus::asio::dbus_interface>& interface,
+    const std::string& propertyName)
+{
+    if (interface)
+    {
+        interface->register_property(propertyName, std::string{});
+        properties[propertyId] = {interface, propertyName, 0, true};
+    }
+}
+
+void Inventory::processInventoryProperty(gpu::InventoryPropertyId propertyId)
+{
+    auto it = properties.find(propertyId);
+    if (it != properties.end())
+    {
+        markPropertyPending(it);
+        std::optional<gpu::InventoryPropertyId> nextProperty =
+            getNextPendingProperty();
+        if (nextProperty && *nextProperty == propertyId)
         {
-            acceleratorInterface =
-                objectServer.add_interface(path, acceleratorIfaceName);
-            acceleratorInterface->register_property("Type", std::string("GPU"));
-            acceleratorInterface->initialize();
+            processNextProperty();
         }
-        catch (const std::exception& e)
+    }
+}
+
+void Inventory::markPropertyPending(
+    std::unordered_map<gpu::InventoryPropertyId, PropertyInfo>::iterator it)
+{
+    it->second.isPending = true;
+    it->second.retryCount = 0;
+}
+
+void Inventory::markPropertyProcessed(
+    std::unordered_map<gpu::InventoryPropertyId, PropertyInfo>::iterator it)
+{
+    it->second.isPending = false;
+}
+
+std::optional<gpu::InventoryPropertyId> Inventory::getNextPendingProperty()
+    const
+{
+    for (const auto& [propertyId, info] : properties)
+    {
+        if (info.isPending)
+        {
+            return propertyId;
+        }
+    }
+    return std::nullopt;
+}
+
+void Inventory::sendInventoryPropertyRequest(
+    gpu::InventoryPropertyId propertyId)
+{
+    int rc = gpu::encodeGetInventoryInformationRequest(
+        0, static_cast<uint8_t>(propertyId), *requestBuffer);
+    if (rc != 0)
+    {
+        lg2::error(
+            "Failed to encode property ID {PROP_ID} request for {NAME}: rc={RC}",
+            "PROP_ID", static_cast<uint8_t>(propertyId), "NAME", name, "RC",
+            rc);
+        return;
+    }
+
+    lg2::info(
+        "Sending inventory request for property ID {PROP_ID} to EID {EID} for {NAME}",
+        "PROP_ID", static_cast<uint8_t>(propertyId), "EID", eid, "NAME", name);
+
+    mctpRequester.sendRecvMsg(eid, *requestBuffer, *responseBuffer,
+                              [this, propertyId](int sendRecvMsgResult) {
+                                  this->handleInventoryPropertyResponse(
+                                      propertyId, sendRecvMsgResult);
+                              });
+}
+
+void Inventory::handleInventoryPropertyResponse(
+    gpu::InventoryPropertyId propertyId, int sendRecvMsgResult)
+{
+    auto it = properties.find(propertyId);
+    if (it == properties.end())
+    {
+        lg2::error("Property ID {PROP_ID} for {NAME} not found", "PROP_ID",
+                   static_cast<uint8_t>(propertyId), "NAME", name);
+        processNextProperty();
+        return;
+    }
+
+    bool success = false;
+    if (sendRecvMsgResult == 0)
+    {
+        ocp::accelerator_management::CompletionCode cc{};
+        uint16_t reasonCode = 0;
+        gpu::InventoryValue info;
+        int rc = gpu::decodeGetInventoryInformationResponse(
+            *responseBuffer, cc, reasonCode, propertyId, info);
+
+        lg2::info(
+            "Response for property ID {PROP_ID} from {NAME}, sendRecvMsgResult: {RESULT}, decode_rc: {RC}, completion_code: {CC}, reason_code: {REASON}",
+            "PROP_ID", static_cast<uint8_t>(propertyId), "NAME", name, "RESULT",
+            sendRecvMsgResult, "RC", rc, "CC", static_cast<uint8_t>(cc),
+            "REASON", reasonCode);
+
+        if (rc == 0 &&
+            cc == ocp::accelerator_management::CompletionCode::SUCCESS &&
+            std::holds_alternative<std::string>(info))
+        {
+            std::string value = std::get<std::string>(info);
+            it->second.interface->set_property(it->second.propertyName, value);
+            lg2::info(
+                "Successfully received property ID {PROP_ID} for {NAME} with value: {VALUE}",
+                "PROP_ID", static_cast<uint8_t>(propertyId), "NAME", name,
+                "VALUE", value);
+            success = true;
+        }
+    }
+
+    if (!success)
+    {
+        it->second.retryCount++;
+        if (it->second.retryCount >= maxRetryAttempts)
         {
             lg2::error(
-                "Failed to add accelerator interface. path='{PATH}', error='{ERROR}'",
-                "PATH", path, "ERROR", e.what());
+                "Property ID {PROP_ID} for {NAME} failed after {ATTEMPTS} attempts",
+                "PROP_ID", static_cast<uint8_t>(propertyId), "NAME", name,
+                "ATTEMPTS", maxRetryAttempts);
+            markPropertyProcessed(it);
         }
+        else
+        {
+            retryTimer.expires_after(retryDelay);
+            retryTimer.async_wait([this](const boost::system::error_code& ec) {
+                if (ec)
+                {
+                    lg2::error("Retry timer error for {NAME}: {ERROR}", "NAME",
+                               name, "ERROR", ec.message());
+                    return;
+                }
+                this->processNextProperty();
+            });
+            return;
+        }
+    }
+    else
+    {
+        markPropertyProcessed(it);
+    }
+
+    processNextProperty();
+}
+
+void Inventory::processNextProperty()
+{
+    std::optional<gpu::InventoryPropertyId> nextProperty =
+        getNextPendingProperty();
+    if (nextProperty)
+    {
+        sendInventoryPropertyRequest(*nextProperty);
+    }
+    else
+    {
+        lg2::info("No pending properties found to process for {NAME}", "NAME",
+                  name);
     }
 }
diff --git a/src/nvidia-gpu/Inventory.hpp b/src/nvidia-gpu/Inventory.hpp
index 8de490d..1d2587b 100644
--- a/src/nvidia-gpu/Inventory.hpp
+++ b/src/nvidia-gpu/Inventory.hpp
@@ -1,23 +1,72 @@
 #pragma once
 
+#include "MctpRequester.hpp"
 #include "NvidiaGpuMctpVdm.hpp"
 
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/steady_timer.hpp>
 #include <sdbusplus/asio/connection.hpp>
 #include <sdbusplus/asio/object_server.hpp>
 
+#include <array>
+#include <chrono>
+#include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
+#include <unordered_map>
 
-class Inventory
+using InventoryRequestBuffer =
+    std::array<uint8_t, sizeof(gpu::GetInventoryInformationRequest)>;
+using InventoryResponseBuffer =
+    std::array<uint8_t, sizeof(gpu::GetInventoryInformationResponse)>;
+
+class Inventory : public std::enable_shared_from_this<Inventory>
 {
   public:
     Inventory(const std::shared_ptr<sdbusplus::asio::connection>& conn,
               sdbusplus::asio::object_server& objectServer,
               const std::string& inventoryName,
-              gpu::DeviceIdentification deviceType);
+              mctp::MctpRequester& mctpRequester,
+              gpu::DeviceIdentification deviceType, uint8_t eid,
+              boost::asio::io_context& io);
 
   private:
+    struct PropertyInfo
+    {
+        std::shared_ptr<sdbusplus::asio::dbus_interface> interface;
+        std::string propertyName;
+        int retryCount{0};
+        bool isPending{false};
+    };
+    void sendInventoryPropertyRequest(gpu::InventoryPropertyId propertyId);
+    void handleInventoryPropertyResponse(gpu::InventoryPropertyId propertyId,
+                                         int sendRecvMsgResult);
+    void processNextProperty();
+    void processInventoryProperty(gpu::InventoryPropertyId propertyId);
+    void registerProperty(
+        gpu::InventoryPropertyId propertyId,
+        const std::shared_ptr<sdbusplus::asio::dbus_interface>& interface,
+        const std::string& propertyName);
+    std::optional<gpu::InventoryPropertyId> getNextPendingProperty() const;
+    static void markPropertyPending(
+        std::unordered_map<gpu::InventoryPropertyId, PropertyInfo>::iterator
+            it);
+    static void markPropertyProcessed(
+        std::unordered_map<gpu::InventoryPropertyId, PropertyInfo>::iterator
+            it);
+
+    std::shared_ptr<sdbusplus::asio::dbus_interface> assetIface;
     std::shared_ptr<sdbusplus::asio::dbus_interface> acceleratorInterface;
 
     std::string name;
+    mctp::MctpRequester& mctpRequester;
+    gpu::DeviceIdentification deviceType;
+    uint8_t eid;
+    boost::asio::steady_timer retryTimer;
+    std::unordered_map<gpu::InventoryPropertyId, PropertyInfo> properties;
+    std::shared_ptr<InventoryRequestBuffer> requestBuffer;
+    std::shared_ptr<InventoryResponseBuffer> responseBuffer;
+    static constexpr std::chrono::seconds retryDelay{5};
+    static constexpr int maxRetryAttempts = 3;
 };
diff --git a/src/nvidia-gpu/NvidiaGpuDevice.cpp b/src/nvidia-gpu/NvidiaGpuDevice.cpp
index d7ad846..a13bcfe 100644
--- a/src/nvidia-gpu/NvidiaGpuDevice.cpp
+++ b/src/nvidia-gpu/NvidiaGpuDevice.cpp
@@ -45,7 +45,8 @@
     configs(configs), name(escapeName(name)), path(path)
 {
     inventory = std::make_shared<Inventory>(
-        conn, objectServer, name, gpu::DeviceIdentification::DEVICE_GPU);
+        conn, objectServer, name, mctpRequester,
+        gpu::DeviceIdentification::DEVICE_GPU, eid, io);
     makeSensors();
 }