gpu: add support for per EID request queuing
The Nvidia Extension of OCP MCTP VDM Protocol specifies that there
should be only one outstanding request message to a GPU Device
implementing the VDM protocol. This introduces a requirement for request
queuing per EID. This patch implements the same.
This patch renames the MctpRequester to Requester and introduces a new
QueuingRequester that composes on top of the Requester and introduces
per EID queuing. Each call to `sendRecvMsg` now enqueues the request
(instead of sending it immediately). If there is no ongoing request the
requester will send the request out right away. Otherwise the requester
waits for the ongoing request to finish before sending out the
previously enqueued request. This ensures the serialization of the
requests and makes sure that there is only one request "in flight" at a
time. For minimal/no client changes, QueuingRequester is type aliased to
MctpRequester.
Tested.
Build an image for gb200nvl-obmc machine with the following
patches cherry picked. This patches are needed to enable the mctp stack.
https://gerrit.openbmc.org/c/openbmc/openbmc/+/79422
Pick the following changes (in order) that enable multiple GPU sensors:
'''
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/79970
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/80031
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/80078
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/80099
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/80566
https://gerrit.openbmc.org/c/openbmc/dbus-sensors/+/80567
'''
Check if all sensors are available on redfish.
'''
~ % curl -s -k -u 'root:0penBmc' https://10.137.203.137/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors",
"@odata.type": "#SensorCollection.SensorCollection",
"Description": "Collection of Sensors for this Chassis",
"Members": [
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/energy_NVIDIA_GB200_GPU_0_Energy_0"
},
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/power_NVIDIA_GB200_GPU_0_Power_0"
},
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_0"
},
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_1"
},
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/voltage_NVIDIA_GB200_GPU_0_Voltage_0"
}
],
"Members@odata.count": 5,
"Name": "Sensors"
}
'''
Check Individual Sensor Updates.
'''
curl -s -k -u 'root:0penBmc' https://10.137.203.245/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_0
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_0",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "temperature_NVIDIA_GB200_GPU_0_TEMP_0",
"Name": "NVIDIA GB200 GPU 0 TEMP 0",
"Reading": 27.71875,
"ReadingRangeMax": 127.0,
"ReadingRangeMin": -128.0,
"ReadingType": "Temperature",
"ReadingUnits": "Cel",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}
curl -s -k -u 'root:0penBmc' https://10.137.203.245/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_1
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/temperature_NVIDIA_GB200_GPU_0_TEMP_1",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "temperature_NVIDIA_GB200_GPU_0_TEMP_1",
"Name": "NVIDIA GB200 GPU 0 TEMP 1",
"Reading": 57.28125,
"ReadingRangeMax": 127.0,
"ReadingRangeMin": -128.0,
"ReadingType": "Temperature",
"ReadingUnits": "Cel",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}
curl -s -k -u 'root:0penBmc' https://10.137.203.245/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/power_NVIDIA_GB200_GPU_0_Power_0
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/power_NVIDIA_GB200_GPU_0_Power_0",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "power_NVIDIA_GB200_GPU_0_Power_0",
"Name": "NVIDIA GB200 GPU 0 Power 0",
"Reading": 27.468,
"ReadingRangeMax": 4294967.295,
"ReadingRangeMin": 0.0,
"ReadingType": "Power",
"ReadingUnits": "W",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}
curl -s -k -u 'root:0penBmc' https://10.137.203.245/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/energy_NVIDIA_GB200_GPU_0_Energy_0
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/energy_NVIDIA_GB200_GPU_0_Energy_0",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "energy_NVIDIA_GB200_GPU_0_Energy_0",
"Name": "NVIDIA GB200 GPU 0 Energy 0",
"Reading": 45058.545,
"ReadingRangeMax": 1.8446744073709552e+16,
"ReadingRangeMin": 0.0,
"ReadingType": "EnergyJoules",
"ReadingUnits": "J",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}
curl -s -k -u 'root:0penBmc' https://10.137.203.245/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/voltage_NVIDIA_GB200_GPU_0_Voltage_0
{
"@odata.id": "/redfish/v1/Chassis/NVIDIA_GB200_1/Sensors/voltage_NVIDIA_GB200_GPU_0_Voltage_0",
"@odata.type": "#Sensor.v1_2_0.Sensor",
"Id": "voltage_NVIDIA_GB200_GPU_0_Voltage_0",
"Name": "NVIDIA GB200 GPU 0 Voltage 0",
"Reading": 0.735,
"ReadingRangeMax": 4294.967295,
"ReadingRangeMin": 0.0,
"ReadingType": "Voltage",
"ReadingUnits": "V",
"Status": {
"Health": "OK",
"State": "Enabled"
}
}
'''
Change-Id: Ic3b892ef2c76c4c703aa55f5b2a66c22a5d71bdf
Signed-off-by: Aditya Kurdunkar <akurdunkar@nvidia.com>
diff --git a/src/nvidia-gpu/MctpRequester.cpp b/src/nvidia-gpu/MctpRequester.cpp
index 024f8cc..765859e 100644
--- a/src/nvidia-gpu/MctpRequester.cpp
+++ b/src/nvidia-gpu/MctpRequester.cpp
@@ -15,6 +15,7 @@
#include <boost/asio/generic/datagram_protocol.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/asio/steady_timer.hpp>
+#include <boost/container/devector.hpp>
#include <phosphor-logging/lg2.hpp>
#include <cerrno>
@@ -22,6 +23,7 @@
#include <cstdint>
#include <cstring>
#include <functional>
+#include <memory>
#include <span>
#include <utility>
@@ -30,16 +32,38 @@
namespace mctp
{
-MctpRequester::MctpRequester(boost::asio::io_context& ctx) :
+Requester::Requester(boost::asio::io_context& ctx) :
mctpSocket(ctx, boost::asio::generic::datagram_protocol{AF_MCTP, 0}),
expiryTimer(ctx)
{}
-void MctpRequester::processRecvMsg(
- uint8_t eid, const std::span<const uint8_t> reqMsg,
- const std::span<uint8_t> respMsg, const boost::system::error_code& ec,
- const size_t /*length*/)
+void Requester::processRecvMsg(
+ const std::span<const uint8_t> reqMsg, const std::span<uint8_t> respMsg,
+ const boost::system::error_code& ec, const size_t /*length*/)
{
+ const auto* respAddr =
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+ reinterpret_cast<const struct sockaddr_mctp*>(recvEndPoint.data());
+
+ uint8_t eid = respAddr->smctp_addr.s_addr;
+
+ if (!completionCallbacks.contains(eid))
+ {
+ lg2::error(
+ "MctpRequester failed to get the callback for the EID: {EID}",
+ "EID", static_cast<int>(eid));
+ return;
+ }
+
+ auto& callback = completionCallbacks.at(eid);
+
+ if (respAddr->smctp_type != msgType)
+ {
+ lg2::error("MctpRequester: Message type mismatch");
+ callback(EPROTO);
+ return;
+ }
+
expiryTimer.cancel();
if (ec)
@@ -47,29 +71,7 @@
lg2::error(
"MctpRequester failed to receive data from the MCTP socket - ErrorCode={EC}, Error={ER}.",
"EC", ec.value(), "ER", ec.message());
- completionCallback(EIO);
- return;
- }
-
- const auto* respAddr =
- // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
- reinterpret_cast<const struct sockaddr_mctp*>(recvEndPoint.data());
-
- if (respAddr->smctp_type != msgType)
- {
- lg2::error("MctpRequester: Message type mismatch");
- completionCallback(EPROTO);
- return;
- }
-
- uint8_t respEid = respAddr->smctp_addr.s_addr;
-
- if (respEid != eid)
- {
- lg2::error(
- "MctpRequester: EID mismatch - expected={EID}, received={REID}",
- "EID", eid, "REID", respEid);
- completionCallback(EPROTO);
+ callback(EIO);
return;
}
@@ -96,46 +98,56 @@
"MctpRequester: Instance ID mismatch - request={REQ}, response={RESP}",
"REQ", static_cast<int>(reqInstanceId), "RESP",
static_cast<int>(respInstanceId));
- completionCallback(EPROTO);
+ callback(EPROTO);
return;
}
}
- completionCallback(0);
+ callback(0);
}
-void MctpRequester::handleSendMsgCompletion(
+void Requester::handleSendMsgCompletion(
uint8_t eid, const std::span<const uint8_t> reqMsg,
std::span<uint8_t> respMsg, const boost::system::error_code& ec,
size_t /* length */)
{
+ if (!completionCallbacks.contains(eid))
+ {
+ lg2::error(
+ "MctpRequester failed to get the callback for the EID: {EID}",
+ "EID", static_cast<int>(eid));
+ return;
+ }
+
+ auto& callback = completionCallbacks.at(eid);
+
if (ec)
{
lg2::error(
"MctpRequester failed to send data from the MCTP socket - ErrorCode={EC}, Error={ER}.",
"EC", ec.value(), "ER", ec.message());
- completionCallback(EIO);
+ callback(EIO);
return;
}
expiryTimer.expires_after(2s);
- expiryTimer.async_wait([this](const boost::system::error_code& ec) {
+ expiryTimer.async_wait([this, eid](const boost::system::error_code& ec) {
if (ec != boost::asio::error::operation_aborted)
{
- completionCallback(ETIME);
+ auto& callback = completionCallbacks.at(eid);
+ callback(ETIME);
}
});
mctpSocket.async_receive_from(
boost::asio::mutable_buffer(respMsg), recvEndPoint,
- std::bind_front(&MctpRequester::processRecvMsg, this, eid, reqMsg,
- respMsg));
+ std::bind_front(&Requester::processRecvMsg, this, reqMsg, respMsg));
}
-void MctpRequester::sendRecvMsg(
- uint8_t eid, const std::span<const uint8_t> reqMsg,
- std::span<uint8_t> respMsg, std::move_only_function<void(int)> callback)
+void Requester::sendRecvMsg(uint8_t eid, const std::span<const uint8_t> reqMsg,
+ std::span<uint8_t> respMsg,
+ std::move_only_function<void(int)> callback)
{
if (reqMsg.size() < sizeof(ocp::accelerator_management::BindingPciVid))
{
@@ -144,7 +156,7 @@
return;
}
- completionCallback = std::move(callback);
+ completionCallbacks[eid] = std::move(callback);
struct sockaddr_mctp addr{};
addr.smctp_family = AF_MCTP;
@@ -156,7 +168,53 @@
mctpSocket.async_send_to(
boost::asio::const_buffer(reqMsg), sendEndPoint,
- std::bind_front(&MctpRequester::handleSendMsgCompletion, this, eid,
- reqMsg, respMsg));
+ std::bind_front(&Requester::handleSendMsgCompletion, this, eid, reqMsg,
+ respMsg));
}
+
+void QueuingRequester::sendRecvMsg(uint8_t eid, std::span<const uint8_t> reqMsg,
+ std::span<uint8_t> respMsg,
+ std::move_only_function<void(int)> callback)
+{
+ auto reqCtx =
+ std::make_unique<RequestContext>(reqMsg, respMsg, std::move(callback));
+
+ // Add request to queue
+ auto& queue = requestContextQueues[eid];
+ queue.push_back(std::move(reqCtx));
+
+ if (queue.size() == 1)
+ {
+ processQueue(eid);
+ }
+}
+
+void QueuingRequester::handleResult(uint8_t eid, int result)
+{
+ auto& queue = requestContextQueues[eid];
+ const auto& reqCtx = queue.front();
+
+ reqCtx->callback(result); // Call the original callback
+
+ queue.pop_front();
+
+ processQueue(eid);
+}
+
+void QueuingRequester::processQueue(uint8_t eid)
+{
+ auto& queue = requestContextQueues[eid];
+
+ if (queue.empty())
+ {
+ return;
+ }
+
+ const auto& reqCtx = queue.front();
+
+ requester.sendRecvMsg(
+ eid, reqCtx->reqMsg, reqCtx->respMsg,
+ std::bind_front(&QueuingRequester::handleResult, this, eid));
+}
+
} // namespace mctp
diff --git a/src/nvidia-gpu/MctpRequester.hpp b/src/nvidia-gpu/MctpRequester.hpp
index 289e800..9d05ebd 100644
--- a/src/nvidia-gpu/MctpRequester.hpp
+++ b/src/nvidia-gpu/MctpRequester.hpp
@@ -10,35 +10,39 @@
#include <boost/asio/generic/datagram_protocol.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/asio/steady_timer.hpp>
+#include <boost/container/devector.hpp>
#include <cstddef>
#include <cstdint>
#include <functional>
+#include <memory>
#include <span>
+#include <unordered_map>
+#include <utility>
namespace mctp
{
-class MctpRequester
+class Requester
{
public:
- MctpRequester() = delete;
+ Requester() = delete;
- MctpRequester(const MctpRequester&) = delete;
+ Requester(const Requester&) = delete;
- MctpRequester(MctpRequester&&) = delete;
+ Requester(Requester&&) = delete;
- MctpRequester& operator=(const MctpRequester&) = delete;
+ Requester& operator=(const Requester&) = delete;
- MctpRequester& operator=(MctpRequester&&) = delete;
+ Requester& operator=(Requester&&) = delete;
- explicit MctpRequester(boost::asio::io_context& ctx);
+ explicit Requester(boost::asio::io_context& ctx);
void sendRecvMsg(uint8_t eid, std::span<const uint8_t> reqMsg,
std::span<uint8_t> respMsg,
std::move_only_function<void(int)> callback);
private:
- void processRecvMsg(uint8_t eid, std::span<const uint8_t> reqMsg,
+ void processRecvMsg(std::span<const uint8_t> reqMsg,
std::span<uint8_t> respMsg,
const boost::system::error_code& ec, size_t length);
@@ -57,8 +61,56 @@
boost::asio::steady_timer expiryTimer;
- std::move_only_function<void(int)> completionCallback;
+ std::unordered_map<uint8_t, std::move_only_function<void(int)>>
+ completionCallbacks;
static constexpr uint8_t msgType = ocp::accelerator_management::messageType;
};
+
+class QueuingRequester
+{
+ public:
+ QueuingRequester() = delete;
+ QueuingRequester(const QueuingRequester&) = delete;
+ QueuingRequester(QueuingRequester&&) = delete;
+ QueuingRequester& operator=(const QueuingRequester&) = delete;
+ QueuingRequester& operator=(QueuingRequester&&) = delete;
+
+ explicit QueuingRequester(boost::asio::io_context& ctx) : requester(ctx) {}
+
+ void sendRecvMsg(uint8_t eid, std::span<const uint8_t> reqMsg,
+ std::span<uint8_t> respMsg,
+ std::move_only_function<void(int)> callback);
+
+ private:
+ struct RequestContext
+ {
+ std::span<const uint8_t> reqMsg;
+ std::span<uint8_t> respMsg;
+ std::move_only_function<void(int)> callback;
+
+ RequestContext(const RequestContext&) = delete;
+ RequestContext& operator=(const RequestContext&) = delete;
+
+ RequestContext(RequestContext&&) = default;
+ RequestContext& operator=(RequestContext&&) = default;
+ ~RequestContext() = default;
+
+ explicit RequestContext(std::span<const uint8_t> req,
+ std::span<uint8_t> resp,
+ std::move_only_function<void(int)>&& cb) :
+ reqMsg(req), respMsg(resp), callback(std::move(cb))
+ {}
+ };
+
+ void handleResult(uint8_t eid, int result);
+ void processQueue(uint8_t eid);
+
+ Requester requester;
+ std::unordered_map<
+ uint8_t, boost::container::devector<std::unique_ptr<RequestContext>>>
+ requestContextQueues;
+};
+
+using MctpRequester = QueuingRequester;
} // namespace mctp