core: Reuse buffers for tx, allow message pools

Use new m_msg_alloc/m_msg_free operations for whole-message
MCTP buffers. m_realloc is no longer used, instead the maximum
sized buffer is allocated for each reassembly.
This allows applications to keep a pool of MCTP message buffers.

Don't create a queue of packets to transmit, instead reuse a single
binding-provided tx_storage buffer for each transmitted packet, which
can be static for bindings that have a known maximum packet size.

Asynchronous users/bindings can no longer rely on the core for queueing
TX packets, instead they should test mctp_is_tx_ready() prior to calling
mctp_message_tx(). The stack will return -EBUSY from mctp_message_tx()
if there is already a message pending to send.

Bindings must be updated to add the tx_storage member, and the core will
no longer free packets passed to mctp_bus_rx().

Change-Id: I2598bb91026ccef01b268c52b06c0f8e20bebb1e
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
diff --git a/alloc.c b/alloc.c
index 84c3f65..0639e87 100644
--- a/alloc.c
+++ b/alloc.c
@@ -9,15 +9,33 @@
 #include "config.h"
 #endif
 
+#include "compiler.h"
+
+#ifdef MCTP_DEFAULT_ALLOC
+static void *default_msg_malloc(size_t size, void *ctx __unused)
+{
+	void *ptr = __mctp_alloc(size);
+	return ptr;
+}
+
+static void default_msg_free(void *msg, void *ctx __unused)
+{
+	__mctp_free(msg);
+}
+#endif
+
 struct {
 	void *(*m_alloc)(size_t);
 	void (*m_free)(void *);
-	void *(*m_realloc)(void *, size_t);
+	/* Final argument is ctx */
+	void *(*m_msg_alloc)(size_t, void *);
+	void (*m_msg_free)(void *, void *);
 } alloc_ops = {
 #ifdef MCTP_DEFAULT_ALLOC
 	malloc,
 	free,
-	realloc,
+	default_msg_malloc,
+	default_msg_free,
 #endif
 };
 
@@ -26,8 +44,6 @@
 {
 	if (alloc_ops.m_alloc)
 		return alloc_ops.m_alloc(size);
-	if (alloc_ops.m_realloc)
-		return alloc_ops.m_realloc(NULL, size);
 	assert(0);
 	return NULL;
 }
@@ -36,24 +52,32 @@
 {
 	if (alloc_ops.m_free)
 		alloc_ops.m_free(ptr);
-	else if (alloc_ops.m_realloc)
-		alloc_ops.m_realloc(ptr, 0);
 	else
 		assert(0);
 }
 
-void *__mctp_realloc(void *ptr, size_t size)
+void *__mctp_msg_alloc(size_t size, struct mctp *mctp)
 {
-	if (alloc_ops.m_realloc)
-		return alloc_ops.m_realloc(ptr, size);
+	void *ctx = mctp_get_alloc_ctx(mctp);
+	if (alloc_ops.m_msg_alloc)
+		return alloc_ops.m_msg_alloc(size, ctx);
 	assert(0);
 	return NULL;
 }
 
+void __mctp_msg_free(void *ptr, struct mctp *mctp)
+{
+	void *ctx = mctp_get_alloc_ctx(mctp);
+	if (alloc_ops.m_msg_free)
+		alloc_ops.m_msg_free(ptr, ctx);
+}
+
 void mctp_set_alloc_ops(void *(*m_alloc)(size_t), void (*m_free)(void *),
-			void *(m_realloc)(void *, size_t))
+			void *(*m_msg_alloc)(size_t, void *),
+			void (*m_msg_free)(void *, void *))
 {
 	alloc_ops.m_alloc = m_alloc;
 	alloc_ops.m_free = m_free;
-	alloc_ops.m_realloc = m_realloc;
+	alloc_ops.m_msg_alloc = m_msg_alloc;
+	alloc_ops.m_msg_free = m_msg_free;
 }
diff --git a/astlpc.c b/astlpc.c
index d322b7c..0acc39b 100644
--- a/astlpc.c
+++ b/astlpc.c
@@ -870,6 +870,26 @@
 	return rc == -EBUSY ? 0 : rc;
 }
 
+/* Update binding pkt_size and reallocate tx_storage */
+static int mctp_astlpc_set_pkt_size(struct mctp_binding_astlpc *astlpc,
+				    size_t pkt_size)
+{
+	size_t body = MCTP_BODY_SIZE(pkt_size);
+	body += astlpc->binding.pkt_header + astlpc->binding.pkt_trailer;
+	size_t pktbuf_size = MCTP_PKTBUF_SIZE(body);
+	/* Reallocate TX storage */
+	if (astlpc->binding.tx_storage) {
+		__mctp_free(astlpc->binding.tx_storage);
+	}
+	astlpc->binding.tx_storage = __mctp_alloc(pktbuf_size);
+	if (!astlpc->binding.tx_storage) {
+		return -ENOMEM;
+	}
+
+	astlpc->binding.pkt_size = pkt_size;
+	return 0;
+}
+
 static uint32_t mctp_astlpc_calculate_mtu(struct mctp_binding_astlpc *astlpc,
 					  struct mctp_astlpc_layout *layout)
 {
@@ -940,8 +960,13 @@
 		return -EINVAL;
 	}
 
-	if (astlpc->proto->version >= 2)
-		astlpc->binding.pkt_size = MCTP_PACKET_SIZE(mtu);
+	if (astlpc->proto->version >= 2) {
+		rc = mctp_astlpc_set_pkt_size(astlpc, MCTP_PACKET_SIZE(mtu));
+		if (rc) {
+			astlpc_prwarn(astlpc, "Allocation error");
+			return rc;
+		}
+	}
 
 	return 0;
 }
@@ -1056,9 +1081,9 @@
 		mctp_bus_rx(&astlpc->binding, pkt);
 	} else {
 		/* TODO: Drop any associated assembly */
-		mctp_pktbuf_free(pkt);
 		astlpc_prdebug(astlpc, "Dropped corrupt packet");
 	}
+	mctp_pktbuf_free(pkt);
 }
 
 static void mctp_astlpc_tx_complete(struct mctp_binding_astlpc *astlpc)
@@ -1266,8 +1291,6 @@
 	astlpc->requested_mtu = mtu;
 	astlpc->binding.name = "astlpc";
 	astlpc->binding.version = 1;
-	astlpc->binding.pkt_size =
-		MCTP_PACKET_SIZE(mtu > MCTP_BTU ? mtu : MCTP_BTU);
 	astlpc->binding.pkt_header = 4;
 	astlpc->binding.pkt_trailer = 4;
 	astlpc->binding.tx = mctp_binding_astlpc_tx;
@@ -1281,6 +1304,14 @@
 		return NULL;
 	}
 
+	if (mctp_astlpc_set_pkt_size(
+		    astlpc,
+		    MCTP_PACKET_SIZE(mtu > MCTP_BTU ? mtu : MCTP_BTU)) != 0) {
+		astlpc_prerr(astlpc, "%s: Allocation error", __func__);
+		__mctp_free(astlpc);
+		return NULL;
+	}
+
 	return astlpc;
 }
 
@@ -1326,6 +1357,7 @@
 	/* Clear channel-active and bmc-ready */
 	if (astlpc->mode == MCTP_BINDING_ASTLPC_MODE_BMC)
 		mctp_astlpc_kcs_set_status(astlpc, 0);
+	__mctp_free(astlpc->binding.tx_storage);
 	__mctp_free(astlpc);
 }
 
diff --git a/core.c b/core.c
index 33f5093..dd8376a 100644
--- a/core.c
+++ b/core.c
@@ -17,6 +17,7 @@
 #include "libmctp-log.h"
 #include "libmctp-cmds.h"
 #include "range.h"
+#include "compiler.h"
 
 /* Internal data structures */
 
@@ -30,19 +31,33 @@
 	mctp_eid_t eid;
 	struct mctp_binding *binding;
 	enum mctp_bus_state state;
+	struct mctp *mctp;
 
-	struct mctp_pktbuf *tx_queue_head;
-	struct mctp_pktbuf *tx_queue_tail;
+	/* Current message to transmit */
+	void *tx_msg;
+	/* Position in tx_msg */
+	size_t tx_msgpos;
+	/* Length of tx_msg */
+	size_t tx_msglen;
+	/* Length of current packet payload */
+	size_t tx_pktlen;
+	uint8_t tx_seq;
+	uint8_t tx_src;
+	uint8_t tx_dest;
+	bool tx_to;
+	uint8_t tx_tag;
 
 	/* todo: routing */
 };
 
 struct mctp_msg_ctx {
+	/* NULL buf indicates an unused mctp_msg_ctx */
+	void *buf;
+
 	uint8_t src;
 	uint8_t dest;
 	uint8_t tag;
 	uint8_t last_seq;
-	void *buf;
 	size_t buf_size;
 	size_t buf_alloc_size;
 	size_t fragment_size;
@@ -70,6 +85,8 @@
 		ROUTE_BRIDGE,
 	} route_policy;
 	size_t max_message_size;
+
+	void *alloc_ctx;
 };
 
 #ifndef ARRAY_SIZE
@@ -88,32 +105,44 @@
 
 struct mctp_pktbuf *mctp_pktbuf_alloc(struct mctp_binding *binding, size_t len)
 {
-	struct mctp_pktbuf *buf;
-	size_t size;
-
-	size = binding->pkt_size + binding->pkt_header + binding->pkt_trailer;
+	size_t size =
+		binding->pkt_size + binding->pkt_header + binding->pkt_trailer;
 	if (len > size) {
 		return NULL;
 	}
 
-	/* todo: pools */
-	buf = __mctp_alloc(sizeof(*buf) + size);
-
-	if (!buf)
+	void *storage = __mctp_alloc(size + sizeof(struct mctp_pktbuf));
+	if (!storage) {
 		return NULL;
-
-	buf->size = size;
-	buf->start = binding->pkt_header;
-	buf->end = buf->start + len;
-	buf->mctp_hdr_off = buf->start;
-	buf->next = NULL;
-
-	return buf;
+	}
+	struct mctp_pktbuf *pkt = mctp_pktbuf_init(binding, storage);
+	pkt->alloc = true;
+	pkt->end = pkt->start + len;
+	return pkt;
 }
 
 void mctp_pktbuf_free(struct mctp_pktbuf *pkt)
 {
-	__mctp_free(pkt);
+	if (pkt->alloc) {
+		__mctp_free(pkt);
+	} else {
+		mctp_prdebug("pktbuf_free called for non-alloced");
+	}
+}
+
+struct mctp_pktbuf *mctp_pktbuf_init(struct mctp_binding *binding,
+				     void *storage)
+{
+	size_t size =
+		binding->pkt_size + binding->pkt_header + binding->pkt_trailer;
+	struct mctp_pktbuf *buf = (struct mctp_pktbuf *)storage;
+	buf->size = size;
+	buf->start = binding->pkt_header;
+	buf->end = buf->start;
+	buf->mctp_hdr_off = buf->start;
+	buf->alloc = false;
+
+	return buf;
 }
 
 struct mctp_hdr *mctp_pktbuf_hdr(struct mctp_pktbuf *pkt)
@@ -126,7 +155,7 @@
 	return pkt->data + pkt->mctp_hdr_off + sizeof(struct mctp_hdr);
 }
 
-size_t mctp_pktbuf_size(struct mctp_pktbuf *pkt)
+size_t mctp_pktbuf_size(const struct mctp_pktbuf *pkt)
 {
 	return pkt->end - pkt->start;
 }
@@ -172,6 +201,19 @@
 	return pkt->data + pkt->end;
 }
 
+/* Allocate a duplicate of the message and copy it */
+static void *mctp_msg_dup(const void *msg, size_t msg_len, struct mctp *mctp)
+{
+	void *copy = __mctp_msg_alloc(msg_len, mctp);
+	if (!copy) {
+		mctp_prdebug("msg dup len %zu failed", msg_len);
+		return NULL;
+	}
+
+	memcpy(copy, msg, msg_len);
+	return copy;
+}
+
 /* Message reassembly */
 static struct mctp_msg_ctx *mctp_msg_ctx_lookup(struct mctp *mctp, uint8_t src,
 						uint8_t dest, uint8_t tag)
@@ -182,7 +224,8 @@
 	 * message contexts */
 	for (i = 0; i < ARRAY_SIZE(mctp->msg_ctxs); i++) {
 		struct mctp_msg_ctx *ctx = &mctp->msg_ctxs[i];
-		if (ctx->src == src && ctx->dest == dest && ctx->tag == tag)
+		if (ctx->buf && ctx->src == src && ctx->dest == dest &&
+		    ctx->tag == tag)
 			return ctx;
 	}
 
@@ -197,7 +240,7 @@
 
 	for (i = 0; i < ARRAY_SIZE(mctp->msg_ctxs); i++) {
 		struct mctp_msg_ctx *tmp = &mctp->msg_ctxs[i];
-		if (!tmp->src) {
+		if (!tmp->buf) {
 			ctx = tmp;
 			break;
 		}
@@ -209,14 +252,22 @@
 	ctx->src = src;
 	ctx->dest = dest;
 	ctx->tag = tag;
+
 	ctx->buf_size = 0;
+	ctx->buf_alloc_size = mctp->max_message_size;
+	ctx->buf = __mctp_msg_alloc(ctx->buf_alloc_size, mctp);
+	if (!ctx->buf) {
+		return NULL;
+	}
 
 	return ctx;
 }
 
-static void mctp_msg_ctx_drop(struct mctp_msg_ctx *ctx)
+static void mctp_msg_ctx_drop(struct mctp_bus *bus, struct mctp_msg_ctx *ctx)
 {
-	ctx->src = 0;
+	/* Free and mark as unused */
+	__mctp_msg_free(ctx->buf, bus->mctp);
+	ctx->buf = NULL;
 }
 
 static void mctp_msg_ctx_reset(struct mctp_msg_ctx *ctx)
@@ -226,7 +277,7 @@
 }
 
 static int mctp_msg_ctx_add_pkt(struct mctp_msg_ctx *ctx,
-				struct mctp_pktbuf *pkt, size_t max_size)
+				struct mctp_pktbuf *pkt)
 {
 	size_t len;
 
@@ -237,29 +288,7 @@
 	}
 
 	if (ctx->buf_size + len > ctx->buf_alloc_size) {
-		size_t new_alloc_size;
-		void *lbuf;
-
-		/* @todo: finer-grained allocation */
-		if (!ctx->buf_alloc_size) {
-			new_alloc_size = MAX(len, 4096UL);
-		} else {
-			new_alloc_size = MAX(ctx->buf_alloc_size * 2,
-					     len + ctx->buf_size);
-		}
-
-		/* Don't allow heap to grow beyond a limit */
-		if (new_alloc_size > max_size)
-			return -1;
-
-		lbuf = __mctp_realloc(ctx->buf, new_alloc_size);
-		if (lbuf) {
-			ctx->buf = lbuf;
-			ctx->buf_alloc_size = new_alloc_size;
-		} else {
-			__mctp_free(ctx->buf);
-			return -1;
-		}
+		return -1;
 	}
 
 	memcpy((uint8_t *)ctx->buf + ctx->buf_size, mctp_pktbuf_data(pkt), len);
@@ -295,13 +324,11 @@
 	mctp->capture_data = user;
 }
 
-static void mctp_bus_destroy(struct mctp_bus *bus)
+static void mctp_bus_destroy(struct mctp_bus *bus, struct mctp *mctp)
 {
-	while (bus->tx_queue_head) {
-		struct mctp_pktbuf *curr = bus->tx_queue_head;
-
-		bus->tx_queue_head = curr->next;
-		mctp_pktbuf_free(curr);
+	if (bus->tx_msg) {
+		__mctp_msg_free(bus->tx_msg, mctp);
+		bus->tx_msg = NULL;
 	}
 }
 
@@ -314,11 +341,11 @@
 	for (i = 0; i < ARRAY_SIZE(mctp->msg_ctxs); i++) {
 		struct mctp_msg_ctx *tmp = &mctp->msg_ctxs[i];
 		if (tmp->buf)
-			__mctp_free(tmp->buf);
+			__mctp_msg_free(tmp->buf, mctp);
 	}
 
 	while (mctp->n_busses--)
-		mctp_bus_destroy(&mctp->busses[mctp->n_busses]);
+		mctp_bus_destroy(&mctp->busses[mctp->n_busses], mctp);
 
 	__mctp_free(mctp->busses);
 	__mctp_free(mctp);
@@ -351,11 +378,14 @@
 	assert(mctp->n_busses == 0);
 	mctp->n_busses = 1;
 
+	assert(binding->tx_storage);
+
 	mctp->busses = __mctp_alloc(sizeof(struct mctp_bus));
 	if (!mctp->busses)
 		return -ENOMEM;
 
 	memset(mctp->busses, 0, sizeof(struct mctp_bus));
+	mctp->busses[0].mctp = mctp;
 	mctp->busses[0].binding = binding;
 	mctp->busses[0].eid = eid;
 	binding->bus = &mctp->busses[0];
@@ -393,6 +423,9 @@
 {
 	int rc = 0;
 
+	assert(b1->tx_storage);
+	assert(b2->tx_storage);
+
 	assert(mctp->n_busses == 0);
 	mctp->busses = __mctp_alloc(2 * sizeof(struct mctp_bus));
 	if (!mctp->busses)
@@ -524,8 +557,13 @@
 			if (dest_bus == bus)
 				continue;
 
+			void *copy = mctp_msg_dup(buf, len, mctp);
+			if (!copy) {
+				return;
+			}
+
 			mctp_message_tx_on_bus(dest_bus, src, dest, tag_owner,
-					       msg_tag, buf, len);
+					       msg_tag, copy, len);
 		}
 	}
 }
@@ -571,8 +609,14 @@
 		/* single-packet message - send straight up to rx function,
 		 * no need to create a message context */
 		len = pkt->end - pkt->mctp_hdr_off - sizeof(struct mctp_hdr);
-		p = pkt->data + pkt->mctp_hdr_off + sizeof(struct mctp_hdr);
-		mctp_rx(mctp, bus, hdr->src, hdr->dest, tag_owner, tag, p, len);
+		p = mctp_msg_dup(pkt->data + pkt->mctp_hdr_off +
+					 sizeof(struct mctp_hdr),
+				 len, mctp);
+		if (p) {
+			mctp_rx(mctp, bus, hdr->src, hdr->dest, tag_owner, tag,
+				p, len);
+			__mctp_msg_free(p, mctp);
+		}
 		break;
 
 	case MCTP_HDR_FLAG_SOM:
@@ -597,9 +641,9 @@
 		 * should of the same size */
 		ctx->fragment_size = mctp_pktbuf_size(pkt);
 
-		rc = mctp_msg_ctx_add_pkt(ctx, pkt, mctp->max_message_size);
+		rc = mctp_msg_ctx_add_pkt(ctx, pkt);
 		if (rc) {
-			mctp_msg_ctx_drop(ctx);
+			mctp_msg_ctx_drop(bus, ctx);
 		} else {
 			ctx->last_seq = seq;
 		}
@@ -617,7 +661,7 @@
 			mctp_prdebug(
 				"Sequence number %d does not match expected %d",
 				seq, exp_seq);
-			mctp_msg_ctx_drop(ctx);
+			mctp_msg_ctx_drop(bus, ctx);
 			goto out;
 		}
 
@@ -627,16 +671,16 @@
 			mctp_prdebug("Unexpected fragment size. Expected"
 				     " less than %zu, received = %zu",
 				     ctx->fragment_size, len);
-			mctp_msg_ctx_drop(ctx);
+			mctp_msg_ctx_drop(bus, ctx);
 			goto out;
 		}
 
-		rc = mctp_msg_ctx_add_pkt(ctx, pkt, mctp->max_message_size);
+		rc = mctp_msg_ctx_add_pkt(ctx, pkt);
 		if (!rc)
 			mctp_rx(mctp, bus, ctx->src, ctx->dest, tag_owner, tag,
 				ctx->buf, ctx->buf_size);
 
-		mctp_msg_ctx_drop(ctx);
+		mctp_msg_ctx_drop(bus, ctx);
 		break;
 
 	case 0:
@@ -650,7 +694,7 @@
 			mctp_prdebug(
 				"Sequence number %d does not match expected %d",
 				seq, exp_seq);
-			mctp_msg_ctx_drop(ctx);
+			mctp_msg_ctx_drop(bus, ctx);
 			goto out;
 		}
 
@@ -660,13 +704,13 @@
 			mctp_prdebug("Unexpected fragment size. Expected = %zu "
 				     "received = %zu",
 				     ctx->fragment_size, len);
-			mctp_msg_ctx_drop(ctx);
+			mctp_msg_ctx_drop(bus, ctx);
 			goto out;
 		}
 
-		rc = mctp_msg_ctx_add_pkt(ctx, pkt, mctp->max_message_size);
+		rc = mctp_msg_ctx_add_pkt(ctx, pkt);
 		if (rc) {
-			mctp_msg_ctx_drop(ctx);
+			mctp_msg_ctx_drop(bus, ctx);
 			goto out;
 		}
 		ctx->last_seq = seq;
@@ -674,15 +718,17 @@
 		break;
 	}
 out:
-	mctp_pktbuf_free(pkt);
+	return;
 }
 
 static int mctp_packet_tx(struct mctp_bus *bus, struct mctp_pktbuf *pkt)
 {
 	struct mctp *mctp = bus->binding->mctp;
 
-	if (bus->state != mctp_bus_state_tx_enabled)
+	if (bus->state != mctp_bus_state_tx_enabled) {
+		mctp_prdebug("tx with bus disabled");
 		return -1;
+	}
 
 	if (mctp->capture)
 		mctp->capture(pkt, MCTP_MESSAGE_CAPTURE_OUTGOING,
@@ -691,36 +737,95 @@
 	return bus->binding->tx(bus->binding, pkt);
 }
 
+/* Returns a pointer to the binding's tx_storage */
+static struct mctp_pktbuf *mctp_next_tx_pkt(struct mctp_bus *bus)
+{
+	if (!bus->tx_msg) {
+		return NULL;
+	}
+
+	size_t p = bus->tx_msgpos;
+	size_t msg_len = bus->tx_msglen;
+	size_t payload_len = msg_len - p;
+	size_t max_payload_len = MCTP_BODY_SIZE(bus->binding->pkt_size);
+	if (payload_len > max_payload_len)
+		payload_len = max_payload_len;
+
+	struct mctp_pktbuf *pkt =
+		mctp_pktbuf_init(bus->binding, bus->binding->tx_storage);
+	struct mctp_hdr *hdr = mctp_pktbuf_hdr(pkt);
+
+	hdr->ver = bus->binding->version & 0xf;
+	hdr->dest = bus->tx_dest;
+	hdr->src = bus->tx_src;
+	hdr->flags_seq_tag = (bus->tx_to << MCTP_HDR_TO_SHIFT) |
+			     (bus->tx_tag << MCTP_HDR_TAG_SHIFT);
+
+	if (p == 0)
+		hdr->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
+	if (p + payload_len >= msg_len)
+		hdr->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
+	hdr->flags_seq_tag |= bus->tx_seq << MCTP_HDR_SEQ_SHIFT;
+
+	memcpy(mctp_pktbuf_data(pkt), (uint8_t *)bus->tx_msg + p, payload_len);
+	pkt->end = pkt->start + sizeof(*hdr) + payload_len;
+	bus->tx_pktlen = payload_len;
+
+	mctp_prdebug(
+		"tx dst %d tag %d payload len %zu seq %d. msg pos %zu len %zu",
+		hdr->dest, bus->tx_tag, payload_len, bus->tx_seq, p, msg_len);
+
+	return pkt;
+}
+
+/* Called when a packet has successfully been sent */
+static void mctp_tx_complete(struct mctp_bus *bus)
+{
+	if (!bus->tx_msg) {
+		mctp_prdebug("tx complete no message");
+		return;
+	}
+
+	bus->tx_seq = (bus->tx_seq + 1) & MCTP_HDR_SEQ_MASK;
+	bus->tx_msgpos += bus->tx_pktlen;
+
+	if (bus->tx_msgpos >= bus->tx_msglen) {
+		__mctp_msg_free(bus->tx_msg, bus->binding->mctp);
+		bus->tx_msg = NULL;
+	}
+}
+
 static void mctp_send_tx_queue(struct mctp_bus *bus)
 {
 	struct mctp_pktbuf *pkt;
 
-	while ((pkt = bus->tx_queue_head)) {
+	while (bus->tx_msg && bus->state == mctp_bus_state_tx_enabled) {
 		int rc;
 
+		pkt = mctp_next_tx_pkt(bus);
+
 		rc = mctp_packet_tx(bus, pkt);
 		switch (rc) {
-		/* If transmission succeded, or */
+		/* If transmission succeded */
 		case 0:
-		/* If the packet is somehow too large */
-		case -EMSGSIZE:
 			/* Drop the packet */
-			bus->tx_queue_head = pkt->next;
-			mctp_pktbuf_free(pkt);
+			mctp_tx_complete(bus);
 			break;
 
-		/* If the binding was busy, or */
+		/* If the binding was busy */
 		case -EBUSY:
+			/* Keep the packet for next try */
+			mctp_prdebug("tx EBUSY");
+			return;
+
 		/* Some other unknown error occurred */
 		default:
-			/* Make sure the tail pointer is consistent and retry later */
-			goto cleanup_tail;
+			/* Drop the packet */
+			mctp_prdebug("tx drop %d", rc);
+			mctp_tx_complete(bus);
+			return;
 		};
 	}
-
-cleanup_tail:
-	if (!bus->tx_queue_head)
-		bus->tx_queue_tail = NULL;
 }
 
 void mctp_binding_set_tx_enabled(struct mctp_binding *binding, bool enable)
@@ -765,74 +870,60 @@
 				  mctp_eid_t dest, bool tag_owner,
 				  uint8_t msg_tag, void *msg, size_t msg_len)
 {
-	size_t max_payload_len, payload_len, p;
-	struct mctp_pktbuf *pkt;
-	struct mctp_hdr *hdr;
-	int i;
+	size_t max_payload_len;
+	int rc;
 
-	if (bus->state == mctp_bus_state_constructed)
-		return -ENXIO;
+	if (bus->state == mctp_bus_state_constructed) {
+		rc = -ENXIO;
+		goto err;
+	}
 
-	if ((msg_tag & MCTP_HDR_TAG_MASK) != msg_tag)
-		return -EINVAL;
+	if ((msg_tag & MCTP_HDR_TAG_MASK) != msg_tag) {
+		rc = -EINVAL;
+		goto err;
+	}
 
 	max_payload_len = MCTP_BODY_SIZE(bus->binding->pkt_size);
 
 	{
 		const bool valid_mtu = max_payload_len >= MCTP_BTU;
 		assert(valid_mtu);
-		if (!valid_mtu)
-			return -EINVAL;
+		if (!valid_mtu) {
+			rc = -EINVAL;
+			goto err;
+		}
 	}
 
 	mctp_prdebug(
 		"%s: Generating packets for transmission of %zu byte message from %hhu to %hhu",
 		__func__, msg_len, src, dest);
 
-	/* queue up packets, each of max MCTP_MTU size */
-	for (p = 0, i = 0; p < msg_len; i++) {
-		payload_len = msg_len - p;
-		if (payload_len > max_payload_len)
-			payload_len = max_payload_len;
-
-		pkt = mctp_pktbuf_alloc(bus->binding,
-					payload_len + sizeof(*hdr));
-		hdr = mctp_pktbuf_hdr(pkt);
-
-		hdr->ver = bus->binding->version & 0xf;
-		hdr->dest = dest;
-		hdr->src = src;
-		hdr->flags_seq_tag = (tag_owner << MCTP_HDR_TO_SHIFT) |
-				     (msg_tag << MCTP_HDR_TAG_SHIFT);
-
-		if (i == 0)
-			hdr->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
-		if (p + payload_len >= msg_len)
-			hdr->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
-		hdr->flags_seq_tag |= (i & MCTP_HDR_SEQ_MASK)
-				      << MCTP_HDR_SEQ_SHIFT;
-
-		memcpy(mctp_pktbuf_data(pkt), (uint8_t *)msg + p, payload_len);
-
-		/* add to tx queue */
-		if (bus->tx_queue_tail)
-			bus->tx_queue_tail->next = pkt;
-		else
-			bus->tx_queue_head = pkt;
-		bus->tx_queue_tail = pkt;
-
-		p += payload_len;
+	if (bus->tx_msg) {
+		mctp_prdebug("Bus busy");
+		rc = -EBUSY;
+		goto err;
 	}
 
-	mctp_prdebug("%s: Enqueued %d packets", __func__, i);
+	/* Take the message to send */
+	bus->tx_msg = msg;
+	bus->tx_msglen = msg_len;
+	bus->tx_msgpos = 0;
+	/* bus->tx_seq is allowed to continue from previous message */
+	bus->tx_src = src;
+	bus->tx_dest = dest;
+	bus->tx_to = tag_owner;
+	bus->tx_tag = msg_tag;
 
 	mctp_send_tx_queue(bus);
-
 	return 0;
+
+err:
+	__mctp_msg_free(msg, bus->binding->mctp);
+	return rc;
 }
 
-int mctp_message_tx(struct mctp *mctp, mctp_eid_t eid, bool tag_owner,
-		    uint8_t msg_tag, void *msg, size_t msg_len)
+int mctp_message_tx_alloced(struct mctp *mctp, mctp_eid_t eid, bool tag_owner,
+			    uint8_t msg_tag, void *msg, size_t msg_len)
 {
 	struct mctp_bus *bus;
 
@@ -840,13 +931,49 @@
 	 * different callers */
 	if ((msg_tag & MCTP_HDR_TAG_MASK) != msg_tag) {
 		mctp_prerr("Incorrect message tag %u passed.", msg_tag);
+		__mctp_msg_free(msg, mctp);
 		return -EINVAL;
 	}
 
 	bus = find_bus_for_eid(mctp, eid);
-	if (!bus)
+	if (!bus) {
+		__mctp_msg_free(msg, mctp);
 		return 0;
+	}
 
 	return mctp_message_tx_on_bus(bus, bus->eid, eid, tag_owner, msg_tag,
 				      msg, msg_len);
 }
+
+int mctp_message_tx(struct mctp *mctp, mctp_eid_t eid, bool tag_owner,
+		    uint8_t msg_tag, const void *msg, size_t msg_len)
+{
+	void *copy = mctp_msg_dup(msg, msg_len, mctp);
+	if (!copy) {
+		return -ENOMEM;
+	}
+
+	return mctp_message_tx_alloced(mctp, eid, tag_owner, msg_tag, copy,
+				       msg_len);
+}
+
+bool mctp_is_tx_ready(struct mctp *mctp, mctp_eid_t eid)
+{
+	struct mctp_bus *bus;
+
+	bus = find_bus_for_eid(mctp, eid);
+	if (!bus) {
+		return true;
+	}
+	return bus->tx_msg == NULL;
+}
+
+void *mctp_get_alloc_ctx(struct mctp *mctp)
+{
+	return mctp->alloc_ctx;
+}
+
+void mctp_set_alloc_ctx(struct mctp *mctp, void *ctx)
+{
+	mctp->alloc_ctx = ctx;
+}
diff --git a/libmctp-alloc.h b/libmctp-alloc.h
index 2532cfa..0167454 100644
--- a/libmctp-alloc.h
+++ b/libmctp-alloc.h
@@ -5,8 +5,12 @@
 
 #include <stdlib.h>
 
+struct mctp;
+
 void *__mctp_alloc(size_t size);
 void __mctp_free(void *ptr);
-void *__mctp_realloc(void *ptr, size_t size);
+
+void *__mctp_msg_alloc(size_t size, struct mctp *mctp);
+void __mctp_msg_free(void *ptr, struct mctp *mctp);
 
 #endif /* _LIBMCTP_ALLOC_H */
diff --git a/libmctp.h b/libmctp.h
index d3c5ed1..a3e1331 100644
--- a/libmctp.h
+++ b/libmctp.h
@@ -52,25 +52,35 @@
 struct mctp_pktbuf {
 	size_t start, end, size;
 	size_t mctp_hdr_off;
-	struct mctp_pktbuf *next;
+	bool alloc;
 	unsigned char data[];
 };
 
+#define MCTP_PKTBUF_SIZE(payload)                                              \
+	(MCTP_PACKET_SIZE(payload) + sizeof(struct mctp_pktbuf))
+
+struct mctp;
+struct mctp_bus;
 struct mctp_binding;
 
-struct mctp_pktbuf *mctp_pktbuf_alloc(struct mctp_binding *hw, size_t len);
+/* Initialise a mctp_pktbuf in static storage. Should not be freed.
+ * Storage must be sized to fit the binding,
+ * MCTP_PKTBUF_SIZE(binding->pkt_size + binding->pkt_header + binding->pkt_trailer) */
+struct mctp_pktbuf *mctp_pktbuf_init(struct mctp_binding *binding,
+				     void *storage);
+/* Allocate and initialise a mctp_pktbuf. Should be freed with
+ * mctp_pktbuf_free */
+struct mctp_pktbuf *mctp_pktbuf_alloc(struct mctp_binding *binding, size_t len);
 void mctp_pktbuf_free(struct mctp_pktbuf *pkt);
 struct mctp_hdr *mctp_pktbuf_hdr(struct mctp_pktbuf *pkt);
 void *mctp_pktbuf_data(struct mctp_pktbuf *pkt);
-size_t mctp_pktbuf_size(struct mctp_pktbuf *pkt);
+size_t mctp_pktbuf_size(const struct mctp_pktbuf *pkt);
 void *mctp_pktbuf_alloc_start(struct mctp_pktbuf *pkt, size_t size);
 void *mctp_pktbuf_alloc_end(struct mctp_pktbuf *pkt, size_t size);
 int mctp_pktbuf_push(struct mctp_pktbuf *pkt, const void *data, size_t len);
 void *mctp_pktbuf_pop(struct mctp_pktbuf *pkt, size_t len);
 
 /* MCTP core */
-struct mctp;
-struct mctp_bus;
 
 struct mctp *mctp_init(void);
 void mctp_set_max_message_size(struct mctp *mctp, size_t message_size);
@@ -106,13 +116,40 @@
 
 int mctp_set_rx_all(struct mctp *mctp, mctp_rx_fn fn, void *data);
 
+/* Transmit a message.
+ * @msg: The message buffer to send. Must be suitable for
+ * free(), or the custom mctp_set_alloc_ops() m_msg_free.
+ * The mctp stack will take ownership of the buffer
+ * and release it when message transmission is complete or fails.
+ *
+ * If an asynchronous binding is being used, it will return -EBUSY if
+ * a message is already pending for transmission (msg will be freed as usual).
+ * Asynchronous users can test mctp_is_tx_ready() prior to sending.
+ */
+int mctp_message_tx_alloced(struct mctp *mctp, mctp_eid_t eid, bool tag_owner,
+			    uint8_t msg_tag, void *msg, size_t msg_len);
+
+/* Transmit a message.
+ * @msg: The message buffer to send. Ownership of this buffer
+ * remains with the caller (a copy is made internally with __mctp_msg_alloc).
+ *
+ * If an asynchronous binding is being used, it will return -EBUSY if
+ * a message is already pending for transmission.
+ * Asynchronous users can test mctp_is_tx_ready() prior to sending.
+ *
+ * This is equivalent to duplicating `msg` then calling mctp_message_tx_alloc().
+ */
 int mctp_message_tx(struct mctp *mctp, mctp_eid_t eid, bool tag_owner,
-		    uint8_t msg_tag, void *msg, size_t msg_len);
+		    uint8_t msg_tag, const void *msg, size_t msg_len);
+
+bool mctp_is_tx_ready(struct mctp *mctp, mctp_eid_t eid);
 
 /* hardware bindings */
 
 /**
  * @tx: Binding function to transmit one packet on the interface
+ * @tx_storage: A buffer for transmitting packets. Must be sized
+ * as MCTP_PKTBUF_SIZE(mtu).
  *      Return:
  *      * 0 - Success, pktbuf can be released
  *	* -EMSGSIZE - Packet exceeds binding MTU, pktbuf must be dropped
@@ -126,6 +163,7 @@
 	size_t pkt_size;
 	size_t pkt_header;
 	size_t pkt_trailer;
+	void *tx_storage;
 	int (*start)(struct mctp_binding *binding);
 	int (*tx)(struct mctp_binding *binding, struct mctp_pktbuf *pkt);
 	mctp_rx_fn control_rx;
@@ -141,8 +179,12 @@
 void mctp_bus_rx(struct mctp_binding *binding, struct mctp_pktbuf *pkt);
 
 /* environment-specific allocation */
-void mctp_set_alloc_ops(void *(*alloc)(size_t), void (*free)(void *),
-			void *(realloc)(void *, size_t));
+void mctp_set_alloc_ops(void *(*m_alloc)(size_t), void (*m_free)(void *),
+			void *(*m_msg_alloc)(size_t, void *),
+			void (*m_msg_free)(void *, void *));
+/* Gets/sets context that will be passed to custom m_msg_ ops */
+void *mctp_get_alloc_ctx(struct mctp *mctp);
+void mctp_set_alloc_ctx(struct mctp *mctp, void *ctx);
 
 /* environment-specific logging */
 
diff --git a/serial.c b/serial.c
index 77b6bae..d79c99c 100644
--- a/serial.c
+++ b/serial.c
@@ -25,6 +25,8 @@
 
 #define pr_fmt(x) "serial: " x
 
+#define SERIAL_BTU MCTP_BTU
+
 #include "libmctp.h"
 #include "libmctp-alloc.h"
 #include "libmctp-log.h"
@@ -42,6 +44,7 @@
 	/* receive buffer and state */
 	uint8_t rxbuf[1024];
 	struct mctp_pktbuf *rx_pkt;
+	uint8_t rx_storage[MCTP_PKTBUF_SIZE(SERIAL_BTU)];
 	uint8_t rx_exp_len;
 	uint16_t rx_fcs;
 	uint16_t rx_fcs_calc;
@@ -58,6 +61,8 @@
 
 	/* temporary transmit buffer */
 	uint8_t txbuf[256];
+	/* used by the MCTP stack */
+	uint8_t tx_storage[MCTP_PKTBUF_SIZE(SERIAL_BTU)];
 };
 
 #define binding_to_serial(b)                                                   \
@@ -198,10 +203,9 @@
 	serial->rx_pkt = NULL;
 }
 
-static void mctp_serial_start_packet(struct mctp_binding_serial *serial,
-				     uint8_t len)
+static void mctp_serial_start_packet(struct mctp_binding_serial *serial)
 {
-	serial->rx_pkt = mctp_pktbuf_alloc(&serial->binding, len);
+	serial->rx_pkt = mctp_pktbuf_init(&serial->binding, serial->rx_storage);
 }
 
 static void mctp_rx_consume_one(struct mctp_binding_serial *serial, uint8_t c)
@@ -253,7 +257,7 @@
 			mctp_prdebug("invalid size %d", c);
 			serial->rx_state = STATE_WAIT_SYNC_START;
 		} else {
-			mctp_serial_start_packet(serial, 0);
+			mctp_serial_start_packet(serial);
 			pkt = serial->rx_pkt;
 			serial->rx_exp_len = c;
 			serial->rx_state = STATE_DATA;
@@ -404,9 +408,10 @@
 	serial->rx_pkt = NULL;
 	serial->binding.name = "serial";
 	serial->binding.version = 1;
-	serial->binding.pkt_size = MCTP_PACKET_SIZE(MCTP_BTU);
+	serial->binding.pkt_size = MCTP_PACKET_SIZE(SERIAL_BTU);
 	serial->binding.pkt_header = 0;
 	serial->binding.pkt_trailer = 0;
+	serial->binding.tx_storage = serial->tx_storage;
 
 	serial->binding.start = mctp_serial_core_start;
 	serial->binding.tx = mctp_binding_serial_tx;
diff --git a/tests/test-utils.c b/tests/test-utils.c
index cbb931b..ccbe382 100644
--- a/tests/test-utils.c
+++ b/tests/test-utils.c
@@ -14,6 +14,7 @@
 
 struct mctp_binding_test {
 	struct mctp_binding binding;
+	uint8_t tx_storage[MCTP_PKTBUF_SIZE(MCTP_BTU)];
 };
 
 static int mctp_binding_test_tx(struct mctp_binding *b __attribute__((unused)),
@@ -35,6 +36,7 @@
 	test->binding.pkt_size = MCTP_PACKET_SIZE(MCTP_BTU);
 	test->binding.pkt_header = 0;
 	test->binding.pkt_trailer = 0;
+	test->binding.tx_storage = test->tx_storage;
 	return test;
 }
 
@@ -52,6 +54,7 @@
 	assert(pkt);
 	memcpy(mctp_pktbuf_hdr(pkt), buf, len);
 	mctp_bus_rx(&test->binding, pkt);
+	mctp_pktbuf_free(pkt);
 }
 
 void mctp_binding_test_register_bus(struct mctp_binding_test *binding,
diff --git a/tests/test_bridge.c b/tests/test_bridge.c
index 35625ca..73705ba 100644
--- a/tests/test_bridge.c
+++ b/tests/test_bridge.c
@@ -19,6 +19,7 @@
 	int rx_count;
 	int tx_count;
 	uint8_t last_pkt_data;
+	uint8_t tx_storage[MCTP_PKTBUF_SIZE(MCTP_BTU)];
 };
 
 struct test_ctx {
@@ -61,6 +62,7 @@
 
 	binding->rx_count++;
 	mctp_bus_rx(&binding->binding, pkt);
+	mctp_pktbuf_free(pkt);
 }
 
 static struct mctp_binding_bridge *mctp_binding_bridge_init(char *name)
@@ -75,6 +77,7 @@
 	binding->binding.pkt_size = MCTP_PACKET_SIZE(MCTP_BTU);
 	binding->binding.pkt_header = 0;
 	binding->binding.pkt_trailer = 0;
+	binding->binding.tx_storage = binding->tx_storage;
 	return binding;
 }
 
diff --git a/tests/test_cmds.c b/tests/test_cmds.c
index ca5e838..2646b8c 100644
--- a/tests/test_cmds.c
+++ b/tests/test_cmds.c
@@ -50,6 +50,7 @@
 	struct mctp_pktbuf *pkt = mctp_pktbuf_alloc(b, len);
 	memcpy(mctp_pktbuf_hdr(pkt), buf, len);
 	mctp_bus_rx(b, pkt);
+	mctp_pktbuf_free(pkt);
 }
 
 static void setup_test_binding(struct mctp_binding *test_binding,
@@ -59,6 +60,7 @@
 	assert(test_endpoint != NULL);
 	assert(callback_ctx != NULL);
 
+	uint8_t tx_storage[MCTP_PKTBUF_SIZE(MCTP_BTU)];
 	memset(test_binding, 0, sizeof(*test_binding));
 	test_binding->name = "test";
 	test_binding->version = 1;
@@ -68,6 +70,7 @@
 	test_binding->pkt_trailer = 0;
 	test_binding->control_rx = control_message_transport_callback;
 	test_binding->control_rx_data = callback_ctx;
+	test_binding->tx_storage = tx_storage;
 
 	mctp_register_bus(test_endpoint, test_binding, eid_1);
 	mctp_binding_set_tx_enabled(test_binding, true);
diff --git a/tests/test_core.c b/tests/test_core.c
index ead3990..2f5d2c7 100644
--- a/tests/test_core.c
+++ b/tests/test_core.c
@@ -97,11 +97,11 @@
 	rx_pkt->start = 0;
 	rx_pkt->end = MCTP_PACKET_SIZE(len);
 	rx_pkt->mctp_hdr_off = 0;
-	rx_pkt->next = NULL;
 	memcpy(rx_pkt->data, &pktbuf->hdr, sizeof(pktbuf->hdr));
 	memcpy(rx_pkt->data + sizeof(pktbuf->hdr), pktbuf->payload, alloc_size);
 
 	mctp_bus_rx((struct mctp_binding *)binding, rx_pkt);
+	__mctp_free(rx_pkt);
 }
 
 static void receive_one_fragment(struct mctp_binding_test *binding,