Add --trace support (in blktrace format)
In an effort understand what PNOR requests come from the host, it'd be
good to be able to trace what requests come in and visualise them.
blktrace is some Linux infrastructure for tracing block device activity
all the way through the linux block layer, for which there is a variety
of existing tooling. These tools process the (typically) kernel produced
blktrace output. We can produce this same output programatically from
mboxd though.
This patch gives us the (option) to start mboxd in a mode where it will
write a blktrace file out, which can be fed into tools like blkparse(1)
or tools like iowatcher[1] to generate charts (and video).
A quirk of the blktrace format is that it's very geared towards a full
IO subsystem, so we can't directly map window operations (what we know
in mboxd) to specific IO ops (i.e. we don't get "firmware read one page
out of this window before closing it"). So, for each Window opening (or
reusing a cached one), we write THREE blktrace events: a Queue,
Dispatch, and Complete.
We can usk tools like blkparse to do everything from get a detailed list
of what windows were opened and for how long:
0,0 0 1 0.000000000 0 Q R 0 + 8 [(null)]
0,0 0 2 0.000000000 0 D R 0 + 8 [(null)]
0,0 0 3 0.000182022 0 C R 0 + 8 [0]
0,0 0 4 0.042416351 0 Q R 4144 + 2040 [(null)]
0,0 0 5 0.042416351 0 D R 4144 + 2040 [(null)]
0,0 0 6 0.060802662 0 C R 4144 + 2040 [0]
0,0 0 7 0.084775813 0 Q R 64 + 288 [(null)]
0,0 0 8 0.084775813 0 D R 64 + 288 [(null)]
0,0 0 9 0.087835720 0 C R 64 + 288 [0]
0,0 0 10 1.429234244 0 Q R 8488 + 2048 [(null)]
to getting a simple summary at the end of how many windows were opened
read and read/write:
CPU0 (0,0):
Reads Queued: 90, 74,040KiB Writes Queued: 6, 2,664KiB
Read Dispatches: 90, 74,040KiB Write Dispatches: 6, 2,664KiB
Reads Requeued: 0 Writes Requeued: 0
Reads Completed: 90, 74,040KiB Writes Completed: 6, 2,664KiB
Read Merges: 0, 0KiB Write Merges: 0, 0KiB
Read depth: 1 Write depth: 1
IO unplugs: 0 Timer unplugs: 0
If you change the window size to something tiny, like 4096 bytes, you
can get detailed paging information for hostboot at the expense of IPL
time.
Pretty graphs and animations:
https://www.flamingspork.com/blog/?p=4419
[1] iowatcher: http://masoncoding.com/iowatcher/
Change-Id: I5dd02b6bc616c441abf54d87a5d67c972cbaf228
Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
[AJ: Resolve merge conflicts, some tidy ups]
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
diff --git a/protocol.c b/protocol.c
index 7158bfc..ab1c332 100644
--- a/protocol.c
+++ b/protocol.c
@@ -5,6 +5,7 @@
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
+#include <unistd.h>
#include "backend.h"
#include "common.h"
@@ -13,6 +14,7 @@
#include "protocol.h"
#include "windows.h"
+
#define BLOCK_SIZE_SHIFT_V1 12 /* 4K */
static inline uint8_t protocol_get_bmc_event_mask(struct mbox_context *context)
@@ -155,6 +157,109 @@
return lpc_addr >> context->backend.block_size_shift;
}
+static inline int64_t blktrace_gettime(void)
+{
+ struct timespec ts;
+ int64_t n;
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+ n = (int64_t)(ts.tv_sec) * (int64_t)1000000000 + (int64_t)(ts.tv_nsec);
+
+ return n;
+}
+
+static void blktrace_flush_start(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+ struct timespec now;
+
+ if (!context->blktracefd)
+ return;
+
+ if (!context->blktrace_start) {
+ clock_gettime(CLOCK_REALTIME, &now);
+ context->blktrace_start = blktrace_gettime();
+ }
+
+ trace->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->sector = context->current->flash_offset / 512;
+ trace->bytes = context->current->size;
+ if (context->current_is_write)
+ trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_WRITE);
+ else
+ trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_READ);
+ trace->pid = 0;
+ trace->device = 0;
+ trace->cpu = 0;
+ trace->error = 0;
+ trace->pdu_len = 0;
+ write(context->blktracefd, trace, sizeof(*trace));
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action &= ~BLK_TA_QUEUE;
+ trace->action |= BLK_TA_ISSUE;
+ write(context->blktracefd, trace, sizeof(*trace));
+}
+
+static void blktrace_flush_done(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+
+ if (!context->blktracefd)
+ return;
+
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action &= ~BLK_TA_ISSUE;
+ trace->action |= BLK_TA_COMPLETE;
+ write(context->blktracefd, trace, sizeof(*trace));
+}
+
+static void blktrace_window_start(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+
+ if (!context->blktracefd)
+ return;
+
+ if (!context->blktrace_start)
+ context->blktrace_start = blktrace_gettime();
+
+ trace->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_READ);
+ trace->pid = 0;
+ trace->device = 0;
+ trace->cpu = 0;
+ trace->error = 0;
+ trace->pdu_len = 0;
+}
+
+static void blktrace_window_done(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+
+ if (!context->blktracefd)
+ return;
+
+ trace->sector = context->current->flash_offset / 512;
+ trace->bytes = context->current->size;
+ write(context->blktracefd, trace, sizeof(*trace));
+ trace->sequence++;
+ trace->action &= ~BLK_TA_QUEUE;
+ trace->action |= BLK_TA_ISSUE;
+ write(context->blktracefd, trace, sizeof(*trace));
+
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action &= ~BLK_TA_ISSUE;
+ trace->action |= BLK_TA_COMPLETE;
+ write(context->blktracefd, trace, sizeof(*trace));
+}
+
static int protocol_v1_create_window(struct mbox_context *context,
struct protocol_create_window *io)
{
@@ -180,7 +285,9 @@
* write_flush() to make sure we pick the right one.
*/
if (context->current_is_write) {
+ blktrace_flush_start(context);
rc = context->protocol->flush(context, NULL);
+ blktrace_flush_done(context);
if (rc < 0) {
MSG_ERR("Couldn't Flush Write Window\n");
return rc;
@@ -192,6 +299,7 @@
/* Offset the host has requested */
MSG_INFO("Host requested flash @ 0x%.8x\n", offset);
/* Check if we have an existing window */
+ blktrace_window_start(context);
context->current = windows_search(context, offset,
context->version == API_VERSION_1);
@@ -206,6 +314,7 @@
return rc;
}
}
+ blktrace_window_done(context);
context->current_is_write = !io->req.ro;