From 9e0fd22b14805a3a3219a99bc5eccebf70f5484a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 25 Feb 2011 21:34:34 -0500 Subject: trace-cmd: Add hack to report out blktrace The blktrace never exported the ftrace events via the /debug/tracing/events directory. Not to mention, that the blktrace is much more complex data to read out. Add a hack into the trace-input that creates a event format that parse-events can read for the blktrace file, and also create a plugin to parse the complex data. Signed-off-by: Steven Rostedt --- Makefile | 6 +- plugin_blk.c | 377 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ trace-blk-hack.c | 175 ++++++++++++++++++++++++++ trace-cmd.h | 3 + trace-input.c | 2 + 5 files changed, 561 insertions(+), 2 deletions(-) create mode 100644 plugin_blk.c create mode 100644 trace-blk-hack.c diff --git a/Makefile b/Makefile index 277be99..169fcbc 100644 --- a/Makefile +++ b/Makefile @@ -272,10 +272,12 @@ KERNEL_SHARK_OBJS = $(TRACE_VIEW_OBJS) $(TRACE_GRAPH_OBJS) $(TRACE_GUI_OBJS) \ PEVENT_LIB_OBJS = parse-events.o trace-seq.o parse-filter.o parse-utils.o TCMD_LIB_OBJS = $(PEVENT_LIB_OBJS) trace-util.o trace-input.o trace-ftrace.o \ - trace-output.o trace-recorder.o trace-restore.o trace-usage.o + trace-output.o trace-recorder.o trace-restore.o trace-usage.o \ + trace-blk-hack.o PLUGIN_OBJS = plugin_hrtimer.o plugin_kmem.o plugin_sched_switch.o \ - plugin_mac80211.o plugin_jbd2.o plugin_function.o plugin_kvm.o + plugin_mac80211.o plugin_jbd2.o plugin_function.o plugin_kvm.o \ + plugin_blk.o PLUGINS := $(PLUGIN_OBJS:.o=.so) diff --git a/plugin_blk.c b/plugin_blk.c new file mode 100644 index 0000000..9327b17 --- /dev/null +++ b/plugin_blk.c @@ -0,0 +1,377 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include +#include +#include + +#include + +#include "trace-cmd.h" + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct blk_data { + unsigned long long sector; + struct event_format *event; + unsigned int action; + unsigned int pid; + unsigned int device; + unsigned int bytes; + unsigned int error; + void *pdu_data; + unsigned short pdu_len; +}; + +static void fill_rwbs(char *rwbs, int action, unsigned int bytes) +{ + int i = 0; + int tc = action >> BLK_TC_SHIFT; + + if (action == BLK_TN_MESSAGE) { + rwbs[i++] = 'N'; + goto out; + } + + if (tc & BLK_TC_DISCARD) + rwbs[i++] = 'D'; + else if (tc & BLK_TC_WRITE) + rwbs[i++] = 'W'; + else if (bytes) + rwbs[i++] = 'R'; + else + rwbs[i++] = 'N'; + + if (tc & BLK_TC_AHEAD) + rwbs[i++] = 'A'; + if (tc & BLK_TC_BARRIER) + rwbs[i++] = 'B'; + if (tc & BLK_TC_SYNC) + rwbs[i++] = 'S'; + if (tc & BLK_TC_META) + rwbs[i++] = 'M'; +out: + rwbs[i] = '\0'; +} + +static int log_action(struct trace_seq *s, struct blk_data *data, + const char *act) +{ + char rwbs[6]; + + fill_rwbs(rwbs, data->action, data->bytes); + return trace_seq_printf(s, "%3d,%-3d %2s %3s ", + MAJOR(data->device), + MINOR(data->device), act, rwbs); +} + +static void blk_log_msg(struct trace_seq *s, void *data, int len) +{ + trace_seq_printf(s, "%.*s", len, (char *)data); +} + +static int blk_log_dump_pdu(struct trace_seq *s, const unsigned char *pdu_buf, + int pdu_len) +{ + int i, end, ret; + + if (!pdu_len) + return 1; + + /* find the last zero that needs to be printed */ + for (end = pdu_len - 1; end >= 0; end--) + if (pdu_buf[end]) + break; + end++; + + if (!trace_seq_putc(s, '(')) + return 0; + + for (i = 0; i < pdu_len; i++) { + + ret = trace_seq_printf(s, "%s%02x", + i == 0 ? "" : " ", pdu_buf[i]); + if (!ret) + return ret; + + /* + * stop when the rest is just zeroes and indicate so + * with a ".." appended + */ + if (i == end && end != pdu_len - 1) + return trace_seq_puts(s, " ..) "); + } + + return trace_seq_puts(s, ") "); +} + +static unsigned int t_sec(int bytes) +{ + return bytes >> 9; +} + +static unsigned int be32_to_cpu(unsigned int val) +{ + unsigned int swap; + + if (tracecmd_host_bigendian()) + return val; + + swap = ((val & 0xffULL) << 24) | + ((val & (0xffULL << 8)) << 8) | + ((val & (0xffULL << 16)) >> 8) | + ((val & (0xffULL << 24)) >> 24); + + return swap; +} + +static unsigned long long be64_to_cpu(unsigned long long val) +{ + unsigned long long swap; + + if (tracecmd_host_bigendian()) + return val; + + swap = ((val & 0xffULL) << 56) | + ((val & (0xffULL << 8)) << 40) | + ((val & (0xffULL << 16)) << 24) | + ((val & (0xffULL << 24)) << 8) | + ((val & (0xffULL << 32)) >> 8) | + ((val & (0xffULL << 40)) >> 24) | + ((val & (0xffULL << 48)) >> 40) | + ((val & (0xffULL << 56)) >> 56); + + return swap; +} + +static unsigned long long get_pdu_int(void *data) +{ + const unsigned long long *val = data; + return be64_to_cpu(*val); +} + +static void get_pdu_remap(void *pdu_data, + struct blk_io_trace_remap *r) +{ + const struct blk_io_trace_remap *__r = pdu_data; + unsigned long long sector_from = __r->sector_from; + + r->device_from = be32_to_cpu(__r->device_from); + r->device_to = be32_to_cpu(__r->device_to); + r->sector_from = be64_to_cpu(sector_from); +} + +static int blk_log_remap(struct trace_seq *s, struct blk_data *data) +{ + struct blk_io_trace_remap r = { .device_from = 0, }; + + get_pdu_remap(data->pdu_data, &r); + return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", + data->sector, t_sec(data->bytes), + MAJOR(r.device_from), MINOR(r.device_from), + (unsigned long long)r.sector_from); +} + +static int blk_log_split(struct trace_seq *s, struct blk_data *data) +{ + const char *cmd; + + cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid); + + return trace_seq_printf(s, "%llu / %llu [%s]\n", data->sector, + get_pdu_int(data->pdu_data), cmd); +} + +static int blk_log_plug(struct trace_seq *s, struct blk_data *data) +{ + const char *cmd; + + cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid); + + return trace_seq_printf(s, "[%s]\n", cmd); +} + +static int blk_log_unplug(struct trace_seq *s, struct blk_data *data) +{ + const char *cmd; + + cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid); + + return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(data->pdu_data)); +} + +static int blk_log_with_error(struct trace_seq *s, struct blk_data *data) +{ + if (data->action & BLK_TC_ACT(BLK_TC_PC)) { + blk_log_dump_pdu(s, data->pdu_data, data->pdu_len); + trace_seq_printf(s, "[%d]\n", data->error); + return 0; + } else { + if (t_sec(data->bytes)) + return trace_seq_printf(s, "%llu + %u [%d]\n", + data->sector, + t_sec(data->bytes), + data->error); + return trace_seq_printf(s, "%llu [%d]\n", + data->sector, data->error); + } +} + +static int blk_log_generic(struct trace_seq *s, struct blk_data *data) +{ + const char *cmd; + + cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid); + + if (data->action & BLK_TC_ACT(BLK_TC_PC)) { + int ret; + + ret = trace_seq_printf(s, "%u ", data->bytes); + if (!ret) + return 0; + ret = blk_log_dump_pdu(s, data->pdu_data, data->pdu_len); + if (!ret) + return 0; + return trace_seq_printf(s, "[%s]\n", cmd); + } else { + if (t_sec(data->bytes)) + return trace_seq_printf(s, "%llu + %u [%s]\n", + data->sector, + t_sec(data->bytes), cmd); + return trace_seq_printf(s, "[%s]\n", cmd); + } +} + +static const struct { + const char *act[2]; + int (*print)(struct trace_seq *s, struct blk_data *data); +} what2act[] = { + [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, + [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, + [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, + [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, + [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, + [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, + [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, + [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, + [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, + [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, + [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, + [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, + [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, + [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, + [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, +}; + +static int blktrace_handler(struct trace_seq *s, struct record *record, + struct event_format *event, void *context) +{ + struct format_field *field; + unsigned long long val; + void *data = record->data; + struct blk_data blk_data; + unsigned short what; + int long_act = 0; + + field = pevent_find_field(event, "action"); + if (!field) + return 1; + if (pevent_read_number_field(field, data, &val)) + return 1; + blk_data.action = val; + + field = pevent_find_field(event, "bytes"); + if (!field) + return 1; + if (pevent_read_number_field(field, data, &val)) + return 1; + blk_data.bytes = val; + + field = pevent_find_field(event, "device"); + if (!field) + return 1; + if (pevent_read_number_field(field, data, &val)) + return 1; + blk_data.device = val; + + field = pevent_find_field(event, "pdu_len"); + if (!field) + return 1; + if (pevent_read_number_field(field, data, &val)) + return 1; + blk_data.pdu_len = val; + + field = pevent_find_field(event, "data"); + if (!field) + return 1; + blk_data.pdu_data = data + field->offset; + + field = pevent_find_field(event, "sector"); + if (!field) + return 1; + if (pevent_read_number_field(field, data, &blk_data.sector)) + return 1; + + field = pevent_find_field(event, "pid"); + if (!field) + return 1; + if (pevent_read_number_field(field, data, &val)) + return 1; + blk_data.pid = val; + + field = pevent_find_field(event, "error"); + if (!field) + return 1; + if (pevent_read_number_field(field, data, &val)) + return 1; + blk_data.error = val; + + blk_data.event = event; + + + what = blk_data.action & ((1 << BLK_TC_SHIFT) - 1); + + if (blk_data.action == BLK_TN_MESSAGE) { + log_action(s, &blk_data, "m"); + blk_log_msg(s, blk_data.pdu_data, blk_data.pdu_len); + goto out; + } + + if (what == 0 || what >= ARRAY_SIZE(what2act)) + trace_seq_printf(s, "Unknown action %x\n", what); + else { + log_action(s, &blk_data, what2act[what].act[long_act]); + what2act[what].print(s, &blk_data); + } + + out: + return 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_event_handler(pevent, -1, "ftrace", "blktrace", + blktrace_handler, NULL); + return 0; +} diff --git a/trace-blk-hack.c b/trace-blk-hack.c new file mode 100644 index 0000000..29c3c74 --- /dev/null +++ b/trace-blk-hack.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include +#include "trace-cmd.h" +#include "trace-local.h" + +static const char blk_event_start[] = + "name: blktrace\n" + "ID: %d\n" + "format:\n" + "\tfield:unsigned short common_type;\toffset:0;\tsize:2;\n" + "\tfield:unsigned char common_flags;\toffset:2;\tsize:1;\n" + "\tfield:unsigned char common_preempt_count;\toffset:3;\tsize:1;\n" + "\tfield:int common_pid;\toffset:4;\tsize:4;\n"; + +static const char blk_body[] = "\n" + "\tfield:u64 sector;\toffset:16;\tsize:8;\n" + "\tfield:int bytes;\toffset:24;\tsize:4;\n" + "\tfield:int action;\toffset:28;\tsize:4;\n" + "\tfield:int pid;\toffset:32;\tsize:4;\n" + "\tfield:int device;\toffset:36;\tsize:4;\n" + "\tfield:int cpu;\toffset:40;\tsize:4;\n" + "\tfield:short error;\toffset:44;\tsize:2;\n" + "\tfield:short pdu_len;\toffset:46;\tsize:2;\n" + "\tfield:void data;\toffset:48;\tsize:0;\n" + "\n" + "print fmt: \"%%d\", REC->pid\n"; + +int tracecmd_blk_hack(struct tracecmd_input *handle) +{ + struct pevent *pevent; + struct event_format *event; + struct format_field *field; + char buf[4096]; /* way more than enough! */ + int id; + int l; + int r; + int i; + + pevent = tracecmd_get_pevent(handle); + + /* + * Unfortunately, the TRACE_BLK has changed a bit. + * We need to test if various events exist to try + * to guess what event id TRACE_BLK would be. + */ + + /* It was originally behind the "power" event */ + event = pevent_find_event_by_name(pevent, "ftrace", "power"); + if (event) { + id = event->id + 1; + goto found; + } + + /* + * But the power tracer is now in perf. + * Then it was after kmem_free + */ + event = pevent_find_event_by_name(pevent, "ftrace", "kmem_free"); + if (event) { + id = event->id + 1; + goto found; + } + + /* + * But that then went away. + * Currently it should be behind the user stack. + */ + event = pevent_find_event_by_name(pevent, "ftrace", "user_stack"); + if (event) { + id = event->id + 1; + goto found; + } + /* Give up :( */ + return -1; + + found: + /* + * Blk events are not exported in the events directory. + * This is a hack to attempt to create a block event + * that we can read. + * + * We'll make a format file to look like this: + * + * name: blktrace + * ID: 13 + * format: + * field:unsigned short common_type; offset:0; size:2; + * field:unsigned char common_flags; offset:2; size:1; + * field:unsigned char common_preempt_count; offset:3; size:1; + * field:int common_pid; offset:4; size:4; + * field:int common_lock_depth; offset:8; size:4; + * + * field:u64 sector; offset:16; size:8; + * field:int bytes; offset:32; size:4; + * field:int action; offset:36; size:4; + * field:int pid; offset:40; size:4; + * field:int device; offset:44; size:4; + * field:int cpu; offset:48; size:4; + * field:short error; offset:52; size:2; + * field:short pdu_len; offset:54; size:2; + * field:void data; offset:60; size:0; + * + * print fmt: "%d", REC->pid + * + * Note: the struct blk_io_trace is used directly and + * just the first parts of the struct are not used in order + * to not write over the ftrace data. + */ + + /* search for a ftrace event */ + for (i = 0; i < 13; i++) { + event = pevent_find_event(pevent, i); + if (event) + break; + } + if (!event) + goto fail; + + /* Make sure the common fields exist */ + field = pevent_find_common_field(event, "common_type"); + if (!field || field->offset != 0 || field->size != 2) + goto fail; + field = pevent_find_common_field(event, "common_flags"); + if (!field || field->offset != 2 || field->size != 1) + goto fail; + field = pevent_find_common_field(event, "common_preempt_count"); + if (!field || field->offset != 3 || field->size != 1) + goto fail; + field = pevent_find_common_field(event, "common_pid"); + if (!field || field->offset != 4 || field->size != 4) + goto fail; + r = sprintf(buf, blk_event_start, id); + l = r; + + /* lock depth is optional */ + field = pevent_find_common_field(event, "common_lock_depth"); + if (field) { + if (field->offset != 8 || field->size != 4) + return -1; + r = sprintf(buf+l, "\tfield:int common_lock_depth;\toffset:8;\tsize:4;\n"); + l += r; + } + + r = sprintf(buf+l, blk_body); + + /* Parse this event */ + l += r; + pevent_parse_event(pevent, buf, l, "ftrace"); + + return 0; + + fail: + exit(0); + printf("failed!\n"); + return -1; +} diff --git a/trace-cmd.h b/trace-cmd.h index b3cc6b4..9063c8c 100644 --- a/trace-cmd.h +++ b/trace-cmd.h @@ -214,4 +214,7 @@ void trace_util_load_plugins(struct pevent *pevent, const char *suffix, void *data), void *data); +/* --- Hack! --- */ +int tracecmd_blk_hack(struct tracecmd_input *handle); + #endif /* _TRACE_CMD_H */ diff --git a/trace-input.c b/trace-input.c index b11392e..c3b58b1 100644 --- a/trace-input.c +++ b/trace-input.c @@ -2089,6 +2089,8 @@ int tracecmd_init_data(struct tracecmd_input *handle) return -1; } + tracecmd_blk_hack(handle); + return 0; } -- cgit v1.2.2