aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2011-02-25 21:34:34 -0500
committerSteven Rostedt <rostedt@goodmis.org>2011-02-25 21:40:03 -0500
commit9e0fd22b14805a3a3219a99bc5eccebf70f5484a (patch)
tree08df99aed0c1cd75a5c29f16c77fff6de8f09f3c
parent8d1e65fdb15da0b0061cdb43c938e0882757648a (diff)
trace-cmd: Add hack to report out blktrace
The blktrace never exported the ftrace events via the /debug/tracing/events directory. Not to mention, that the blktrace is much more complex data to read out. Add a hack into the trace-input that creates a event format that parse-events can read for the blktrace file, and also create a plugin to parse the complex data. Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--Makefile6
-rw-r--r--plugin_blk.c377
-rw-r--r--trace-blk-hack.c175
-rw-r--r--trace-cmd.h3
-rw-r--r--trace-input.c2
5 files changed, 561 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 277be99..169fcbc 100644
--- a/Makefile
+++ b/Makefile
@@ -272,10 +272,12 @@ KERNEL_SHARK_OBJS = $(TRACE_VIEW_OBJS) $(TRACE_GRAPH_OBJS) $(TRACE_GUI_OBJS) \
272 272
273PEVENT_LIB_OBJS = parse-events.o trace-seq.o parse-filter.o parse-utils.o 273PEVENT_LIB_OBJS = parse-events.o trace-seq.o parse-filter.o parse-utils.o
274TCMD_LIB_OBJS = $(PEVENT_LIB_OBJS) trace-util.o trace-input.o trace-ftrace.o \ 274TCMD_LIB_OBJS = $(PEVENT_LIB_OBJS) trace-util.o trace-input.o trace-ftrace.o \
275 trace-output.o trace-recorder.o trace-restore.o trace-usage.o 275 trace-output.o trace-recorder.o trace-restore.o trace-usage.o \
276 trace-blk-hack.o
276 277
277PLUGIN_OBJS = plugin_hrtimer.o plugin_kmem.o plugin_sched_switch.o \ 278PLUGIN_OBJS = plugin_hrtimer.o plugin_kmem.o plugin_sched_switch.o \
278 plugin_mac80211.o plugin_jbd2.o plugin_function.o plugin_kvm.o 279 plugin_mac80211.o plugin_jbd2.o plugin_function.o plugin_kvm.o \
280 plugin_blk.o
279 281
280PLUGINS := $(PLUGIN_OBJS:.o=.so) 282PLUGINS := $(PLUGIN_OBJS:.o=.so)
281 283
diff --git a/plugin_blk.c b/plugin_blk.c
new file mode 100644
index 0000000..9327b17
--- /dev/null
+++ b/plugin_blk.c
@@ -0,0 +1,377 @@
1/*
2 * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
3 *
4 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation;
8 * version 2.1 of the License (not later!)
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include <linux/blktrace_api.h>
26
27#include "trace-cmd.h"
28
29#define MINORBITS 20
30#define MINORMASK ((1U << MINORBITS) - 1)
31#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
32#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
33#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
34
35struct blk_data {
36 unsigned long long sector;
37 struct event_format *event;
38 unsigned int action;
39 unsigned int pid;
40 unsigned int device;
41 unsigned int bytes;
42 unsigned int error;
43 void *pdu_data;
44 unsigned short pdu_len;
45};
46
47static void fill_rwbs(char *rwbs, int action, unsigned int bytes)
48{
49 int i = 0;
50 int tc = action >> BLK_TC_SHIFT;
51
52 if (action == BLK_TN_MESSAGE) {
53 rwbs[i++] = 'N';
54 goto out;
55 }
56
57 if (tc & BLK_TC_DISCARD)
58 rwbs[i++] = 'D';
59 else if (tc & BLK_TC_WRITE)
60 rwbs[i++] = 'W';
61 else if (bytes)
62 rwbs[i++] = 'R';
63 else
64 rwbs[i++] = 'N';
65
66 if (tc & BLK_TC_AHEAD)
67 rwbs[i++] = 'A';
68 if (tc & BLK_TC_BARRIER)
69 rwbs[i++] = 'B';
70 if (tc & BLK_TC_SYNC)
71 rwbs[i++] = 'S';
72 if (tc & BLK_TC_META)
73 rwbs[i++] = 'M';
74out:
75 rwbs[i] = '\0';
76}
77
78static int log_action(struct trace_seq *s, struct blk_data *data,
79 const char *act)
80{
81 char rwbs[6];
82
83 fill_rwbs(rwbs, data->action, data->bytes);
84 return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
85 MAJOR(data->device),
86 MINOR(data->device), act, rwbs);
87}
88
89static void blk_log_msg(struct trace_seq *s, void *data, int len)
90{
91 trace_seq_printf(s, "%.*s", len, (char *)data);
92}
93
94static int blk_log_dump_pdu(struct trace_seq *s, const unsigned char *pdu_buf,
95 int pdu_len)
96{
97 int i, end, ret;
98
99 if (!pdu_len)
100 return 1;
101
102 /* find the last zero that needs to be printed */
103 for (end = pdu_len - 1; end >= 0; end--)
104 if (pdu_buf[end])
105 break;
106 end++;
107
108 if (!trace_seq_putc(s, '('))
109 return 0;
110
111 for (i = 0; i < pdu_len; i++) {
112
113 ret = trace_seq_printf(s, "%s%02x",
114 i == 0 ? "" : " ", pdu_buf[i]);
115 if (!ret)
116 return ret;
117
118 /*
119 * stop when the rest is just zeroes and indicate so
120 * with a ".." appended
121 */
122 if (i == end && end != pdu_len - 1)
123 return trace_seq_puts(s, " ..) ");
124 }
125
126 return trace_seq_puts(s, ") ");
127}
128
129static unsigned int t_sec(int bytes)
130{
131 return bytes >> 9;
132}
133
134static unsigned int be32_to_cpu(unsigned int val)
135{
136 unsigned int swap;
137
138 if (tracecmd_host_bigendian())
139 return val;
140
141 swap = ((val & 0xffULL) << 24) |
142 ((val & (0xffULL << 8)) << 8) |
143 ((val & (0xffULL << 16)) >> 8) |
144 ((val & (0xffULL << 24)) >> 24);
145
146 return swap;
147}
148
149static unsigned long long be64_to_cpu(unsigned long long val)
150{
151 unsigned long long swap;
152
153 if (tracecmd_host_bigendian())
154 return val;
155
156 swap = ((val & 0xffULL) << 56) |
157 ((val & (0xffULL << 8)) << 40) |
158 ((val & (0xffULL << 16)) << 24) |
159 ((val & (0xffULL << 24)) << 8) |
160 ((val & (0xffULL << 32)) >> 8) |
161 ((val & (0xffULL << 40)) >> 24) |
162 ((val & (0xffULL << 48)) >> 40) |
163 ((val & (0xffULL << 56)) >> 56);
164
165 return swap;
166}
167
168static unsigned long long get_pdu_int(void *data)
169{
170 const unsigned long long *val = data;
171 return be64_to_cpu(*val);
172}
173
174static void get_pdu_remap(void *pdu_data,
175 struct blk_io_trace_remap *r)
176{
177 const struct blk_io_trace_remap *__r = pdu_data;
178 unsigned long long sector_from = __r->sector_from;
179
180 r->device_from = be32_to_cpu(__r->device_from);
181 r->device_to = be32_to_cpu(__r->device_to);
182 r->sector_from = be64_to_cpu(sector_from);
183}
184
185static int blk_log_remap(struct trace_seq *s, struct blk_data *data)
186{
187 struct blk_io_trace_remap r = { .device_from = 0, };
188
189 get_pdu_remap(data->pdu_data, &r);
190 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
191 data->sector, t_sec(data->bytes),
192 MAJOR(r.device_from), MINOR(r.device_from),
193 (unsigned long long)r.sector_from);
194}
195
196static int blk_log_split(struct trace_seq *s, struct blk_data *data)
197{
198 const char *cmd;
199
200 cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid);
201
202 return trace_seq_printf(s, "%llu / %llu [%s]\n", data->sector,
203 get_pdu_int(data->pdu_data), cmd);
204}
205
206static int blk_log_plug(struct trace_seq *s, struct blk_data *data)
207{
208 const char *cmd;
209
210 cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid);
211
212 return trace_seq_printf(s, "[%s]\n", cmd);
213}
214
215static int blk_log_unplug(struct trace_seq *s, struct blk_data *data)
216{
217 const char *cmd;
218
219 cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid);
220
221 return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(data->pdu_data));
222}
223
224static int blk_log_with_error(struct trace_seq *s, struct blk_data *data)
225{
226 if (data->action & BLK_TC_ACT(BLK_TC_PC)) {
227 blk_log_dump_pdu(s, data->pdu_data, data->pdu_len);
228 trace_seq_printf(s, "[%d]\n", data->error);
229 return 0;
230 } else {
231 if (t_sec(data->bytes))
232 return trace_seq_printf(s, "%llu + %u [%d]\n",
233 data->sector,
234 t_sec(data->bytes),
235 data->error);
236 return trace_seq_printf(s, "%llu [%d]\n",
237 data->sector, data->error);
238 }
239}
240
241static int blk_log_generic(struct trace_seq *s, struct blk_data *data)
242{
243 const char *cmd;
244
245 cmd = pevent_data_comm_from_pid(data->event->pevent, data->pid);
246
247 if (data->action & BLK_TC_ACT(BLK_TC_PC)) {
248 int ret;
249
250 ret = trace_seq_printf(s, "%u ", data->bytes);
251 if (!ret)
252 return 0;
253 ret = blk_log_dump_pdu(s, data->pdu_data, data->pdu_len);
254 if (!ret)
255 return 0;
256 return trace_seq_printf(s, "[%s]\n", cmd);
257 } else {
258 if (t_sec(data->bytes))
259 return trace_seq_printf(s, "%llu + %u [%s]\n",
260 data->sector,
261 t_sec(data->bytes), cmd);
262 return trace_seq_printf(s, "[%s]\n", cmd);
263 }
264}
265
266static const struct {
267 const char *act[2];
268 int (*print)(struct trace_seq *s, struct blk_data *data);
269} what2act[] = {
270 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
271 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
272 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
273 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
274 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
275 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
276 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
277 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
278 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
279 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
280 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
281 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
282 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
283 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
284 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
285};
286
287static int blktrace_handler(struct trace_seq *s, struct record *record,
288 struct event_format *event, void *context)
289{
290 struct format_field *field;
291 unsigned long long val;
292 void *data = record->data;
293 struct blk_data blk_data;
294 unsigned short what;
295 int long_act = 0;
296
297 field = pevent_find_field(event, "action");
298 if (!field)
299 return 1;
300 if (pevent_read_number_field(field, data, &val))
301 return 1;
302 blk_data.action = val;
303
304 field = pevent_find_field(event, "bytes");
305 if (!field)
306 return 1;
307 if (pevent_read_number_field(field, data, &val))
308 return 1;
309 blk_data.bytes = val;
310
311 field = pevent_find_field(event, "device");
312 if (!field)
313 return 1;
314 if (pevent_read_number_field(field, data, &val))
315 return 1;
316 blk_data.device = val;
317
318 field = pevent_find_field(event, "pdu_len");
319 if (!field)
320 return 1;
321 if (pevent_read_number_field(field, data, &val))
322 return 1;
323 blk_data.pdu_len = val;
324
325 field = pevent_find_field(event, "data");
326 if (!field)
327 return 1;
328 blk_data.pdu_data = data + field->offset;
329
330 field = pevent_find_field(event, "sector");
331 if (!field)
332 return 1;
333 if (pevent_read_number_field(field, data, &blk_data.sector))
334 return 1;
335
336 field = pevent_find_field(event, "pid");
337 if (!field)
338 return 1;
339 if (pevent_read_number_field(field, data, &val))
340 return 1;
341 blk_data.pid = val;
342
343 field = pevent_find_field(event, "error");
344 if (!field)
345 return 1;
346 if (pevent_read_number_field(field, data, &val))
347 return 1;
348 blk_data.error = val;
349
350 blk_data.event = event;
351
352
353 what = blk_data.action & ((1 << BLK_TC_SHIFT) - 1);
354
355 if (blk_data.action == BLK_TN_MESSAGE) {
356 log_action(s, &blk_data, "m");
357 blk_log_msg(s, blk_data.pdu_data, blk_data.pdu_len);
358 goto out;
359 }
360
361 if (what == 0 || what >= ARRAY_SIZE(what2act))
362 trace_seq_printf(s, "Unknown action %x\n", what);
363 else {
364 log_action(s, &blk_data, what2act[what].act[long_act]);
365 what2act[what].print(s, &blk_data);
366 }
367
368 out:
369 return 0;
370}
371
372int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
373{
374 pevent_register_event_handler(pevent, -1, "ftrace", "blktrace",
375 blktrace_handler, NULL);
376 return 0;
377}
diff --git a/trace-blk-hack.c b/trace-blk-hack.c
new file mode 100644
index 0000000..29c3c74
--- /dev/null
+++ b/trace-blk-hack.c
@@ -0,0 +1,175 @@
1/*
2 * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
3 *
4 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation;
8 * version 2.1 of the License (not later!)
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */
21#include <stdio.h>
22#include "trace-cmd.h"
23#include "trace-local.h"
24
25static const char blk_event_start[] =
26 "name: blktrace\n"
27 "ID: %d\n"
28 "format:\n"
29 "\tfield:unsigned short common_type;\toffset:0;\tsize:2;\n"
30 "\tfield:unsigned char common_flags;\toffset:2;\tsize:1;\n"
31 "\tfield:unsigned char common_preempt_count;\toffset:3;\tsize:1;\n"
32 "\tfield:int common_pid;\toffset:4;\tsize:4;\n";
33
34static const char blk_body[] = "\n"
35 "\tfield:u64 sector;\toffset:16;\tsize:8;\n"
36 "\tfield:int bytes;\toffset:24;\tsize:4;\n"
37 "\tfield:int action;\toffset:28;\tsize:4;\n"
38 "\tfield:int pid;\toffset:32;\tsize:4;\n"
39 "\tfield:int device;\toffset:36;\tsize:4;\n"
40 "\tfield:int cpu;\toffset:40;\tsize:4;\n"
41 "\tfield:short error;\toffset:44;\tsize:2;\n"
42 "\tfield:short pdu_len;\toffset:46;\tsize:2;\n"
43 "\tfield:void data;\toffset:48;\tsize:0;\n"
44 "\n"
45 "print fmt: \"%%d\", REC->pid\n";
46
47int tracecmd_blk_hack(struct tracecmd_input *handle)
48{
49 struct pevent *pevent;
50 struct event_format *event;
51 struct format_field *field;
52 char buf[4096]; /* way more than enough! */
53 int id;
54 int l;
55 int r;
56 int i;
57
58 pevent = tracecmd_get_pevent(handle);
59
60 /*
61 * Unfortunately, the TRACE_BLK has changed a bit.
62 * We need to test if various events exist to try
63 * to guess what event id TRACE_BLK would be.
64 */
65
66 /* It was originally behind the "power" event */
67 event = pevent_find_event_by_name(pevent, "ftrace", "power");
68 if (event) {
69 id = event->id + 1;
70 goto found;
71 }
72
73 /*
74 * But the power tracer is now in perf.
75 * Then it was after kmem_free
76 */
77 event = pevent_find_event_by_name(pevent, "ftrace", "kmem_free");
78 if (event) {
79 id = event->id + 1;
80 goto found;
81 }
82
83 /*
84 * But that then went away.
85 * Currently it should be behind the user stack.
86 */
87 event = pevent_find_event_by_name(pevent, "ftrace", "user_stack");
88 if (event) {
89 id = event->id + 1;
90 goto found;
91 }
92 /* Give up :( */
93 return -1;
94
95 found:
96 /*
97 * Blk events are not exported in the events directory.
98 * This is a hack to attempt to create a block event
99 * that we can read.
100 *
101 * We'll make a format file to look like this:
102 *
103 * name: blktrace
104 * ID: 13
105 * format:
106 * field:unsigned short common_type; offset:0; size:2;
107 * field:unsigned char common_flags; offset:2; size:1;
108 * field:unsigned char common_preempt_count; offset:3; size:1;
109 * field:int common_pid; offset:4; size:4;
110 * field:int common_lock_depth; offset:8; size:4;
111 *
112 * field:u64 sector; offset:16; size:8;
113 * field:int bytes; offset:32; size:4;
114 * field:int action; offset:36; size:4;
115 * field:int pid; offset:40; size:4;
116 * field:int device; offset:44; size:4;
117 * field:int cpu; offset:48; size:4;
118 * field:short error; offset:52; size:2;
119 * field:short pdu_len; offset:54; size:2;
120 * field:void data; offset:60; size:0;
121 *
122 * print fmt: "%d", REC->pid
123 *
124 * Note: the struct blk_io_trace is used directly and
125 * just the first parts of the struct are not used in order
126 * to not write over the ftrace data.
127 */
128
129 /* search for a ftrace event */
130 for (i = 0; i < 13; i++) {
131 event = pevent_find_event(pevent, i);
132 if (event)
133 break;
134 }
135 if (!event)
136 goto fail;
137
138 /* Make sure the common fields exist */
139 field = pevent_find_common_field(event, "common_type");
140 if (!field || field->offset != 0 || field->size != 2)
141 goto fail;
142 field = pevent_find_common_field(event, "common_flags");
143 if (!field || field->offset != 2 || field->size != 1)
144 goto fail;
145 field = pevent_find_common_field(event, "common_preempt_count");
146 if (!field || field->offset != 3 || field->size != 1)
147 goto fail;
148 field = pevent_find_common_field(event, "common_pid");
149 if (!field || field->offset != 4 || field->size != 4)
150 goto fail;
151 r = sprintf(buf, blk_event_start, id);
152 l = r;
153
154 /* lock depth is optional */
155 field = pevent_find_common_field(event, "common_lock_depth");
156 if (field) {
157 if (field->offset != 8 || field->size != 4)
158 return -1;
159 r = sprintf(buf+l, "\tfield:int common_lock_depth;\toffset:8;\tsize:4;\n");
160 l += r;
161 }
162
163 r = sprintf(buf+l, blk_body);
164
165 /* Parse this event */
166 l += r;
167 pevent_parse_event(pevent, buf, l, "ftrace");
168
169 return 0;
170
171 fail:
172 exit(0);
173 printf("failed!\n");
174 return -1;
175}
diff --git a/trace-cmd.h b/trace-cmd.h
index b3cc6b4..9063c8c 100644
--- a/trace-cmd.h
+++ b/trace-cmd.h
@@ -214,4 +214,7 @@ void trace_util_load_plugins(struct pevent *pevent, const char *suffix,
214 void *data), 214 void *data),
215 void *data); 215 void *data);
216 216
217/* --- Hack! --- */
218int tracecmd_blk_hack(struct tracecmd_input *handle);
219
217#endif /* _TRACE_CMD_H */ 220#endif /* _TRACE_CMD_H */
diff --git a/trace-input.c b/trace-input.c
index b11392e..c3b58b1 100644
--- a/trace-input.c
+++ b/trace-input.c
@@ -2089,6 +2089,8 @@ int tracecmd_init_data(struct tracecmd_input *handle)
2089 return -1; 2089 return -1;
2090 } 2090 }
2091 2091
2092 tracecmd_blk_hack(handle);
2093
2092 return 0; 2094 return 0;
2093} 2095}
2094 2096