diff options
| author | Li Zefan <lizf@cn.fujitsu.com> | 2009-06-09 01:43:05 -0400 |
|---|---|---|
| committer | Steven Rostedt <rostedt@goodmis.org> | 2009-06-09 12:34:23 -0400 |
| commit | 55782138e47d9baf2f7d3a7af9e7cf42adf72c56 (patch) | |
| tree | c7ccabae20e27bbeb08b69a358e8b86c98d1d9f3 /kernel | |
| parent | f57a8a1911342265e7acdc190333c4e9235a6632 (diff) | |
tracing/events: convert block trace points to TRACE_EVENT()
TRACE_EVENT is a more generic way to define tracepoints. Doing so adds
these new capabilities to this tracepoint:
- zero-copy and per-cpu splice() tracing
- binary tracing without printf overhead
- structured logging records exposed under /debug/tracing/events
- trace events embedded in function tracer output and other plugins
- user-defined, per tracepoint filter expressions
...
Cons:
- no dev_t info for the output of plug, unplug_timer and unplug_io events.
no dev_t info for getrq and sleeprq events if bio == NULL.
no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL.
This is mainly because we can't get the deivce from a request queue.
But this may change in the future.
- A packet command is converted to a string in TP_assign, not TP_print.
While blktrace do the convertion just before output.
Since pc requests should be rather rare, this is not a big issue.
- In blktrace, an event can have 2 different print formats, but a TRACE_EVENT
has a unique format, which means we have some unused data in a trace entry.
The overhead is minimized by using __dynamic_array() instead of __array().
I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing:
dd dd + ioctl blktrace dd + TRACE_EVENT (splice)
1 7.36s, 42.7 MB/s 7.50s, 42.0 MB/s 7.41s, 42.5 MB/s
2 7.43s, 42.3 MB/s 7.48s, 42.1 MB/s 7.43s, 42.4 MB/s
3 7.38s, 42.6 MB/s 7.45s, 42.2 MB/s 7.41s, 42.5 MB/s
So the overhead of tracing is very small, and no regression when using
those trace events vs blktrace.
And the binary output of TRACE_EVENT is much smaller than blktrace:
# ls -l -h
-rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0
-rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1
-rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out
Following are some comparisons between TRACE_EVENT and blktrace:
plug:
kjournald-480 [000] 303.084981: block_plug: [kjournald]
kjournald-480 [000] 303.084981: 8,0 P N [kjournald]
unplug_io:
kblockd/0-118 [000] 300.052973: block_unplug_io: [kblockd/0] 1
kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1
remap:
kjournald-480 [000] 303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384
kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384
bio_backmerge:
kjournald-480 [000] 303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald]
kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald]
getrq:
kjournald-480 [000] 303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald]
kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald]
bash-2066 [001] 1072.953770: 8,0 G N [bash]
bash-2066 [001] 1072.953773: block_getrq: 0,0 N 0 + 0 [bash]
rq_complete:
konsole-2065 [001] 300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0]
konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0]
ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0]
ksoftirqd/1-7 [001] 1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0]
rq_insert:
kjournald-480 [000] 303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald]
kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald]
Changelog from v2 -> v3:
- use the newly introduced __dynamic_array().
Changelog from v1 -> v2:
- use __string() instead of __array() to minimize the memory required
to store hex dump of rq->cmd().
- support large pc requests.
- add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT.
- some cleanups.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/trace/Makefile | 5 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 78 |
2 files changed, 81 insertions, 2 deletions
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 06b85850fab4..844164dca90a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
| @@ -45,7 +45,10 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o | |||
| 45 | obj-$(CONFIG_POWER_TRACER) += trace_power.o | 45 | obj-$(CONFIG_POWER_TRACER) += trace_power.o |
| 46 | obj-$(CONFIG_KMEMTRACE) += kmemtrace.o | 46 | obj-$(CONFIG_KMEMTRACE) += kmemtrace.o |
| 47 | obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o | 47 | obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o |
| 48 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o | 48 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o |
| 49 | ifeq ($(CONFIG_BLOCK),y) | ||
| 50 | obj-$(CONFIG_EVENT_TRACING) += blktrace.o | ||
| 51 | endif | ||
| 49 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o | 52 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o |
| 50 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o | 53 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o |
| 51 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 54 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e3abf55bc8e5..7bd6a9893c24 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -23,10 +23,14 @@ | |||
| 23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
| 24 | #include <linux/debugfs.h> | 24 | #include <linux/debugfs.h> |
| 25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
| 26 | #include <trace/block.h> | ||
| 27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
| 27 | |||
| 28 | #include <trace/events/block.h> | ||
| 29 | |||
| 28 | #include "trace_output.h" | 30 | #include "trace_output.h" |
| 29 | 31 | ||
| 32 | #ifdef CONFIG_BLK_DEV_IO_TRACE | ||
| 33 | |||
| 30 | static unsigned int blktrace_seq __read_mostly = 1; | 34 | static unsigned int blktrace_seq __read_mostly = 1; |
| 31 | 35 | ||
| 32 | static struct trace_array *blk_tr; | 36 | static struct trace_array *blk_tr; |
| @@ -1658,3 +1662,75 @@ int blk_trace_init_sysfs(struct device *dev) | |||
| 1658 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); | 1662 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); |
| 1659 | } | 1663 | } |
| 1660 | 1664 | ||
| 1665 | #endif /* CONFIG_BLK_DEV_IO_TRACE */ | ||
| 1666 | |||
| 1667 | #ifdef CONFIG_EVENT_TRACING | ||
| 1668 | |||
| 1669 | void blk_dump_cmd(char *buf, struct request *rq) | ||
| 1670 | { | ||
| 1671 | int i, end; | ||
| 1672 | int len = rq->cmd_len; | ||
| 1673 | unsigned char *cmd = rq->cmd; | ||
| 1674 | |||
| 1675 | if (!blk_pc_request(rq)) { | ||
| 1676 | buf[0] = '\0'; | ||
| 1677 | return; | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | for (end = len - 1; end >= 0; end--) | ||
| 1681 | if (cmd[end]) | ||
| 1682 | break; | ||
| 1683 | end++; | ||
| 1684 | |||
| 1685 | for (i = 0; i < len; i++) { | ||
| 1686 | buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); | ||
| 1687 | if (i == end && end != len - 1) { | ||
| 1688 | sprintf(buf, " .."); | ||
| 1689 | break; | ||
| 1690 | } | ||
| 1691 | } | ||
| 1692 | } | ||
| 1693 | |||
| 1694 | void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | ||
| 1695 | { | ||
| 1696 | int i = 0; | ||
| 1697 | |||
| 1698 | if (rw & WRITE) | ||
| 1699 | rwbs[i++] = 'W'; | ||
| 1700 | else if (rw & 1 << BIO_RW_DISCARD) | ||
| 1701 | rwbs[i++] = 'D'; | ||
| 1702 | else if (bytes) | ||
| 1703 | rwbs[i++] = 'R'; | ||
| 1704 | else | ||
| 1705 | rwbs[i++] = 'N'; | ||
| 1706 | |||
| 1707 | if (rw & 1 << BIO_RW_AHEAD) | ||
| 1708 | rwbs[i++] = 'A'; | ||
| 1709 | if (rw & 1 << BIO_RW_BARRIER) | ||
| 1710 | rwbs[i++] = 'B'; | ||
| 1711 | if (rw & 1 << BIO_RW_SYNCIO) | ||
| 1712 | rwbs[i++] = 'S'; | ||
| 1713 | if (rw & 1 << BIO_RW_META) | ||
| 1714 | rwbs[i++] = 'M'; | ||
| 1715 | |||
| 1716 | rwbs[i] = '\0'; | ||
| 1717 | } | ||
| 1718 | |||
| 1719 | void blk_fill_rwbs_rq(char *rwbs, struct request *rq) | ||
| 1720 | { | ||
| 1721 | int rw = rq->cmd_flags & 0x03; | ||
| 1722 | int bytes; | ||
| 1723 | |||
| 1724 | if (blk_discard_rq(rq)) | ||
| 1725 | rw |= (1 << BIO_RW_DISCARD); | ||
| 1726 | |||
| 1727 | if (blk_pc_request(rq)) | ||
| 1728 | bytes = rq->data_len; | ||
| 1729 | else | ||
| 1730 | bytes = rq->hard_nr_sectors << 9; | ||
| 1731 | |||
| 1732 | blk_fill_rwbs(rwbs, rw, bytes); | ||
| 1733 | } | ||
| 1734 | |||
| 1735 | #endif /* CONFIG_EVENT_TRACING */ | ||
| 1736 | |||
