diff options
28 files changed, 1312 insertions, 366 deletions
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 57b33edb7ce3..61df77532120 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
| @@ -410,7 +410,6 @@ int ftrace_disable_ftrace_graph_caller(void) | |||
| 410 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | 410 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) |
| 411 | { | 411 | { |
| 412 | unsigned long old; | 412 | unsigned long old; |
| 413 | unsigned long long calltime; | ||
| 414 | int faulted; | 413 | int faulted; |
| 415 | struct ftrace_graph_ent trace; | 414 | struct ftrace_graph_ent trace; |
| 416 | unsigned long return_hooker = (unsigned long) | 415 | unsigned long return_hooker = (unsigned long) |
| @@ -453,10 +452,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
| 453 | return; | 452 | return; |
| 454 | } | 453 | } |
| 455 | 454 | ||
| 456 | calltime = trace_clock_local(); | 455 | if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { |
| 457 | |||
| 458 | if (ftrace_push_return_trace(old, calltime, | ||
| 459 | self_addr, &trace.depth) == -EBUSY) { | ||
| 460 | *parent = old; | 456 | *parent = old; |
| 461 | return; | 457 | return; |
| 462 | } | 458 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 4558dd3918cf..759095d53a06 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -638,13 +638,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
| 638 | #else | 638 | #else |
| 639 | " pushf\n" | 639 | " pushf\n" |
| 640 | /* | 640 | /* |
| 641 | * Skip cs, ip, orig_ax. | 641 | * Skip cs, ip, orig_ax and gs. |
| 642 | * trampoline_handler() will plug in these values | 642 | * trampoline_handler() will plug in these values |
| 643 | */ | 643 | */ |
| 644 | " subl $12, %esp\n" | 644 | " subl $16, %esp\n" |
| 645 | " pushl %fs\n" | 645 | " pushl %fs\n" |
| 646 | " pushl %ds\n" | ||
| 647 | " pushl %es\n" | 646 | " pushl %es\n" |
| 647 | " pushl %ds\n" | ||
| 648 | " pushl %eax\n" | 648 | " pushl %eax\n" |
| 649 | " pushl %ebp\n" | 649 | " pushl %ebp\n" |
| 650 | " pushl %edi\n" | 650 | " pushl %edi\n" |
| @@ -655,10 +655,10 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
| 655 | " movl %esp, %eax\n" | 655 | " movl %esp, %eax\n" |
| 656 | " call trampoline_handler\n" | 656 | " call trampoline_handler\n" |
| 657 | /* Move flags to cs */ | 657 | /* Move flags to cs */ |
| 658 | " movl 52(%esp), %edx\n" | 658 | " movl 56(%esp), %edx\n" |
| 659 | " movl %edx, 48(%esp)\n" | 659 | " movl %edx, 52(%esp)\n" |
| 660 | /* Replace saved flags with true return address. */ | 660 | /* Replace saved flags with true return address. */ |
| 661 | " movl %eax, 52(%esp)\n" | 661 | " movl %eax, 56(%esp)\n" |
| 662 | " popl %ebx\n" | 662 | " popl %ebx\n" |
| 663 | " popl %ecx\n" | 663 | " popl %ecx\n" |
| 664 | " popl %edx\n" | 664 | " popl %edx\n" |
| @@ -666,8 +666,8 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
| 666 | " popl %edi\n" | 666 | " popl %edi\n" |
| 667 | " popl %ebp\n" | 667 | " popl %ebp\n" |
| 668 | " popl %eax\n" | 668 | " popl %eax\n" |
| 669 | /* Skip ip, orig_ax, es, ds, fs */ | 669 | /* Skip ds, es, fs, gs, orig_ax and ip */ |
| 670 | " addl $20, %esp\n" | 670 | " addl $24, %esp\n" |
| 671 | " popf\n" | 671 | " popf\n" |
| 672 | #endif | 672 | #endif |
| 673 | " ret\n"); | 673 | " ret\n"); |
| @@ -691,6 +691,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 691 | regs->cs = __KERNEL_CS; | 691 | regs->cs = __KERNEL_CS; |
| 692 | #else | 692 | #else |
| 693 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | 693 | regs->cs = __KERNEL_CS | get_kernel_rpl(); |
| 694 | regs->gs = 0; | ||
| 694 | #endif | 695 | #endif |
| 695 | regs->ip = trampoline_address; | 696 | regs->ip = trampoline_address; |
| 696 | regs->orig_ax = ~0UL; | 697 | regs->orig_ax = ~0UL; |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 81ae9ea3c6e1..0662ba6de85a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | 30 | ||
| 31 | static struct vfsmount *debugfs_mount; | 31 | static struct vfsmount *debugfs_mount; |
| 32 | static int debugfs_mount_count; | 32 | static int debugfs_mount_count; |
| 33 | static bool debugfs_registered; | ||
| 33 | 34 | ||
| 34 | static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev) | 35 | static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev) |
| 35 | { | 36 | { |
| @@ -496,6 +497,16 @@ exit: | |||
| 496 | } | 497 | } |
| 497 | EXPORT_SYMBOL_GPL(debugfs_rename); | 498 | EXPORT_SYMBOL_GPL(debugfs_rename); |
| 498 | 499 | ||
| 500 | /** | ||
| 501 | * debugfs_initialized - Tells whether debugfs has been registered | ||
| 502 | */ | ||
| 503 | bool debugfs_initialized(void) | ||
| 504 | { | ||
| 505 | return debugfs_registered; | ||
| 506 | } | ||
| 507 | EXPORT_SYMBOL_GPL(debugfs_initialized); | ||
| 508 | |||
| 509 | |||
| 499 | static struct kobject *debug_kobj; | 510 | static struct kobject *debug_kobj; |
| 500 | 511 | ||
| 501 | static int __init debugfs_init(void) | 512 | static int __init debugfs_init(void) |
| @@ -509,11 +520,16 @@ static int __init debugfs_init(void) | |||
| 509 | retval = register_filesystem(&debug_fs_type); | 520 | retval = register_filesystem(&debug_fs_type); |
| 510 | if (retval) | 521 | if (retval) |
| 511 | kobject_put(debug_kobj); | 522 | kobject_put(debug_kobj); |
| 523 | else | ||
| 524 | debugfs_registered = true; | ||
| 525 | |||
| 512 | return retval; | 526 | return retval; |
| 513 | } | 527 | } |
| 514 | 528 | ||
| 515 | static void __exit debugfs_exit(void) | 529 | static void __exit debugfs_exit(void) |
| 516 | { | 530 | { |
| 531 | debugfs_registered = false; | ||
| 532 | |||
| 517 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | 533 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); |
| 518 | unregister_filesystem(&debug_fs_type); | 534 | unregister_filesystem(&debug_fs_type); |
| 519 | kobject_put(debug_kobj); | 535 | kobject_put(debug_kobj); |
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index af0e01d4c663..eb5c2ba2f81a 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h | |||
| @@ -71,6 +71,9 @@ struct dentry *debugfs_create_bool(const char *name, mode_t mode, | |||
| 71 | struct dentry *debugfs_create_blob(const char *name, mode_t mode, | 71 | struct dentry *debugfs_create_blob(const char *name, mode_t mode, |
| 72 | struct dentry *parent, | 72 | struct dentry *parent, |
| 73 | struct debugfs_blob_wrapper *blob); | 73 | struct debugfs_blob_wrapper *blob); |
| 74 | |||
| 75 | bool debugfs_initialized(void); | ||
| 76 | |||
| 74 | #else | 77 | #else |
| 75 | 78 | ||
| 76 | #include <linux/err.h> | 79 | #include <linux/err.h> |
| @@ -183,6 +186,11 @@ static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode, | |||
| 183 | return ERR_PTR(-ENODEV); | 186 | return ERR_PTR(-ENODEV); |
| 184 | } | 187 | } |
| 185 | 188 | ||
| 189 | static inline bool debugfs_initialized(void) | ||
| 190 | { | ||
| 191 | return false; | ||
| 192 | } | ||
| 193 | |||
| 186 | #endif | 194 | #endif |
| 187 | 195 | ||
| 188 | #endif | 196 | #endif |
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db3fed630db3..015a3d22cf74 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h | |||
| @@ -145,9 +145,15 @@ enum { | |||
| 145 | }; | 145 | }; |
| 146 | 146 | ||
| 147 | struct dyn_ftrace { | 147 | struct dyn_ftrace { |
| 148 | unsigned long ip; /* address of mcount call-site */ | 148 | union { |
| 149 | unsigned long flags; | 149 | unsigned long ip; /* address of mcount call-site */ |
| 150 | struct dyn_arch_ftrace arch; | 150 | struct dyn_ftrace *freelist; |
| 151 | }; | ||
| 152 | union { | ||
| 153 | unsigned long flags; | ||
| 154 | struct dyn_ftrace *newlist; | ||
| 155 | }; | ||
| 156 | struct dyn_arch_ftrace arch; | ||
| 151 | }; | 157 | }; |
| 152 | 158 | ||
| 153 | int ftrace_force_update(void); | 159 | int ftrace_force_update(void); |
| @@ -369,8 +375,7 @@ struct ftrace_ret_stack { | |||
| 369 | extern void return_to_handler(void); | 375 | extern void return_to_handler(void); |
| 370 | 376 | ||
| 371 | extern int | 377 | extern int |
| 372 | ftrace_push_return_trace(unsigned long ret, unsigned long long time, | 378 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); |
| 373 | unsigned long func, int *depth); | ||
| 374 | extern void | 379 | extern void |
| 375 | ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); | 380 | ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); |
| 376 | 381 | ||
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 9e6052bd1a1c..e1b7b2173885 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h | |||
| @@ -18,10 +18,13 @@ struct ring_buffer_event { | |||
| 18 | /** | 18 | /** |
| 19 | * enum ring_buffer_type - internal ring buffer types | 19 | * enum ring_buffer_type - internal ring buffer types |
| 20 | * | 20 | * |
| 21 | * @RINGBUF_TYPE_PADDING: Left over page padding | 21 | * @RINGBUF_TYPE_PADDING: Left over page padding or discarded event |
| 22 | * array is ignored | 22 | * If time_delta is 0: |
| 23 | * size is variable depending on how much | 23 | * array is ignored |
| 24 | * size is variable depending on how much | ||
| 24 | * padding is needed | 25 | * padding is needed |
| 26 | * If time_delta is non zero: | ||
| 27 | * everything else same as RINGBUF_TYPE_DATA | ||
| 25 | * | 28 | * |
| 26 | * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta | 29 | * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta |
| 27 | * array[0] = time delta (28 .. 59) | 30 | * array[0] = time delta (28 .. 59) |
| @@ -65,6 +68,8 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) | |||
| 65 | return event->time_delta; | 68 | return event->time_delta; |
| 66 | } | 69 | } |
| 67 | 70 | ||
| 71 | void ring_buffer_event_discard(struct ring_buffer_event *event); | ||
| 72 | |||
| 68 | /* | 73 | /* |
| 69 | * size is in bytes for each per CPU buffer. | 74 | * size is in bytes for each per CPU buffer. |
| 70 | */ | 75 | */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 89cd308cc7a5..471e36d30123 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1409,6 +1409,8 @@ struct task_struct { | |||
| 1409 | int curr_ret_stack; | 1409 | int curr_ret_stack; |
| 1410 | /* Stack of return addresses for return function tracing */ | 1410 | /* Stack of return addresses for return function tracing */ |
| 1411 | struct ftrace_ret_stack *ret_stack; | 1411 | struct ftrace_ret_stack *ret_stack; |
| 1412 | /* time stamp for last schedule */ | ||
| 1413 | unsigned long long ftrace_timestamp; | ||
| 1412 | /* | 1414 | /* |
| 1413 | * Number of functions that haven't been traced | 1415 | * Number of functions that haven't been traced |
| 1414 | * because of depth overrun. | 1416 | * because of depth overrun. |
diff --git a/init/main.c b/init/main.c index 20d784ab5ef8..b0097d2b63ae 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -772,6 +772,7 @@ static void __init do_basic_setup(void) | |||
| 772 | { | 772 | { |
| 773 | rcu_init_sched(); /* needed by module_init stage. */ | 773 | rcu_init_sched(); /* needed by module_init stage. */ |
| 774 | init_workqueues(); | 774 | init_workqueues(); |
| 775 | cpuset_init_smp(); | ||
| 775 | usermodehelper_init(); | 776 | usermodehelper_init(); |
| 776 | driver_init(); | 777 | driver_init(); |
| 777 | init_irq_proc(); | 778 | init_irq_proc(); |
| @@ -865,8 +866,6 @@ static int __init kernel_init(void * unused) | |||
| 865 | smp_init(); | 866 | smp_init(); |
| 866 | sched_init_smp(); | 867 | sched_init_smp(); |
| 867 | 868 | ||
| 868 | cpuset_init_smp(); | ||
| 869 | |||
| 870 | do_basic_setup(); | 869 | do_basic_setup(); |
| 871 | 870 | ||
| 872 | /* | 871 | /* |
diff --git a/kernel/extable.c b/kernel/extable.c index 25d39b0c3a1b..b54a6017b6b5 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 17 | */ | 17 | */ |
| 18 | #include <linux/ftrace.h> | 18 | #include <linux/ftrace.h> |
| 19 | #include <linux/memory.h> | ||
| 19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
| 20 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
| 21 | #include <linux/init.h> | 22 | #include <linux/init.h> |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b0a46f889659..8a4d72931042 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
| @@ -63,7 +63,11 @@ config TRACING | |||
| 63 | # | 63 | # |
| 64 | config TRACING_SUPPORT | 64 | config TRACING_SUPPORT |
| 65 | bool | 65 | bool |
| 66 | depends on TRACE_IRQFLAGS_SUPPORT | 66 | # PPC32 has no irqflags tracing support, but it can use most of the |
| 67 | # tracers anyway, they were tested to build and work. Note that new | ||
| 68 | # exceptions to this list aren't welcomed, better implement the | ||
| 69 | # irqflags tracing for your architecture. | ||
| 70 | depends on TRACE_IRQFLAGS_SUPPORT || PPC32 | ||
| 67 | depends on STACKTRACE_SUPPORT | 71 | depends on STACKTRACE_SUPPORT |
| 68 | default y | 72 | default y |
| 69 | 73 | ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0e45c206c2f9..2630f5121ec1 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
| @@ -45,5 +45,6 @@ obj-$(CONFIG_EVENT_TRACER) += events.o | |||
| 45 | obj-$(CONFIG_EVENT_TRACER) += trace_export.o | 45 | obj-$(CONFIG_EVENT_TRACER) += trace_export.o |
| 46 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 46 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
| 47 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 47 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o |
| 48 | obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o | ||
| 48 | 49 | ||
| 49 | libftrace-y := ftrace.o | 50 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b171778e3863..947c5b3f90c4 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | static unsigned int blktrace_seq __read_mostly = 1; | 30 | static unsigned int blktrace_seq __read_mostly = 1; |
| 31 | 31 | ||
| 32 | static struct trace_array *blk_tr; | 32 | static struct trace_array *blk_tr; |
| 33 | static int __read_mostly blk_tracer_enabled; | 33 | static bool blk_tracer_enabled __read_mostly; |
| 34 | 34 | ||
| 35 | /* Select an alternative, minimalistic output than the original one */ | 35 | /* Select an alternative, minimalistic output than the original one */ |
| 36 | #define TRACE_BLK_OPT_CLASSIC 0x1 | 36 | #define TRACE_BLK_OPT_CLASSIC 0x1 |
| @@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = { | |||
| 47 | }; | 47 | }; |
| 48 | 48 | ||
| 49 | /* Global reference count of probes */ | 49 | /* Global reference count of probes */ |
| 50 | static DEFINE_MUTEX(blk_probe_mutex); | ||
| 51 | static atomic_t blk_probes_ref = ATOMIC_INIT(0); | 50 | static atomic_t blk_probes_ref = ATOMIC_INIT(0); |
| 52 | 51 | ||
| 53 | static int blk_register_tracepoints(void); | 52 | static void blk_register_tracepoints(void); |
| 54 | static void blk_unregister_tracepoints(void); | 53 | static void blk_unregister_tracepoints(void); |
| 55 | 54 | ||
| 56 | /* | 55 | /* |
| @@ -60,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, | |||
| 60 | const void *data, size_t len) | 59 | const void *data, size_t len) |
| 61 | { | 60 | { |
| 62 | struct blk_io_trace *t; | 61 | struct blk_io_trace *t; |
| 62 | struct ring_buffer_event *event = NULL; | ||
| 63 | int pc = 0; | ||
| 64 | int cpu = smp_processor_id(); | ||
| 65 | bool blk_tracer = blk_tracer_enabled; | ||
| 66 | |||
| 67 | if (blk_tracer) { | ||
| 68 | pc = preempt_count(); | ||
| 69 | event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, | ||
| 70 | sizeof(*t) + len, | ||
| 71 | 0, pc); | ||
| 72 | if (!event) | ||
| 73 | return; | ||
| 74 | t = ring_buffer_event_data(event); | ||
| 75 | goto record_it; | ||
| 76 | } | ||
| 63 | 77 | ||
| 64 | if (!bt->rchan) | 78 | if (!bt->rchan) |
| 65 | return; | 79 | return; |
| 66 | 80 | ||
| 67 | t = relay_reserve(bt->rchan, sizeof(*t) + len); | 81 | t = relay_reserve(bt->rchan, sizeof(*t) + len); |
| 68 | if (t) { | 82 | if (t) { |
| 69 | const int cpu = smp_processor_id(); | ||
| 70 | |||
| 71 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; | 83 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; |
| 72 | t->time = ktime_to_ns(ktime_get()); | 84 | t->time = ktime_to_ns(ktime_get()); |
| 85 | record_it: | ||
| 73 | t->device = bt->dev; | 86 | t->device = bt->dev; |
| 74 | t->action = action; | 87 | t->action = action; |
| 75 | t->pid = pid; | 88 | t->pid = pid; |
| 76 | t->cpu = cpu; | 89 | t->cpu = cpu; |
| 77 | t->pdu_len = len; | 90 | t->pdu_len = len; |
| 78 | memcpy((void *) t + sizeof(*t), data, len); | 91 | memcpy((void *) t + sizeof(*t), data, len); |
| 92 | |||
| 93 | if (blk_tracer) | ||
| 94 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); | ||
| 79 | } | 95 | } |
| 80 | } | 96 | } |
| 81 | 97 | ||
| @@ -111,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) | |||
| 111 | unsigned long flags; | 127 | unsigned long flags; |
| 112 | char *buf; | 128 | char *buf; |
| 113 | 129 | ||
| 114 | if (blk_tr) { | 130 | if (unlikely(bt->trace_state != Blktrace_running && |
| 115 | va_start(args, fmt); | 131 | !blk_tracer_enabled)) |
| 116 | ftrace_vprintk(fmt, args); | ||
| 117 | va_end(args); | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | |||
| 121 | if (!bt->msg_data) | ||
| 122 | return; | 132 | return; |
| 123 | 133 | ||
| 124 | local_irq_save(flags); | 134 | local_irq_save(flags); |
| @@ -148,8 +158,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
| 148 | /* | 158 | /* |
| 149 | * Data direction bit lookup | 159 | * Data direction bit lookup |
| 150 | */ | 160 | */ |
| 151 | static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), | 161 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
| 152 | BLK_TC_ACT(BLK_TC_WRITE) }; | 162 | BLK_TC_ACT(BLK_TC_WRITE) }; |
| 153 | 163 | ||
| 154 | /* The ilog2() calls fall out because they're constant */ | 164 | /* The ilog2() calls fall out because they're constant */ |
| 155 | #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ | 165 | #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ |
| @@ -169,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
| 169 | unsigned long *sequence; | 179 | unsigned long *sequence; |
| 170 | pid_t pid; | 180 | pid_t pid; |
| 171 | int cpu, pc = 0; | 181 | int cpu, pc = 0; |
| 182 | bool blk_tracer = blk_tracer_enabled; | ||
| 172 | 183 | ||
| 173 | if (unlikely(bt->trace_state != Blktrace_running || | 184 | if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) |
| 174 | !blk_tracer_enabled)) | ||
| 175 | return; | 185 | return; |
| 176 | 186 | ||
| 177 | what |= ddir_act[rw & WRITE]; | 187 | what |= ddir_act[rw & WRITE]; |
| @@ -186,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
| 186 | return; | 196 | return; |
| 187 | cpu = raw_smp_processor_id(); | 197 | cpu = raw_smp_processor_id(); |
| 188 | 198 | ||
| 189 | if (blk_tr) { | 199 | if (blk_tracer) { |
| 190 | tracing_record_cmdline(current); | 200 | tracing_record_cmdline(current); |
| 191 | 201 | ||
| 192 | pc = preempt_count(); | 202 | pc = preempt_count(); |
| @@ -236,7 +246,7 @@ record_it: | |||
| 236 | if (pdu_len) | 246 | if (pdu_len) |
| 237 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); | 247 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); |
| 238 | 248 | ||
| 239 | if (blk_tr) { | 249 | if (blk_tracer) { |
| 240 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); | 250 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); |
| 241 | return; | 251 | return; |
| 242 | } | 252 | } |
| @@ -248,7 +258,7 @@ record_it: | |||
| 248 | static struct dentry *blk_tree_root; | 258 | static struct dentry *blk_tree_root; |
| 249 | static DEFINE_MUTEX(blk_tree_mutex); | 259 | static DEFINE_MUTEX(blk_tree_mutex); |
| 250 | 260 | ||
| 251 | static void blk_trace_cleanup(struct blk_trace *bt) | 261 | static void blk_trace_free(struct blk_trace *bt) |
| 252 | { | 262 | { |
| 253 | debugfs_remove(bt->msg_file); | 263 | debugfs_remove(bt->msg_file); |
| 254 | debugfs_remove(bt->dropped_file); | 264 | debugfs_remove(bt->dropped_file); |
| @@ -256,10 +266,13 @@ static void blk_trace_cleanup(struct blk_trace *bt) | |||
| 256 | free_percpu(bt->sequence); | 266 | free_percpu(bt->sequence); |
| 257 | free_percpu(bt->msg_data); | 267 | free_percpu(bt->msg_data); |
| 258 | kfree(bt); | 268 | kfree(bt); |
| 259 | mutex_lock(&blk_probe_mutex); | 269 | } |
| 270 | |||
| 271 | static void blk_trace_cleanup(struct blk_trace *bt) | ||
| 272 | { | ||
| 273 | blk_trace_free(bt); | ||
| 260 | if (atomic_dec_and_test(&blk_probes_ref)) | 274 | if (atomic_dec_and_test(&blk_probes_ref)) |
| 261 | blk_unregister_tracepoints(); | 275 | blk_unregister_tracepoints(); |
| 262 | mutex_unlock(&blk_probe_mutex); | ||
| 263 | } | 276 | } |
| 264 | 277 | ||
| 265 | int blk_trace_remove(struct request_queue *q) | 278 | int blk_trace_remove(struct request_queue *q) |
| @@ -270,8 +283,7 @@ int blk_trace_remove(struct request_queue *q) | |||
| 270 | if (!bt) | 283 | if (!bt) |
| 271 | return -EINVAL; | 284 | return -EINVAL; |
| 272 | 285 | ||
| 273 | if (bt->trace_state == Blktrace_setup || | 286 | if (bt->trace_state != Blktrace_running) |
| 274 | bt->trace_state == Blktrace_stopped) | ||
| 275 | blk_trace_cleanup(bt); | 287 | blk_trace_cleanup(bt); |
| 276 | 288 | ||
| 277 | return 0; | 289 | return 0; |
| @@ -414,11 +426,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
| 414 | if (buts->name[i] == '/') | 426 | if (buts->name[i] == '/') |
| 415 | buts->name[i] = '_'; | 427 | buts->name[i] = '_'; |
| 416 | 428 | ||
| 417 | ret = -ENOMEM; | ||
| 418 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); | 429 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); |
| 419 | if (!bt) | 430 | if (!bt) |
| 420 | goto err; | 431 | return -ENOMEM; |
| 421 | 432 | ||
| 433 | ret = -ENOMEM; | ||
| 422 | bt->sequence = alloc_percpu(unsigned long); | 434 | bt->sequence = alloc_percpu(unsigned long); |
| 423 | if (!bt->sequence) | 435 | if (!bt->sequence) |
| 424 | goto err; | 436 | goto err; |
| @@ -429,11 +441,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
| 429 | 441 | ||
| 430 | ret = -ENOENT; | 442 | ret = -ENOENT; |
| 431 | 443 | ||
| 444 | mutex_lock(&blk_tree_mutex); | ||
| 432 | if (!blk_tree_root) { | 445 | if (!blk_tree_root) { |
| 433 | blk_tree_root = debugfs_create_dir("block", NULL); | 446 | blk_tree_root = debugfs_create_dir("block", NULL); |
| 434 | if (!blk_tree_root) | 447 | if (!blk_tree_root) { |
| 435 | return -ENOMEM; | 448 | mutex_unlock(&blk_tree_mutex); |
| 449 | goto err; | ||
| 450 | } | ||
| 436 | } | 451 | } |
| 452 | mutex_unlock(&blk_tree_mutex); | ||
| 437 | 453 | ||
| 438 | dir = debugfs_create_dir(buts->name, blk_tree_root); | 454 | dir = debugfs_create_dir(buts->name, blk_tree_root); |
| 439 | 455 | ||
| @@ -471,14 +487,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
| 471 | bt->pid = buts->pid; | 487 | bt->pid = buts->pid; |
| 472 | bt->trace_state = Blktrace_setup; | 488 | bt->trace_state = Blktrace_setup; |
| 473 | 489 | ||
| 474 | mutex_lock(&blk_probe_mutex); | ||
| 475 | if (atomic_add_return(1, &blk_probes_ref) == 1) { | ||
| 476 | ret = blk_register_tracepoints(); | ||
| 477 | if (ret) | ||
| 478 | goto probe_err; | ||
| 479 | } | ||
| 480 | mutex_unlock(&blk_probe_mutex); | ||
| 481 | |||
| 482 | ret = -EBUSY; | 490 | ret = -EBUSY; |
| 483 | old_bt = xchg(&q->blk_trace, bt); | 491 | old_bt = xchg(&q->blk_trace, bt); |
| 484 | if (old_bt) { | 492 | if (old_bt) { |
| @@ -486,22 +494,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
| 486 | goto err; | 494 | goto err; |
| 487 | } | 495 | } |
| 488 | 496 | ||
| 497 | if (atomic_inc_return(&blk_probes_ref) == 1) | ||
| 498 | blk_register_tracepoints(); | ||
| 499 | |||
| 489 | return 0; | 500 | return 0; |
| 490 | probe_err: | ||
| 491 | atomic_dec(&blk_probes_ref); | ||
| 492 | mutex_unlock(&blk_probe_mutex); | ||
| 493 | err: | 501 | err: |
| 494 | if (bt) { | 502 | blk_trace_free(bt); |
| 495 | if (bt->msg_file) | ||
| 496 | debugfs_remove(bt->msg_file); | ||
| 497 | if (bt->dropped_file) | ||
| 498 | debugfs_remove(bt->dropped_file); | ||
| 499 | free_percpu(bt->sequence); | ||
| 500 | free_percpu(bt->msg_data); | ||
| 501 | if (bt->rchan) | ||
| 502 | relay_close(bt->rchan); | ||
| 503 | kfree(bt); | ||
| 504 | } | ||
| 505 | return ret; | 503 | return ret; |
| 506 | } | 504 | } |
| 507 | 505 | ||
| @@ -863,7 +861,7 @@ void blk_add_driver_data(struct request_queue *q, | |||
| 863 | } | 861 | } |
| 864 | EXPORT_SYMBOL_GPL(blk_add_driver_data); | 862 | EXPORT_SYMBOL_GPL(blk_add_driver_data); |
| 865 | 863 | ||
| 866 | static int blk_register_tracepoints(void) | 864 | static void blk_register_tracepoints(void) |
| 867 | { | 865 | { |
| 868 | int ret; | 866 | int ret; |
| 869 | 867 | ||
| @@ -901,7 +899,6 @@ static int blk_register_tracepoints(void) | |||
| 901 | WARN_ON(ret); | 899 | WARN_ON(ret); |
| 902 | ret = register_trace_block_remap(blk_add_trace_remap); | 900 | ret = register_trace_block_remap(blk_add_trace_remap); |
| 903 | WARN_ON(ret); | 901 | WARN_ON(ret); |
| 904 | return 0; | ||
| 905 | } | 902 | } |
| 906 | 903 | ||
| 907 | static void blk_unregister_tracepoints(void) | 904 | static void blk_unregister_tracepoints(void) |
| @@ -934,25 +931,31 @@ static void blk_unregister_tracepoints(void) | |||
| 934 | static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) | 931 | static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) |
| 935 | { | 932 | { |
| 936 | int i = 0; | 933 | int i = 0; |
| 934 | int tc = t->action >> BLK_TC_SHIFT; | ||
| 935 | |||
| 936 | if (t->action == BLK_TN_MESSAGE) { | ||
| 937 | rwbs[i++] = 'N'; | ||
| 938 | goto out; | ||
| 939 | } | ||
| 937 | 940 | ||
| 938 | if (t->action & BLK_TC_DISCARD) | 941 | if (tc & BLK_TC_DISCARD) |
| 939 | rwbs[i++] = 'D'; | 942 | rwbs[i++] = 'D'; |
| 940 | else if (t->action & BLK_TC_WRITE) | 943 | else if (tc & BLK_TC_WRITE) |
| 941 | rwbs[i++] = 'W'; | 944 | rwbs[i++] = 'W'; |
| 942 | else if (t->bytes) | 945 | else if (t->bytes) |
| 943 | rwbs[i++] = 'R'; | 946 | rwbs[i++] = 'R'; |
| 944 | else | 947 | else |
| 945 | rwbs[i++] = 'N'; | 948 | rwbs[i++] = 'N'; |
| 946 | 949 | ||
| 947 | if (t->action & BLK_TC_AHEAD) | 950 | if (tc & BLK_TC_AHEAD) |
| 948 | rwbs[i++] = 'A'; | 951 | rwbs[i++] = 'A'; |
| 949 | if (t->action & BLK_TC_BARRIER) | 952 | if (tc & BLK_TC_BARRIER) |
| 950 | rwbs[i++] = 'B'; | 953 | rwbs[i++] = 'B'; |
| 951 | if (t->action & BLK_TC_SYNC) | 954 | if (tc & BLK_TC_SYNC) |
| 952 | rwbs[i++] = 'S'; | 955 | rwbs[i++] = 'S'; |
| 953 | if (t->action & BLK_TC_META) | 956 | if (tc & BLK_TC_META) |
| 954 | rwbs[i++] = 'M'; | 957 | rwbs[i++] = 'M'; |
| 955 | 958 | out: | |
| 956 | rwbs[i] = '\0'; | 959 | rwbs[i] = '\0'; |
| 957 | } | 960 | } |
| 958 | 961 | ||
| @@ -979,7 +982,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent) | |||
| 979 | 982 | ||
| 980 | static inline __u16 t_error(const struct trace_entry *ent) | 983 | static inline __u16 t_error(const struct trace_entry *ent) |
| 981 | { | 984 | { |
| 982 | return te_blk_io_trace(ent)->sector; | 985 | return te_blk_io_trace(ent)->error; |
| 983 | } | 986 | } |
| 984 | 987 | ||
| 985 | static __u64 get_pdu_int(const struct trace_entry *ent) | 988 | static __u64 get_pdu_int(const struct trace_entry *ent) |
| @@ -999,29 +1002,31 @@ static void get_pdu_remap(const struct trace_entry *ent, | |||
| 999 | r->sector = be64_to_cpu(sector); | 1002 | r->sector = be64_to_cpu(sector); |
| 1000 | } | 1003 | } |
| 1001 | 1004 | ||
| 1002 | static int blk_log_action_iter(struct trace_iterator *iter, const char *act) | 1005 | typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); |
| 1006 | |||
| 1007 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | ||
| 1003 | { | 1008 | { |
| 1004 | char rwbs[6]; | 1009 | char rwbs[6]; |
| 1005 | unsigned long long ts = ns2usecs(iter->ts); | 1010 | unsigned long long ts = iter->ts; |
| 1006 | unsigned long usec_rem = do_div(ts, USEC_PER_SEC); | 1011 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); |
| 1007 | unsigned secs = (unsigned long)ts; | 1012 | unsigned secs = (unsigned long)ts; |
| 1008 | const struct trace_entry *ent = iter->ent; | 1013 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
| 1009 | const struct blk_io_trace *t = (const struct blk_io_trace *)ent; | ||
| 1010 | 1014 | ||
| 1011 | fill_rwbs(rwbs, t); | 1015 | fill_rwbs(rwbs, t); |
| 1012 | 1016 | ||
| 1013 | return trace_seq_printf(&iter->seq, | 1017 | return trace_seq_printf(&iter->seq, |
| 1014 | "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", | 1018 | "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", |
| 1015 | MAJOR(t->device), MINOR(t->device), iter->cpu, | 1019 | MAJOR(t->device), MINOR(t->device), iter->cpu, |
| 1016 | secs, usec_rem, ent->pid, act, rwbs); | 1020 | secs, nsec_rem, iter->ent->pid, act, rwbs); |
| 1017 | } | 1021 | } |
| 1018 | 1022 | ||
| 1019 | static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, | 1023 | static int blk_log_action(struct trace_iterator *iter, const char *act) |
| 1020 | const char *act) | ||
| 1021 | { | 1024 | { |
| 1022 | char rwbs[6]; | 1025 | char rwbs[6]; |
| 1026 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); | ||
| 1027 | |||
| 1023 | fill_rwbs(rwbs, t); | 1028 | fill_rwbs(rwbs, t); |
| 1024 | return trace_seq_printf(s, "%3d,%-3d %2s %3s ", | 1029 | return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", |
| 1025 | MAJOR(t->device), MINOR(t->device), act, rwbs); | 1030 | MAJOR(t->device), MINOR(t->device), act, rwbs); |
| 1026 | } | 1031 | } |
| 1027 | 1032 | ||
| @@ -1085,6 +1090,17 @@ static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) | |||
| 1085 | get_pdu_int(ent), cmd); | 1090 | get_pdu_int(ent), cmd); |
| 1086 | } | 1091 | } |
| 1087 | 1092 | ||
| 1093 | static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) | ||
| 1094 | { | ||
| 1095 | int ret; | ||
| 1096 | const struct blk_io_trace *t = te_blk_io_trace(ent); | ||
| 1097 | |||
| 1098 | ret = trace_seq_putmem(s, t + 1, t->pdu_len); | ||
| 1099 | if (ret) | ||
| 1100 | return trace_seq_putc(s, '\n'); | ||
| 1101 | return ret; | ||
| 1102 | } | ||
| 1103 | |||
| 1088 | /* | 1104 | /* |
| 1089 | * struct tracer operations | 1105 | * struct tracer operations |
| 1090 | */ | 1106 | */ |
| @@ -1099,11 +1115,7 @@ static void blk_tracer_print_header(struct seq_file *m) | |||
| 1099 | 1115 | ||
| 1100 | static void blk_tracer_start(struct trace_array *tr) | 1116 | static void blk_tracer_start(struct trace_array *tr) |
| 1101 | { | 1117 | { |
| 1102 | mutex_lock(&blk_probe_mutex); | 1118 | blk_tracer_enabled = true; |
| 1103 | if (atomic_add_return(1, &blk_probes_ref) == 1) | ||
| 1104 | if (blk_register_tracepoints()) | ||
| 1105 | atomic_dec(&blk_probes_ref); | ||
| 1106 | mutex_unlock(&blk_probe_mutex); | ||
| 1107 | trace_flags &= ~TRACE_ITER_CONTEXT_INFO; | 1119 | trace_flags &= ~TRACE_ITER_CONTEXT_INFO; |
| 1108 | } | 1120 | } |
| 1109 | 1121 | ||
| @@ -1111,38 +1123,24 @@ static int blk_tracer_init(struct trace_array *tr) | |||
| 1111 | { | 1123 | { |
| 1112 | blk_tr = tr; | 1124 | blk_tr = tr; |
| 1113 | blk_tracer_start(tr); | 1125 | blk_tracer_start(tr); |
| 1114 | mutex_lock(&blk_probe_mutex); | ||
| 1115 | blk_tracer_enabled++; | ||
| 1116 | mutex_unlock(&blk_probe_mutex); | ||
| 1117 | return 0; | 1126 | return 0; |
| 1118 | } | 1127 | } |
| 1119 | 1128 | ||
| 1120 | static void blk_tracer_stop(struct trace_array *tr) | 1129 | static void blk_tracer_stop(struct trace_array *tr) |
| 1121 | { | 1130 | { |
| 1131 | blk_tracer_enabled = false; | ||
| 1122 | trace_flags |= TRACE_ITER_CONTEXT_INFO; | 1132 | trace_flags |= TRACE_ITER_CONTEXT_INFO; |
| 1123 | mutex_lock(&blk_probe_mutex); | ||
| 1124 | if (atomic_dec_and_test(&blk_probes_ref)) | ||
| 1125 | blk_unregister_tracepoints(); | ||
| 1126 | mutex_unlock(&blk_probe_mutex); | ||
| 1127 | } | 1133 | } |
| 1128 | 1134 | ||
| 1129 | static void blk_tracer_reset(struct trace_array *tr) | 1135 | static void blk_tracer_reset(struct trace_array *tr) |
| 1130 | { | 1136 | { |
| 1131 | if (!atomic_read(&blk_probes_ref)) | ||
| 1132 | return; | ||
| 1133 | |||
| 1134 | mutex_lock(&blk_probe_mutex); | ||
| 1135 | blk_tracer_enabled--; | ||
| 1136 | WARN_ON(blk_tracer_enabled < 0); | ||
| 1137 | mutex_unlock(&blk_probe_mutex); | ||
| 1138 | |||
| 1139 | blk_tracer_stop(tr); | 1137 | blk_tracer_stop(tr); |
| 1140 | } | 1138 | } |
| 1141 | 1139 | ||
| 1142 | static struct { | 1140 | static const struct { |
| 1143 | const char *act[2]; | 1141 | const char *act[2]; |
| 1144 | int (*print)(struct trace_seq *s, const struct trace_entry *ent); | 1142 | int (*print)(struct trace_seq *s, const struct trace_entry *ent); |
| 1145 | } what2act[] __read_mostly = { | 1143 | } what2act[] = { |
| 1146 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, | 1144 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, |
| 1147 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, | 1145 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, |
| 1148 | [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, | 1146 | [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, |
| @@ -1160,29 +1158,48 @@ static struct { | |||
| 1160 | [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, | 1158 | [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, |
| 1161 | }; | 1159 | }; |
| 1162 | 1160 | ||
| 1163 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | 1161 | static enum print_line_t print_one_line(struct trace_iterator *iter, |
| 1164 | int flags) | 1162 | bool classic) |
| 1165 | { | 1163 | { |
| 1166 | struct trace_seq *s = &iter->seq; | 1164 | struct trace_seq *s = &iter->seq; |
| 1167 | const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; | 1165 | const struct blk_io_trace *t; |
| 1168 | const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); | 1166 | u16 what; |
| 1169 | int ret; | 1167 | int ret; |
| 1168 | bool long_act; | ||
| 1169 | blk_log_action_t *log_action; | ||
| 1170 | 1170 | ||
| 1171 | if (!trace_print_context(iter)) | 1171 | t = te_blk_io_trace(iter->ent); |
| 1172 | return TRACE_TYPE_PARTIAL_LINE; | 1172 | what = t->action & ((1 << BLK_TC_SHIFT) - 1); |
| 1173 | long_act = !!(trace_flags & TRACE_ITER_VERBOSE); | ||
| 1174 | log_action = classic ? &blk_log_action_classic : &blk_log_action; | ||
| 1173 | 1175 | ||
| 1174 | if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) | 1176 | if (t->action == BLK_TN_MESSAGE) { |
| 1177 | ret = log_action(iter, long_act ? "message" : "m"); | ||
| 1178 | if (ret) | ||
| 1179 | ret = blk_log_msg(s, iter->ent); | ||
| 1180 | goto out; | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) | ||
| 1175 | ret = trace_seq_printf(s, "Bad pc action %x\n", what); | 1184 | ret = trace_seq_printf(s, "Bad pc action %x\n", what); |
| 1176 | else { | 1185 | else { |
| 1177 | const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); | 1186 | ret = log_action(iter, what2act[what].act[long_act]); |
| 1178 | ret = blk_log_action_seq(s, t, what2act[what].act[long_act]); | ||
| 1179 | if (ret) | 1187 | if (ret) |
| 1180 | ret = what2act[what].print(s, iter->ent); | 1188 | ret = what2act[what].print(s, iter->ent); |
| 1181 | } | 1189 | } |
| 1182 | 1190 | out: | |
| 1183 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 1191 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
| 1184 | } | 1192 | } |
| 1185 | 1193 | ||
| 1194 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | ||
| 1195 | int flags) | ||
| 1196 | { | ||
| 1197 | if (!trace_print_context(iter)) | ||
| 1198 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 1199 | |||
| 1200 | return print_one_line(iter, false); | ||
| 1201 | } | ||
| 1202 | |||
| 1186 | static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | 1203 | static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) |
| 1187 | { | 1204 | { |
| 1188 | struct trace_seq *s = &iter->seq; | 1205 | struct trace_seq *s = &iter->seq; |
| @@ -1190,7 +1207,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | |||
| 1190 | const int offset = offsetof(struct blk_io_trace, sector); | 1207 | const int offset = offsetof(struct blk_io_trace, sector); |
| 1191 | struct blk_io_trace old = { | 1208 | struct blk_io_trace old = { |
| 1192 | .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, | 1209 | .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, |
| 1193 | .time = ns2usecs(iter->ts), | 1210 | .time = iter->ts, |
| 1194 | }; | 1211 | }; |
| 1195 | 1212 | ||
| 1196 | if (!trace_seq_putmem(s, &old, offset)) | 1213 | if (!trace_seq_putmem(s, &old, offset)) |
| @@ -1208,26 +1225,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags) | |||
| 1208 | 1225 | ||
| 1209 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) | 1226 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) |
| 1210 | { | 1227 | { |
| 1211 | const struct blk_io_trace *t; | ||
| 1212 | u16 what; | ||
| 1213 | int ret; | ||
| 1214 | |||
| 1215 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) | 1228 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
| 1216 | return TRACE_TYPE_UNHANDLED; | 1229 | return TRACE_TYPE_UNHANDLED; |
| 1217 | 1230 | ||
| 1218 | t = (const struct blk_io_trace *)iter->ent; | 1231 | return print_one_line(iter, true); |
| 1219 | what = t->action & ((1 << BLK_TC_SHIFT) - 1); | ||
| 1220 | |||
| 1221 | if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) | ||
| 1222 | ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what); | ||
| 1223 | else { | ||
| 1224 | const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); | ||
| 1225 | ret = blk_log_action_iter(iter, what2act[what].act[long_act]); | ||
| 1226 | if (ret) | ||
| 1227 | ret = what2act[what].print(&iter->seq, iter->ent); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
| 1231 | } | 1232 | } |
| 1232 | 1233 | ||
| 1233 | static struct tracer blk_tracer __read_mostly = { | 1234 | static struct tracer blk_tracer __read_mostly = { |
| @@ -1273,7 +1274,10 @@ static int blk_trace_remove_queue(struct request_queue *q) | |||
| 1273 | if (bt == NULL) | 1274 | if (bt == NULL) |
| 1274 | return -EINVAL; | 1275 | return -EINVAL; |
| 1275 | 1276 | ||
| 1276 | kfree(bt); | 1277 | if (atomic_dec_and_test(&blk_probes_ref)) |
| 1278 | blk_unregister_tracepoints(); | ||
| 1279 | |||
| 1280 | blk_trace_free(bt); | ||
| 1277 | return 0; | 1281 | return 0; |
| 1278 | } | 1282 | } |
| 1279 | 1283 | ||
| @@ -1283,26 +1287,33 @@ static int blk_trace_remove_queue(struct request_queue *q) | |||
| 1283 | static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) | 1287 | static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) |
| 1284 | { | 1288 | { |
| 1285 | struct blk_trace *old_bt, *bt = NULL; | 1289 | struct blk_trace *old_bt, *bt = NULL; |
| 1286 | int ret; | 1290 | int ret = -ENOMEM; |
| 1287 | 1291 | ||
| 1288 | ret = -ENOMEM; | ||
| 1289 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); | 1292 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); |
| 1290 | if (!bt) | 1293 | if (!bt) |
| 1291 | goto err; | 1294 | return -ENOMEM; |
| 1295 | |||
| 1296 | bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); | ||
| 1297 | if (!bt->msg_data) | ||
| 1298 | goto free_bt; | ||
| 1292 | 1299 | ||
| 1293 | bt->dev = dev; | 1300 | bt->dev = dev; |
| 1294 | bt->act_mask = (u16)-1; | 1301 | bt->act_mask = (u16)-1; |
| 1295 | bt->end_lba = -1ULL; | 1302 | bt->end_lba = -1ULL; |
| 1296 | bt->trace_state = Blktrace_running; | ||
| 1297 | 1303 | ||
| 1298 | old_bt = xchg(&q->blk_trace, bt); | 1304 | old_bt = xchg(&q->blk_trace, bt); |
| 1299 | if (old_bt != NULL) { | 1305 | if (old_bt != NULL) { |
| 1300 | (void)xchg(&q->blk_trace, old_bt); | 1306 | (void)xchg(&q->blk_trace, old_bt); |
| 1301 | kfree(bt); | ||
| 1302 | ret = -EBUSY; | 1307 | ret = -EBUSY; |
| 1308 | goto free_bt; | ||
| 1303 | } | 1309 | } |
| 1310 | |||
| 1311 | if (atomic_inc_return(&blk_probes_ref) == 1) | ||
| 1312 | blk_register_tracepoints(); | ||
| 1304 | return 0; | 1313 | return 0; |
| 1305 | err: | 1314 | |
| 1315 | free_bt: | ||
| 1316 | blk_trace_free(bt); | ||
| 1306 | return ret; | 1317 | return ret; |
| 1307 | } | 1318 | } |
| 1308 | 1319 | ||
| @@ -1310,72 +1321,6 @@ err: | |||
| 1310 | * sysfs interface to enable and configure tracing | 1321 | * sysfs interface to enable and configure tracing |
| 1311 | */ | 1322 | */ |
| 1312 | 1323 | ||
| 1313 | static ssize_t sysfs_blk_trace_enable_show(struct device *dev, | ||
| 1314 | struct device_attribute *attr, | ||
| 1315 | char *buf) | ||
| 1316 | { | ||
| 1317 | struct hd_struct *p = dev_to_part(dev); | ||
| 1318 | struct block_device *bdev; | ||
| 1319 | ssize_t ret = -ENXIO; | ||
| 1320 | |||
| 1321 | lock_kernel(); | ||
| 1322 | bdev = bdget(part_devt(p)); | ||
| 1323 | if (bdev != NULL) { | ||
| 1324 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 1325 | |||
| 1326 | if (q != NULL) { | ||
| 1327 | mutex_lock(&bdev->bd_mutex); | ||
| 1328 | ret = sprintf(buf, "%u\n", !!q->blk_trace); | ||
| 1329 | mutex_unlock(&bdev->bd_mutex); | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | bdput(bdev); | ||
| 1333 | } | ||
| 1334 | |||
| 1335 | unlock_kernel(); | ||
| 1336 | return ret; | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | static ssize_t sysfs_blk_trace_enable_store(struct device *dev, | ||
| 1340 | struct device_attribute *attr, | ||
| 1341 | const char *buf, size_t count) | ||
| 1342 | { | ||
| 1343 | struct block_device *bdev; | ||
| 1344 | struct request_queue *q; | ||
| 1345 | struct hd_struct *p; | ||
| 1346 | int value; | ||
| 1347 | ssize_t ret = -ENXIO; | ||
| 1348 | |||
| 1349 | if (count == 0 || sscanf(buf, "%d", &value) != 1) | ||
| 1350 | goto out; | ||
| 1351 | |||
| 1352 | lock_kernel(); | ||
| 1353 | p = dev_to_part(dev); | ||
| 1354 | bdev = bdget(part_devt(p)); | ||
| 1355 | if (bdev == NULL) | ||
| 1356 | goto out_unlock_kernel; | ||
| 1357 | |||
| 1358 | q = bdev_get_queue(bdev); | ||
| 1359 | if (q == NULL) | ||
| 1360 | goto out_bdput; | ||
| 1361 | |||
| 1362 | mutex_lock(&bdev->bd_mutex); | ||
| 1363 | if (value) | ||
| 1364 | ret = blk_trace_setup_queue(q, bdev->bd_dev); | ||
| 1365 | else | ||
| 1366 | ret = blk_trace_remove_queue(q); | ||
| 1367 | mutex_unlock(&bdev->bd_mutex); | ||
| 1368 | |||
| 1369 | if (ret == 0) | ||
| 1370 | ret = count; | ||
| 1371 | out_bdput: | ||
| 1372 | bdput(bdev); | ||
| 1373 | out_unlock_kernel: | ||
| 1374 | unlock_kernel(); | ||
| 1375 | out: | ||
| 1376 | return ret; | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | 1324 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, |
| 1380 | struct device_attribute *attr, | 1325 | struct device_attribute *attr, |
| 1381 | char *buf); | 1326 | char *buf); |
| @@ -1387,8 +1332,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1387 | sysfs_blk_trace_attr_show, \ | 1332 | sysfs_blk_trace_attr_show, \ |
| 1388 | sysfs_blk_trace_attr_store) | 1333 | sysfs_blk_trace_attr_store) |
| 1389 | 1334 | ||
| 1390 | static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, | 1335 | static BLK_TRACE_DEVICE_ATTR(enable); |
| 1391 | sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store); | ||
| 1392 | static BLK_TRACE_DEVICE_ATTR(act_mask); | 1336 | static BLK_TRACE_DEVICE_ATTR(act_mask); |
| 1393 | static BLK_TRACE_DEVICE_ATTR(pid); | 1337 | static BLK_TRACE_DEVICE_ATTR(pid); |
| 1394 | static BLK_TRACE_DEVICE_ATTR(start_lba); | 1338 | static BLK_TRACE_DEVICE_ATTR(start_lba); |
| @@ -1408,53 +1352,85 @@ struct attribute_group blk_trace_attr_group = { | |||
| 1408 | .attrs = blk_trace_attrs, | 1352 | .attrs = blk_trace_attrs, |
| 1409 | }; | 1353 | }; |
| 1410 | 1354 | ||
| 1411 | static int blk_str2act_mask(const char *str) | 1355 | static const struct { |
| 1356 | int mask; | ||
| 1357 | const char *str; | ||
| 1358 | } mask_maps[] = { | ||
| 1359 | { BLK_TC_READ, "read" }, | ||
| 1360 | { BLK_TC_WRITE, "write" }, | ||
| 1361 | { BLK_TC_BARRIER, "barrier" }, | ||
| 1362 | { BLK_TC_SYNC, "sync" }, | ||
| 1363 | { BLK_TC_QUEUE, "queue" }, | ||
| 1364 | { BLK_TC_REQUEUE, "requeue" }, | ||
| 1365 | { BLK_TC_ISSUE, "issue" }, | ||
| 1366 | { BLK_TC_COMPLETE, "complete" }, | ||
| 1367 | { BLK_TC_FS, "fs" }, | ||
| 1368 | { BLK_TC_PC, "pc" }, | ||
| 1369 | { BLK_TC_AHEAD, "ahead" }, | ||
| 1370 | { BLK_TC_META, "meta" }, | ||
| 1371 | { BLK_TC_DISCARD, "discard" }, | ||
| 1372 | { BLK_TC_DRV_DATA, "drv_data" }, | ||
| 1373 | }; | ||
| 1374 | |||
| 1375 | static int blk_trace_str2mask(const char *str) | ||
| 1412 | { | 1376 | { |
| 1377 | int i; | ||
| 1413 | int mask = 0; | 1378 | int mask = 0; |
| 1414 | char *copy = kstrdup(str, GFP_KERNEL), *s; | 1379 | char *s, *token; |
| 1415 | 1380 | ||
| 1416 | if (copy == NULL) | 1381 | s = kstrdup(str, GFP_KERNEL); |
| 1382 | if (s == NULL) | ||
| 1417 | return -ENOMEM; | 1383 | return -ENOMEM; |
| 1418 | 1384 | s = strstrip(s); | |
| 1419 | s = strstrip(copy); | ||
| 1420 | 1385 | ||
| 1421 | while (1) { | 1386 | while (1) { |
| 1422 | char *sep = strchr(s, ','); | 1387 | token = strsep(&s, ","); |
| 1423 | 1388 | if (token == NULL) | |
| 1424 | if (sep != NULL) | ||
| 1425 | *sep = '\0'; | ||
| 1426 | |||
| 1427 | if (strcasecmp(s, "barrier") == 0) | ||
| 1428 | mask |= BLK_TC_BARRIER; | ||
| 1429 | else if (strcasecmp(s, "complete") == 0) | ||
| 1430 | mask |= BLK_TC_COMPLETE; | ||
| 1431 | else if (strcasecmp(s, "fs") == 0) | ||
| 1432 | mask |= BLK_TC_FS; | ||
| 1433 | else if (strcasecmp(s, "issue") == 0) | ||
| 1434 | mask |= BLK_TC_ISSUE; | ||
| 1435 | else if (strcasecmp(s, "pc") == 0) | ||
| 1436 | mask |= BLK_TC_PC; | ||
| 1437 | else if (strcasecmp(s, "queue") == 0) | ||
| 1438 | mask |= BLK_TC_QUEUE; | ||
| 1439 | else if (strcasecmp(s, "read") == 0) | ||
| 1440 | mask |= BLK_TC_READ; | ||
| 1441 | else if (strcasecmp(s, "requeue") == 0) | ||
| 1442 | mask |= BLK_TC_REQUEUE; | ||
| 1443 | else if (strcasecmp(s, "sync") == 0) | ||
| 1444 | mask |= BLK_TC_SYNC; | ||
| 1445 | else if (strcasecmp(s, "write") == 0) | ||
| 1446 | mask |= BLK_TC_WRITE; | ||
| 1447 | |||
| 1448 | if (sep == NULL) | ||
| 1449 | break; | 1389 | break; |
| 1450 | 1390 | ||
| 1451 | s = sep + 1; | 1391 | if (*token == '\0') |
| 1392 | continue; | ||
| 1393 | |||
| 1394 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { | ||
| 1395 | if (strcasecmp(token, mask_maps[i].str) == 0) { | ||
| 1396 | mask |= mask_maps[i].mask; | ||
| 1397 | break; | ||
| 1398 | } | ||
| 1399 | } | ||
| 1400 | if (i == ARRAY_SIZE(mask_maps)) { | ||
| 1401 | mask = -EINVAL; | ||
| 1402 | break; | ||
| 1403 | } | ||
| 1452 | } | 1404 | } |
| 1453 | kfree(copy); | 1405 | kfree(s); |
| 1454 | 1406 | ||
| 1455 | return mask; | 1407 | return mask; |
| 1456 | } | 1408 | } |
| 1457 | 1409 | ||
| 1410 | static ssize_t blk_trace_mask2str(char *buf, int mask) | ||
| 1411 | { | ||
| 1412 | int i; | ||
| 1413 | char *p = buf; | ||
| 1414 | |||
| 1415 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { | ||
| 1416 | if (mask & mask_maps[i].mask) { | ||
| 1417 | p += sprintf(p, "%s%s", | ||
| 1418 | (p == buf) ? "" : ",", mask_maps[i].str); | ||
| 1419 | } | ||
| 1420 | } | ||
| 1421 | *p++ = '\n'; | ||
| 1422 | |||
| 1423 | return p - buf; | ||
| 1424 | } | ||
| 1425 | |||
| 1426 | static struct request_queue *blk_trace_get_queue(struct block_device *bdev) | ||
| 1427 | { | ||
| 1428 | if (bdev->bd_disk == NULL) | ||
| 1429 | return NULL; | ||
| 1430 | |||
| 1431 | return bdev_get_queue(bdev); | ||
| 1432 | } | ||
| 1433 | |||
| 1458 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | 1434 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, |
| 1459 | struct device_attribute *attr, | 1435 | struct device_attribute *attr, |
| 1460 | char *buf) | 1436 | char *buf) |
| @@ -1469,20 +1445,29 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
| 1469 | if (bdev == NULL) | 1445 | if (bdev == NULL) |
| 1470 | goto out_unlock_kernel; | 1446 | goto out_unlock_kernel; |
| 1471 | 1447 | ||
| 1472 | q = bdev_get_queue(bdev); | 1448 | q = blk_trace_get_queue(bdev); |
| 1473 | if (q == NULL) | 1449 | if (q == NULL) |
| 1474 | goto out_bdput; | 1450 | goto out_bdput; |
| 1451 | |||
| 1475 | mutex_lock(&bdev->bd_mutex); | 1452 | mutex_lock(&bdev->bd_mutex); |
| 1453 | |||
| 1454 | if (attr == &dev_attr_enable) { | ||
| 1455 | ret = sprintf(buf, "%u\n", !!q->blk_trace); | ||
| 1456 | goto out_unlock_bdev; | ||
| 1457 | } | ||
| 1458 | |||
| 1476 | if (q->blk_trace == NULL) | 1459 | if (q->blk_trace == NULL) |
| 1477 | ret = sprintf(buf, "disabled\n"); | 1460 | ret = sprintf(buf, "disabled\n"); |
| 1478 | else if (attr == &dev_attr_act_mask) | 1461 | else if (attr == &dev_attr_act_mask) |
| 1479 | ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); | 1462 | ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); |
| 1480 | else if (attr == &dev_attr_pid) | 1463 | else if (attr == &dev_attr_pid) |
| 1481 | ret = sprintf(buf, "%u\n", q->blk_trace->pid); | 1464 | ret = sprintf(buf, "%u\n", q->blk_trace->pid); |
| 1482 | else if (attr == &dev_attr_start_lba) | 1465 | else if (attr == &dev_attr_start_lba) |
| 1483 | ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); | 1466 | ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); |
| 1484 | else if (attr == &dev_attr_end_lba) | 1467 | else if (attr == &dev_attr_end_lba) |
| 1485 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); | 1468 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); |
| 1469 | |||
| 1470 | out_unlock_bdev: | ||
| 1486 | mutex_unlock(&bdev->bd_mutex); | 1471 | mutex_unlock(&bdev->bd_mutex); |
| 1487 | out_bdput: | 1472 | out_bdput: |
| 1488 | bdput(bdev); | 1473 | bdput(bdev); |
| @@ -1499,7 +1484,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1499 | struct request_queue *q; | 1484 | struct request_queue *q; |
| 1500 | struct hd_struct *p; | 1485 | struct hd_struct *p; |
| 1501 | u64 value; | 1486 | u64 value; |
| 1502 | ssize_t ret = -ENXIO; | 1487 | ssize_t ret = -EINVAL; |
| 1503 | 1488 | ||
| 1504 | if (count == 0) | 1489 | if (count == 0) |
| 1505 | goto out; | 1490 | goto out; |
| @@ -1507,24 +1492,36 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1507 | if (attr == &dev_attr_act_mask) { | 1492 | if (attr == &dev_attr_act_mask) { |
| 1508 | if (sscanf(buf, "%llx", &value) != 1) { | 1493 | if (sscanf(buf, "%llx", &value) != 1) { |
| 1509 | /* Assume it is a list of trace category names */ | 1494 | /* Assume it is a list of trace category names */ |
| 1510 | value = blk_str2act_mask(buf); | 1495 | ret = blk_trace_str2mask(buf); |
| 1511 | if (value < 0) | 1496 | if (ret < 0) |
| 1512 | goto out; | 1497 | goto out; |
| 1498 | value = ret; | ||
| 1513 | } | 1499 | } |
| 1514 | } else if (sscanf(buf, "%llu", &value) != 1) | 1500 | } else if (sscanf(buf, "%llu", &value) != 1) |
| 1515 | goto out; | 1501 | goto out; |
| 1516 | 1502 | ||
| 1503 | ret = -ENXIO; | ||
| 1504 | |||
| 1517 | lock_kernel(); | 1505 | lock_kernel(); |
| 1518 | p = dev_to_part(dev); | 1506 | p = dev_to_part(dev); |
| 1519 | bdev = bdget(part_devt(p)); | 1507 | bdev = bdget(part_devt(p)); |
| 1520 | if (bdev == NULL) | 1508 | if (bdev == NULL) |
| 1521 | goto out_unlock_kernel; | 1509 | goto out_unlock_kernel; |
| 1522 | 1510 | ||
| 1523 | q = bdev_get_queue(bdev); | 1511 | q = blk_trace_get_queue(bdev); |
| 1524 | if (q == NULL) | 1512 | if (q == NULL) |
| 1525 | goto out_bdput; | 1513 | goto out_bdput; |
| 1526 | 1514 | ||
| 1527 | mutex_lock(&bdev->bd_mutex); | 1515 | mutex_lock(&bdev->bd_mutex); |
| 1516 | |||
| 1517 | if (attr == &dev_attr_enable) { | ||
| 1518 | if (value) | ||
| 1519 | ret = blk_trace_setup_queue(q, bdev->bd_dev); | ||
| 1520 | else | ||
| 1521 | ret = blk_trace_remove_queue(q); | ||
| 1522 | goto out_unlock_bdev; | ||
| 1523 | } | ||
| 1524 | |||
| 1528 | ret = 0; | 1525 | ret = 0; |
| 1529 | if (q->blk_trace == NULL) | 1526 | if (q->blk_trace == NULL) |
| 1530 | ret = blk_trace_setup_queue(q, bdev->bd_dev); | 1527 | ret = blk_trace_setup_queue(q, bdev->bd_dev); |
| @@ -1538,13 +1535,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1538 | q->blk_trace->start_lba = value; | 1535 | q->blk_trace->start_lba = value; |
| 1539 | else if (attr == &dev_attr_end_lba) | 1536 | else if (attr == &dev_attr_end_lba) |
| 1540 | q->blk_trace->end_lba = value; | 1537 | q->blk_trace->end_lba = value; |
| 1541 | ret = count; | ||
| 1542 | } | 1538 | } |
| 1539 | |||
| 1540 | out_unlock_bdev: | ||
| 1543 | mutex_unlock(&bdev->bd_mutex); | 1541 | mutex_unlock(&bdev->bd_mutex); |
| 1544 | out_bdput: | 1542 | out_bdput: |
| 1545 | bdput(bdev); | 1543 | bdput(bdev); |
| 1546 | out_unlock_kernel: | 1544 | out_unlock_kernel: |
| 1547 | unlock_kernel(); | 1545 | unlock_kernel(); |
| 1548 | out: | 1546 | out: |
| 1549 | return ret; | 1547 | return ret ? ret : count; |
| 1550 | } | 1548 | } |
| 1549 | |||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7847806eefef..1752a63f37c0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -29,6 +29,8 @@ | |||
| 29 | #include <linux/list.h> | 29 | #include <linux/list.h> |
| 30 | #include <linux/hash.h> | 30 | #include <linux/hash.h> |
| 31 | 31 | ||
| 32 | #include <trace/sched.h> | ||
| 33 | |||
| 32 | #include <asm/ftrace.h> | 34 | #include <asm/ftrace.h> |
| 33 | 35 | ||
| 34 | #include "trace.h" | 36 | #include "trace.h" |
| @@ -339,7 +341,7 @@ static inline int record_frozen(struct dyn_ftrace *rec) | |||
| 339 | 341 | ||
| 340 | static void ftrace_free_rec(struct dyn_ftrace *rec) | 342 | static void ftrace_free_rec(struct dyn_ftrace *rec) |
| 341 | { | 343 | { |
| 342 | rec->ip = (unsigned long)ftrace_free_records; | 344 | rec->freelist = ftrace_free_records; |
| 343 | ftrace_free_records = rec; | 345 | ftrace_free_records = rec; |
| 344 | rec->flags |= FTRACE_FL_FREE; | 346 | rec->flags |= FTRACE_FL_FREE; |
| 345 | } | 347 | } |
| @@ -356,9 +358,14 @@ void ftrace_release(void *start, unsigned long size) | |||
| 356 | 358 | ||
| 357 | mutex_lock(&ftrace_lock); | 359 | mutex_lock(&ftrace_lock); |
| 358 | do_for_each_ftrace_rec(pg, rec) { | 360 | do_for_each_ftrace_rec(pg, rec) { |
| 359 | if ((rec->ip >= s) && (rec->ip < e) && | 361 | if ((rec->ip >= s) && (rec->ip < e)) { |
| 360 | !(rec->flags & FTRACE_FL_FREE)) | 362 | /* |
| 363 | * rec->ip is changed in ftrace_free_rec() | ||
| 364 | * It should not between s and e if record was freed. | ||
| 365 | */ | ||
| 366 | FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); | ||
| 361 | ftrace_free_rec(rec); | 367 | ftrace_free_rec(rec); |
| 368 | } | ||
| 362 | } while_for_each_ftrace_rec(); | 369 | } while_for_each_ftrace_rec(); |
| 363 | mutex_unlock(&ftrace_lock); | 370 | mutex_unlock(&ftrace_lock); |
| 364 | } | 371 | } |
| @@ -377,7 +384,7 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) | |||
| 377 | return NULL; | 384 | return NULL; |
| 378 | } | 385 | } |
| 379 | 386 | ||
| 380 | ftrace_free_records = (void *)rec->ip; | 387 | ftrace_free_records = rec->freelist; |
| 381 | memset(rec, 0, sizeof(*rec)); | 388 | memset(rec, 0, sizeof(*rec)); |
| 382 | return rec; | 389 | return rec; |
| 383 | } | 390 | } |
| @@ -409,7 +416,7 @@ ftrace_record_ip(unsigned long ip) | |||
| 409 | return NULL; | 416 | return NULL; |
| 410 | 417 | ||
| 411 | rec->ip = ip; | 418 | rec->ip = ip; |
| 412 | rec->flags = (unsigned long)ftrace_new_addrs; | 419 | rec->newlist = ftrace_new_addrs; |
| 413 | ftrace_new_addrs = rec; | 420 | ftrace_new_addrs = rec; |
| 414 | 421 | ||
| 415 | return rec; | 422 | return rec; |
| @@ -729,7 +736,7 @@ static int ftrace_update_code(struct module *mod) | |||
| 729 | return -1; | 736 | return -1; |
| 730 | 737 | ||
| 731 | p = ftrace_new_addrs; | 738 | p = ftrace_new_addrs; |
| 732 | ftrace_new_addrs = (struct dyn_ftrace *)p->flags; | 739 | ftrace_new_addrs = p->newlist; |
| 733 | p->flags = 0L; | 740 | p->flags = 0L; |
| 734 | 741 | ||
| 735 | /* convert record (i.e, patch mcount-call with NOP) */ | 742 | /* convert record (i.e, patch mcount-call with NOP) */ |
| @@ -2262,7 +2269,7 @@ ftrace_pid_read(struct file *file, char __user *ubuf, | |||
| 2262 | if (ftrace_pid_trace == ftrace_swapper_pid) | 2269 | if (ftrace_pid_trace == ftrace_swapper_pid) |
| 2263 | r = sprintf(buf, "swapper tasks\n"); | 2270 | r = sprintf(buf, "swapper tasks\n"); |
| 2264 | else if (ftrace_pid_trace) | 2271 | else if (ftrace_pid_trace) |
| 2265 | r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace)); | 2272 | r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace)); |
| 2266 | else | 2273 | else |
| 2267 | r = sprintf(buf, "no pid\n"); | 2274 | r = sprintf(buf, "no pid\n"); |
| 2268 | 2275 | ||
| @@ -2590,6 +2597,38 @@ free: | |||
| 2590 | return ret; | 2597 | return ret; |
| 2591 | } | 2598 | } |
| 2592 | 2599 | ||
| 2600 | static void | ||
| 2601 | ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, | ||
| 2602 | struct task_struct *next) | ||
| 2603 | { | ||
| 2604 | unsigned long long timestamp; | ||
| 2605 | int index; | ||
| 2606 | |||
| 2607 | /* | ||
| 2608 | * Does the user want to count the time a function was asleep. | ||
| 2609 | * If so, do not update the time stamps. | ||
| 2610 | */ | ||
| 2611 | if (trace_flags & TRACE_ITER_SLEEP_TIME) | ||
| 2612 | return; | ||
| 2613 | |||
| 2614 | timestamp = trace_clock_local(); | ||
| 2615 | |||
| 2616 | prev->ftrace_timestamp = timestamp; | ||
| 2617 | |||
| 2618 | /* only process tasks that we timestamped */ | ||
| 2619 | if (!next->ftrace_timestamp) | ||
| 2620 | return; | ||
| 2621 | |||
| 2622 | /* | ||
| 2623 | * Update all the counters in next to make up for the | ||
| 2624 | * time next was sleeping. | ||
| 2625 | */ | ||
| 2626 | timestamp -= next->ftrace_timestamp; | ||
| 2627 | |||
| 2628 | for (index = next->curr_ret_stack; index >= 0; index--) | ||
| 2629 | next->ret_stack[index].calltime += timestamp; | ||
| 2630 | } | ||
| 2631 | |||
| 2593 | /* Allocate a return stack for each task */ | 2632 | /* Allocate a return stack for each task */ |
| 2594 | static int start_graph_tracing(void) | 2633 | static int start_graph_tracing(void) |
| 2595 | { | 2634 | { |
| @@ -2611,6 +2650,13 @@ static int start_graph_tracing(void) | |||
| 2611 | ret = alloc_retstack_tasklist(ret_stack_list); | 2650 | ret = alloc_retstack_tasklist(ret_stack_list); |
| 2612 | } while (ret == -EAGAIN); | 2651 | } while (ret == -EAGAIN); |
| 2613 | 2652 | ||
| 2653 | if (!ret) { | ||
| 2654 | ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); | ||
| 2655 | if (ret) | ||
| 2656 | pr_info("ftrace_graph: Couldn't activate tracepoint" | ||
| 2657 | " probe to kernel_sched_switch\n"); | ||
| 2658 | } | ||
| 2659 | |||
| 2614 | kfree(ret_stack_list); | 2660 | kfree(ret_stack_list); |
| 2615 | return ret; | 2661 | return ret; |
| 2616 | } | 2662 | } |
| @@ -2643,6 +2689,12 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, | |||
| 2643 | 2689 | ||
| 2644 | mutex_lock(&ftrace_lock); | 2690 | mutex_lock(&ftrace_lock); |
| 2645 | 2691 | ||
| 2692 | /* we currently allow only one tracer registered at a time */ | ||
| 2693 | if (atomic_read(&ftrace_graph_active)) { | ||
| 2694 | ret = -EBUSY; | ||
| 2695 | goto out; | ||
| 2696 | } | ||
| 2697 | |||
| 2646 | ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; | 2698 | ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; |
| 2647 | register_pm_notifier(&ftrace_suspend_notifier); | 2699 | register_pm_notifier(&ftrace_suspend_notifier); |
| 2648 | 2700 | ||
| @@ -2668,6 +2720,7 @@ void unregister_ftrace_graph(void) | |||
| 2668 | mutex_lock(&ftrace_lock); | 2720 | mutex_lock(&ftrace_lock); |
| 2669 | 2721 | ||
| 2670 | atomic_dec(&ftrace_graph_active); | 2722 | atomic_dec(&ftrace_graph_active); |
| 2723 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); | ||
| 2671 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; | 2724 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; |
| 2672 | ftrace_graph_entry = ftrace_graph_entry_stub; | 2725 | ftrace_graph_entry = ftrace_graph_entry_stub; |
| 2673 | ftrace_shutdown(FTRACE_STOP_FUNC_RET); | 2726 | ftrace_shutdown(FTRACE_STOP_FUNC_RET); |
| @@ -2688,6 +2741,7 @@ void ftrace_graph_init_task(struct task_struct *t) | |||
| 2688 | t->curr_ret_stack = -1; | 2741 | t->curr_ret_stack = -1; |
| 2689 | atomic_set(&t->tracing_graph_pause, 0); | 2742 | atomic_set(&t->tracing_graph_pause, 0); |
| 2690 | atomic_set(&t->trace_overrun, 0); | 2743 | atomic_set(&t->trace_overrun, 0); |
| 2744 | t->ftrace_timestamp = 0; | ||
| 2691 | } else | 2745 | } else |
| 2692 | t->ret_stack = NULL; | 2746 | t->ret_stack = NULL; |
| 2693 | } | 2747 | } |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 808b14bbf076..edce2ff38944 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -189,16 +189,65 @@ enum { | |||
| 189 | RB_LEN_TIME_STAMP = 16, | 189 | RB_LEN_TIME_STAMP = 16, |
| 190 | }; | 190 | }; |
| 191 | 191 | ||
| 192 | /* inline for ring buffer fast paths */ | 192 | static inline int rb_null_event(struct ring_buffer_event *event) |
| 193 | { | ||
| 194 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; | ||
| 195 | } | ||
| 196 | |||
| 197 | static inline int rb_discarded_event(struct ring_buffer_event *event) | ||
| 198 | { | ||
| 199 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta; | ||
| 200 | } | ||
| 201 | |||
| 202 | static void rb_event_set_padding(struct ring_buffer_event *event) | ||
| 203 | { | ||
| 204 | event->type = RINGBUF_TYPE_PADDING; | ||
| 205 | event->time_delta = 0; | ||
| 206 | } | ||
| 207 | |||
| 208 | /** | ||
| 209 | * ring_buffer_event_discard - discard an event in the ring buffer | ||
| 210 | * @buffer: the ring buffer | ||
| 211 | * @event: the event to discard | ||
| 212 | * | ||
| 213 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
| 214 | * This function lets the user discard an event in the ring buffer | ||
| 215 | * and then that event will not be read later. | ||
| 216 | * | ||
| 217 | * Note, it is up to the user to be careful with this, and protect | ||
| 218 | * against races. If the user discards an event that has been consumed | ||
| 219 | * it is possible that it could corrupt the ring buffer. | ||
| 220 | */ | ||
| 221 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
| 222 | { | ||
| 223 | event->type = RINGBUF_TYPE_PADDING; | ||
| 224 | /* time delta must be non zero */ | ||
| 225 | if (!event->time_delta) | ||
| 226 | event->time_delta = 1; | ||
| 227 | } | ||
| 228 | |||
| 193 | static unsigned | 229 | static unsigned |
| 194 | rb_event_length(struct ring_buffer_event *event) | 230 | rb_event_data_length(struct ring_buffer_event *event) |
| 195 | { | 231 | { |
| 196 | unsigned length; | 232 | unsigned length; |
| 197 | 233 | ||
| 234 | if (event->len) | ||
| 235 | length = event->len * RB_ALIGNMENT; | ||
| 236 | else | ||
| 237 | length = event->array[0]; | ||
| 238 | return length + RB_EVNT_HDR_SIZE; | ||
| 239 | } | ||
| 240 | |||
| 241 | /* inline for ring buffer fast paths */ | ||
| 242 | static unsigned | ||
| 243 | rb_event_length(struct ring_buffer_event *event) | ||
| 244 | { | ||
| 198 | switch (event->type) { | 245 | switch (event->type) { |
| 199 | case RINGBUF_TYPE_PADDING: | 246 | case RINGBUF_TYPE_PADDING: |
| 200 | /* undefined */ | 247 | if (rb_null_event(event)) |
| 201 | return -1; | 248 | /* undefined */ |
| 249 | return -1; | ||
| 250 | return rb_event_data_length(event); | ||
| 202 | 251 | ||
| 203 | case RINGBUF_TYPE_TIME_EXTEND: | 252 | case RINGBUF_TYPE_TIME_EXTEND: |
| 204 | return RB_LEN_TIME_EXTEND; | 253 | return RB_LEN_TIME_EXTEND; |
| @@ -207,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event) | |||
| 207 | return RB_LEN_TIME_STAMP; | 256 | return RB_LEN_TIME_STAMP; |
| 208 | 257 | ||
| 209 | case RINGBUF_TYPE_DATA: | 258 | case RINGBUF_TYPE_DATA: |
| 210 | if (event->len) | 259 | return rb_event_data_length(event); |
| 211 | length = event->len * RB_ALIGNMENT; | ||
| 212 | else | ||
| 213 | length = event->array[0]; | ||
| 214 | return length + RB_EVNT_HDR_SIZE; | ||
| 215 | default: | 260 | default: |
| 216 | BUG(); | 261 | BUG(); |
| 217 | } | 262 | } |
| @@ -845,11 +890,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 845 | } | 890 | } |
| 846 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | 891 | EXPORT_SYMBOL_GPL(ring_buffer_resize); |
| 847 | 892 | ||
| 848 | static inline int rb_null_event(struct ring_buffer_event *event) | ||
| 849 | { | ||
| 850 | return event->type == RINGBUF_TYPE_PADDING; | ||
| 851 | } | ||
| 852 | |||
| 853 | static inline void * | 893 | static inline void * |
| 854 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | 894 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) |
| 855 | { | 895 | { |
| @@ -1219,7 +1259,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1219 | if (tail < BUF_PAGE_SIZE) { | 1259 | if (tail < BUF_PAGE_SIZE) { |
| 1220 | /* Mark the rest of the page with padding */ | 1260 | /* Mark the rest of the page with padding */ |
| 1221 | event = __rb_page_index(tail_page, tail); | 1261 | event = __rb_page_index(tail_page, tail); |
| 1222 | event->type = RINGBUF_TYPE_PADDING; | 1262 | rb_event_set_padding(event); |
| 1223 | } | 1263 | } |
| 1224 | 1264 | ||
| 1225 | if (tail <= BUF_PAGE_SIZE) | 1265 | if (tail <= BUF_PAGE_SIZE) |
| @@ -1969,7 +2009,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1969 | 2009 | ||
| 1970 | event = rb_reader_event(cpu_buffer); | 2010 | event = rb_reader_event(cpu_buffer); |
| 1971 | 2011 | ||
| 1972 | if (event->type == RINGBUF_TYPE_DATA) | 2012 | if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) |
| 1973 | cpu_buffer->entries--; | 2013 | cpu_buffer->entries--; |
| 1974 | 2014 | ||
| 1975 | rb_update_read_stamp(cpu_buffer, event); | 2015 | rb_update_read_stamp(cpu_buffer, event); |
| @@ -2052,9 +2092,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2052 | 2092 | ||
| 2053 | switch (event->type) { | 2093 | switch (event->type) { |
| 2054 | case RINGBUF_TYPE_PADDING: | 2094 | case RINGBUF_TYPE_PADDING: |
| 2055 | RB_WARN_ON(cpu_buffer, 1); | 2095 | if (rb_null_event(event)) |
| 2096 | RB_WARN_ON(cpu_buffer, 1); | ||
| 2097 | /* | ||
| 2098 | * Because the writer could be discarding every | ||
| 2099 | * event it creates (which would probably be bad) | ||
| 2100 | * if we were to go back to "again" then we may never | ||
| 2101 | * catch up, and will trigger the warn on, or lock | ||
| 2102 | * the box. Return the padding, and we will release | ||
| 2103 | * the current locks, and try again. | ||
| 2104 | */ | ||
| 2056 | rb_advance_reader(cpu_buffer); | 2105 | rb_advance_reader(cpu_buffer); |
| 2057 | return NULL; | 2106 | return event; |
| 2058 | 2107 | ||
| 2059 | case RINGBUF_TYPE_TIME_EXTEND: | 2108 | case RINGBUF_TYPE_TIME_EXTEND: |
| 2060 | /* Internal data, OK to advance */ | 2109 | /* Internal data, OK to advance */ |
| @@ -2115,8 +2164,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2115 | 2164 | ||
| 2116 | switch (event->type) { | 2165 | switch (event->type) { |
| 2117 | case RINGBUF_TYPE_PADDING: | 2166 | case RINGBUF_TYPE_PADDING: |
| 2118 | rb_inc_iter(iter); | 2167 | if (rb_null_event(event)) { |
| 2119 | goto again; | 2168 | rb_inc_iter(iter); |
| 2169 | goto again; | ||
| 2170 | } | ||
| 2171 | rb_advance_iter(iter); | ||
| 2172 | return event; | ||
| 2120 | 2173 | ||
| 2121 | case RINGBUF_TYPE_TIME_EXTEND: | 2174 | case RINGBUF_TYPE_TIME_EXTEND: |
| 2122 | /* Internal data, OK to advance */ | 2175 | /* Internal data, OK to advance */ |
| @@ -2163,10 +2216,16 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2163 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2216 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| 2164 | return NULL; | 2217 | return NULL; |
| 2165 | 2218 | ||
| 2219 | again: | ||
| 2166 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2220 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2167 | event = rb_buffer_peek(buffer, cpu, ts); | 2221 | event = rb_buffer_peek(buffer, cpu, ts); |
| 2168 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2222 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2169 | 2223 | ||
| 2224 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2225 | cpu_relax(); | ||
| 2226 | goto again; | ||
| 2227 | } | ||
| 2228 | |||
| 2170 | return event; | 2229 | return event; |
| 2171 | } | 2230 | } |
| 2172 | 2231 | ||
| @@ -2185,10 +2244,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2185 | struct ring_buffer_event *event; | 2244 | struct ring_buffer_event *event; |
| 2186 | unsigned long flags; | 2245 | unsigned long flags; |
| 2187 | 2246 | ||
| 2247 | again: | ||
| 2188 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2248 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2189 | event = rb_iter_peek(iter, ts); | 2249 | event = rb_iter_peek(iter, ts); |
| 2190 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2250 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2191 | 2251 | ||
| 2252 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2253 | cpu_relax(); | ||
| 2254 | goto again; | ||
| 2255 | } | ||
| 2256 | |||
| 2192 | return event; | 2257 | return event; |
| 2193 | } | 2258 | } |
| 2194 | 2259 | ||
| @@ -2207,6 +2272,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2207 | struct ring_buffer_event *event = NULL; | 2272 | struct ring_buffer_event *event = NULL; |
| 2208 | unsigned long flags; | 2273 | unsigned long flags; |
| 2209 | 2274 | ||
| 2275 | again: | ||
| 2210 | /* might be called in atomic */ | 2276 | /* might be called in atomic */ |
| 2211 | preempt_disable(); | 2277 | preempt_disable(); |
| 2212 | 2278 | ||
| @@ -2228,6 +2294,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2228 | out: | 2294 | out: |
| 2229 | preempt_enable(); | 2295 | preempt_enable(); |
| 2230 | 2296 | ||
| 2297 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2298 | cpu_relax(); | ||
| 2299 | goto again; | ||
| 2300 | } | ||
| 2301 | |||
| 2231 | return event; | 2302 | return event; |
| 2232 | } | 2303 | } |
| 2233 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 2304 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
| @@ -2306,6 +2377,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2306 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 2377 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
| 2307 | unsigned long flags; | 2378 | unsigned long flags; |
| 2308 | 2379 | ||
| 2380 | again: | ||
| 2309 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2381 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2310 | event = rb_iter_peek(iter, ts); | 2382 | event = rb_iter_peek(iter, ts); |
| 2311 | if (!event) | 2383 | if (!event) |
| @@ -2315,6 +2387,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2315 | out: | 2387 | out: |
| 2316 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2388 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2317 | 2389 | ||
| 2390 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2391 | cpu_relax(); | ||
| 2392 | goto again; | ||
| 2393 | } | ||
| 2394 | |||
| 2318 | return event; | 2395 | return event; |
| 2319 | } | 2396 | } |
| 2320 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 2397 | EXPORT_SYMBOL_GPL(ring_buffer_read); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6fac0ffe6f0..a0174a40c563 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -255,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | |||
| 255 | 255 | ||
| 256 | /* trace_flags holds trace_options default values */ | 256 | /* trace_flags holds trace_options default values */ |
| 257 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | 257 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | |
| 258 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; | 258 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; |
| 259 | 259 | ||
| 260 | /** | 260 | /** |
| 261 | * trace_wake_up - wake up tasks waiting for trace input | 261 | * trace_wake_up - wake up tasks waiting for trace input |
| @@ -316,6 +316,7 @@ static const char *trace_options[] = { | |||
| 316 | "context-info", | 316 | "context-info", |
| 317 | "latency-format", | 317 | "latency-format", |
| 318 | "global-clock", | 318 | "global-clock", |
| 319 | "sleep-time", | ||
| 319 | NULL | 320 | NULL |
| 320 | }; | 321 | }; |
| 321 | 322 | ||
| @@ -382,7 +383,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) | |||
| 382 | return cnt; | 383 | return cnt; |
| 383 | } | 384 | } |
| 384 | 385 | ||
| 385 | ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) | 386 | static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) |
| 386 | { | 387 | { |
| 387 | int len; | 388 | int len; |
| 388 | void *ret; | 389 | void *ret; |
| @@ -860,15 +861,25 @@ static void ftrace_trace_stack(struct trace_array *tr, | |||
| 860 | static void ftrace_trace_userstack(struct trace_array *tr, | 861 | static void ftrace_trace_userstack(struct trace_array *tr, |
| 861 | unsigned long flags, int pc); | 862 | unsigned long flags, int pc); |
| 862 | 863 | ||
| 863 | void trace_buffer_unlock_commit(struct trace_array *tr, | 864 | static inline void __trace_buffer_unlock_commit(struct trace_array *tr, |
| 864 | struct ring_buffer_event *event, | 865 | struct ring_buffer_event *event, |
| 865 | unsigned long flags, int pc) | 866 | unsigned long flags, int pc, |
| 867 | int wake) | ||
| 866 | { | 868 | { |
| 867 | ring_buffer_unlock_commit(tr->buffer, event); | 869 | ring_buffer_unlock_commit(tr->buffer, event); |
| 868 | 870 | ||
| 869 | ftrace_trace_stack(tr, flags, 6, pc); | 871 | ftrace_trace_stack(tr, flags, 6, pc); |
| 870 | ftrace_trace_userstack(tr, flags, pc); | 872 | ftrace_trace_userstack(tr, flags, pc); |
| 871 | trace_wake_up(); | 873 | |
| 874 | if (wake) | ||
| 875 | trace_wake_up(); | ||
| 876 | } | ||
| 877 | |||
| 878 | void trace_buffer_unlock_commit(struct trace_array *tr, | ||
| 879 | struct ring_buffer_event *event, | ||
| 880 | unsigned long flags, int pc) | ||
| 881 | { | ||
| 882 | __trace_buffer_unlock_commit(tr, event, flags, pc, 1); | ||
| 872 | } | 883 | } |
| 873 | 884 | ||
| 874 | struct ring_buffer_event * | 885 | struct ring_buffer_event * |
| @@ -882,7 +893,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, | |||
| 882 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, | 893 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, |
| 883 | unsigned long flags, int pc) | 894 | unsigned long flags, int pc) |
| 884 | { | 895 | { |
| 885 | return trace_buffer_unlock_commit(&global_trace, event, flags, pc); | 896 | return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); |
| 897 | } | ||
| 898 | |||
| 899 | void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, | ||
| 900 | unsigned long flags, int pc) | ||
| 901 | { | ||
| 902 | return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); | ||
| 886 | } | 903 | } |
| 887 | 904 | ||
| 888 | void | 905 | void |
| @@ -908,7 +925,7 @@ trace_function(struct trace_array *tr, | |||
| 908 | } | 925 | } |
| 909 | 926 | ||
| 910 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 927 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
| 911 | static void __trace_graph_entry(struct trace_array *tr, | 928 | static int __trace_graph_entry(struct trace_array *tr, |
| 912 | struct ftrace_graph_ent *trace, | 929 | struct ftrace_graph_ent *trace, |
| 913 | unsigned long flags, | 930 | unsigned long flags, |
| 914 | int pc) | 931 | int pc) |
| @@ -917,15 +934,17 @@ static void __trace_graph_entry(struct trace_array *tr, | |||
| 917 | struct ftrace_graph_ent_entry *entry; | 934 | struct ftrace_graph_ent_entry *entry; |
| 918 | 935 | ||
| 919 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | 936 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) |
| 920 | return; | 937 | return 0; |
| 921 | 938 | ||
| 922 | event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, | 939 | event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, |
| 923 | sizeof(*entry), flags, pc); | 940 | sizeof(*entry), flags, pc); |
| 924 | if (!event) | 941 | if (!event) |
| 925 | return; | 942 | return 0; |
| 926 | entry = ring_buffer_event_data(event); | 943 | entry = ring_buffer_event_data(event); |
| 927 | entry->graph_ent = *trace; | 944 | entry->graph_ent = *trace; |
| 928 | ring_buffer_unlock_commit(global_trace.buffer, event); | 945 | ring_buffer_unlock_commit(global_trace.buffer, event); |
| 946 | |||
| 947 | return 1; | ||
| 929 | } | 948 | } |
| 930 | 949 | ||
| 931 | static void __trace_graph_return(struct trace_array *tr, | 950 | static void __trace_graph_return(struct trace_array *tr, |
| @@ -1146,6 +1165,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
| 1146 | struct trace_array_cpu *data; | 1165 | struct trace_array_cpu *data; |
| 1147 | unsigned long flags; | 1166 | unsigned long flags; |
| 1148 | long disabled; | 1167 | long disabled; |
| 1168 | int ret; | ||
| 1149 | int cpu; | 1169 | int cpu; |
| 1150 | int pc; | 1170 | int pc; |
| 1151 | 1171 | ||
| @@ -1161,15 +1181,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
| 1161 | disabled = atomic_inc_return(&data->disabled); | 1181 | disabled = atomic_inc_return(&data->disabled); |
| 1162 | if (likely(disabled == 1)) { | 1182 | if (likely(disabled == 1)) { |
| 1163 | pc = preempt_count(); | 1183 | pc = preempt_count(); |
| 1164 | __trace_graph_entry(tr, trace, flags, pc); | 1184 | ret = __trace_graph_entry(tr, trace, flags, pc); |
| 1185 | } else { | ||
| 1186 | ret = 0; | ||
| 1165 | } | 1187 | } |
| 1166 | /* Only do the atomic if it is not already set */ | 1188 | /* Only do the atomic if it is not already set */ |
| 1167 | if (!test_tsk_trace_graph(current)) | 1189 | if (!test_tsk_trace_graph(current)) |
| 1168 | set_tsk_trace_graph(current); | 1190 | set_tsk_trace_graph(current); |
| 1191 | |||
| 1169 | atomic_dec(&data->disabled); | 1192 | atomic_dec(&data->disabled); |
| 1170 | local_irq_restore(flags); | 1193 | local_irq_restore(flags); |
| 1171 | 1194 | ||
| 1172 | return 1; | 1195 | return ret; |
| 1173 | } | 1196 | } |
| 1174 | 1197 | ||
| 1175 | void trace_graph_return(struct ftrace_graph_ret *trace) | 1198 | void trace_graph_return(struct ftrace_graph_ret *trace) |
| @@ -3513,6 +3536,9 @@ struct dentry *tracing_init_dentry(void) | |||
| 3513 | if (d_tracer) | 3536 | if (d_tracer) |
| 3514 | return d_tracer; | 3537 | return d_tracer; |
| 3515 | 3538 | ||
| 3539 | if (!debugfs_initialized()) | ||
| 3540 | return NULL; | ||
| 3541 | |||
| 3516 | d_tracer = debugfs_create_dir("tracing", NULL); | 3542 | d_tracer = debugfs_create_dir("tracing", NULL); |
| 3517 | 3543 | ||
| 3518 | if (!d_tracer && !once) { | 3544 | if (!d_tracer && !once) { |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7cfb741be200..cb0ce3fc36d3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -483,6 +483,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, | |||
| 483 | unsigned long flags, int pc); | 483 | unsigned long flags, int pc); |
| 484 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, | 484 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, |
| 485 | unsigned long flags, int pc); | 485 | unsigned long flags, int pc); |
| 486 | void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, | ||
| 487 | unsigned long flags, int pc); | ||
| 486 | 488 | ||
| 487 | struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, | 489 | struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, |
| 488 | struct trace_array_cpu *data); | 490 | struct trace_array_cpu *data); |
| @@ -683,6 +685,7 @@ enum trace_iterator_flags { | |||
| 683 | TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ | 685 | TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ |
| 684 | TRACE_ITER_LATENCY_FMT = 0x40000, | 686 | TRACE_ITER_LATENCY_FMT = 0x40000, |
| 685 | TRACE_ITER_GLOBAL_CLK = 0x80000, | 687 | TRACE_ITER_GLOBAL_CLK = 0x80000, |
| 688 | TRACE_ITER_SLEEP_TIME = 0x100000, | ||
| 686 | }; | 689 | }; |
| 687 | 690 | ||
| 688 | /* | 691 | /* |
| @@ -775,16 +778,27 @@ enum { | |||
| 775 | TRACE_EVENT_TYPE_RAW = 2, | 778 | TRACE_EVENT_TYPE_RAW = 2, |
| 776 | }; | 779 | }; |
| 777 | 780 | ||
| 781 | struct ftrace_event_field { | ||
| 782 | struct list_head link; | ||
| 783 | char *name; | ||
| 784 | char *type; | ||
| 785 | int offset; | ||
| 786 | int size; | ||
| 787 | }; | ||
| 788 | |||
| 778 | struct ftrace_event_call { | 789 | struct ftrace_event_call { |
| 779 | char *name; | 790 | char *name; |
| 780 | char *system; | 791 | char *system; |
| 781 | struct dentry *dir; | 792 | struct dentry *dir; |
| 782 | int enabled; | 793 | int enabled; |
| 783 | int (*regfunc)(void); | 794 | int (*regfunc)(void); |
| 784 | void (*unregfunc)(void); | 795 | void (*unregfunc)(void); |
| 785 | int id; | 796 | int id; |
| 786 | int (*raw_init)(void); | 797 | int (*raw_init)(void); |
| 787 | int (*show_format)(struct trace_seq *s); | 798 | int (*show_format)(struct trace_seq *s); |
| 799 | int (*define_fields)(void); | ||
| 800 | struct list_head fields; | ||
| 801 | struct filter_pred **preds; | ||
| 788 | 802 | ||
| 789 | #ifdef CONFIG_EVENT_PROFILE | 803 | #ifdef CONFIG_EVENT_PROFILE |
| 790 | atomic_t profile_count; | 804 | atomic_t profile_count; |
| @@ -793,6 +807,51 @@ struct ftrace_event_call { | |||
| 793 | #endif | 807 | #endif |
| 794 | }; | 808 | }; |
| 795 | 809 | ||
| 810 | struct event_subsystem { | ||
| 811 | struct list_head list; | ||
| 812 | const char *name; | ||
| 813 | struct dentry *entry; | ||
| 814 | struct filter_pred **preds; | ||
| 815 | }; | ||
| 816 | |||
| 817 | #define events_for_each(event) \ | ||
| 818 | for (event = __start_ftrace_events; \ | ||
| 819 | (unsigned long)event < (unsigned long)__stop_ftrace_events; \ | ||
| 820 | event++) | ||
| 821 | |||
| 822 | #define MAX_FILTER_PRED 8 | ||
| 823 | |||
| 824 | struct filter_pred; | ||
| 825 | |||
| 826 | typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); | ||
| 827 | |||
| 828 | struct filter_pred { | ||
| 829 | filter_pred_fn_t fn; | ||
| 830 | u64 val; | ||
| 831 | char *str_val; | ||
| 832 | int str_len; | ||
| 833 | char *field_name; | ||
| 834 | int offset; | ||
| 835 | int not; | ||
| 836 | int or; | ||
| 837 | int compound; | ||
| 838 | int clear; | ||
| 839 | }; | ||
| 840 | |||
| 841 | int trace_define_field(struct ftrace_event_call *call, char *type, | ||
| 842 | char *name, int offset, int size); | ||
| 843 | extern void filter_free_pred(struct filter_pred *pred); | ||
| 844 | extern void filter_print_preds(struct filter_pred **preds, | ||
| 845 | struct trace_seq *s); | ||
| 846 | extern int filter_parse(char **pbuf, struct filter_pred *pred); | ||
| 847 | extern int filter_add_pred(struct ftrace_event_call *call, | ||
| 848 | struct filter_pred *pred); | ||
| 849 | extern void filter_free_preds(struct ftrace_event_call *call); | ||
| 850 | extern int filter_match_preds(struct ftrace_event_call *call, void *rec); | ||
| 851 | extern void filter_free_subsystem_preds(struct event_subsystem *system); | ||
| 852 | extern int filter_add_subsystem_pred(struct event_subsystem *system, | ||
| 853 | struct filter_pred *pred); | ||
| 854 | |||
| 796 | void event_trace_printk(unsigned long ip, const char *fmt, ...); | 855 | void event_trace_printk(unsigned long ip, const char *fmt, ...); |
| 797 | extern struct ftrace_event_call __start_ftrace_events[]; | 856 | extern struct ftrace_event_call __start_ftrace_events[]; |
| 798 | extern struct ftrace_event_call __stop_ftrace_events[]; | 857 | extern struct ftrace_event_call __stop_ftrace_events[]; |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 05b176abfd30..b588fd81f7f9 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/ktime.h> | 20 | #include <linux/ktime.h> |
| 21 | #include <linux/trace_clock.h> | ||
| 21 | 22 | ||
| 22 | /* | 23 | /* |
| 23 | * trace_clock_local(): the simplest and least coherent tracing clock. | 24 | * trace_clock_local(): the simplest and least coherent tracing clock. |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3047b56f6637..64ec4d278ffb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -19,6 +19,39 @@ | |||
| 19 | 19 | ||
| 20 | static DEFINE_MUTEX(event_mutex); | 20 | static DEFINE_MUTEX(event_mutex); |
| 21 | 21 | ||
| 22 | int trace_define_field(struct ftrace_event_call *call, char *type, | ||
| 23 | char *name, int offset, int size) | ||
| 24 | { | ||
| 25 | struct ftrace_event_field *field; | ||
| 26 | |||
| 27 | field = kzalloc(sizeof(*field), GFP_KERNEL); | ||
| 28 | if (!field) | ||
| 29 | goto err; | ||
| 30 | |||
| 31 | field->name = kstrdup(name, GFP_KERNEL); | ||
| 32 | if (!field->name) | ||
| 33 | goto err; | ||
| 34 | |||
| 35 | field->type = kstrdup(type, GFP_KERNEL); | ||
| 36 | if (!field->type) | ||
| 37 | goto err; | ||
| 38 | |||
| 39 | field->offset = offset; | ||
| 40 | field->size = size; | ||
| 41 | list_add(&field->link, &call->fields); | ||
| 42 | |||
| 43 | return 0; | ||
| 44 | |||
| 45 | err: | ||
| 46 | if (field) { | ||
| 47 | kfree(field->name); | ||
| 48 | kfree(field->type); | ||
| 49 | } | ||
| 50 | kfree(field); | ||
| 51 | |||
| 52 | return -ENOMEM; | ||
| 53 | } | ||
| 54 | |||
| 22 | static void ftrace_clear_events(void) | 55 | static void ftrace_clear_events(void) |
| 23 | { | 56 | { |
| 24 | struct ftrace_event_call *call = (void *)__start_ftrace_events; | 57 | struct ftrace_event_call *call = (void *)__start_ftrace_events; |
| @@ -343,7 +376,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 343 | 376 | ||
| 344 | #undef FIELD | 377 | #undef FIELD |
| 345 | #define FIELD(type, name) \ | 378 | #define FIELD(type, name) \ |
| 346 | #type, #name, offsetof(typeof(field), name), sizeof(field.name) | 379 | #type, "common_" #name, offsetof(typeof(field), name), \ |
| 380 | sizeof(field.name) | ||
| 347 | 381 | ||
| 348 | static int trace_write_header(struct trace_seq *s) | 382 | static int trace_write_header(struct trace_seq *s) |
| 349 | { | 383 | { |
| @@ -430,6 +464,139 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | |||
| 430 | return r; | 464 | return r; |
| 431 | } | 465 | } |
| 432 | 466 | ||
| 467 | static ssize_t | ||
| 468 | event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | ||
| 469 | loff_t *ppos) | ||
| 470 | { | ||
| 471 | struct ftrace_event_call *call = filp->private_data; | ||
| 472 | struct trace_seq *s; | ||
| 473 | int r; | ||
| 474 | |||
| 475 | if (*ppos) | ||
| 476 | return 0; | ||
| 477 | |||
| 478 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
| 479 | if (!s) | ||
| 480 | return -ENOMEM; | ||
| 481 | |||
| 482 | trace_seq_init(s); | ||
| 483 | |||
| 484 | filter_print_preds(call->preds, s); | ||
| 485 | r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); | ||
| 486 | |||
| 487 | kfree(s); | ||
| 488 | |||
| 489 | return r; | ||
| 490 | } | ||
| 491 | |||
| 492 | static ssize_t | ||
| 493 | event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | ||
| 494 | loff_t *ppos) | ||
| 495 | { | ||
| 496 | struct ftrace_event_call *call = filp->private_data; | ||
| 497 | char buf[64], *pbuf = buf; | ||
| 498 | struct filter_pred *pred; | ||
| 499 | int err; | ||
| 500 | |||
| 501 | if (cnt >= sizeof(buf)) | ||
| 502 | return -EINVAL; | ||
| 503 | |||
| 504 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 505 | return -EFAULT; | ||
| 506 | |||
| 507 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | ||
| 508 | if (!pred) | ||
| 509 | return -ENOMEM; | ||
| 510 | |||
| 511 | err = filter_parse(&pbuf, pred); | ||
| 512 | if (err < 0) { | ||
| 513 | filter_free_pred(pred); | ||
| 514 | return err; | ||
| 515 | } | ||
| 516 | |||
| 517 | if (pred->clear) { | ||
| 518 | filter_free_preds(call); | ||
| 519 | filter_free_pred(pred); | ||
| 520 | return cnt; | ||
| 521 | } | ||
| 522 | |||
| 523 | if (filter_add_pred(call, pred)) { | ||
| 524 | filter_free_pred(pred); | ||
| 525 | return -EINVAL; | ||
| 526 | } | ||
| 527 | |||
| 528 | *ppos += cnt; | ||
| 529 | |||
| 530 | return cnt; | ||
| 531 | } | ||
| 532 | |||
| 533 | static ssize_t | ||
| 534 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | ||
| 535 | loff_t *ppos) | ||
| 536 | { | ||
| 537 | struct event_subsystem *system = filp->private_data; | ||
| 538 | struct trace_seq *s; | ||
| 539 | int r; | ||
| 540 | |||
| 541 | if (*ppos) | ||
| 542 | return 0; | ||
| 543 | |||
| 544 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
| 545 | if (!s) | ||
| 546 | return -ENOMEM; | ||
| 547 | |||
| 548 | trace_seq_init(s); | ||
| 549 | |||
| 550 | filter_print_preds(system->preds, s); | ||
| 551 | r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); | ||
| 552 | |||
| 553 | kfree(s); | ||
| 554 | |||
| 555 | return r; | ||
| 556 | } | ||
| 557 | |||
| 558 | static ssize_t | ||
| 559 | subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | ||
| 560 | loff_t *ppos) | ||
| 561 | { | ||
| 562 | struct event_subsystem *system = filp->private_data; | ||
| 563 | char buf[64], *pbuf = buf; | ||
| 564 | struct filter_pred *pred; | ||
| 565 | int err; | ||
| 566 | |||
| 567 | if (cnt >= sizeof(buf)) | ||
| 568 | return -EINVAL; | ||
| 569 | |||
| 570 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 571 | return -EFAULT; | ||
| 572 | |||
| 573 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | ||
| 574 | if (!pred) | ||
| 575 | return -ENOMEM; | ||
| 576 | |||
| 577 | err = filter_parse(&pbuf, pred); | ||
| 578 | if (err < 0) { | ||
| 579 | filter_free_pred(pred); | ||
| 580 | return err; | ||
| 581 | } | ||
| 582 | |||
| 583 | if (pred->clear) { | ||
| 584 | filter_free_subsystem_preds(system); | ||
| 585 | filter_free_pred(pred); | ||
| 586 | return cnt; | ||
| 587 | } | ||
| 588 | |||
| 589 | if (filter_add_subsystem_pred(system, pred)) { | ||
| 590 | filter_free_subsystem_preds(system); | ||
| 591 | filter_free_pred(pred); | ||
| 592 | return -EINVAL; | ||
| 593 | } | ||
| 594 | |||
| 595 | *ppos += cnt; | ||
| 596 | |||
| 597 | return cnt; | ||
| 598 | } | ||
| 599 | |||
| 433 | static const struct seq_operations show_event_seq_ops = { | 600 | static const struct seq_operations show_event_seq_ops = { |
| 434 | .start = t_start, | 601 | .start = t_start, |
| 435 | .next = t_next, | 602 | .next = t_next, |
| @@ -475,6 +642,18 @@ static const struct file_operations ftrace_event_id_fops = { | |||
| 475 | .read = event_id_read, | 642 | .read = event_id_read, |
| 476 | }; | 643 | }; |
| 477 | 644 | ||
| 645 | static const struct file_operations ftrace_event_filter_fops = { | ||
| 646 | .open = tracing_open_generic, | ||
| 647 | .read = event_filter_read, | ||
| 648 | .write = event_filter_write, | ||
| 649 | }; | ||
| 650 | |||
| 651 | static const struct file_operations ftrace_subsystem_filter_fops = { | ||
| 652 | .open = tracing_open_generic, | ||
| 653 | .read = subsystem_filter_read, | ||
| 654 | .write = subsystem_filter_write, | ||
| 655 | }; | ||
| 656 | |||
| 478 | static struct dentry *event_trace_events_dir(void) | 657 | static struct dentry *event_trace_events_dir(void) |
| 479 | { | 658 | { |
| 480 | static struct dentry *d_tracer; | 659 | static struct dentry *d_tracer; |
| @@ -495,12 +674,6 @@ static struct dentry *event_trace_events_dir(void) | |||
| 495 | return d_events; | 674 | return d_events; |
| 496 | } | 675 | } |
| 497 | 676 | ||
| 498 | struct event_subsystem { | ||
| 499 | struct list_head list; | ||
| 500 | const char *name; | ||
| 501 | struct dentry *entry; | ||
| 502 | }; | ||
| 503 | |||
| 504 | static LIST_HEAD(event_subsystems); | 677 | static LIST_HEAD(event_subsystems); |
| 505 | 678 | ||
| 506 | static struct dentry * | 679 | static struct dentry * |
| @@ -533,6 +706,8 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
| 533 | system->name = name; | 706 | system->name = name; |
| 534 | list_add(&system->list, &event_subsystems); | 707 | list_add(&system->list, &event_subsystems); |
| 535 | 708 | ||
| 709 | system->preds = NULL; | ||
| 710 | |||
| 536 | return system->entry; | 711 | return system->entry; |
| 537 | } | 712 | } |
| 538 | 713 | ||
| @@ -581,6 +756,20 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) | |||
| 581 | call->name); | 756 | call->name); |
| 582 | } | 757 | } |
| 583 | 758 | ||
| 759 | if (call->define_fields) { | ||
| 760 | ret = call->define_fields(); | ||
| 761 | if (ret < 0) { | ||
| 762 | pr_warning("Could not initialize trace point" | ||
| 763 | " events/%s\n", call->name); | ||
| 764 | return ret; | ||
| 765 | } | ||
| 766 | entry = debugfs_create_file("filter", 0644, call->dir, call, | ||
| 767 | &ftrace_event_filter_fops); | ||
| 768 | if (!entry) | ||
| 769 | pr_warning("Could not create debugfs " | ||
| 770 | "'%s/filter' entry\n", call->name); | ||
| 771 | } | ||
| 772 | |||
| 584 | /* A trace may not want to export its format */ | 773 | /* A trace may not want to export its format */ |
| 585 | if (!call->show_format) | 774 | if (!call->show_format) |
| 586 | return 0; | 775 | return 0; |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c new file mode 100644 index 000000000000..026be412f356 --- /dev/null +++ b/kernel/trace/trace_events_filter.c | |||
| @@ -0,0 +1,427 @@ | |||
| 1 | /* | ||
| 2 | * trace_events_filter - generic event filtering | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
| 17 | * | ||
| 18 | * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <linux/debugfs.h> | ||
| 22 | #include <linux/uaccess.h> | ||
| 23 | #include <linux/module.h> | ||
| 24 | #include <linux/ctype.h> | ||
| 25 | |||
| 26 | #include "trace.h" | ||
| 27 | #include "trace_output.h" | ||
| 28 | |||
| 29 | static int filter_pred_64(struct filter_pred *pred, void *event) | ||
| 30 | { | ||
| 31 | u64 *addr = (u64 *)(event + pred->offset); | ||
| 32 | u64 val = (u64)pred->val; | ||
| 33 | int match; | ||
| 34 | |||
| 35 | match = (val == *addr) ^ pred->not; | ||
| 36 | |||
| 37 | return match; | ||
| 38 | } | ||
| 39 | |||
| 40 | static int filter_pred_32(struct filter_pred *pred, void *event) | ||
| 41 | { | ||
| 42 | u32 *addr = (u32 *)(event + pred->offset); | ||
| 43 | u32 val = (u32)pred->val; | ||
| 44 | int match; | ||
| 45 | |||
| 46 | match = (val == *addr) ^ pred->not; | ||
| 47 | |||
| 48 | return match; | ||
| 49 | } | ||
| 50 | |||
| 51 | static int filter_pred_16(struct filter_pred *pred, void *event) | ||
| 52 | { | ||
| 53 | u16 *addr = (u16 *)(event + pred->offset); | ||
| 54 | u16 val = (u16)pred->val; | ||
| 55 | int match; | ||
| 56 | |||
| 57 | match = (val == *addr) ^ pred->not; | ||
| 58 | |||
| 59 | return match; | ||
| 60 | } | ||
| 61 | |||
| 62 | static int filter_pred_8(struct filter_pred *pred, void *event) | ||
| 63 | { | ||
| 64 | u8 *addr = (u8 *)(event + pred->offset); | ||
| 65 | u8 val = (u8)pred->val; | ||
| 66 | int match; | ||
| 67 | |||
| 68 | match = (val == *addr) ^ pred->not; | ||
| 69 | |||
| 70 | return match; | ||
| 71 | } | ||
| 72 | |||
| 73 | static int filter_pred_string(struct filter_pred *pred, void *event) | ||
| 74 | { | ||
| 75 | char *addr = (char *)(event + pred->offset); | ||
| 76 | int cmp, match; | ||
| 77 | |||
| 78 | cmp = strncmp(addr, pred->str_val, pred->str_len); | ||
| 79 | |||
| 80 | match = (!cmp) ^ pred->not; | ||
| 81 | |||
| 82 | return match; | ||
| 83 | } | ||
| 84 | |||
| 85 | /* return 1 if event matches, 0 otherwise (discard) */ | ||
| 86 | int filter_match_preds(struct ftrace_event_call *call, void *rec) | ||
| 87 | { | ||
| 88 | int i, matched, and_failed = 0; | ||
| 89 | struct filter_pred *pred; | ||
| 90 | |||
| 91 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
| 92 | if (call->preds[i]) { | ||
| 93 | pred = call->preds[i]; | ||
| 94 | if (and_failed && !pred->or) | ||
| 95 | continue; | ||
| 96 | matched = pred->fn(pred, rec); | ||
| 97 | if (!matched && !pred->or) { | ||
| 98 | and_failed = 1; | ||
| 99 | continue; | ||
| 100 | } else if (matched && pred->or) | ||
| 101 | return 1; | ||
| 102 | } else | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | |||
| 106 | if (and_failed) | ||
| 107 | return 0; | ||
| 108 | |||
| 109 | return 1; | ||
| 110 | } | ||
| 111 | |||
| 112 | void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) | ||
| 113 | { | ||
| 114 | char *field_name; | ||
| 115 | struct filter_pred *pred; | ||
| 116 | int i; | ||
| 117 | |||
| 118 | if (!preds) { | ||
| 119 | trace_seq_printf(s, "none\n"); | ||
| 120 | return; | ||
| 121 | } | ||
| 122 | |||
| 123 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
| 124 | if (preds[i]) { | ||
| 125 | pred = preds[i]; | ||
| 126 | field_name = pred->field_name; | ||
| 127 | if (i) | ||
| 128 | trace_seq_printf(s, pred->or ? "|| " : "&& "); | ||
| 129 | trace_seq_printf(s, "%s ", field_name); | ||
| 130 | trace_seq_printf(s, pred->not ? "!= " : "== "); | ||
| 131 | if (pred->str_val) | ||
| 132 | trace_seq_printf(s, "%s\n", pred->str_val); | ||
| 133 | else | ||
| 134 | trace_seq_printf(s, "%llu\n", pred->val); | ||
| 135 | } else | ||
| 136 | break; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | static struct ftrace_event_field * | ||
| 141 | find_event_field(struct ftrace_event_call *call, char *name) | ||
| 142 | { | ||
| 143 | struct ftrace_event_field *field; | ||
| 144 | |||
| 145 | list_for_each_entry(field, &call->fields, link) { | ||
| 146 | if (!strcmp(field->name, name)) | ||
| 147 | return field; | ||
| 148 | } | ||
| 149 | |||
| 150 | return NULL; | ||
| 151 | } | ||
| 152 | |||
| 153 | void filter_free_pred(struct filter_pred *pred) | ||
| 154 | { | ||
| 155 | if (!pred) | ||
| 156 | return; | ||
| 157 | |||
| 158 | kfree(pred->field_name); | ||
| 159 | kfree(pred->str_val); | ||
| 160 | kfree(pred); | ||
| 161 | } | ||
| 162 | |||
| 163 | void filter_free_preds(struct ftrace_event_call *call) | ||
| 164 | { | ||
| 165 | int i; | ||
| 166 | |||
| 167 | if (call->preds) { | ||
| 168 | for (i = 0; i < MAX_FILTER_PRED; i++) | ||
| 169 | filter_free_pred(call->preds[i]); | ||
| 170 | kfree(call->preds); | ||
| 171 | call->preds = NULL; | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | void filter_free_subsystem_preds(struct event_subsystem *system) | ||
| 176 | { | ||
| 177 | struct ftrace_event_call *call = __start_ftrace_events; | ||
| 178 | int i; | ||
| 179 | |||
| 180 | if (system->preds) { | ||
| 181 | for (i = 0; i < MAX_FILTER_PRED; i++) | ||
| 182 | filter_free_pred(system->preds[i]); | ||
| 183 | kfree(system->preds); | ||
| 184 | system->preds = NULL; | ||
| 185 | } | ||
| 186 | |||
| 187 | events_for_each(call) { | ||
| 188 | if (!call->name || !call->regfunc) | ||
| 189 | continue; | ||
| 190 | |||
| 191 | if (!strcmp(call->system, system->name)) | ||
| 192 | filter_free_preds(call); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | static int __filter_add_pred(struct ftrace_event_call *call, | ||
| 197 | struct filter_pred *pred) | ||
| 198 | { | ||
| 199 | int i; | ||
| 200 | |||
| 201 | if (call->preds && !pred->compound) | ||
| 202 | filter_free_preds(call); | ||
| 203 | |||
| 204 | if (!call->preds) { | ||
| 205 | call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), | ||
| 206 | GFP_KERNEL); | ||
| 207 | if (!call->preds) | ||
| 208 | return -ENOMEM; | ||
| 209 | } | ||
| 210 | |||
| 211 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
| 212 | if (!call->preds[i]) { | ||
| 213 | call->preds[i] = pred; | ||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | return -ENOMEM; | ||
| 219 | } | ||
| 220 | |||
| 221 | static int is_string_field(const char *type) | ||
| 222 | { | ||
| 223 | if (strchr(type, '[') && strstr(type, "char")) | ||
| 224 | return 1; | ||
| 225 | |||
| 226 | return 0; | ||
| 227 | } | ||
| 228 | |||
| 229 | int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) | ||
| 230 | { | ||
| 231 | struct ftrace_event_field *field; | ||
| 232 | |||
| 233 | field = find_event_field(call, pred->field_name); | ||
| 234 | if (!field) | ||
| 235 | return -EINVAL; | ||
| 236 | |||
| 237 | pred->offset = field->offset; | ||
| 238 | |||
| 239 | if (is_string_field(field->type)) { | ||
| 240 | if (!pred->str_val) | ||
| 241 | return -EINVAL; | ||
| 242 | pred->fn = filter_pred_string; | ||
| 243 | pred->str_len = field->size; | ||
| 244 | return __filter_add_pred(call, pred); | ||
| 245 | } else { | ||
| 246 | if (pred->str_val) | ||
| 247 | return -EINVAL; | ||
| 248 | } | ||
| 249 | |||
| 250 | switch (field->size) { | ||
| 251 | case 8: | ||
| 252 | pred->fn = filter_pred_64; | ||
| 253 | break; | ||
| 254 | case 4: | ||
| 255 | pred->fn = filter_pred_32; | ||
| 256 | break; | ||
| 257 | case 2: | ||
| 258 | pred->fn = filter_pred_16; | ||
| 259 | break; | ||
| 260 | case 1: | ||
| 261 | pred->fn = filter_pred_8; | ||
| 262 | break; | ||
| 263 | default: | ||
| 264 | return -EINVAL; | ||
| 265 | } | ||
| 266 | |||
| 267 | return __filter_add_pred(call, pred); | ||
| 268 | } | ||
| 269 | |||
| 270 | static struct filter_pred *copy_pred(struct filter_pred *pred) | ||
| 271 | { | ||
| 272 | struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); | ||
| 273 | if (!new_pred) | ||
| 274 | return NULL; | ||
| 275 | |||
| 276 | memcpy(new_pred, pred, sizeof(*pred)); | ||
| 277 | |||
| 278 | if (pred->field_name) { | ||
| 279 | new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); | ||
| 280 | if (!new_pred->field_name) { | ||
| 281 | kfree(new_pred); | ||
| 282 | return NULL; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | if (pred->str_val) { | ||
| 287 | new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); | ||
| 288 | if (!new_pred->str_val) { | ||
| 289 | filter_free_pred(new_pred); | ||
| 290 | return NULL; | ||
| 291 | } | ||
| 292 | } | ||
| 293 | |||
| 294 | return new_pred; | ||
| 295 | } | ||
| 296 | |||
| 297 | int filter_add_subsystem_pred(struct event_subsystem *system, | ||
| 298 | struct filter_pred *pred) | ||
| 299 | { | ||
| 300 | struct ftrace_event_call *call = __start_ftrace_events; | ||
| 301 | struct filter_pred *event_pred; | ||
| 302 | int i; | ||
| 303 | |||
| 304 | if (system->preds && !pred->compound) | ||
| 305 | filter_free_subsystem_preds(system); | ||
| 306 | |||
| 307 | if (!system->preds) { | ||
| 308 | system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), | ||
| 309 | GFP_KERNEL); | ||
| 310 | if (!system->preds) | ||
| 311 | return -ENOMEM; | ||
| 312 | } | ||
| 313 | |||
| 314 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
| 315 | if (!system->preds[i]) { | ||
| 316 | system->preds[i] = pred; | ||
| 317 | break; | ||
| 318 | } | ||
| 319 | } | ||
| 320 | |||
| 321 | if (i == MAX_FILTER_PRED) | ||
| 322 | return -EINVAL; | ||
| 323 | |||
| 324 | events_for_each(call) { | ||
| 325 | int err; | ||
| 326 | |||
| 327 | if (!call->name || !call->regfunc) | ||
| 328 | continue; | ||
| 329 | |||
| 330 | if (strcmp(call->system, system->name)) | ||
| 331 | continue; | ||
| 332 | |||
| 333 | if (!find_event_field(call, pred->field_name)) | ||
| 334 | continue; | ||
| 335 | |||
| 336 | event_pred = copy_pred(pred); | ||
| 337 | if (!event_pred) | ||
| 338 | goto oom; | ||
| 339 | |||
| 340 | err = filter_add_pred(call, event_pred); | ||
| 341 | if (err) | ||
| 342 | filter_free_pred(event_pred); | ||
| 343 | if (err == -ENOMEM) | ||
| 344 | goto oom; | ||
| 345 | } | ||
| 346 | |||
| 347 | return 0; | ||
| 348 | |||
| 349 | oom: | ||
| 350 | system->preds[i] = NULL; | ||
| 351 | return -ENOMEM; | ||
| 352 | } | ||
| 353 | |||
| 354 | int filter_parse(char **pbuf, struct filter_pred *pred) | ||
| 355 | { | ||
| 356 | char *tmp, *tok, *val_str = NULL; | ||
| 357 | int tok_n = 0; | ||
| 358 | |||
| 359 | /* field ==/!= number, or/and field ==/!= number, number */ | ||
| 360 | while ((tok = strsep(pbuf, " \n"))) { | ||
| 361 | if (tok_n == 0) { | ||
| 362 | if (!strcmp(tok, "0")) { | ||
| 363 | pred->clear = 1; | ||
| 364 | return 0; | ||
| 365 | } else if (!strcmp(tok, "&&")) { | ||
| 366 | pred->or = 0; | ||
| 367 | pred->compound = 1; | ||
| 368 | } else if (!strcmp(tok, "||")) { | ||
| 369 | pred->or = 1; | ||
| 370 | pred->compound = 1; | ||
| 371 | } else | ||
| 372 | pred->field_name = tok; | ||
| 373 | tok_n = 1; | ||
| 374 | continue; | ||
| 375 | } | ||
| 376 | if (tok_n == 1) { | ||
| 377 | if (!pred->field_name) | ||
| 378 | pred->field_name = tok; | ||
| 379 | else if (!strcmp(tok, "!=")) | ||
| 380 | pred->not = 1; | ||
| 381 | else if (!strcmp(tok, "==")) | ||
| 382 | pred->not = 0; | ||
| 383 | else { | ||
| 384 | pred->field_name = NULL; | ||
| 385 | return -EINVAL; | ||
| 386 | } | ||
| 387 | tok_n = 2; | ||
| 388 | continue; | ||
| 389 | } | ||
| 390 | if (tok_n == 2) { | ||
| 391 | if (pred->compound) { | ||
| 392 | if (!strcmp(tok, "!=")) | ||
| 393 | pred->not = 1; | ||
| 394 | else if (!strcmp(tok, "==")) | ||
| 395 | pred->not = 0; | ||
| 396 | else { | ||
| 397 | pred->field_name = NULL; | ||
| 398 | return -EINVAL; | ||
| 399 | } | ||
| 400 | } else { | ||
| 401 | val_str = tok; | ||
| 402 | break; /* done */ | ||
| 403 | } | ||
| 404 | tok_n = 3; | ||
| 405 | continue; | ||
| 406 | } | ||
| 407 | if (tok_n == 3) { | ||
| 408 | val_str = tok; | ||
| 409 | break; /* done */ | ||
| 410 | } | ||
| 411 | } | ||
| 412 | |||
| 413 | pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); | ||
| 414 | if (!pred->field_name) | ||
| 415 | return -ENOMEM; | ||
| 416 | |||
| 417 | pred->val = simple_strtoull(val_str, &tmp, 10); | ||
| 418 | if (tmp == val_str) { | ||
| 419 | pred->str_val = kstrdup(val_str, GFP_KERNEL); | ||
| 420 | if (!pred->str_val) | ||
| 421 | return -ENOMEM; | ||
| 422 | } | ||
| 423 | |||
| 424 | return 0; | ||
| 425 | } | ||
| 426 | |||
| 427 | |||
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 5117c43f5c67..30743f7d4110 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h | |||
| @@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \ | |||
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | #include <trace/trace_event_types.h> | 131 | #include <trace/trace_event_types.h> |
| 132 | |||
| 133 | #undef __field | ||
| 134 | #define __field(type, item) \ | ||
| 135 | ret = trace_define_field(event_call, #type, #item, \ | ||
| 136 | offsetof(typeof(field), item), \ | ||
| 137 | sizeof(field.item)); \ | ||
| 138 | if (ret) \ | ||
| 139 | return ret; | ||
| 140 | |||
| 141 | #undef __array | ||
| 142 | #define __array(type, item, len) \ | ||
| 143 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | ||
| 144 | offsetof(typeof(field), item), \ | ||
| 145 | sizeof(field.item)); \ | ||
| 146 | if (ret) \ | ||
| 147 | return ret; | ||
| 148 | |||
| 149 | #define __common_field(type, item) \ | ||
| 150 | ret = trace_define_field(event_call, #type, "common_" #item, \ | ||
| 151 | offsetof(typeof(field.ent), item), \ | ||
| 152 | sizeof(field.ent.item)); \ | ||
| 153 | if (ret) \ | ||
| 154 | return ret; | ||
| 155 | |||
| 156 | #undef TRACE_EVENT | ||
| 157 | #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ | ||
| 158 | int \ | ||
| 159 | ftrace_define_fields_##call(void) \ | ||
| 160 | { \ | ||
| 161 | struct ftrace_raw_##call field; \ | ||
| 162 | struct ftrace_event_call *event_call = &event_##call; \ | ||
| 163 | int ret; \ | ||
| 164 | \ | ||
| 165 | __common_field(unsigned char, type); \ | ||
| 166 | __common_field(unsigned char, flags); \ | ||
| 167 | __common_field(unsigned char, preempt_count); \ | ||
| 168 | __common_field(int, pid); \ | ||
| 169 | __common_field(int, tgid); \ | ||
| 170 | \ | ||
| 171 | tstruct; \ | ||
| 172 | \ | ||
| 173 | return ret; \ | ||
| 174 | } | ||
| 175 | |||
| 176 | #include <trace/trace_event_types.h> | ||
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 6b3261ca988c..9d2fa78cecca 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h | |||
| @@ -204,6 +204,7 @@ static struct ftrace_event_call event_##call; \ | |||
| 204 | \ | 204 | \ |
| 205 | static void ftrace_raw_event_##call(proto) \ | 205 | static void ftrace_raw_event_##call(proto) \ |
| 206 | { \ | 206 | { \ |
| 207 | struct ftrace_event_call *call = &event_##call; \ | ||
| 207 | struct ring_buffer_event *event; \ | 208 | struct ring_buffer_event *event; \ |
| 208 | struct ftrace_raw_##call *entry; \ | 209 | struct ftrace_raw_##call *entry; \ |
| 209 | unsigned long irq_flags; \ | 210 | unsigned long irq_flags; \ |
| @@ -221,7 +222,11 @@ static void ftrace_raw_event_##call(proto) \ | |||
| 221 | \ | 222 | \ |
| 222 | assign; \ | 223 | assign; \ |
| 223 | \ | 224 | \ |
| 224 | trace_current_buffer_unlock_commit(event, irq_flags, pc); \ | 225 | if (call->preds && !filter_match_preds(call, entry)) \ |
| 226 | ring_buffer_event_discard(event); \ | ||
| 227 | \ | ||
| 228 | trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ | ||
| 229 | \ | ||
| 225 | } \ | 230 | } \ |
| 226 | \ | 231 | \ |
| 227 | static int ftrace_raw_reg_event_##call(void) \ | 232 | static int ftrace_raw_reg_event_##call(void) \ |
| @@ -252,6 +257,7 @@ static int ftrace_raw_init_event_##call(void) \ | |||
| 252 | if (!id) \ | 257 | if (!id) \ |
| 253 | return -ENODEV; \ | 258 | return -ENODEV; \ |
| 254 | event_##call.id = id; \ | 259 | event_##call.id = id; \ |
| 260 | INIT_LIST_HEAD(&event_##call.fields); \ | ||
| 255 | return 0; \ | 261 | return 0; \ |
| 256 | } \ | 262 | } \ |
| 257 | \ | 263 | \ |
| @@ -264,6 +270,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
| 264 | .regfunc = ftrace_raw_reg_event_##call, \ | 270 | .regfunc = ftrace_raw_reg_event_##call, \ |
| 265 | .unregfunc = ftrace_raw_unreg_event_##call, \ | 271 | .unregfunc = ftrace_raw_unreg_event_##call, \ |
| 266 | .show_format = ftrace_format_##call, \ | 272 | .show_format = ftrace_format_##call, \ |
| 273 | .define_fields = ftrace_define_fields_##call, \ | ||
| 267 | _TRACE_PROFILE_INIT(call) \ | 274 | _TRACE_PROFILE_INIT(call) \ |
| 268 | } | 275 | } |
| 269 | 276 | ||
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e876816fa8e7..d28687e7b3a7 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -57,9 +57,9 @@ static struct tracer_flags tracer_flags = { | |||
| 57 | 57 | ||
| 58 | /* Add a function return address to the trace stack on thread info.*/ | 58 | /* Add a function return address to the trace stack on thread info.*/ |
| 59 | int | 59 | int |
| 60 | ftrace_push_return_trace(unsigned long ret, unsigned long long time, | 60 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) |
| 61 | unsigned long func, int *depth) | ||
| 62 | { | 61 | { |
| 62 | unsigned long long calltime; | ||
| 63 | int index; | 63 | int index; |
| 64 | 64 | ||
| 65 | if (!current->ret_stack) | 65 | if (!current->ret_stack) |
| @@ -71,11 +71,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long long time, | |||
| 71 | return -EBUSY; | 71 | return -EBUSY; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | calltime = trace_clock_local(); | ||
| 75 | |||
| 74 | index = ++current->curr_ret_stack; | 76 | index = ++current->curr_ret_stack; |
| 75 | barrier(); | 77 | barrier(); |
| 76 | current->ret_stack[index].ret = ret; | 78 | current->ret_stack[index].ret = ret; |
| 77 | current->ret_stack[index].func = func; | 79 | current->ret_stack[index].func = func; |
| 78 | current->ret_stack[index].calltime = time; | 80 | current->ret_stack[index].calltime = calltime; |
| 79 | *depth = index; | 81 | *depth = index; |
| 80 | 82 | ||
| 81 | return 0; | 83 | return 0; |
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 9aa84bde23cd..394f94417e2f 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c | |||
| @@ -91,6 +91,7 @@ struct tracer nop_trace __read_mostly = | |||
| 91 | .name = "nop", | 91 | .name = "nop", |
| 92 | .init = nop_trace_init, | 92 | .init = nop_trace_init, |
| 93 | .reset = nop_trace_reset, | 93 | .reset = nop_trace_reset, |
| 94 | .wait_pipe = poll_wait_pipe, | ||
| 94 | #ifdef CONFIG_FTRACE_SELFTEST | 95 | #ifdef CONFIG_FTRACE_SELFTEST |
| 95 | .selftest = trace_selftest_startup_nop, | 96 | .selftest = trace_selftest_startup_nop, |
| 96 | #endif | 97 | #endif |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 19261fdd2455..d72b9a63b247 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -137,7 +137,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c) | |||
| 137 | return 1; | 137 | return 1; |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) | 140 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) |
| 141 | { | 141 | { |
| 142 | if (len > ((PAGE_SIZE - 1) - s->len)) | 142 | if (len > ((PAGE_SIZE - 1) - s->len)) |
| 143 | return 0; | 143 | return 0; |
| @@ -148,10 +148,10 @@ int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) | |||
| 148 | return len; | 148 | return len; |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) | 151 | int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) |
| 152 | { | 152 | { |
| 153 | unsigned char hex[HEX_CHARS]; | 153 | unsigned char hex[HEX_CHARS]; |
| 154 | unsigned char *data = mem; | 154 | const unsigned char *data = mem; |
| 155 | int i, j; | 155 | int i, j; |
| 156 | 156 | ||
| 157 | #ifdef __BIG_ENDIAN | 157 | #ifdef __BIG_ENDIAN |
| @@ -167,6 +167,19 @@ int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) | |||
| 167 | return trace_seq_putmem(s, hex, j); | 167 | return trace_seq_putmem(s, hex, j); |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | void *trace_seq_reserve(struct trace_seq *s, size_t len) | ||
| 171 | { | ||
| 172 | void *ret; | ||
| 173 | |||
| 174 | if (len > ((PAGE_SIZE - 1) - s->len)) | ||
| 175 | return NULL; | ||
| 176 | |||
| 177 | ret = s->buffer + s->len; | ||
| 178 | s->len += len; | ||
| 179 | |||
| 180 | return ret; | ||
| 181 | } | ||
| 182 | |||
| 170 | int trace_seq_path(struct trace_seq *s, struct path *path) | 183 | int trace_seq_path(struct trace_seq *s, struct path *path) |
| 171 | { | 184 | { |
| 172 | unsigned char *p; | 185 | unsigned char *p; |
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 35c422fb51a9..e0bde39c2dd9 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h | |||
| @@ -29,24 +29,27 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, | |||
| 29 | unsigned long sym_flags); | 29 | unsigned long sym_flags); |
| 30 | extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, | 30 | extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, |
| 31 | size_t cnt); | 31 | size_t cnt); |
| 32 | int trace_seq_puts(struct trace_seq *s, const char *str); | 32 | extern int trace_seq_puts(struct trace_seq *s, const char *str); |
| 33 | int trace_seq_putc(struct trace_seq *s, unsigned char c); | 33 | extern int trace_seq_putc(struct trace_seq *s, unsigned char c); |
| 34 | int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len); | 34 | extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); |
| 35 | int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len); | 35 | extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, |
| 36 | int trace_seq_path(struct trace_seq *s, struct path *path); | 36 | size_t len); |
| 37 | int seq_print_userip_objs(const struct userstack_entry *entry, | 37 | extern void *trace_seq_reserve(struct trace_seq *s, size_t len); |
| 38 | struct trace_seq *s, unsigned long sym_flags); | 38 | extern int trace_seq_path(struct trace_seq *s, struct path *path); |
| 39 | int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, | 39 | extern int seq_print_userip_objs(const struct userstack_entry *entry, |
| 40 | unsigned long ip, unsigned long sym_flags); | 40 | struct trace_seq *s, unsigned long sym_flags); |
| 41 | extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, | ||
| 42 | unsigned long ip, unsigned long sym_flags); | ||
| 41 | 43 | ||
| 42 | int trace_print_context(struct trace_iterator *iter); | 44 | extern int trace_print_context(struct trace_iterator *iter); |
| 43 | int trace_print_lat_context(struct trace_iterator *iter); | 45 | extern int trace_print_lat_context(struct trace_iterator *iter); |
| 44 | 46 | ||
| 45 | struct trace_event *ftrace_find_event(int type); | 47 | extern struct trace_event *ftrace_find_event(int type); |
| 46 | int register_ftrace_event(struct trace_event *event); | 48 | extern int register_ftrace_event(struct trace_event *event); |
| 47 | int unregister_ftrace_event(struct trace_event *event); | 49 | extern int unregister_ftrace_event(struct trace_event *event); |
| 48 | 50 | ||
| 49 | enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); | 51 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, |
| 52 | int flags); | ||
| 50 | 53 | ||
| 51 | #define MAX_MEMHEX_BYTES 8 | 54 | #define MAX_MEMHEX_BYTES 8 |
| 52 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) | 55 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) |
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 39310e3434ee..acdebd771a93 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c | |||
| @@ -75,7 +75,7 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
| 75 | { | 75 | { |
| 76 | struct trace_stat_list *iter_entry, *new_entry; | 76 | struct trace_stat_list *iter_entry, *new_entry; |
| 77 | struct tracer_stat *ts = session->ts; | 77 | struct tracer_stat *ts = session->ts; |
| 78 | void *prev_stat; | 78 | void *stat; |
| 79 | int ret = 0; | 79 | int ret = 0; |
| 80 | int i; | 80 | int i; |
| 81 | 81 | ||
| @@ -85,6 +85,10 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
| 85 | if (!ts->stat_cmp) | 85 | if (!ts->stat_cmp) |
| 86 | ts->stat_cmp = dummy_cmp; | 86 | ts->stat_cmp = dummy_cmp; |
| 87 | 87 | ||
| 88 | stat = ts->stat_start(); | ||
| 89 | if (!stat) | ||
| 90 | goto exit; | ||
| 91 | |||
| 88 | /* | 92 | /* |
| 89 | * The first entry. Actually this is the second, but the first | 93 | * The first entry. Actually this is the second, but the first |
| 90 | * one (the stat_list head) is pointless. | 94 | * one (the stat_list head) is pointless. |
| @@ -99,14 +103,19 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
| 99 | 103 | ||
| 100 | list_add(&new_entry->list, &session->stat_list); | 104 | list_add(&new_entry->list, &session->stat_list); |
| 101 | 105 | ||
| 102 | new_entry->stat = ts->stat_start(); | 106 | new_entry->stat = stat; |
| 103 | prev_stat = new_entry->stat; | ||
| 104 | 107 | ||
| 105 | /* | 108 | /* |
| 106 | * Iterate over the tracer stat entries and store them in a sorted | 109 | * Iterate over the tracer stat entries and store them in a sorted |
| 107 | * list. | 110 | * list. |
| 108 | */ | 111 | */ |
| 109 | for (i = 1; ; i++) { | 112 | for (i = 1; ; i++) { |
| 113 | stat = ts->stat_next(stat, i); | ||
| 114 | |||
| 115 | /* End of insertion */ | ||
| 116 | if (!stat) | ||
| 117 | break; | ||
| 118 | |||
| 110 | new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); | 119 | new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); |
| 111 | if (!new_entry) { | 120 | if (!new_entry) { |
| 112 | ret = -ENOMEM; | 121 | ret = -ENOMEM; |
| @@ -114,31 +123,23 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
| 114 | } | 123 | } |
| 115 | 124 | ||
| 116 | INIT_LIST_HEAD(&new_entry->list); | 125 | INIT_LIST_HEAD(&new_entry->list); |
| 117 | new_entry->stat = ts->stat_next(prev_stat, i); | 126 | new_entry->stat = stat; |
| 118 | 127 | ||
| 119 | /* End of insertion */ | 128 | list_for_each_entry_reverse(iter_entry, &session->stat_list, |
| 120 | if (!new_entry->stat) | 129 | list) { |
| 121 | break; | ||
| 122 | |||
| 123 | list_for_each_entry(iter_entry, &session->stat_list, list) { | ||
| 124 | 130 | ||
| 125 | /* Insertion with a descendent sorting */ | 131 | /* Insertion with a descendent sorting */ |
| 126 | if (ts->stat_cmp(new_entry->stat, | 132 | if (ts->stat_cmp(iter_entry->stat, |
| 127 | iter_entry->stat) > 0) { | 133 | new_entry->stat) >= 0) { |
| 128 | |||
| 129 | list_add_tail(&new_entry->list, | ||
| 130 | &iter_entry->list); | ||
| 131 | break; | ||
| 132 | 134 | ||
| 133 | /* The current smaller value */ | ||
| 134 | } else if (list_is_last(&iter_entry->list, | ||
| 135 | &session->stat_list)) { | ||
| 136 | list_add(&new_entry->list, &iter_entry->list); | 135 | list_add(&new_entry->list, &iter_entry->list); |
| 137 | break; | 136 | break; |
| 138 | } | 137 | } |
| 139 | } | 138 | } |
| 140 | 139 | ||
| 141 | prev_stat = new_entry->stat; | 140 | /* The current larger value */ |
| 141 | if (list_empty(&new_entry->list)) | ||
| 142 | list_add(&new_entry->list, &session->stat_list); | ||
| 142 | } | 143 | } |
| 143 | exit: | 144 | exit: |
| 144 | mutex_unlock(&session->stat_mutex); | 145 | mutex_unlock(&session->stat_mutex); |
| @@ -160,7 +161,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos) | |||
| 160 | 161 | ||
| 161 | /* If we are in the beginning of the file, print the headers */ | 162 | /* If we are in the beginning of the file, print the headers */ |
| 162 | if (!*pos && session->ts->stat_headers) | 163 | if (!*pos && session->ts->stat_headers) |
| 163 | session->ts->stat_headers(s); | 164 | return SEQ_START_TOKEN; |
| 164 | 165 | ||
| 165 | return seq_list_start(&session->stat_list, *pos); | 166 | return seq_list_start(&session->stat_list, *pos); |
| 166 | } | 167 | } |
| @@ -169,6 +170,9 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) | |||
| 169 | { | 170 | { |
| 170 | struct tracer_stat_session *session = s->private; | 171 | struct tracer_stat_session *session = s->private; |
| 171 | 172 | ||
| 173 | if (p == SEQ_START_TOKEN) | ||
| 174 | return seq_list_start(&session->stat_list, *pos); | ||
| 175 | |||
| 172 | return seq_list_next(p, &session->stat_list, pos); | 176 | return seq_list_next(p, &session->stat_list, pos); |
| 173 | } | 177 | } |
| 174 | 178 | ||
| @@ -183,6 +187,9 @@ static int stat_seq_show(struct seq_file *s, void *v) | |||
| 183 | struct tracer_stat_session *session = s->private; | 187 | struct tracer_stat_session *session = s->private; |
| 184 | struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); | 188 | struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); |
| 185 | 189 | ||
| 190 | if (v == SEQ_START_TOKEN) | ||
| 191 | return session->ts->stat_headers(s); | ||
| 192 | |||
| 186 | return session->ts->stat_show(s, l->stat); | 193 | return session->ts->stat_show(s, l->stat); |
| 187 | } | 194 | } |
| 188 | 195 | ||
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 9ab035b58cf1..797201e4a137 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c | |||
| @@ -196,6 +196,11 @@ static int workqueue_stat_show(struct seq_file *s, void *p) | |||
| 196 | struct pid *pid; | 196 | struct pid *pid; |
| 197 | struct task_struct *tsk; | 197 | struct task_struct *tsk; |
| 198 | 198 | ||
| 199 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
| 200 | if (&cws->list == workqueue_cpu_stat(cpu)->list.next) | ||
| 201 | seq_printf(s, "\n"); | ||
| 202 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
| 203 | |||
| 199 | pid = find_get_pid(cws->pid); | 204 | pid = find_get_pid(cws->pid); |
| 200 | if (pid) { | 205 | if (pid) { |
| 201 | tsk = get_pid_task(pid, PIDTYPE_PID); | 206 | tsk = get_pid_task(pid, PIDTYPE_PID); |
| @@ -208,18 +213,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p) | |||
| 208 | put_pid(pid); | 213 | put_pid(pid); |
| 209 | } | 214 | } |
| 210 | 215 | ||
| 211 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
| 212 | if (&cws->list == workqueue_cpu_stat(cpu)->list.next) | ||
| 213 | seq_printf(s, "\n"); | ||
| 214 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
| 215 | |||
| 216 | return 0; | 216 | return 0; |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | static int workqueue_stat_headers(struct seq_file *s) | 219 | static int workqueue_stat_headers(struct seq_file *s) |
| 220 | { | 220 | { |
| 221 | seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); | 221 | seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); |
| 222 | seq_printf(s, "# | | | |\n\n"); | 222 | seq_printf(s, "# | | | |\n"); |
| 223 | return 0; | 223 | return 0; |
| 224 | } | 224 | } |
| 225 | 225 | ||
diff --git a/mm/memory.c b/mm/memory.c index dfc9e4ea4e8b..baa999e87cd2 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -48,8 +48,6 @@ | |||
| 48 | #include <linux/rmap.h> | 48 | #include <linux/rmap.h> |
| 49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
| 50 | #include <linux/delayacct.h> | 50 | #include <linux/delayacct.h> |
| 51 | #include <linux/kprobes.h> | ||
| 52 | #include <linux/mutex.h> | ||
| 53 | #include <linux/init.h> | 51 | #include <linux/init.h> |
| 54 | #include <linux/writeback.h> | 52 | #include <linux/writeback.h> |
| 55 | #include <linux/memcontrol.h> | 53 | #include <linux/memcontrol.h> |
